{ "best_metric": null, "best_model_checkpoint": null, "epoch": 4.57763671875e-05, "eval_steps": 500, "global_step": 30000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0, "model_forward_time": 1.0319106578826904, "step": 0 }, { "epoch": 0, "step": 0, "training_step_time": 1.5901906490325928 }, { "epoch": 1.52587890625e-09, "model_forward_time": 0.028571128845214844, "step": 1 }, { "epoch": 1.52587890625e-09, "step": 1, "training_step_time": 0.22123479843139648 }, { "epoch": 3.0517578125e-09, "model_forward_time": 0.02512216567993164, "step": 2 }, { "epoch": 3.0517578125e-09, "step": 2, "training_step_time": 0.11980462074279785 }, { "epoch": 4.57763671875e-09, "model_forward_time": 0.02453923225402832, "step": 3 }, { "epoch": 4.57763671875e-09, "step": 3, "training_step_time": 0.12976622581481934 }, { "epoch": 6.103515625e-09, "model_forward_time": 0.02606821060180664, "step": 4 }, { "epoch": 6.103515625e-09, "step": 4, "training_step_time": 0.10907411575317383 }, { "epoch": 7.62939453125e-09, "model_forward_time": 0.026157140731811523, "step": 5 }, { "epoch": 7.62939453125e-09, "step": 5, "training_step_time": 0.11301732063293457 }, { "epoch": 9.1552734375e-09, "model_forward_time": 0.02605152130126953, "step": 6 }, { "epoch": 9.1552734375e-09, "step": 6, "training_step_time": 0.12163853645324707 }, { "epoch": 1.068115234375e-08, "model_forward_time": 0.025647640228271484, "step": 7 }, { "epoch": 1.068115234375e-08, "step": 7, "training_step_time": 0.11937880516052246 }, { "epoch": 1.220703125e-08, "model_forward_time": 0.02511310577392578, "step": 8 }, { "epoch": 1.220703125e-08, "step": 8, "training_step_time": 0.11348962783813477 }, { "epoch": 1.373291015625e-08, "model_forward_time": 0.025819778442382812, "step": 9 }, { "epoch": 1.373291015625e-08, "step": 9, "training_step_time": 0.11962628364562988 }, { "epoch": 1.52587890625e-08, "grad_norm": 6.067507266998291, "learning_rate": 6.666666666666667e-07, "loss": 1.2252, "step": 10 }, { "epoch": 1.52587890625e-08, "model_forward_time": 0.028791189193725586, "step": 10 }, { "epoch": 1.52587890625e-08, "step": 10, "training_step_time": 0.12380552291870117 }, { "epoch": 1.678466796875e-08, "model_forward_time": 0.025547266006469727, "step": 11 }, { "epoch": 1.678466796875e-08, "step": 11, "training_step_time": 0.11302661895751953 }, { "epoch": 1.8310546875e-08, "model_forward_time": 0.026027679443359375, "step": 12 }, { "epoch": 1.8310546875e-08, "step": 12, "training_step_time": 0.11699748039245605 }, { "epoch": 1.983642578125e-08, "model_forward_time": 0.025533676147460938, "step": 13 }, { "epoch": 1.983642578125e-08, "step": 13, "training_step_time": 0.10940051078796387 }, { "epoch": 2.13623046875e-08, "model_forward_time": 0.025338411331176758, "step": 14 }, { "epoch": 2.13623046875e-08, "step": 14, "training_step_time": 0.16063308715820312 }, { "epoch": 2.288818359375e-08, "model_forward_time": 0.024246692657470703, "step": 15 }, { "epoch": 2.288818359375e-08, "step": 15, "training_step_time": 0.14876723289489746 }, { "epoch": 2.44140625e-08, "model_forward_time": 0.024797916412353516, "step": 16 }, { "epoch": 2.44140625e-08, "step": 16, "training_step_time": 0.16344356536865234 }, { "epoch": 2.593994140625e-08, "model_forward_time": 0.024475812911987305, "step": 17 }, { "epoch": 2.593994140625e-08, "step": 17, "training_step_time": 0.17724943161010742 }, { "epoch": 2.74658203125e-08, "model_forward_time": 0.02528214454650879, "step": 18 }, { "epoch": 2.74658203125e-08, "step": 18, "training_step_time": 0.15558290481567383 }, { "epoch": 2.899169921875e-08, "model_forward_time": 0.02420949935913086, "step": 19 }, { "epoch": 2.899169921875e-08, "step": 19, "training_step_time": 0.15831494331359863 }, { "epoch": 3.0517578125e-08, "grad_norm": 4.578792572021484, "learning_rate": 1.3333333333333334e-06, "loss": 1.2077, "step": 20 }, { "epoch": 3.0517578125e-08, "model_forward_time": 0.024424314498901367, "step": 20 }, { "epoch": 3.0517578125e-08, "step": 20, "training_step_time": 0.10786700248718262 }, { "epoch": 3.204345703125e-08, "model_forward_time": 0.02824997901916504, "step": 21 }, { "epoch": 3.204345703125e-08, "step": 21, "training_step_time": 0.1069478988647461 }, { "epoch": 3.35693359375e-08, "model_forward_time": 0.02910923957824707, "step": 22 }, { "epoch": 3.35693359375e-08, "step": 22, "training_step_time": 0.11163830757141113 }, { "epoch": 3.509521484375e-08, "model_forward_time": 0.029635906219482422, "step": 23 }, { "epoch": 3.509521484375e-08, "step": 23, "training_step_time": 0.11366629600524902 }, { "epoch": 3.662109375e-08, "model_forward_time": 0.02586197853088379, "step": 24 }, { "epoch": 3.662109375e-08, "step": 24, "training_step_time": 0.10776495933532715 }, { "epoch": 3.814697265625e-08, "model_forward_time": 0.02928924560546875, "step": 25 }, { "epoch": 3.814697265625e-08, "step": 25, "training_step_time": 0.11644697189331055 }, { "epoch": 3.96728515625e-08, "model_forward_time": 0.02590775489807129, "step": 26 }, { "epoch": 3.96728515625e-08, "step": 26, "training_step_time": 0.11479544639587402 }, { "epoch": 4.119873046875e-08, "model_forward_time": 0.026750564575195312, "step": 27 }, { "epoch": 4.119873046875e-08, "step": 27, "training_step_time": 0.151716947555542 }, { "epoch": 4.2724609375e-08, "model_forward_time": 0.025519847869873047, "step": 28 }, { "epoch": 4.2724609375e-08, "step": 28, "training_step_time": 0.11794662475585938 }, { "epoch": 4.425048828125e-08, "model_forward_time": 0.02509760856628418, "step": 29 }, { "epoch": 4.425048828125e-08, "step": 29, "training_step_time": 0.10537981986999512 }, { "epoch": 4.57763671875e-08, "grad_norm": 3.404512882232666, "learning_rate": 2.0000000000000003e-06, "loss": 1.1347, "step": 30 }, { "epoch": 4.57763671875e-08, "model_forward_time": 0.026080846786499023, "step": 30 }, { "epoch": 4.57763671875e-08, "step": 30, "training_step_time": 0.10661101341247559 }, { "epoch": 4.730224609375e-08, "model_forward_time": 0.029267549514770508, "step": 31 }, { "epoch": 4.730224609375e-08, "step": 31, "training_step_time": 0.11377143859863281 }, { "epoch": 4.8828125e-08, "model_forward_time": 0.02588963508605957, "step": 32 }, { "epoch": 4.8828125e-08, "step": 32, "training_step_time": 0.16434931755065918 }, { "epoch": 5.035400390625e-08, "model_forward_time": 0.025550127029418945, "step": 33 }, { "epoch": 5.035400390625e-08, "step": 33, "training_step_time": 0.13167810440063477 }, { "epoch": 5.18798828125e-08, "model_forward_time": 0.02539205551147461, "step": 34 }, { "epoch": 5.18798828125e-08, "step": 34, "training_step_time": 0.11022114753723145 }, { "epoch": 5.340576171875e-08, "model_forward_time": 0.026363134384155273, "step": 35 }, { "epoch": 5.340576171875e-08, "step": 35, "training_step_time": 0.11951398849487305 }, { "epoch": 5.4931640625e-08, "model_forward_time": 0.02743220329284668, "step": 36 }, { "epoch": 5.4931640625e-08, "step": 36, "training_step_time": 0.1782093048095703 }, { "epoch": 5.645751953125e-08, "model_forward_time": 0.025634288787841797, "step": 37 }, { "epoch": 5.645751953125e-08, "step": 37, "training_step_time": 0.1223607063293457 }, { "epoch": 5.79833984375e-08, "model_forward_time": 0.027596235275268555, "step": 38 }, { "epoch": 5.79833984375e-08, "step": 38, "training_step_time": 0.10950684547424316 }, { "epoch": 5.950927734375e-08, "model_forward_time": 0.02748847007751465, "step": 39 }, { "epoch": 5.950927734375e-08, "step": 39, "training_step_time": 0.1121678352355957 }, { "epoch": 6.103515625e-08, "grad_norm": 3.3540265560150146, "learning_rate": 2.666666666666667e-06, "loss": 1.1275, "step": 40 }, { "epoch": 6.103515625e-08, "model_forward_time": 0.02660655975341797, "step": 40 }, { "epoch": 6.103515625e-08, "step": 40, "training_step_time": 0.1085958480834961 }, { "epoch": 6.256103515625e-08, "model_forward_time": 0.026361942291259766, "step": 41 }, { "epoch": 6.256103515625e-08, "step": 41, "training_step_time": 0.10995984077453613 }, { "epoch": 6.40869140625e-08, "model_forward_time": 0.025968313217163086, "step": 42 }, { "epoch": 6.40869140625e-08, "step": 42, "training_step_time": 0.11075258255004883 }, { "epoch": 6.561279296875e-08, "model_forward_time": 0.025856733322143555, "step": 43 }, { "epoch": 6.561279296875e-08, "step": 43, "training_step_time": 0.10966372489929199 }, { "epoch": 6.7138671875e-08, "model_forward_time": 0.02678656578063965, "step": 44 }, { "epoch": 6.7138671875e-08, "step": 44, "training_step_time": 0.10839462280273438 }, { "epoch": 6.866455078125e-08, "model_forward_time": 0.026245832443237305, "step": 45 }, { "epoch": 6.866455078125e-08, "step": 45, "training_step_time": 0.10392260551452637 }, { "epoch": 7.01904296875e-08, "model_forward_time": 0.025958776473999023, "step": 46 }, { "epoch": 7.01904296875e-08, "step": 46, "training_step_time": 0.10898923873901367 }, { "epoch": 7.171630859375e-08, "model_forward_time": 0.02625560760498047, "step": 47 }, { "epoch": 7.171630859375e-08, "step": 47, "training_step_time": 0.10556769371032715 }, { "epoch": 7.32421875e-08, "model_forward_time": 0.026059389114379883, "step": 48 }, { "epoch": 7.32421875e-08, "step": 48, "training_step_time": 0.10772347450256348 }, { "epoch": 7.476806640625e-08, "model_forward_time": 0.026355743408203125, "step": 49 }, { "epoch": 7.476806640625e-08, "step": 49, "training_step_time": 0.10431146621704102 }, { "epoch": 7.62939453125e-08, "grad_norm": 5.17725133895874, "learning_rate": 3.3333333333333333e-06, "loss": 1.1185, "step": 50 }, { "epoch": 7.62939453125e-08, "model_forward_time": 0.0267946720123291, "step": 50 }, { "epoch": 7.62939453125e-08, "step": 50, "training_step_time": 0.10547614097595215 }, { "epoch": 7.781982421875e-08, "model_forward_time": 0.026325225830078125, "step": 51 }, { "epoch": 7.781982421875e-08, "step": 51, "training_step_time": 0.10610151290893555 }, { "epoch": 7.9345703125e-08, "model_forward_time": 0.02591419219970703, "step": 52 }, { "epoch": 7.9345703125e-08, "step": 52, "training_step_time": 0.10524606704711914 }, { "epoch": 8.087158203125e-08, "model_forward_time": 0.025983810424804688, "step": 53 }, { "epoch": 8.087158203125e-08, "step": 53, "training_step_time": 0.10518741607666016 }, { "epoch": 8.23974609375e-08, "model_forward_time": 0.026117563247680664, "step": 54 }, { "epoch": 8.23974609375e-08, "step": 54, "training_step_time": 0.10506796836853027 }, { "epoch": 8.392333984375e-08, "model_forward_time": 0.027858734130859375, "step": 55 }, { "epoch": 8.392333984375e-08, "step": 55, "training_step_time": 0.1100766658782959 }, { "epoch": 8.544921875e-08, "model_forward_time": 0.02553868293762207, "step": 56 }, { "epoch": 8.544921875e-08, "step": 56, "training_step_time": 0.10886931419372559 }, { "epoch": 8.697509765625e-08, "model_forward_time": 0.025895118713378906, "step": 57 }, { "epoch": 8.697509765625e-08, "step": 57, "training_step_time": 0.1085052490234375 }, { "epoch": 8.85009765625e-08, "model_forward_time": 0.026247262954711914, "step": 58 }, { "epoch": 8.85009765625e-08, "step": 58, "training_step_time": 0.11170578002929688 }, { "epoch": 9.002685546875e-08, "model_forward_time": 0.02557516098022461, "step": 59 }, { "epoch": 9.002685546875e-08, "step": 59, "training_step_time": 0.10515999794006348 }, { "epoch": 9.1552734375e-08, "grad_norm": 2.941340923309326, "learning_rate": 4.000000000000001e-06, "loss": 1.1342, "step": 60 }, { "epoch": 9.1552734375e-08, "model_forward_time": 0.027450084686279297, "step": 60 }, { "epoch": 9.1552734375e-08, "step": 60, "training_step_time": 0.11174392700195312 }, { "epoch": 9.307861328125e-08, "model_forward_time": 0.027616024017333984, "step": 61 }, { "epoch": 9.307861328125e-08, "step": 61, "training_step_time": 0.1144108772277832 }, { "epoch": 9.46044921875e-08, "model_forward_time": 0.026006698608398438, "step": 62 }, { "epoch": 9.46044921875e-08, "step": 62, "training_step_time": 0.1659221649169922 }, { "epoch": 9.613037109375e-08, "model_forward_time": 0.025553226470947266, "step": 63 }, { "epoch": 9.613037109375e-08, "step": 63, "training_step_time": 0.18537402153015137 }, { "epoch": 9.765625e-08, "model_forward_time": 0.025078296661376953, "step": 64 }, { "epoch": 9.765625e-08, "step": 64, "training_step_time": 0.17681241035461426 }, { "epoch": 9.918212890625e-08, "model_forward_time": 0.02532052993774414, "step": 65 }, { "epoch": 9.918212890625e-08, "step": 65, "training_step_time": 0.16931819915771484 }, { "epoch": 1.007080078125e-07, "model_forward_time": 0.02660965919494629, "step": 66 }, { "epoch": 1.007080078125e-07, "step": 66, "training_step_time": 0.12088370323181152 }, { "epoch": 1.0223388671875e-07, "model_forward_time": 0.02500319480895996, "step": 67 }, { "epoch": 1.0223388671875e-07, "step": 67, "training_step_time": 0.1154329776763916 }, { "epoch": 1.03759765625e-07, "model_forward_time": 0.025840282440185547, "step": 68 }, { "epoch": 1.03759765625e-07, "step": 68, "training_step_time": 0.10432648658752441 }, { "epoch": 1.0528564453125e-07, "model_forward_time": 0.02615952491760254, "step": 69 }, { "epoch": 1.0528564453125e-07, "step": 69, "training_step_time": 0.1107783317565918 }, { "epoch": 1.068115234375e-07, "grad_norm": 2.9935081005096436, "learning_rate": 4.666666666666667e-06, "loss": 1.1149, "step": 70 }, { "epoch": 1.068115234375e-07, "model_forward_time": 0.026018619537353516, "step": 70 }, { "epoch": 1.068115234375e-07, "step": 70, "training_step_time": 0.10485291481018066 }, { "epoch": 1.0833740234375e-07, "model_forward_time": 0.026154279708862305, "step": 71 }, { "epoch": 1.0833740234375e-07, "step": 71, "training_step_time": 0.11275053024291992 }, { "epoch": 1.0986328125e-07, "model_forward_time": 0.025632143020629883, "step": 72 }, { "epoch": 1.0986328125e-07, "step": 72, "training_step_time": 0.1080777645111084 }, { "epoch": 1.1138916015625e-07, "model_forward_time": 0.02599644660949707, "step": 73 }, { "epoch": 1.1138916015625e-07, "step": 73, "training_step_time": 0.10730195045471191 }, { "epoch": 1.129150390625e-07, "model_forward_time": 0.025754928588867188, "step": 74 }, { "epoch": 1.129150390625e-07, "step": 74, "training_step_time": 0.12090826034545898 }, { "epoch": 1.1444091796875e-07, "model_forward_time": 0.02681899070739746, "step": 75 }, { "epoch": 1.1444091796875e-07, "step": 75, "training_step_time": 0.1279304027557373 }, { "epoch": 1.15966796875e-07, "model_forward_time": 0.026015043258666992, "step": 76 }, { "epoch": 1.15966796875e-07, "step": 76, "training_step_time": 0.1549975872039795 }, { "epoch": 1.1749267578125e-07, "model_forward_time": 0.02524256706237793, "step": 77 }, { "epoch": 1.1749267578125e-07, "step": 77, "training_step_time": 0.16358280181884766 }, { "epoch": 1.190185546875e-07, "model_forward_time": 0.025020122528076172, "step": 78 }, { "epoch": 1.190185546875e-07, "step": 78, "training_step_time": 0.16798901557922363 }, { "epoch": 1.2054443359375e-07, "model_forward_time": 0.02513861656188965, "step": 79 }, { "epoch": 1.2054443359375e-07, "step": 79, "training_step_time": 0.1597733497619629 }, { "epoch": 1.220703125e-07, "grad_norm": 2.160930871963501, "learning_rate": 5.333333333333334e-06, "loss": 1.0985, "step": 80 }, { "epoch": 1.220703125e-07, "model_forward_time": 0.0246734619140625, "step": 80 }, { "epoch": 1.220703125e-07, "step": 80, "training_step_time": 0.11692357063293457 }, { "epoch": 1.2359619140625e-07, "model_forward_time": 0.02527141571044922, "step": 81 }, { "epoch": 1.2359619140625e-07, "step": 81, "training_step_time": 0.10662055015563965 }, { "epoch": 1.251220703125e-07, "model_forward_time": 0.028119802474975586, "step": 82 }, { "epoch": 1.251220703125e-07, "step": 82, "training_step_time": 0.11094474792480469 }, { "epoch": 1.2664794921875e-07, "model_forward_time": 0.02660393714904785, "step": 83 }, { "epoch": 1.2664794921875e-07, "step": 83, "training_step_time": 0.2058873176574707 }, { "epoch": 1.28173828125e-07, "model_forward_time": 0.02728438377380371, "step": 84 }, { "epoch": 1.28173828125e-07, "step": 84, "training_step_time": 0.11332988739013672 }, { "epoch": 1.2969970703125e-07, "model_forward_time": 0.025354385375976562, "step": 85 }, { "epoch": 1.2969970703125e-07, "step": 85, "training_step_time": 0.1064918041229248 }, { "epoch": 1.312255859375e-07, "model_forward_time": 0.02603292465209961, "step": 86 }, { "epoch": 1.312255859375e-07, "step": 86, "training_step_time": 0.10985732078552246 }, { "epoch": 1.3275146484375e-07, "model_forward_time": 0.02564835548400879, "step": 87 }, { "epoch": 1.3275146484375e-07, "step": 87, "training_step_time": 0.10858607292175293 }, { "epoch": 1.3427734375e-07, "model_forward_time": 0.025949716567993164, "step": 88 }, { "epoch": 1.3427734375e-07, "step": 88, "training_step_time": 0.1072390079498291 }, { "epoch": 1.3580322265625e-07, "model_forward_time": 0.029630661010742188, "step": 89 }, { "epoch": 1.3580322265625e-07, "step": 89, "training_step_time": 0.11060118675231934 }, { "epoch": 1.373291015625e-07, "grad_norm": 2.017200231552124, "learning_rate": 6e-06, "loss": 1.1021, "step": 90 }, { "epoch": 1.373291015625e-07, "model_forward_time": 0.025426387786865234, "step": 90 }, { "epoch": 1.373291015625e-07, "step": 90, "training_step_time": 0.10535740852355957 }, { "epoch": 1.3885498046875e-07, "model_forward_time": 0.025750398635864258, "step": 91 }, { "epoch": 1.3885498046875e-07, "step": 91, "training_step_time": 0.10763072967529297 }, { "epoch": 1.40380859375e-07, "model_forward_time": 0.026279449462890625, "step": 92 }, { "epoch": 1.40380859375e-07, "step": 92, "training_step_time": 0.10863423347473145 }, { "epoch": 1.4190673828125e-07, "model_forward_time": 0.02568960189819336, "step": 93 }, { "epoch": 1.4190673828125e-07, "step": 93, "training_step_time": 0.1126718521118164 }, { "epoch": 1.434326171875e-07, "model_forward_time": 0.025844812393188477, "step": 94 }, { "epoch": 1.434326171875e-07, "step": 94, "training_step_time": 0.10429811477661133 }, { "epoch": 1.4495849609375e-07, "model_forward_time": 0.025232315063476562, "step": 95 }, { "epoch": 1.4495849609375e-07, "step": 95, "training_step_time": 0.10566186904907227 }, { "epoch": 1.46484375e-07, "model_forward_time": 0.025871753692626953, "step": 96 }, { "epoch": 1.46484375e-07, "step": 96, "training_step_time": 0.10646677017211914 }, { "epoch": 1.4801025390625e-07, "model_forward_time": 0.025615692138671875, "step": 97 }, { "epoch": 1.4801025390625e-07, "step": 97, "training_step_time": 0.1059727668762207 }, { "epoch": 1.495361328125e-07, "model_forward_time": 0.025202035903930664, "step": 98 }, { "epoch": 1.495361328125e-07, "step": 98, "training_step_time": 0.10299944877624512 }, { "epoch": 1.5106201171875e-07, "model_forward_time": 0.025269746780395508, "step": 99 }, { "epoch": 1.5106201171875e-07, "step": 99, "training_step_time": 0.10377955436706543 }, { "epoch": 1.52587890625e-07, "grad_norm": 2.2146012783050537, "learning_rate": 6.666666666666667e-06, "loss": 1.1055, "step": 100 }, { "epoch": 1.52587890625e-07, "model_forward_time": 0.025841474533081055, "step": 100 }, { "epoch": 1.52587890625e-07, "step": 100, "training_step_time": 0.10974383354187012 }, { "epoch": 1.5411376953125e-07, "model_forward_time": 0.025582075119018555, "step": 101 }, { "epoch": 1.5411376953125e-07, "step": 101, "training_step_time": 0.10512566566467285 }, { "epoch": 1.556396484375e-07, "model_forward_time": 0.02594161033630371, "step": 102 }, { "epoch": 1.556396484375e-07, "step": 102, "training_step_time": 0.1070563793182373 }, { "epoch": 1.5716552734375e-07, "model_forward_time": 0.025438785552978516, "step": 103 }, { "epoch": 1.5716552734375e-07, "step": 103, "training_step_time": 0.10385584831237793 }, { "epoch": 1.5869140625e-07, "model_forward_time": 0.025485754013061523, "step": 104 }, { "epoch": 1.5869140625e-07, "step": 104, "training_step_time": 0.10833215713500977 }, { "epoch": 1.6021728515625e-07, "model_forward_time": 0.025707244873046875, "step": 105 }, { "epoch": 1.6021728515625e-07, "step": 105, "training_step_time": 0.1103818416595459 }, { "epoch": 1.617431640625e-07, "model_forward_time": 0.025554418563842773, "step": 106 }, { "epoch": 1.617431640625e-07, "step": 106, "training_step_time": 0.1063225269317627 }, { "epoch": 1.6326904296875e-07, "model_forward_time": 0.02517247200012207, "step": 107 }, { "epoch": 1.6326904296875e-07, "step": 107, "training_step_time": 0.11746406555175781 }, { "epoch": 1.64794921875e-07, "model_forward_time": 0.02513718605041504, "step": 108 }, { "epoch": 1.64794921875e-07, "step": 108, "training_step_time": 0.10857868194580078 }, { "epoch": 1.6632080078125e-07, "model_forward_time": 0.025552749633789062, "step": 109 }, { "epoch": 1.6632080078125e-07, "step": 109, "training_step_time": 0.19937992095947266 }, { "epoch": 1.678466796875e-07, "grad_norm": 3.705718755722046, "learning_rate": 7.333333333333334e-06, "loss": 1.0923, "step": 110 }, { "epoch": 1.678466796875e-07, "model_forward_time": 0.024410247802734375, "step": 110 }, { "epoch": 1.678466796875e-07, "step": 110, "training_step_time": 0.20553088188171387 }, { "epoch": 1.6937255859375e-07, "model_forward_time": 0.025051116943359375, "step": 111 }, { "epoch": 1.6937255859375e-07, "step": 111, "training_step_time": 0.11757636070251465 }, { "epoch": 1.708984375e-07, "model_forward_time": 0.024251699447631836, "step": 112 }, { "epoch": 1.708984375e-07, "step": 112, "training_step_time": 0.1045370101928711 }, { "epoch": 1.7242431640625e-07, "model_forward_time": 0.02516627311706543, "step": 113 }, { "epoch": 1.7242431640625e-07, "step": 113, "training_step_time": 0.21181869506835938 }, { "epoch": 1.739501953125e-07, "model_forward_time": 0.024795055389404297, "step": 114 }, { "epoch": 1.739501953125e-07, "step": 114, "training_step_time": 0.1017763614654541 }, { "epoch": 1.7547607421875e-07, "model_forward_time": 0.024666547775268555, "step": 115 }, { "epoch": 1.7547607421875e-07, "step": 115, "training_step_time": 0.10803914070129395 }, { "epoch": 1.77001953125e-07, "model_forward_time": 0.024348974227905273, "step": 116 }, { "epoch": 1.77001953125e-07, "step": 116, "training_step_time": 0.10791945457458496 }, { "epoch": 1.7852783203125e-07, "model_forward_time": 0.025171995162963867, "step": 117 }, { "epoch": 1.7852783203125e-07, "step": 117, "training_step_time": 0.10659551620483398 }, { "epoch": 1.800537109375e-07, "model_forward_time": 0.025080204010009766, "step": 118 }, { "epoch": 1.800537109375e-07, "step": 118, "training_step_time": 0.10491943359375 }, { "epoch": 1.8157958984375e-07, "model_forward_time": 0.025238513946533203, "step": 119 }, { "epoch": 1.8157958984375e-07, "step": 119, "training_step_time": 0.175095796585083 }, { "epoch": 1.8310546875e-07, "grad_norm": 2.5202622413635254, "learning_rate": 8.000000000000001e-06, "loss": 1.0777, "step": 120 }, { "epoch": 1.8310546875e-07, "model_forward_time": 0.025106430053710938, "step": 120 }, { "epoch": 1.8310546875e-07, "step": 120, "training_step_time": 0.11800432205200195 }, { "epoch": 1.8463134765625e-07, "model_forward_time": 0.024784088134765625, "step": 121 }, { "epoch": 1.8463134765625e-07, "step": 121, "training_step_time": 0.1031346321105957 }, { "epoch": 1.861572265625e-07, "model_forward_time": 0.026371479034423828, "step": 122 }, { "epoch": 1.861572265625e-07, "step": 122, "training_step_time": 0.10760188102722168 }, { "epoch": 1.8768310546875e-07, "model_forward_time": 0.025882720947265625, "step": 123 }, { "epoch": 1.8768310546875e-07, "step": 123, "training_step_time": 0.10884928703308105 }, { "epoch": 1.89208984375e-07, "model_forward_time": 0.024898767471313477, "step": 124 }, { "epoch": 1.89208984375e-07, "step": 124, "training_step_time": 0.1766490936279297 }, { "epoch": 1.9073486328125e-07, "model_forward_time": 0.024541854858398438, "step": 125 }, { "epoch": 1.9073486328125e-07, "step": 125, "training_step_time": 0.10937190055847168 }, { "epoch": 1.922607421875e-07, "model_forward_time": 0.024924755096435547, "step": 126 }, { "epoch": 1.922607421875e-07, "step": 126, "training_step_time": 0.10846853256225586 }, { "epoch": 1.9378662109375e-07, "model_forward_time": 0.02446126937866211, "step": 127 }, { "epoch": 1.9378662109375e-07, "step": 127, "training_step_time": 0.20496296882629395 }, { "epoch": 1.953125e-07, "model_forward_time": 0.023757457733154297, "step": 128 }, { "epoch": 1.953125e-07, "step": 128, "training_step_time": 0.10406255722045898 }, { "epoch": 1.9683837890625e-07, "model_forward_time": 0.024302959442138672, "step": 129 }, { "epoch": 1.9683837890625e-07, "step": 129, "training_step_time": 0.11093688011169434 }, { "epoch": 1.983642578125e-07, "grad_norm": 3.2971274852752686, "learning_rate": 8.666666666666668e-06, "loss": 1.087, "step": 130 }, { "epoch": 1.983642578125e-07, "model_forward_time": 0.025298118591308594, "step": 130 }, { "epoch": 1.983642578125e-07, "step": 130, "training_step_time": 0.1963181495666504 }, { "epoch": 1.9989013671875e-07, "model_forward_time": 0.024568796157836914, "step": 131 }, { "epoch": 1.9989013671875e-07, "step": 131, "training_step_time": 0.10664820671081543 }, { "epoch": 2.01416015625e-07, "model_forward_time": 0.024847030639648438, "step": 132 }, { "epoch": 2.01416015625e-07, "step": 132, "training_step_time": 0.10486078262329102 }, { "epoch": 2.0294189453125e-07, "model_forward_time": 0.025294065475463867, "step": 133 }, { "epoch": 2.0294189453125e-07, "step": 133, "training_step_time": 0.10913729667663574 }, { "epoch": 2.044677734375e-07, "model_forward_time": 0.02522587776184082, "step": 134 }, { "epoch": 2.044677734375e-07, "step": 134, "training_step_time": 0.11135077476501465 }, { "epoch": 2.0599365234375e-07, "model_forward_time": 0.025368452072143555, "step": 135 }, { "epoch": 2.0599365234375e-07, "step": 135, "training_step_time": 0.10720133781433105 }, { "epoch": 2.0751953125e-07, "model_forward_time": 0.025255441665649414, "step": 136 }, { "epoch": 2.0751953125e-07, "step": 136, "training_step_time": 0.10765504837036133 }, { "epoch": 2.0904541015625e-07, "model_forward_time": 0.025819778442382812, "step": 137 }, { "epoch": 2.0904541015625e-07, "step": 137, "training_step_time": 0.10836267471313477 }, { "epoch": 2.105712890625e-07, "model_forward_time": 0.025656461715698242, "step": 138 }, { "epoch": 2.105712890625e-07, "step": 138, "training_step_time": 0.10537075996398926 }, { "epoch": 2.1209716796875e-07, "model_forward_time": 0.024921894073486328, "step": 139 }, { "epoch": 2.1209716796875e-07, "step": 139, "training_step_time": 0.10823225975036621 }, { "epoch": 2.13623046875e-07, "grad_norm": 2.060201406478882, "learning_rate": 9.333333333333334e-06, "loss": 1.1007, "step": 140 }, { "epoch": 2.13623046875e-07, "model_forward_time": 0.025678396224975586, "step": 140 }, { "epoch": 2.13623046875e-07, "step": 140, "training_step_time": 0.10832762718200684 }, { "epoch": 2.1514892578125e-07, "model_forward_time": 0.02512669563293457, "step": 141 }, { "epoch": 2.1514892578125e-07, "step": 141, "training_step_time": 0.11315178871154785 }, { "epoch": 2.166748046875e-07, "model_forward_time": 0.02522563934326172, "step": 142 }, { "epoch": 2.166748046875e-07, "step": 142, "training_step_time": 0.1073002815246582 }, { "epoch": 2.1820068359375e-07, "model_forward_time": 0.025441646575927734, "step": 143 }, { "epoch": 2.1820068359375e-07, "step": 143, "training_step_time": 0.10685944557189941 }, { "epoch": 2.197265625e-07, "model_forward_time": 0.02531743049621582, "step": 144 }, { "epoch": 2.197265625e-07, "step": 144, "training_step_time": 0.10764336585998535 }, { "epoch": 2.2125244140625e-07, "model_forward_time": 0.025152206420898438, "step": 145 }, { "epoch": 2.2125244140625e-07, "step": 145, "training_step_time": 0.11270904541015625 }, { "epoch": 2.227783203125e-07, "model_forward_time": 0.027230024337768555, "step": 146 }, { "epoch": 2.227783203125e-07, "step": 146, "training_step_time": 0.11264300346374512 }, { "epoch": 2.2430419921875e-07, "model_forward_time": 0.025463104248046875, "step": 147 }, { "epoch": 2.2430419921875e-07, "step": 147, "training_step_time": 0.11224055290222168 }, { "epoch": 2.25830078125e-07, "model_forward_time": 0.025217294692993164, "step": 148 }, { "epoch": 2.25830078125e-07, "step": 148, "training_step_time": 0.11015510559082031 }, { "epoch": 2.2735595703125e-07, "model_forward_time": 0.026349782943725586, "step": 149 }, { "epoch": 2.2735595703125e-07, "step": 149, "training_step_time": 0.10867691040039062 }, { "epoch": 2.288818359375e-07, "grad_norm": 1.9003688097000122, "learning_rate": 1e-05, "loss": 1.0665, "step": 150 }, { "epoch": 2.288818359375e-07, "model_forward_time": 0.02533888816833496, "step": 150 }, { "epoch": 2.288818359375e-07, "step": 150, "training_step_time": 0.10929298400878906 }, { "epoch": 2.3040771484375e-07, "model_forward_time": 0.025371074676513672, "step": 151 }, { "epoch": 2.3040771484375e-07, "step": 151, "training_step_time": 0.10710024833679199 }, { "epoch": 2.3193359375e-07, "model_forward_time": 0.024374961853027344, "step": 152 }, { "epoch": 2.3193359375e-07, "step": 152, "training_step_time": 0.18261027336120605 }, { "epoch": 2.3345947265625e-07, "model_forward_time": 0.024361848831176758, "step": 153 }, { "epoch": 2.3345947265625e-07, "step": 153, "training_step_time": 0.10788965225219727 }, { "epoch": 2.349853515625e-07, "model_forward_time": 0.024384498596191406, "step": 154 }, { "epoch": 2.349853515625e-07, "step": 154, "training_step_time": 0.20012164115905762 }, { "epoch": 2.3651123046875e-07, "model_forward_time": 0.024213552474975586, "step": 155 }, { "epoch": 2.3651123046875e-07, "step": 155, "training_step_time": 0.1328420639038086 }, { "epoch": 2.38037109375e-07, "model_forward_time": 0.02397465705871582, "step": 156 }, { "epoch": 2.38037109375e-07, "step": 156, "training_step_time": 0.14797019958496094 }, { "epoch": 2.3956298828125e-07, "model_forward_time": 0.024573564529418945, "step": 157 }, { "epoch": 2.3956298828125e-07, "step": 157, "training_step_time": 0.17415404319763184 }, { "epoch": 2.410888671875e-07, "model_forward_time": 0.024271011352539062, "step": 158 }, { "epoch": 2.410888671875e-07, "step": 158, "training_step_time": 0.15529704093933105 }, { "epoch": 2.4261474609375e-07, "model_forward_time": 0.024670839309692383, "step": 159 }, { "epoch": 2.4261474609375e-07, "step": 159, "training_step_time": 0.10644721984863281 }, { "epoch": 2.44140625e-07, "grad_norm": 2.8054840564727783, "learning_rate": 1.0666666666666667e-05, "loss": 1.1201, "step": 160 }, { "epoch": 2.44140625e-07, "model_forward_time": 0.02489161491394043, "step": 160 }, { "epoch": 2.44140625e-07, "step": 160, "training_step_time": 0.1070854663848877 }, { "epoch": 2.4566650390625e-07, "model_forward_time": 0.025034427642822266, "step": 161 }, { "epoch": 2.4566650390625e-07, "step": 161, "training_step_time": 0.10844230651855469 }, { "epoch": 2.471923828125e-07, "model_forward_time": 0.02515697479248047, "step": 162 }, { "epoch": 2.471923828125e-07, "step": 162, "training_step_time": 0.10387110710144043 }, { "epoch": 2.4871826171875e-07, "model_forward_time": 0.02525186538696289, "step": 163 }, { "epoch": 2.4871826171875e-07, "step": 163, "training_step_time": 0.1042320728302002 }, { "epoch": 2.50244140625e-07, "model_forward_time": 0.025216341018676758, "step": 164 }, { "epoch": 2.50244140625e-07, "step": 164, "training_step_time": 0.10904598236083984 }, { "epoch": 2.5177001953125e-07, "model_forward_time": 0.025246381759643555, "step": 165 }, { "epoch": 2.5177001953125e-07, "step": 165, "training_step_time": 0.20741724967956543 }, { "epoch": 2.532958984375e-07, "model_forward_time": 0.024412155151367188, "step": 166 }, { "epoch": 2.532958984375e-07, "step": 166, "training_step_time": 0.10574102401733398 }, { "epoch": 2.5482177734375e-07, "model_forward_time": 0.024666786193847656, "step": 167 }, { "epoch": 2.5482177734375e-07, "step": 167, "training_step_time": 0.10556483268737793 }, { "epoch": 2.5634765625e-07, "model_forward_time": 0.025375843048095703, "step": 168 }, { "epoch": 2.5634765625e-07, "step": 168, "training_step_time": 0.11011004447937012 }, { "epoch": 2.5787353515625e-07, "model_forward_time": 0.025117874145507812, "step": 169 }, { "epoch": 2.5787353515625e-07, "step": 169, "training_step_time": 0.1689453125 }, { "epoch": 2.593994140625e-07, "grad_norm": 1.4153610467910767, "learning_rate": 1.1333333333333334e-05, "loss": 1.0695, "step": 170 }, { "epoch": 2.593994140625e-07, "model_forward_time": 0.024693965911865234, "step": 170 }, { "epoch": 2.593994140625e-07, "step": 170, "training_step_time": 0.10523772239685059 }, { "epoch": 2.6092529296875e-07, "model_forward_time": 0.025215864181518555, "step": 171 }, { "epoch": 2.6092529296875e-07, "step": 171, "training_step_time": 0.10704302787780762 }, { "epoch": 2.62451171875e-07, "model_forward_time": 0.025417804718017578, "step": 172 }, { "epoch": 2.62451171875e-07, "step": 172, "training_step_time": 0.20757031440734863 }, { "epoch": 2.6397705078125e-07, "model_forward_time": 0.024976253509521484, "step": 173 }, { "epoch": 2.6397705078125e-07, "step": 173, "training_step_time": 0.10101461410522461 }, { "epoch": 2.655029296875e-07, "model_forward_time": 0.02486872673034668, "step": 174 }, { "epoch": 2.655029296875e-07, "step": 174, "training_step_time": 0.1085202693939209 }, { "epoch": 2.6702880859375e-07, "model_forward_time": 0.025509119033813477, "step": 175 }, { "epoch": 2.6702880859375e-07, "step": 175, "training_step_time": 0.20956778526306152 }, { "epoch": 2.685546875e-07, "model_forward_time": 0.024161815643310547, "step": 176 }, { "epoch": 2.685546875e-07, "step": 176, "training_step_time": 0.10477256774902344 }, { "epoch": 2.7008056640625e-07, "model_forward_time": 0.024560928344726562, "step": 177 }, { "epoch": 2.7008056640625e-07, "step": 177, "training_step_time": 0.10026764869689941 }, { "epoch": 2.716064453125e-07, "model_forward_time": 0.025931358337402344, "step": 178 }, { "epoch": 2.716064453125e-07, "step": 178, "training_step_time": 0.10803103446960449 }, { "epoch": 2.7313232421875e-07, "model_forward_time": 0.025135040283203125, "step": 179 }, { "epoch": 2.7313232421875e-07, "step": 179, "training_step_time": 0.10832476615905762 }, { "epoch": 2.74658203125e-07, "grad_norm": 1.289467215538025, "learning_rate": 1.2e-05, "loss": 1.059, "step": 180 }, { "epoch": 2.74658203125e-07, "model_forward_time": 0.02535223960876465, "step": 180 }, { "epoch": 2.74658203125e-07, "step": 180, "training_step_time": 0.10510659217834473 }, { "epoch": 2.7618408203125e-07, "model_forward_time": 0.024930715560913086, "step": 181 }, { "epoch": 2.7618408203125e-07, "step": 181, "training_step_time": 0.10405206680297852 }, { "epoch": 2.777099609375e-07, "model_forward_time": 0.027380943298339844, "step": 182 }, { "epoch": 2.777099609375e-07, "step": 182, "training_step_time": 0.10753679275512695 }, { "epoch": 2.7923583984375e-07, "model_forward_time": 0.025535106658935547, "step": 183 }, { "epoch": 2.7923583984375e-07, "step": 183, "training_step_time": 0.10632085800170898 }, { "epoch": 2.8076171875e-07, "model_forward_time": 0.02551746368408203, "step": 184 }, { "epoch": 2.8076171875e-07, "step": 184, "training_step_time": 0.10328292846679688 }, { "epoch": 2.8228759765625e-07, "model_forward_time": 0.025577068328857422, "step": 185 }, { "epoch": 2.8228759765625e-07, "step": 185, "training_step_time": 0.10737752914428711 }, { "epoch": 2.838134765625e-07, "model_forward_time": 0.025345325469970703, "step": 186 }, { "epoch": 2.838134765625e-07, "step": 186, "training_step_time": 0.10521984100341797 }, { "epoch": 2.8533935546875e-07, "model_forward_time": 0.02513742446899414, "step": 187 }, { "epoch": 2.8533935546875e-07, "step": 187, "training_step_time": 0.10754847526550293 }, { "epoch": 2.86865234375e-07, "model_forward_time": 0.025455236434936523, "step": 188 }, { "epoch": 2.86865234375e-07, "step": 188, "training_step_time": 0.10693955421447754 }, { "epoch": 2.8839111328125e-07, "model_forward_time": 0.025736331939697266, "step": 189 }, { "epoch": 2.8839111328125e-07, "step": 189, "training_step_time": 0.10713648796081543 }, { "epoch": 2.899169921875e-07, "grad_norm": 1.8123581409454346, "learning_rate": 1.2666666666666668e-05, "loss": 1.042, "step": 190 }, { "epoch": 2.899169921875e-07, "model_forward_time": 0.025921106338500977, "step": 190 }, { "epoch": 2.899169921875e-07, "step": 190, "training_step_time": 0.1091923713684082 }, { "epoch": 2.9144287109375e-07, "model_forward_time": 0.025386333465576172, "step": 191 }, { "epoch": 2.9144287109375e-07, "step": 191, "training_step_time": 0.10807228088378906 }, { "epoch": 2.9296875e-07, "model_forward_time": 0.026799678802490234, "step": 192 }, { "epoch": 2.9296875e-07, "step": 192, "training_step_time": 0.10712909698486328 }, { "epoch": 2.9449462890625e-07, "model_forward_time": 0.025065183639526367, "step": 193 }, { "epoch": 2.9449462890625e-07, "step": 193, "training_step_time": 0.10766220092773438 }, { "epoch": 2.960205078125e-07, "model_forward_time": 0.02542567253112793, "step": 194 }, { "epoch": 2.960205078125e-07, "step": 194, "training_step_time": 0.10897135734558105 }, { "epoch": 2.9754638671875e-07, "model_forward_time": 0.02498006820678711, "step": 195 }, { "epoch": 2.9754638671875e-07, "step": 195, "training_step_time": 0.10578083992004395 }, { "epoch": 2.99072265625e-07, "model_forward_time": 0.025472640991210938, "step": 196 }, { "epoch": 2.99072265625e-07, "step": 196, "training_step_time": 0.10597109794616699 }, { "epoch": 3.0059814453125e-07, "model_forward_time": 0.025658845901489258, "step": 197 }, { "epoch": 3.0059814453125e-07, "step": 197, "training_step_time": 0.10854721069335938 }, { "epoch": 3.021240234375e-07, "model_forward_time": 0.02532052993774414, "step": 198 }, { "epoch": 3.021240234375e-07, "step": 198, "training_step_time": 0.11224126815795898 }, { "epoch": 3.0364990234375e-07, "model_forward_time": 0.025629758834838867, "step": 199 }, { "epoch": 3.0364990234375e-07, "step": 199, "training_step_time": 0.10959386825561523 }, { "epoch": 3.0517578125e-07, "grad_norm": 2.2768733501434326, "learning_rate": 1.3333333333333333e-05, "loss": 1.0852, "step": 200 }, { "epoch": 3.0517578125e-07, "model_forward_time": 0.026217937469482422, "step": 200 }, { "epoch": 3.0517578125e-07, "step": 200, "training_step_time": 0.20072722434997559 }, { "epoch": 3.0670166015625e-07, "model_forward_time": 0.024330854415893555, "step": 201 }, { "epoch": 3.0670166015625e-07, "step": 201, "training_step_time": 0.1308908462524414 }, { "epoch": 3.082275390625e-07, "model_forward_time": 0.025192737579345703, "step": 202 }, { "epoch": 3.082275390625e-07, "step": 202, "training_step_time": 0.14418363571166992 }, { "epoch": 3.0975341796875e-07, "model_forward_time": 0.02505040168762207, "step": 203 }, { "epoch": 3.0975341796875e-07, "step": 203, "training_step_time": 0.1721487045288086 }, { "epoch": 3.11279296875e-07, "model_forward_time": 0.024411439895629883, "step": 204 }, { "epoch": 3.11279296875e-07, "step": 204, "training_step_time": 0.11894965171813965 }, { "epoch": 3.1280517578125e-07, "model_forward_time": 0.0242464542388916, "step": 205 }, { "epoch": 3.1280517578125e-07, "step": 205, "training_step_time": 0.11566329002380371 }, { "epoch": 3.143310546875e-07, "model_forward_time": 0.02573990821838379, "step": 206 }, { "epoch": 3.143310546875e-07, "step": 206, "training_step_time": 0.10853457450866699 }, { "epoch": 3.1585693359375e-07, "model_forward_time": 0.02544426918029785, "step": 207 }, { "epoch": 3.1585693359375e-07, "step": 207, "training_step_time": 0.11241364479064941 }, { "epoch": 3.173828125e-07, "model_forward_time": 0.025177001953125, "step": 208 }, { "epoch": 3.173828125e-07, "step": 208, "training_step_time": 0.10628056526184082 }, { "epoch": 3.1890869140625e-07, "model_forward_time": 0.025434017181396484, "step": 209 }, { "epoch": 3.1890869140625e-07, "step": 209, "training_step_time": 0.1055746078491211 }, { "epoch": 3.204345703125e-07, "grad_norm": 2.139814853668213, "learning_rate": 1.4000000000000001e-05, "loss": 1.0817, "step": 210 }, { "epoch": 3.204345703125e-07, "model_forward_time": 0.025125503540039062, "step": 210 }, { "epoch": 3.204345703125e-07, "step": 210, "training_step_time": 0.10978817939758301 }, { "epoch": 3.2196044921875e-07, "model_forward_time": 0.025634765625, "step": 211 }, { "epoch": 3.2196044921875e-07, "step": 211, "training_step_time": 0.1635749340057373 }, { "epoch": 3.23486328125e-07, "model_forward_time": 0.024929046630859375, "step": 212 }, { "epoch": 3.23486328125e-07, "step": 212, "training_step_time": 0.14980244636535645 }, { "epoch": 3.2501220703125e-07, "model_forward_time": 0.025199174880981445, "step": 213 }, { "epoch": 3.2501220703125e-07, "step": 213, "training_step_time": 0.1106722354888916 }, { "epoch": 3.265380859375e-07, "model_forward_time": 0.0253293514251709, "step": 214 }, { "epoch": 3.265380859375e-07, "step": 214, "training_step_time": 0.10408616065979004 }, { "epoch": 3.2806396484375e-07, "model_forward_time": 0.02529764175415039, "step": 215 }, { "epoch": 3.2806396484375e-07, "step": 215, "training_step_time": 0.1448988914489746 }, { "epoch": 3.2958984375e-07, "model_forward_time": 0.02536630630493164, "step": 216 }, { "epoch": 3.2958984375e-07, "step": 216, "training_step_time": 0.14667034149169922 }, { "epoch": 3.3111572265625e-07, "model_forward_time": 0.0243074893951416, "step": 217 }, { "epoch": 3.3111572265625e-07, "step": 217, "training_step_time": 0.10565853118896484 }, { "epoch": 3.326416015625e-07, "model_forward_time": 0.02464008331298828, "step": 218 }, { "epoch": 3.326416015625e-07, "step": 218, "training_step_time": 0.15975522994995117 }, { "epoch": 3.3416748046875e-07, "model_forward_time": 0.024682998657226562, "step": 219 }, { "epoch": 3.3416748046875e-07, "step": 219, "training_step_time": 0.15303301811218262 }, { "epoch": 3.35693359375e-07, "grad_norm": 1.3726691007614136, "learning_rate": 1.4666666666666668e-05, "loss": 1.0744, "step": 220 }, { "epoch": 3.35693359375e-07, "model_forward_time": 0.025442838668823242, "step": 220 }, { "epoch": 3.35693359375e-07, "step": 220, "training_step_time": 0.1061403751373291 }, { "epoch": 3.3721923828125e-07, "model_forward_time": 0.024499893188476562, "step": 221 }, { "epoch": 3.3721923828125e-07, "step": 221, "training_step_time": 0.20609641075134277 }, { "epoch": 3.387451171875e-07, "model_forward_time": 0.024770736694335938, "step": 222 }, { "epoch": 3.387451171875e-07, "step": 222, "training_step_time": 0.10871124267578125 }, { "epoch": 3.4027099609375e-07, "model_forward_time": 0.024731159210205078, "step": 223 }, { "epoch": 3.4027099609375e-07, "step": 223, "training_step_time": 0.1062014102935791 }, { "epoch": 3.41796875e-07, "model_forward_time": 0.025137901306152344, "step": 224 }, { "epoch": 3.41796875e-07, "step": 224, "training_step_time": 0.10837364196777344 }, { "epoch": 3.4332275390625e-07, "model_forward_time": 0.025831937789916992, "step": 225 }, { "epoch": 3.4332275390625e-07, "step": 225, "training_step_time": 0.10949540138244629 }, { "epoch": 3.448486328125e-07, "model_forward_time": 0.025206327438354492, "step": 226 }, { "epoch": 3.448486328125e-07, "step": 226, "training_step_time": 0.11171412467956543 }, { "epoch": 3.4637451171875e-07, "model_forward_time": 0.02624988555908203, "step": 227 }, { "epoch": 3.4637451171875e-07, "step": 227, "training_step_time": 0.11604642868041992 }, { "epoch": 3.47900390625e-07, "model_forward_time": 0.026447534561157227, "step": 228 }, { "epoch": 3.47900390625e-07, "step": 228, "training_step_time": 0.11378717422485352 }, { "epoch": 3.4942626953125e-07, "model_forward_time": 0.026441335678100586, "step": 229 }, { "epoch": 3.4942626953125e-07, "step": 229, "training_step_time": 0.11003470420837402 }, { "epoch": 3.509521484375e-07, "grad_norm": 3.7555150985717773, "learning_rate": 1.5333333333333334e-05, "loss": 1.0905, "step": 230 }, { "epoch": 3.509521484375e-07, "model_forward_time": 0.02584552764892578, "step": 230 }, { "epoch": 3.509521484375e-07, "step": 230, "training_step_time": 0.10566830635070801 }, { "epoch": 3.5247802734375e-07, "model_forward_time": 0.025495290756225586, "step": 231 }, { "epoch": 3.5247802734375e-07, "step": 231, "training_step_time": 0.11129093170166016 }, { "epoch": 3.5400390625e-07, "model_forward_time": 0.02561187744140625, "step": 232 }, { "epoch": 3.5400390625e-07, "step": 232, "training_step_time": 0.10994148254394531 }, { "epoch": 3.5552978515625e-07, "model_forward_time": 0.02617359161376953, "step": 233 }, { "epoch": 3.5552978515625e-07, "step": 233, "training_step_time": 0.10953950881958008 }, { "epoch": 3.570556640625e-07, "model_forward_time": 0.025429964065551758, "step": 234 }, { "epoch": 3.570556640625e-07, "step": 234, "training_step_time": 0.10425972938537598 }, { "epoch": 3.5858154296875e-07, "model_forward_time": 0.025571823120117188, "step": 235 }, { "epoch": 3.5858154296875e-07, "step": 235, "training_step_time": 0.10515356063842773 }, { "epoch": 3.60107421875e-07, "model_forward_time": 0.02615976333618164, "step": 236 }, { "epoch": 3.60107421875e-07, "step": 236, "training_step_time": 0.10771536827087402 }, { "epoch": 3.6163330078125e-07, "model_forward_time": 0.026056766510009766, "step": 237 }, { "epoch": 3.6163330078125e-07, "step": 237, "training_step_time": 0.10839581489562988 }, { "epoch": 3.631591796875e-07, "model_forward_time": 0.025864839553833008, "step": 238 }, { "epoch": 3.631591796875e-07, "step": 238, "training_step_time": 0.1052699089050293 }, { "epoch": 3.6468505859375e-07, "model_forward_time": 0.02572035789489746, "step": 239 }, { "epoch": 3.6468505859375e-07, "step": 239, "training_step_time": 0.10449099540710449 }, { "epoch": 3.662109375e-07, "grad_norm": 1.8919063806533813, "learning_rate": 1.6000000000000003e-05, "loss": 1.0872, "step": 240 }, { "epoch": 3.662109375e-07, "model_forward_time": 0.02666187286376953, "step": 240 }, { "epoch": 3.662109375e-07, "step": 240, "training_step_time": 0.10679841041564941 }, { "epoch": 3.6773681640625e-07, "model_forward_time": 0.026041507720947266, "step": 241 }, { "epoch": 3.6773681640625e-07, "step": 241, "training_step_time": 0.10630631446838379 }, { "epoch": 3.692626953125e-07, "model_forward_time": 0.025934457778930664, "step": 242 }, { "epoch": 3.692626953125e-07, "step": 242, "training_step_time": 0.1068878173828125 }, { "epoch": 3.7078857421875e-07, "model_forward_time": 0.026000499725341797, "step": 243 }, { "epoch": 3.7078857421875e-07, "step": 243, "training_step_time": 0.1785283088684082 }, { "epoch": 3.72314453125e-07, "model_forward_time": 0.025198698043823242, "step": 244 }, { "epoch": 3.72314453125e-07, "step": 244, "training_step_time": 0.1068873405456543 }, { "epoch": 3.7384033203125e-07, "model_forward_time": 0.025313377380371094, "step": 245 }, { "epoch": 3.7384033203125e-07, "step": 245, "training_step_time": 0.21465086936950684 }, { "epoch": 3.753662109375e-07, "model_forward_time": 0.025408267974853516, "step": 246 }, { "epoch": 3.753662109375e-07, "step": 246, "training_step_time": 0.17536520957946777 }, { "epoch": 3.7689208984375e-07, "model_forward_time": 0.027303457260131836, "step": 247 }, { "epoch": 3.7689208984375e-07, "step": 247, "training_step_time": 0.14746475219726562 }, { "epoch": 3.7841796875e-07, "model_forward_time": 0.026276111602783203, "step": 248 }, { "epoch": 3.7841796875e-07, "step": 248, "training_step_time": 0.12691950798034668 }, { "epoch": 3.7994384765625e-07, "model_forward_time": 0.024385452270507812, "step": 249 }, { "epoch": 3.7994384765625e-07, "step": 249, "training_step_time": 0.11448836326599121 }, { "epoch": 3.814697265625e-07, "grad_norm": 3.5808424949645996, "learning_rate": 1.6666666666666667e-05, "loss": 1.0672, "step": 250 }, { "epoch": 3.814697265625e-07, "model_forward_time": 0.025554418563842773, "step": 250 }, { "epoch": 3.814697265625e-07, "step": 250, "training_step_time": 0.11302661895751953 }, { "epoch": 3.8299560546875e-07, "model_forward_time": 0.025761842727661133, "step": 251 }, { "epoch": 3.8299560546875e-07, "step": 251, "training_step_time": 0.11169314384460449 }, { "epoch": 3.84521484375e-07, "model_forward_time": 0.02521657943725586, "step": 252 }, { "epoch": 3.84521484375e-07, "step": 252, "training_step_time": 0.1096949577331543 }, { "epoch": 3.8604736328125e-07, "model_forward_time": 0.025208473205566406, "step": 253 }, { "epoch": 3.8604736328125e-07, "step": 253, "training_step_time": 0.10798931121826172 }, { "epoch": 3.875732421875e-07, "model_forward_time": 0.025070667266845703, "step": 254 }, { "epoch": 3.875732421875e-07, "step": 254, "training_step_time": 0.10565304756164551 }, { "epoch": 3.8909912109375e-07, "model_forward_time": 0.025752544403076172, "step": 255 }, { "epoch": 3.8909912109375e-07, "step": 255, "training_step_time": 0.1708359718322754 }, { "epoch": 3.90625e-07, "model_forward_time": 0.02519512176513672, "step": 256 }, { "epoch": 3.90625e-07, "step": 256, "training_step_time": 0.12517976760864258 }, { "epoch": 3.9215087890625e-07, "model_forward_time": 0.024870872497558594, "step": 257 }, { "epoch": 3.9215087890625e-07, "step": 257, "training_step_time": 0.1068265438079834 }, { "epoch": 3.936767578125e-07, "model_forward_time": 0.029593944549560547, "step": 258 }, { "epoch": 3.936767578125e-07, "step": 258, "training_step_time": 0.10765361785888672 }, { "epoch": 3.9520263671875e-07, "model_forward_time": 0.02561044692993164, "step": 259 }, { "epoch": 3.9520263671875e-07, "step": 259, "training_step_time": 0.10733556747436523 }, { "epoch": 3.96728515625e-07, "grad_norm": 12.873846054077148, "learning_rate": 1.7333333333333336e-05, "loss": 1.1058, "step": 260 }, { "epoch": 3.96728515625e-07, "model_forward_time": 0.02564263343811035, "step": 260 }, { "epoch": 3.96728515625e-07, "step": 260, "training_step_time": 0.1722714900970459 }, { "epoch": 3.9825439453125e-07, "model_forward_time": 0.024820327758789062, "step": 261 }, { "epoch": 3.9825439453125e-07, "step": 261, "training_step_time": 0.10611677169799805 }, { "epoch": 3.997802734375e-07, "model_forward_time": 0.025095462799072266, "step": 262 }, { "epoch": 3.997802734375e-07, "step": 262, "training_step_time": 0.10972094535827637 }, { "epoch": 4.0130615234375e-07, "model_forward_time": 0.026111125946044922, "step": 263 }, { "epoch": 4.0130615234375e-07, "step": 263, "training_step_time": 0.1816272735595703 }, { "epoch": 4.0283203125e-07, "model_forward_time": 0.0251309871673584, "step": 264 }, { "epoch": 4.0283203125e-07, "step": 264, "training_step_time": 0.12833762168884277 }, { "epoch": 4.0435791015625e-07, "model_forward_time": 0.026359081268310547, "step": 265 }, { "epoch": 4.0435791015625e-07, "step": 265, "training_step_time": 0.11410975456237793 }, { "epoch": 4.058837890625e-07, "model_forward_time": 0.026052474975585938, "step": 266 }, { "epoch": 4.058837890625e-07, "step": 266, "training_step_time": 0.10692667961120605 }, { "epoch": 4.0740966796875e-07, "model_forward_time": 0.02548813819885254, "step": 267 }, { "epoch": 4.0740966796875e-07, "step": 267, "training_step_time": 0.19517922401428223 }, { "epoch": 4.08935546875e-07, "model_forward_time": 0.02492523193359375, "step": 268 }, { "epoch": 4.08935546875e-07, "step": 268, "training_step_time": 0.10407400131225586 }, { "epoch": 4.1046142578125e-07, "model_forward_time": 0.02457261085510254, "step": 269 }, { "epoch": 4.1046142578125e-07, "step": 269, "training_step_time": 0.10704469680786133 }, { "epoch": 4.119873046875e-07, "grad_norm": 2.260190010070801, "learning_rate": 1.8e-05, "loss": 1.1271, "step": 270 }, { "epoch": 4.119873046875e-07, "model_forward_time": 0.025186777114868164, "step": 270 }, { "epoch": 4.119873046875e-07, "step": 270, "training_step_time": 0.10310244560241699 }, { "epoch": 4.1351318359375e-07, "model_forward_time": 0.025582551956176758, "step": 271 }, { "epoch": 4.1351318359375e-07, "step": 271, "training_step_time": 0.10373687744140625 }, { "epoch": 4.150390625e-07, "model_forward_time": 0.025163650512695312, "step": 272 }, { "epoch": 4.150390625e-07, "step": 272, "training_step_time": 0.10535073280334473 }, { "epoch": 4.1656494140625e-07, "model_forward_time": 0.02567458152770996, "step": 273 }, { "epoch": 4.1656494140625e-07, "step": 273, "training_step_time": 0.10604214668273926 }, { "epoch": 4.180908203125e-07, "model_forward_time": 0.02625131607055664, "step": 274 }, { "epoch": 4.180908203125e-07, "step": 274, "training_step_time": 0.10631108283996582 }, { "epoch": 4.1961669921875e-07, "model_forward_time": 0.025867223739624023, "step": 275 }, { "epoch": 4.1961669921875e-07, "step": 275, "training_step_time": 0.10478878021240234 }, { "epoch": 4.21142578125e-07, "model_forward_time": 0.0256192684173584, "step": 276 }, { "epoch": 4.21142578125e-07, "step": 276, "training_step_time": 0.10482239723205566 }, { "epoch": 4.2266845703125e-07, "model_forward_time": 0.025732755661010742, "step": 277 }, { "epoch": 4.2266845703125e-07, "step": 277, "training_step_time": 0.10580730438232422 }, { "epoch": 4.241943359375e-07, "model_forward_time": 0.02573704719543457, "step": 278 }, { "epoch": 4.241943359375e-07, "step": 278, "training_step_time": 0.10732054710388184 }, { "epoch": 4.2572021484375e-07, "model_forward_time": 0.025740385055541992, "step": 279 }, { "epoch": 4.2572021484375e-07, "step": 279, "training_step_time": 0.10444331169128418 }, { "epoch": 4.2724609375e-07, "grad_norm": 2.123913288116455, "learning_rate": 1.866666666666667e-05, "loss": 1.0774, "step": 280 }, { "epoch": 4.2724609375e-07, "model_forward_time": 0.025602102279663086, "step": 280 }, { "epoch": 4.2724609375e-07, "step": 280, "training_step_time": 0.10497379302978516 }, { "epoch": 4.2877197265625e-07, "model_forward_time": 0.025228023529052734, "step": 281 }, { "epoch": 4.2877197265625e-07, "step": 281, "training_step_time": 0.10668349266052246 }, { "epoch": 4.302978515625e-07, "model_forward_time": 0.02588510513305664, "step": 282 }, { "epoch": 4.302978515625e-07, "step": 282, "training_step_time": 0.11024165153503418 }, { "epoch": 4.3182373046875e-07, "model_forward_time": 0.02536606788635254, "step": 283 }, { "epoch": 4.3182373046875e-07, "step": 283, "training_step_time": 0.10803508758544922 }, { "epoch": 4.33349609375e-07, "model_forward_time": 0.025258541107177734, "step": 284 }, { "epoch": 4.33349609375e-07, "step": 284, "training_step_time": 0.10373044013977051 }, { "epoch": 4.3487548828125e-07, "model_forward_time": 0.025148630142211914, "step": 285 }, { "epoch": 4.3487548828125e-07, "step": 285, "training_step_time": 0.1060020923614502 }, { "epoch": 4.364013671875e-07, "model_forward_time": 0.025150537490844727, "step": 286 }, { "epoch": 4.364013671875e-07, "step": 286, "training_step_time": 0.10913610458374023 }, { "epoch": 4.3792724609375e-07, "model_forward_time": 0.025230884552001953, "step": 287 }, { "epoch": 4.3792724609375e-07, "step": 287, "training_step_time": 0.10769248008728027 }, { "epoch": 4.39453125e-07, "model_forward_time": 0.02524399757385254, "step": 288 }, { "epoch": 4.39453125e-07, "step": 288, "training_step_time": 0.11211633682250977 }, { "epoch": 4.4097900390625e-07, "model_forward_time": 0.025160551071166992, "step": 289 }, { "epoch": 4.4097900390625e-07, "step": 289, "training_step_time": 0.10717606544494629 }, { "epoch": 4.425048828125e-07, "grad_norm": 1.9012972116470337, "learning_rate": 1.9333333333333333e-05, "loss": 1.0857, "step": 290 }, { "epoch": 4.425048828125e-07, "model_forward_time": 0.025286436080932617, "step": 290 }, { "epoch": 4.425048828125e-07, "step": 290, "training_step_time": 0.1056680679321289 }, { "epoch": 4.4403076171875e-07, "model_forward_time": 0.025116682052612305, "step": 291 }, { "epoch": 4.4403076171875e-07, "step": 291, "training_step_time": 0.1090090274810791 }, { "epoch": 4.45556640625e-07, "model_forward_time": 0.025147199630737305, "step": 292 }, { "epoch": 4.45556640625e-07, "step": 292, "training_step_time": 0.20943307876586914 }, { "epoch": 4.4708251953125e-07, "model_forward_time": 0.025385379791259766, "step": 293 }, { "epoch": 4.4708251953125e-07, "step": 293, "training_step_time": 0.15311980247497559 }, { "epoch": 4.486083984375e-07, "model_forward_time": 0.025290489196777344, "step": 294 }, { "epoch": 4.486083984375e-07, "step": 294, "training_step_time": 0.17037463188171387 }, { "epoch": 4.5013427734375e-07, "model_forward_time": 0.025030851364135742, "step": 295 }, { "epoch": 4.5013427734375e-07, "step": 295, "training_step_time": 0.16298389434814453 }, { "epoch": 4.5166015625e-07, "model_forward_time": 0.02533555030822754, "step": 296 }, { "epoch": 4.5166015625e-07, "step": 296, "training_step_time": 0.11112737655639648 }, { "epoch": 4.5318603515625e-07, "model_forward_time": 0.024981260299682617, "step": 297 }, { "epoch": 4.5318603515625e-07, "step": 297, "training_step_time": 0.10686969757080078 }, { "epoch": 4.547119140625e-07, "model_forward_time": 0.02520132064819336, "step": 298 }, { "epoch": 4.547119140625e-07, "step": 298, "training_step_time": 0.10563445091247559 }, { "epoch": 4.5623779296875e-07, "model_forward_time": 0.025437355041503906, "step": 299 }, { "epoch": 4.5623779296875e-07, "step": 299, "training_step_time": 0.1054999828338623 }, { "epoch": 4.57763671875e-07, "grad_norm": 1.348624587059021, "learning_rate": 2e-05, "loss": 1.0482, "step": 300 }, { "epoch": 4.57763671875e-07, "model_forward_time": 0.025567054748535156, "step": 300 }, { "epoch": 4.57763671875e-07, "step": 300, "training_step_time": 0.10553264617919922 }, { "epoch": 4.5928955078125e-07, "model_forward_time": 0.025410175323486328, "step": 301 }, { "epoch": 4.5928955078125e-07, "step": 301, "training_step_time": 0.11336088180541992 }, { "epoch": 4.608154296875e-07, "model_forward_time": 0.025510311126708984, "step": 302 }, { "epoch": 4.608154296875e-07, "step": 302, "training_step_time": 0.10550570487976074 }, { "epoch": 4.6234130859375e-07, "model_forward_time": 0.0255124568939209, "step": 303 }, { "epoch": 4.6234130859375e-07, "step": 303, "training_step_time": 0.12787961959838867 }, { "epoch": 4.638671875e-07, "model_forward_time": 0.0253751277923584, "step": 304 }, { "epoch": 4.638671875e-07, "step": 304, "training_step_time": 0.10251665115356445 }, { "epoch": 4.6539306640625e-07, "model_forward_time": 0.025760173797607422, "step": 305 }, { "epoch": 4.6539306640625e-07, "step": 305, "training_step_time": 0.10862398147583008 }, { "epoch": 4.669189453125e-07, "model_forward_time": 0.02561497688293457, "step": 306 }, { "epoch": 4.669189453125e-07, "step": 306, "training_step_time": 0.10998392105102539 }, { "epoch": 4.6844482421875e-07, "model_forward_time": 0.025400400161743164, "step": 307 }, { "epoch": 4.6844482421875e-07, "step": 307, "training_step_time": 0.16315722465515137 }, { "epoch": 4.69970703125e-07, "model_forward_time": 0.024495601654052734, "step": 308 }, { "epoch": 4.69970703125e-07, "step": 308, "training_step_time": 0.10726475715637207 }, { "epoch": 4.7149658203125e-07, "model_forward_time": 0.024821758270263672, "step": 309 }, { "epoch": 4.7149658203125e-07, "step": 309, "training_step_time": 0.10771346092224121 }, { "epoch": 4.730224609375e-07, "grad_norm": 1.6407850980758667, "learning_rate": 2.0666666666666666e-05, "loss": 1.0763, "step": 310 }, { "epoch": 4.730224609375e-07, "model_forward_time": 0.024608850479125977, "step": 310 }, { "epoch": 4.730224609375e-07, "step": 310, "training_step_time": 0.10258984565734863 }, { "epoch": 4.7454833984375e-07, "model_forward_time": 0.0257415771484375, "step": 311 }, { "epoch": 4.7454833984375e-07, "step": 311, "training_step_time": 0.10520243644714355 }, { "epoch": 4.7607421875e-07, "model_forward_time": 0.025485754013061523, "step": 312 }, { "epoch": 4.7607421875e-07, "step": 312, "training_step_time": 0.21076297760009766 }, { "epoch": 4.7760009765625e-07, "model_forward_time": 0.02455306053161621, "step": 313 }, { "epoch": 4.7760009765625e-07, "step": 313, "training_step_time": 0.10784077644348145 }, { "epoch": 4.791259765625e-07, "model_forward_time": 0.02463364601135254, "step": 314 }, { "epoch": 4.791259765625e-07, "step": 314, "training_step_time": 0.10654139518737793 }, { "epoch": 4.8065185546875e-07, "model_forward_time": 0.025342464447021484, "step": 315 }, { "epoch": 4.8065185546875e-07, "step": 315, "training_step_time": 0.20109272003173828 }, { "epoch": 4.82177734375e-07, "model_forward_time": 0.024034500122070312, "step": 316 }, { "epoch": 4.82177734375e-07, "step": 316, "training_step_time": 0.10872626304626465 }, { "epoch": 4.8370361328125e-07, "model_forward_time": 0.025923967361450195, "step": 317 }, { "epoch": 4.8370361328125e-07, "step": 317, "training_step_time": 0.10268092155456543 }, { "epoch": 4.852294921875e-07, "model_forward_time": 0.0257265567779541, "step": 318 }, { "epoch": 4.852294921875e-07, "step": 318, "training_step_time": 0.10426831245422363 }, { "epoch": 4.8675537109375e-07, "model_forward_time": 0.025304079055786133, "step": 319 }, { "epoch": 4.8675537109375e-07, "step": 319, "training_step_time": 0.10774612426757812 }, { "epoch": 4.8828125e-07, "grad_norm": 2.499673843383789, "learning_rate": 2.1333333333333335e-05, "loss": 1.072, "step": 320 }, { "epoch": 4.8828125e-07, "model_forward_time": 0.02578425407409668, "step": 320 }, { "epoch": 4.8828125e-07, "step": 320, "training_step_time": 0.10923504829406738 }, { "epoch": 4.8980712890625e-07, "model_forward_time": 0.02539992332458496, "step": 321 }, { "epoch": 4.8980712890625e-07, "step": 321, "training_step_time": 0.11181998252868652 }, { "epoch": 4.913330078125e-07, "model_forward_time": 0.025756359100341797, "step": 322 }, { "epoch": 4.913330078125e-07, "step": 322, "training_step_time": 0.1070413589477539 }, { "epoch": 4.9285888671875e-07, "model_forward_time": 0.027256488800048828, "step": 323 }, { "epoch": 4.9285888671875e-07, "step": 323, "training_step_time": 0.11181211471557617 }, { "epoch": 4.94384765625e-07, "model_forward_time": 0.025214672088623047, "step": 324 }, { "epoch": 4.94384765625e-07, "step": 324, "training_step_time": 0.10771632194519043 }, { "epoch": 4.9591064453125e-07, "model_forward_time": 0.025455474853515625, "step": 325 }, { "epoch": 4.9591064453125e-07, "step": 325, "training_step_time": 0.10553407669067383 }, { "epoch": 4.974365234375e-07, "model_forward_time": 0.026754140853881836, "step": 326 }, { "epoch": 4.974365234375e-07, "step": 326, "training_step_time": 0.10756516456604004 }, { "epoch": 4.9896240234375e-07, "model_forward_time": 0.025522232055664062, "step": 327 }, { "epoch": 4.9896240234375e-07, "step": 327, "training_step_time": 0.10539746284484863 }, { "epoch": 5.0048828125e-07, "model_forward_time": 0.02558135986328125, "step": 328 }, { "epoch": 5.0048828125e-07, "step": 328, "training_step_time": 0.10802626609802246 }, { "epoch": 5.0201416015625e-07, "model_forward_time": 0.024978160858154297, "step": 329 }, { "epoch": 5.0201416015625e-07, "step": 329, "training_step_time": 0.10753297805786133 }, { "epoch": 5.035400390625e-07, "grad_norm": 1.2639737129211426, "learning_rate": 2.2000000000000003e-05, "loss": 1.05, "step": 330 }, { "epoch": 5.035400390625e-07, "model_forward_time": 0.02574777603149414, "step": 330 }, { "epoch": 5.035400390625e-07, "step": 330, "training_step_time": 0.10523700714111328 }, { "epoch": 5.0506591796875e-07, "model_forward_time": 0.025259733200073242, "step": 331 }, { "epoch": 5.0506591796875e-07, "step": 331, "training_step_time": 0.10706496238708496 }, { "epoch": 5.06591796875e-07, "model_forward_time": 0.025692224502563477, "step": 332 }, { "epoch": 5.06591796875e-07, "step": 332, "training_step_time": 0.1057133674621582 }, { "epoch": 5.0811767578125e-07, "model_forward_time": 0.025745868682861328, "step": 333 }, { "epoch": 5.0811767578125e-07, "step": 333, "training_step_time": 0.10755729675292969 }, { "epoch": 5.096435546875e-07, "model_forward_time": 0.024929046630859375, "step": 334 }, { "epoch": 5.096435546875e-07, "step": 334, "training_step_time": 0.10422444343566895 }, { "epoch": 5.1116943359375e-07, "model_forward_time": 0.025197267532348633, "step": 335 }, { "epoch": 5.1116943359375e-07, "step": 335, "training_step_time": 0.10595965385437012 }, { "epoch": 5.126953125e-07, "model_forward_time": 0.02550029754638672, "step": 336 }, { "epoch": 5.126953125e-07, "step": 336, "training_step_time": 0.10481834411621094 }, { "epoch": 5.1422119140625e-07, "model_forward_time": 0.02560877799987793, "step": 337 }, { "epoch": 5.1422119140625e-07, "step": 337, "training_step_time": 0.11575055122375488 }, { "epoch": 5.157470703125e-07, "model_forward_time": 0.02524399757385254, "step": 338 }, { "epoch": 5.157470703125e-07, "step": 338, "training_step_time": 0.1834111213684082 }, { "epoch": 5.1727294921875e-07, "model_forward_time": 0.02512645721435547, "step": 339 }, { "epoch": 5.1727294921875e-07, "step": 339, "training_step_time": 0.12683677673339844 }, { "epoch": 5.18798828125e-07, "grad_norm": 1.6825470924377441, "learning_rate": 2.2666666666666668e-05, "loss": 1.05, "step": 340 }, { "epoch": 5.18798828125e-07, "model_forward_time": 0.025676727294921875, "step": 340 }, { "epoch": 5.18798828125e-07, "step": 340, "training_step_time": 0.14236116409301758 }, { "epoch": 5.2032470703125e-07, "model_forward_time": 0.02499103546142578, "step": 341 }, { "epoch": 5.2032470703125e-07, "step": 341, "training_step_time": 0.18015098571777344 }, { "epoch": 5.218505859375e-07, "model_forward_time": 0.027096033096313477, "step": 342 }, { "epoch": 5.218505859375e-07, "step": 342, "training_step_time": 0.18549227714538574 }, { "epoch": 5.2337646484375e-07, "model_forward_time": 0.024728059768676758, "step": 343 }, { "epoch": 5.2337646484375e-07, "step": 343, "training_step_time": 0.18679380416870117 }, { "epoch": 5.2490234375e-07, "model_forward_time": 0.0243222713470459, "step": 344 }, { "epoch": 5.2490234375e-07, "step": 344, "training_step_time": 0.1028437614440918 }, { "epoch": 5.2642822265625e-07, "model_forward_time": 0.02447366714477539, "step": 345 }, { "epoch": 5.2642822265625e-07, "step": 345, "training_step_time": 0.10272026062011719 }, { "epoch": 5.279541015625e-07, "model_forward_time": 0.025460243225097656, "step": 346 }, { "epoch": 5.279541015625e-07, "step": 346, "training_step_time": 0.10613131523132324 }, { "epoch": 5.2947998046875e-07, "model_forward_time": 0.02540898323059082, "step": 347 }, { "epoch": 5.2947998046875e-07, "step": 347, "training_step_time": 0.10527825355529785 }, { "epoch": 5.31005859375e-07, "model_forward_time": 0.02550983428955078, "step": 348 }, { "epoch": 5.31005859375e-07, "step": 348, "training_step_time": 0.11838769912719727 }, { "epoch": 5.3253173828125e-07, "model_forward_time": 0.025302886962890625, "step": 349 }, { "epoch": 5.3253173828125e-07, "step": 349, "training_step_time": 0.12597060203552246 }, { "epoch": 5.340576171875e-07, "grad_norm": 2.2448384761810303, "learning_rate": 2.3333333333333336e-05, "loss": 1.0383, "step": 350 }, { "epoch": 5.340576171875e-07, "model_forward_time": 0.025449514389038086, "step": 350 }, { "epoch": 5.340576171875e-07, "step": 350, "training_step_time": 0.10931801795959473 }, { "epoch": 5.3558349609375e-07, "model_forward_time": 0.025323152542114258, "step": 351 }, { "epoch": 5.3558349609375e-07, "step": 351, "training_step_time": 0.20163488388061523 }, { "epoch": 5.37109375e-07, "model_forward_time": 0.025542020797729492, "step": 352 }, { "epoch": 5.37109375e-07, "step": 352, "training_step_time": 0.16962862014770508 }, { "epoch": 5.3863525390625e-07, "model_forward_time": 0.025031566619873047, "step": 353 }, { "epoch": 5.3863525390625e-07, "step": 353, "training_step_time": 0.11136651039123535 }, { "epoch": 5.401611328125e-07, "model_forward_time": 0.024865388870239258, "step": 354 }, { "epoch": 5.401611328125e-07, "step": 354, "training_step_time": 0.10259413719177246 }, { "epoch": 5.4168701171875e-07, "model_forward_time": 0.025754451751708984, "step": 355 }, { "epoch": 5.4168701171875e-07, "step": 355, "training_step_time": 0.10812711715698242 }, { "epoch": 5.43212890625e-07, "model_forward_time": 0.025992870330810547, "step": 356 }, { "epoch": 5.43212890625e-07, "step": 356, "training_step_time": 0.10624837875366211 }, { "epoch": 5.4473876953125e-07, "model_forward_time": 0.029392719268798828, "step": 357 }, { "epoch": 5.4473876953125e-07, "step": 357, "training_step_time": 0.16276955604553223 }, { "epoch": 5.462646484375e-07, "model_forward_time": 0.02474355697631836, "step": 358 }, { "epoch": 5.462646484375e-07, "step": 358, "training_step_time": 0.16207194328308105 }, { "epoch": 5.4779052734375e-07, "model_forward_time": 0.02451801300048828, "step": 359 }, { "epoch": 5.4779052734375e-07, "step": 359, "training_step_time": 0.10290765762329102 }, { "epoch": 5.4931640625e-07, "grad_norm": 3.320141315460205, "learning_rate": 2.4e-05, "loss": 0.971, "step": 360 }, { "epoch": 5.4931640625e-07, "model_forward_time": 0.025188207626342773, "step": 360 }, { "epoch": 5.4931640625e-07, "step": 360, "training_step_time": 0.20246171951293945 }, { "epoch": 5.5084228515625e-07, "model_forward_time": 0.025125980377197266, "step": 361 }, { "epoch": 5.5084228515625e-07, "step": 361, "training_step_time": 0.11040949821472168 }, { "epoch": 5.523681640625e-07, "model_forward_time": 0.024952173233032227, "step": 362 }, { "epoch": 5.523681640625e-07, "step": 362, "training_step_time": 0.10251593589782715 }, { "epoch": 5.5389404296875e-07, "model_forward_time": 0.025482654571533203, "step": 363 }, { "epoch": 5.5389404296875e-07, "step": 363, "training_step_time": 0.10642600059509277 }, { "epoch": 5.55419921875e-07, "model_forward_time": 0.026999473571777344, "step": 364 }, { "epoch": 5.55419921875e-07, "step": 364, "training_step_time": 0.10745072364807129 }, { "epoch": 5.5694580078125e-07, "model_forward_time": 0.02547168731689453, "step": 365 }, { "epoch": 5.5694580078125e-07, "step": 365, "training_step_time": 0.10812687873840332 }, { "epoch": 5.584716796875e-07, "model_forward_time": 0.025760173797607422, "step": 366 }, { "epoch": 5.584716796875e-07, "step": 366, "training_step_time": 0.1073451042175293 }, { "epoch": 5.5999755859375e-07, "model_forward_time": 0.025269508361816406, "step": 367 }, { "epoch": 5.5999755859375e-07, "step": 367, "training_step_time": 0.1066281795501709 }, { "epoch": 5.615234375e-07, "model_forward_time": 0.025445938110351562, "step": 368 }, { "epoch": 5.615234375e-07, "step": 368, "training_step_time": 0.10691142082214355 }, { "epoch": 5.6304931640625e-07, "model_forward_time": 0.025239229202270508, "step": 369 }, { "epoch": 5.6304931640625e-07, "step": 369, "training_step_time": 0.11104178428649902 }, { "epoch": 5.645751953125e-07, "grad_norm": 2.2641561031341553, "learning_rate": 2.466666666666667e-05, "loss": 0.9609, "step": 370 }, { "epoch": 5.645751953125e-07, "model_forward_time": 0.025769710540771484, "step": 370 }, { "epoch": 5.645751953125e-07, "step": 370, "training_step_time": 0.11025595664978027 }, { "epoch": 5.6610107421875e-07, "model_forward_time": 0.025458097457885742, "step": 371 }, { "epoch": 5.6610107421875e-07, "step": 371, "training_step_time": 0.10838937759399414 }, { "epoch": 5.67626953125e-07, "model_forward_time": 0.025336265563964844, "step": 372 }, { "epoch": 5.67626953125e-07, "step": 372, "training_step_time": 0.10942554473876953 }, { "epoch": 5.6915283203125e-07, "model_forward_time": 0.026434898376464844, "step": 373 }, { "epoch": 5.6915283203125e-07, "step": 373, "training_step_time": 0.10899901390075684 }, { "epoch": 5.706787109375e-07, "model_forward_time": 0.02594923973083496, "step": 374 }, { "epoch": 5.706787109375e-07, "step": 374, "training_step_time": 0.1111440658569336 }, { "epoch": 5.7220458984375e-07, "model_forward_time": 0.025658845901489258, "step": 375 }, { "epoch": 5.7220458984375e-07, "step": 375, "training_step_time": 0.1086115837097168 }, { "epoch": 5.7373046875e-07, "model_forward_time": 0.025714874267578125, "step": 376 }, { "epoch": 5.7373046875e-07, "step": 376, "training_step_time": 0.11087250709533691 }, { "epoch": 5.7525634765625e-07, "model_forward_time": 0.02591109275817871, "step": 377 }, { "epoch": 5.7525634765625e-07, "step": 377, "training_step_time": 0.11223721504211426 }, { "epoch": 5.767822265625e-07, "model_forward_time": 0.026454687118530273, "step": 378 }, { "epoch": 5.767822265625e-07, "step": 378, "training_step_time": 0.10787200927734375 }, { "epoch": 5.7830810546875e-07, "model_forward_time": 0.025312423706054688, "step": 379 }, { "epoch": 5.7830810546875e-07, "step": 379, "training_step_time": 0.11315321922302246 }, { "epoch": 5.79833984375e-07, "grad_norm": 3.570995330810547, "learning_rate": 2.5333333333333337e-05, "loss": 0.8392, "step": 380 }, { "epoch": 5.79833984375e-07, "model_forward_time": 0.02517867088317871, "step": 380 }, { "epoch": 5.79833984375e-07, "step": 380, "training_step_time": 0.10422563552856445 }, { "epoch": 5.8135986328125e-07, "model_forward_time": 0.0268707275390625, "step": 381 }, { "epoch": 5.8135986328125e-07, "step": 381, "training_step_time": 0.14196085929870605 }, { "epoch": 5.828857421875e-07, "model_forward_time": 0.025556564331054688, "step": 382 }, { "epoch": 5.828857421875e-07, "step": 382, "training_step_time": 0.10823917388916016 }, { "epoch": 5.8441162109375e-07, "model_forward_time": 0.025725364685058594, "step": 383 }, { "epoch": 5.8441162109375e-07, "step": 383, "training_step_time": 0.19945335388183594 }, { "epoch": 5.859375e-07, "model_forward_time": 0.024982213973999023, "step": 384 }, { "epoch": 5.859375e-07, "step": 384, "training_step_time": 0.14513134956359863 }, { "epoch": 5.8746337890625e-07, "model_forward_time": 0.025201797485351562, "step": 385 }, { "epoch": 5.8746337890625e-07, "step": 385, "training_step_time": 0.1823711395263672 }, { "epoch": 5.889892578125e-07, "model_forward_time": 0.0253298282623291, "step": 386 }, { "epoch": 5.889892578125e-07, "step": 386, "training_step_time": 0.10529184341430664 }, { "epoch": 5.9051513671875e-07, "model_forward_time": 0.024760007858276367, "step": 387 }, { "epoch": 5.9051513671875e-07, "step": 387, "training_step_time": 0.10816597938537598 }, { "epoch": 5.92041015625e-07, "model_forward_time": 0.025652170181274414, "step": 388 }, { "epoch": 5.92041015625e-07, "step": 388, "training_step_time": 0.10868072509765625 }, { "epoch": 5.9356689453125e-07, "model_forward_time": 0.02540111541748047, "step": 389 }, { "epoch": 5.9356689453125e-07, "step": 389, "training_step_time": 0.11518192291259766 }, { "epoch": 5.950927734375e-07, "grad_norm": 3.672109365463257, "learning_rate": 2.6000000000000002e-05, "loss": 0.7457, "step": 390 }, { "epoch": 5.950927734375e-07, "model_forward_time": 0.02523350715637207, "step": 390 }, { "epoch": 5.950927734375e-07, "step": 390, "training_step_time": 0.10444092750549316 }, { "epoch": 5.9661865234375e-07, "model_forward_time": 0.02541184425354004, "step": 391 }, { "epoch": 5.9661865234375e-07, "step": 391, "training_step_time": 0.1040034294128418 }, { "epoch": 5.9814453125e-07, "model_forward_time": 0.025331974029541016, "step": 392 }, { "epoch": 5.9814453125e-07, "step": 392, "training_step_time": 0.1037437915802002 }, { "epoch": 5.9967041015625e-07, "model_forward_time": 0.02550196647644043, "step": 393 }, { "epoch": 5.9967041015625e-07, "step": 393, "training_step_time": 0.15365886688232422 }, { "epoch": 6.011962890625e-07, "model_forward_time": 0.02547001838684082, "step": 394 }, { "epoch": 6.011962890625e-07, "step": 394, "training_step_time": 0.11852598190307617 }, { "epoch": 6.0272216796875e-07, "model_forward_time": 0.027571678161621094, "step": 395 }, { "epoch": 6.0272216796875e-07, "step": 395, "training_step_time": 0.11009788513183594 }, { "epoch": 6.04248046875e-07, "model_forward_time": 0.025635480880737305, "step": 396 }, { "epoch": 6.04248046875e-07, "step": 396, "training_step_time": 0.10553550720214844 }, { "epoch": 6.0577392578125e-07, "model_forward_time": 0.025054931640625, "step": 397 }, { "epoch": 6.0577392578125e-07, "step": 397, "training_step_time": 0.10789656639099121 }, { "epoch": 6.072998046875e-07, "model_forward_time": 0.02522754669189453, "step": 398 }, { "epoch": 6.072998046875e-07, "step": 398, "training_step_time": 0.16695785522460938 }, { "epoch": 6.0882568359375e-07, "model_forward_time": 0.02484440803527832, "step": 399 }, { "epoch": 6.0882568359375e-07, "step": 399, "training_step_time": 0.10576224327087402 }, { "epoch": 6.103515625e-07, "grad_norm": 4.7100019454956055, "learning_rate": 2.6666666666666667e-05, "loss": 0.6335, "step": 400 }, { "epoch": 6.103515625e-07, "model_forward_time": 0.025056123733520508, "step": 400 }, { "epoch": 6.103515625e-07, "step": 400, "training_step_time": 0.1040499210357666 }, { "epoch": 6.1187744140625e-07, "model_forward_time": 0.0249481201171875, "step": 401 }, { "epoch": 6.1187744140625e-07, "step": 401, "training_step_time": 0.10461187362670898 }, { "epoch": 6.134033203125e-07, "model_forward_time": 0.025114059448242188, "step": 402 }, { "epoch": 6.134033203125e-07, "step": 402, "training_step_time": 0.10811042785644531 }, { "epoch": 6.1492919921875e-07, "model_forward_time": 0.025277137756347656, "step": 403 }, { "epoch": 6.1492919921875e-07, "step": 403, "training_step_time": 0.19456076622009277 }, { "epoch": 6.16455078125e-07, "model_forward_time": 0.025078535079956055, "step": 404 }, { "epoch": 6.16455078125e-07, "step": 404, "training_step_time": 0.1071159839630127 }, { "epoch": 6.1798095703125e-07, "model_forward_time": 0.024881839752197266, "step": 405 }, { "epoch": 6.1798095703125e-07, "step": 405, "training_step_time": 0.10774731636047363 }, { "epoch": 6.195068359375e-07, "model_forward_time": 0.025405406951904297, "step": 406 }, { "epoch": 6.195068359375e-07, "step": 406, "training_step_time": 0.10582637786865234 }, { "epoch": 6.2103271484375e-07, "model_forward_time": 0.02497267723083496, "step": 407 }, { "epoch": 6.2103271484375e-07, "step": 407, "training_step_time": 0.20045137405395508 }, { "epoch": 6.2255859375e-07, "model_forward_time": 0.024927377700805664, "step": 408 }, { "epoch": 6.2255859375e-07, "step": 408, "training_step_time": 0.10573315620422363 }, { "epoch": 6.2408447265625e-07, "model_forward_time": 0.02510809898376465, "step": 409 }, { "epoch": 6.2408447265625e-07, "step": 409, "training_step_time": 0.10335302352905273 }, { "epoch": 6.256103515625e-07, "grad_norm": 3.612847089767456, "learning_rate": 2.733333333333333e-05, "loss": 0.524, "step": 410 }, { "epoch": 6.256103515625e-07, "model_forward_time": 0.025872468948364258, "step": 410 }, { "epoch": 6.256103515625e-07, "step": 410, "training_step_time": 0.10776209831237793 }, { "epoch": 6.2713623046875e-07, "model_forward_time": 0.025758743286132812, "step": 411 }, { "epoch": 6.2713623046875e-07, "step": 411, "training_step_time": 0.10558581352233887 }, { "epoch": 6.28662109375e-07, "model_forward_time": 0.02567291259765625, "step": 412 }, { "epoch": 6.28662109375e-07, "step": 412, "training_step_time": 0.1086130142211914 }, { "epoch": 6.3018798828125e-07, "model_forward_time": 0.025747060775756836, "step": 413 }, { "epoch": 6.3018798828125e-07, "step": 413, "training_step_time": 0.11069107055664062 }, { "epoch": 6.317138671875e-07, "model_forward_time": 0.02549123764038086, "step": 414 }, { "epoch": 6.317138671875e-07, "step": 414, "training_step_time": 0.10621237754821777 }, { "epoch": 6.3323974609375e-07, "model_forward_time": 0.02505326271057129, "step": 415 }, { "epoch": 6.3323974609375e-07, "step": 415, "training_step_time": 0.10746026039123535 }, { "epoch": 6.34765625e-07, "model_forward_time": 0.025353193283081055, "step": 416 }, { "epoch": 6.34765625e-07, "step": 416, "training_step_time": 0.10498046875 }, { "epoch": 6.3629150390625e-07, "model_forward_time": 0.026362180709838867, "step": 417 }, { "epoch": 6.3629150390625e-07, "step": 417, "training_step_time": 0.10766363143920898 }, { "epoch": 6.378173828125e-07, "model_forward_time": 0.025040388107299805, "step": 418 }, { "epoch": 6.378173828125e-07, "step": 418, "training_step_time": 0.10718846321105957 }, { "epoch": 6.3934326171875e-07, "model_forward_time": 0.025144577026367188, "step": 419 }, { "epoch": 6.3934326171875e-07, "step": 419, "training_step_time": 0.10942196846008301 }, { "epoch": 6.40869140625e-07, "grad_norm": 4.447332859039307, "learning_rate": 2.8000000000000003e-05, "loss": 0.4736, "step": 420 }, { "epoch": 6.40869140625e-07, "model_forward_time": 0.025609254837036133, "step": 420 }, { "epoch": 6.40869140625e-07, "step": 420, "training_step_time": 0.11010408401489258 }, { "epoch": 6.4239501953125e-07, "model_forward_time": 0.02518439292907715, "step": 421 }, { "epoch": 6.4239501953125e-07, "step": 421, "training_step_time": 0.10423970222473145 }, { "epoch": 6.439208984375e-07, "model_forward_time": 0.0253753662109375, "step": 422 }, { "epoch": 6.439208984375e-07, "step": 422, "training_step_time": 0.10927891731262207 }, { "epoch": 6.4544677734375e-07, "model_forward_time": 0.025458335876464844, "step": 423 }, { "epoch": 6.4544677734375e-07, "step": 423, "training_step_time": 0.10473871231079102 }, { "epoch": 6.4697265625e-07, "model_forward_time": 0.0249636173248291, "step": 424 }, { "epoch": 6.4697265625e-07, "step": 424, "training_step_time": 0.10613298416137695 }, { "epoch": 6.4849853515625e-07, "model_forward_time": 0.025556564331054688, "step": 425 }, { "epoch": 6.4849853515625e-07, "step": 425, "training_step_time": 0.10716867446899414 }, { "epoch": 6.500244140625e-07, "model_forward_time": 0.0252993106842041, "step": 426 }, { "epoch": 6.500244140625e-07, "step": 426, "training_step_time": 0.10672450065612793 }, { "epoch": 6.5155029296875e-07, "model_forward_time": 0.025406837463378906, "step": 427 }, { "epoch": 6.5155029296875e-07, "step": 427, "training_step_time": 0.10779905319213867 }, { "epoch": 6.53076171875e-07, "model_forward_time": 0.025732994079589844, "step": 428 }, { "epoch": 6.53076171875e-07, "step": 428, "training_step_time": 0.11041879653930664 }, { "epoch": 6.5460205078125e-07, "model_forward_time": 0.025339603424072266, "step": 429 }, { "epoch": 6.5460205078125e-07, "step": 429, "training_step_time": 0.10991930961608887 }, { "epoch": 6.561279296875e-07, "grad_norm": 5.554104328155518, "learning_rate": 2.8666666666666668e-05, "loss": 0.4451, "step": 430 }, { "epoch": 6.561279296875e-07, "model_forward_time": 0.02515697479248047, "step": 430 }, { "epoch": 6.561279296875e-07, "step": 430, "training_step_time": 0.17908143997192383 }, { "epoch": 6.5765380859375e-07, "model_forward_time": 0.02454686164855957, "step": 431 }, { "epoch": 6.5765380859375e-07, "step": 431, "training_step_time": 0.1193380355834961 }, { "epoch": 6.591796875e-07, "model_forward_time": 0.024424076080322266, "step": 432 }, { "epoch": 6.591796875e-07, "step": 432, "training_step_time": 0.11492109298706055 }, { "epoch": 6.6070556640625e-07, "model_forward_time": 0.025267362594604492, "step": 433 }, { "epoch": 6.6070556640625e-07, "step": 433, "training_step_time": 0.21300530433654785 }, { "epoch": 6.622314453125e-07, "model_forward_time": 0.024451732635498047, "step": 434 }, { "epoch": 6.622314453125e-07, "step": 434, "training_step_time": 0.10646843910217285 }, { "epoch": 6.6375732421875e-07, "model_forward_time": 0.024306058883666992, "step": 435 }, { "epoch": 6.6375732421875e-07, "step": 435, "training_step_time": 0.10809659957885742 }, { "epoch": 6.65283203125e-07, "model_forward_time": 0.025240421295166016, "step": 436 }, { "epoch": 6.65283203125e-07, "step": 436, "training_step_time": 0.10815548896789551 }, { "epoch": 6.6680908203125e-07, "model_forward_time": 0.025450706481933594, "step": 437 }, { "epoch": 6.6680908203125e-07, "step": 437, "training_step_time": 0.10820126533508301 }, { "epoch": 6.683349609375e-07, "model_forward_time": 0.026177406311035156, "step": 438 }, { "epoch": 6.683349609375e-07, "step": 438, "training_step_time": 0.10497260093688965 }, { "epoch": 6.6986083984375e-07, "model_forward_time": 0.025262832641601562, "step": 439 }, { "epoch": 6.6986083984375e-07, "step": 439, "training_step_time": 0.10571670532226562 }, { "epoch": 6.7138671875e-07, "grad_norm": 4.159182548522949, "learning_rate": 2.9333333333333336e-05, "loss": 0.3914, "step": 440 }, { "epoch": 6.7138671875e-07, "model_forward_time": 0.024928808212280273, "step": 440 }, { "epoch": 6.7138671875e-07, "step": 440, "training_step_time": 0.10728621482849121 }, { "epoch": 6.7291259765625e-07, "model_forward_time": 0.025130271911621094, "step": 441 }, { "epoch": 6.7291259765625e-07, "step": 441, "training_step_time": 0.11249804496765137 }, { "epoch": 6.744384765625e-07, "model_forward_time": 0.025495052337646484, "step": 442 }, { "epoch": 6.744384765625e-07, "step": 442, "training_step_time": 0.12575984001159668 }, { "epoch": 6.7596435546875e-07, "model_forward_time": 0.025611400604248047, "step": 443 }, { "epoch": 6.7596435546875e-07, "step": 443, "training_step_time": 0.1971604824066162 }, { "epoch": 6.77490234375e-07, "model_forward_time": 0.02467823028564453, "step": 444 }, { "epoch": 6.77490234375e-07, "step": 444, "training_step_time": 0.10952949523925781 }, { "epoch": 6.7901611328125e-07, "model_forward_time": 0.024302959442138672, "step": 445 }, { "epoch": 6.7901611328125e-07, "step": 445, "training_step_time": 0.1618640422821045 }, { "epoch": 6.805419921875e-07, "model_forward_time": 0.024413585662841797, "step": 446 }, { "epoch": 6.805419921875e-07, "step": 446, "training_step_time": 0.10695552825927734 }, { "epoch": 6.8206787109375e-07, "model_forward_time": 0.024196863174438477, "step": 447 }, { "epoch": 6.8206787109375e-07, "step": 447, "training_step_time": 0.10132122039794922 }, { "epoch": 6.8359375e-07, "model_forward_time": 0.02486586570739746, "step": 448 }, { "epoch": 6.8359375e-07, "step": 448, "training_step_time": 0.10365796089172363 }, { "epoch": 6.8511962890625e-07, "model_forward_time": 0.025591373443603516, "step": 449 }, { "epoch": 6.8511962890625e-07, "step": 449, "training_step_time": 0.1045384407043457 }, { "epoch": 6.866455078125e-07, "grad_norm": 3.1207408905029297, "learning_rate": 3e-05, "loss": 0.3128, "step": 450 }, { "epoch": 6.866455078125e-07, "model_forward_time": 0.024986743927001953, "step": 450 }, { "epoch": 6.866455078125e-07, "step": 450, "training_step_time": 0.1294558048248291 }, { "epoch": 6.8817138671875e-07, "model_forward_time": 0.024494409561157227, "step": 451 }, { "epoch": 6.8817138671875e-07, "step": 451, "training_step_time": 0.12233781814575195 }, { "epoch": 6.89697265625e-07, "model_forward_time": 0.025536537170410156, "step": 452 }, { "epoch": 6.89697265625e-07, "step": 452, "training_step_time": 0.1077566146850586 }, { "epoch": 6.9122314453125e-07, "model_forward_time": 0.025454044342041016, "step": 453 }, { "epoch": 6.9122314453125e-07, "step": 453, "training_step_time": 0.10560798645019531 }, { "epoch": 6.927490234375e-07, "model_forward_time": 0.02541351318359375, "step": 454 }, { "epoch": 6.927490234375e-07, "step": 454, "training_step_time": 0.19307446479797363 }, { "epoch": 6.9427490234375e-07, "model_forward_time": 0.024988174438476562, "step": 455 }, { "epoch": 6.9427490234375e-07, "step": 455, "training_step_time": 0.1047065258026123 }, { "epoch": 6.9580078125e-07, "model_forward_time": 0.024570703506469727, "step": 456 }, { "epoch": 6.9580078125e-07, "step": 456, "training_step_time": 0.10119986534118652 }, { "epoch": 6.9732666015625e-07, "model_forward_time": 0.024893760681152344, "step": 457 }, { "epoch": 6.9732666015625e-07, "step": 457, "training_step_time": 0.1074984073638916 }, { "epoch": 6.988525390625e-07, "model_forward_time": 0.02536916732788086, "step": 458 }, { "epoch": 6.988525390625e-07, "step": 458, "training_step_time": 0.11094975471496582 }, { "epoch": 7.0037841796875e-07, "model_forward_time": 0.024385929107666016, "step": 459 }, { "epoch": 7.0037841796875e-07, "step": 459, "training_step_time": 0.10678339004516602 }, { "epoch": 7.01904296875e-07, "grad_norm": 1.8441038131713867, "learning_rate": 3.066666666666667e-05, "loss": 0.2592, "step": 460 }, { "epoch": 7.01904296875e-07, "model_forward_time": 0.024442672729492188, "step": 460 }, { "epoch": 7.01904296875e-07, "step": 460, "training_step_time": 0.11397433280944824 }, { "epoch": 7.0343017578125e-07, "model_forward_time": 0.02572154998779297, "step": 461 }, { "epoch": 7.0343017578125e-07, "step": 461, "training_step_time": 0.10576844215393066 }, { "epoch": 7.049560546875e-07, "model_forward_time": 0.025360107421875, "step": 462 }, { "epoch": 7.049560546875e-07, "step": 462, "training_step_time": 0.10760259628295898 }, { "epoch": 7.0648193359375e-07, "model_forward_time": 0.025515317916870117, "step": 463 }, { "epoch": 7.0648193359375e-07, "step": 463, "training_step_time": 0.10414266586303711 }, { "epoch": 7.080078125e-07, "model_forward_time": 0.02516627311706543, "step": 464 }, { "epoch": 7.080078125e-07, "step": 464, "training_step_time": 0.1067345142364502 }, { "epoch": 7.0953369140625e-07, "model_forward_time": 0.025510787963867188, "step": 465 }, { "epoch": 7.0953369140625e-07, "step": 465, "training_step_time": 0.10615801811218262 }, { "epoch": 7.110595703125e-07, "model_forward_time": 0.02575373649597168, "step": 466 }, { "epoch": 7.110595703125e-07, "step": 466, "training_step_time": 0.11486959457397461 }, { "epoch": 7.1258544921875e-07, "model_forward_time": 0.02523636817932129, "step": 467 }, { "epoch": 7.1258544921875e-07, "step": 467, "training_step_time": 0.10658001899719238 }, { "epoch": 7.14111328125e-07, "model_forward_time": 0.025739192962646484, "step": 468 }, { "epoch": 7.14111328125e-07, "step": 468, "training_step_time": 0.10756826400756836 }, { "epoch": 7.1563720703125e-07, "model_forward_time": 0.025443553924560547, "step": 469 }, { "epoch": 7.1563720703125e-07, "step": 469, "training_step_time": 0.1066594123840332 }, { "epoch": 7.171630859375e-07, "grad_norm": 2.409151792526245, "learning_rate": 3.1333333333333334e-05, "loss": 0.235, "step": 470 }, { "epoch": 7.171630859375e-07, "model_forward_time": 0.025129079818725586, "step": 470 }, { "epoch": 7.171630859375e-07, "step": 470, "training_step_time": 0.1112515926361084 }, { "epoch": 7.1868896484375e-07, "model_forward_time": 0.025244951248168945, "step": 471 }, { "epoch": 7.1868896484375e-07, "step": 471, "training_step_time": 0.10385394096374512 }, { "epoch": 7.2021484375e-07, "model_forward_time": 0.02536773681640625, "step": 472 }, { "epoch": 7.2021484375e-07, "step": 472, "training_step_time": 0.10448050498962402 }, { "epoch": 7.2174072265625e-07, "model_forward_time": 0.025127649307250977, "step": 473 }, { "epoch": 7.2174072265625e-07, "step": 473, "training_step_time": 0.10458064079284668 }, { "epoch": 7.232666015625e-07, "model_forward_time": 0.0251767635345459, "step": 474 }, { "epoch": 7.232666015625e-07, "step": 474, "training_step_time": 0.10782837867736816 }, { "epoch": 7.2479248046875e-07, "model_forward_time": 0.0252840518951416, "step": 475 }, { "epoch": 7.2479248046875e-07, "step": 475, "training_step_time": 0.10557746887207031 }, { "epoch": 7.26318359375e-07, "model_forward_time": 0.025723934173583984, "step": 476 }, { "epoch": 7.26318359375e-07, "step": 476, "training_step_time": 0.13434576988220215 }, { "epoch": 7.2784423828125e-07, "model_forward_time": 0.026075124740600586, "step": 477 }, { "epoch": 7.2784423828125e-07, "step": 477, "training_step_time": 0.10866594314575195 }, { "epoch": 7.293701171875e-07, "model_forward_time": 0.025175094604492188, "step": 478 }, { "epoch": 7.293701171875e-07, "step": 478, "training_step_time": 0.19896578788757324 }, { "epoch": 7.3089599609375e-07, "model_forward_time": 0.0244600772857666, "step": 479 }, { "epoch": 7.3089599609375e-07, "step": 479, "training_step_time": 0.1251230239868164 }, { "epoch": 7.32421875e-07, "grad_norm": 2.1561572551727295, "learning_rate": 3.2000000000000005e-05, "loss": 0.2503, "step": 480 }, { "epoch": 7.32421875e-07, "model_forward_time": 0.02434682846069336, "step": 480 }, { "epoch": 7.32421875e-07, "step": 480, "training_step_time": 0.19761943817138672 }, { "epoch": 7.3394775390625e-07, "model_forward_time": 0.027183055877685547, "step": 481 }, { "epoch": 7.3394775390625e-07, "step": 481, "training_step_time": 0.1759166717529297 }, { "epoch": 7.354736328125e-07, "model_forward_time": 0.024228334426879883, "step": 482 }, { "epoch": 7.354736328125e-07, "step": 482, "training_step_time": 0.12036585807800293 }, { "epoch": 7.3699951171875e-07, "model_forward_time": 0.024077892303466797, "step": 483 }, { "epoch": 7.3699951171875e-07, "step": 483, "training_step_time": 0.11739826202392578 }, { "epoch": 7.38525390625e-07, "model_forward_time": 0.025698184967041016, "step": 484 }, { "epoch": 7.38525390625e-07, "step": 484, "training_step_time": 0.10518527030944824 }, { "epoch": 7.4005126953125e-07, "model_forward_time": 0.025905132293701172, "step": 485 }, { "epoch": 7.4005126953125e-07, "step": 485, "training_step_time": 0.1078939437866211 }, { "epoch": 7.415771484375e-07, "model_forward_time": 0.02554917335510254, "step": 486 }, { "epoch": 7.415771484375e-07, "step": 486, "training_step_time": 0.11050152778625488 }, { "epoch": 7.4310302734375e-07, "model_forward_time": 0.025230884552001953, "step": 487 }, { "epoch": 7.4310302734375e-07, "step": 487, "training_step_time": 0.11203527450561523 }, { "epoch": 7.4462890625e-07, "model_forward_time": 0.02565479278564453, "step": 488 }, { "epoch": 7.4462890625e-07, "step": 488, "training_step_time": 0.10901045799255371 }, { "epoch": 7.4615478515625e-07, "model_forward_time": 0.025597333908081055, "step": 489 }, { "epoch": 7.4615478515625e-07, "step": 489, "training_step_time": 0.10728049278259277 }, { "epoch": 7.476806640625e-07, "grad_norm": 2.016129493713379, "learning_rate": 3.266666666666667e-05, "loss": 0.263, "step": 490 }, { "epoch": 7.476806640625e-07, "model_forward_time": 0.025243043899536133, "step": 490 }, { "epoch": 7.476806640625e-07, "step": 490, "training_step_time": 0.19534730911254883 }, { "epoch": 7.4920654296875e-07, "model_forward_time": 0.02427816390991211, "step": 491 }, { "epoch": 7.4920654296875e-07, "step": 491, "training_step_time": 0.10170364379882812 }, { "epoch": 7.50732421875e-07, "model_forward_time": 0.024798870086669922, "step": 492 }, { "epoch": 7.50732421875e-07, "step": 492, "training_step_time": 0.18979692459106445 }, { "epoch": 7.5225830078125e-07, "model_forward_time": 0.024420499801635742, "step": 493 }, { "epoch": 7.5225830078125e-07, "step": 493, "training_step_time": 0.10238790512084961 }, { "epoch": 7.537841796875e-07, "model_forward_time": 0.024649620056152344, "step": 494 }, { "epoch": 7.537841796875e-07, "step": 494, "training_step_time": 0.11126470565795898 }, { "epoch": 7.5531005859375e-07, "model_forward_time": 0.025481700897216797, "step": 495 }, { "epoch": 7.5531005859375e-07, "step": 495, "training_step_time": 0.10887026786804199 }, { "epoch": 7.568359375e-07, "model_forward_time": 0.026036739349365234, "step": 496 }, { "epoch": 7.568359375e-07, "step": 496, "training_step_time": 0.10697698593139648 }, { "epoch": 7.5836181640625e-07, "model_forward_time": 0.02568531036376953, "step": 497 }, { "epoch": 7.5836181640625e-07, "step": 497, "training_step_time": 0.20721054077148438 }, { "epoch": 7.598876953125e-07, "model_forward_time": 0.02572941780090332, "step": 498 }, { "epoch": 7.598876953125e-07, "step": 498, "training_step_time": 0.1076967716217041 }, { "epoch": 7.6141357421875e-07, "model_forward_time": 0.024526357650756836, "step": 499 }, { "epoch": 7.6141357421875e-07, "step": 499, "training_step_time": 0.10853838920593262 }, { "epoch": 7.62939453125e-07, "grad_norm": 3.2406880855560303, "learning_rate": 3.3333333333333335e-05, "loss": 0.2272, "step": 500 }, { "epoch": 7.62939453125e-07, "model_forward_time": 0.025294780731201172, "step": 500 }, { "epoch": 7.62939453125e-07, "step": 500, "training_step_time": 0.19763755798339844 }, { "epoch": 7.6446533203125e-07, "model_forward_time": 0.02438831329345703, "step": 501 }, { "epoch": 7.6446533203125e-07, "step": 501, "training_step_time": 0.1108393669128418 }, { "epoch": 7.659912109375e-07, "model_forward_time": 0.024283170700073242, "step": 502 }, { "epoch": 7.659912109375e-07, "step": 502, "training_step_time": 0.10459184646606445 }, { "epoch": 7.6751708984375e-07, "model_forward_time": 0.025375843048095703, "step": 503 }, { "epoch": 7.6751708984375e-07, "step": 503, "training_step_time": 0.10614657402038574 }, { "epoch": 7.6904296875e-07, "model_forward_time": 0.025356292724609375, "step": 504 }, { "epoch": 7.6904296875e-07, "step": 504, "training_step_time": 0.10770487785339355 }, { "epoch": 7.7056884765625e-07, "model_forward_time": 0.025294780731201172, "step": 505 }, { "epoch": 7.7056884765625e-07, "step": 505, "training_step_time": 0.10694241523742676 }, { "epoch": 7.720947265625e-07, "model_forward_time": 0.02536463737487793, "step": 506 }, { "epoch": 7.720947265625e-07, "step": 506, "training_step_time": 0.10753917694091797 }, { "epoch": 7.7362060546875e-07, "model_forward_time": 0.025621414184570312, "step": 507 }, { "epoch": 7.7362060546875e-07, "step": 507, "training_step_time": 0.11487579345703125 }, { "epoch": 7.75146484375e-07, "model_forward_time": 0.025267839431762695, "step": 508 }, { "epoch": 7.75146484375e-07, "step": 508, "training_step_time": 0.10576748847961426 }, { "epoch": 7.7667236328125e-07, "model_forward_time": 0.025400638580322266, "step": 509 }, { "epoch": 7.7667236328125e-07, "step": 509, "training_step_time": 0.10762190818786621 }, { "epoch": 7.781982421875e-07, "grad_norm": 2.047429084777832, "learning_rate": 3.4000000000000007e-05, "loss": 0.284, "step": 510 }, { "epoch": 7.781982421875e-07, "model_forward_time": 0.025446176528930664, "step": 510 }, { "epoch": 7.781982421875e-07, "step": 510, "training_step_time": 0.11067008972167969 }, { "epoch": 7.7972412109375e-07, "model_forward_time": 0.030070781707763672, "step": 511 }, { "epoch": 7.7972412109375e-07, "step": 511, "training_step_time": 0.12609171867370605 }, { "epoch": 7.8125e-07, "model_forward_time": 0.025276660919189453, "step": 512 }, { "epoch": 7.8125e-07, "step": 512, "training_step_time": 0.10894942283630371 }, { "epoch": 7.8277587890625e-07, "model_forward_time": 0.02492499351501465, "step": 513 }, { "epoch": 7.8277587890625e-07, "step": 513, "training_step_time": 0.10774707794189453 }, { "epoch": 7.843017578125e-07, "model_forward_time": 0.025327444076538086, "step": 514 }, { "epoch": 7.843017578125e-07, "step": 514, "training_step_time": 0.10639381408691406 }, { "epoch": 7.8582763671875e-07, "model_forward_time": 0.025114059448242188, "step": 515 }, { "epoch": 7.8582763671875e-07, "step": 515, "training_step_time": 0.11034345626831055 }, { "epoch": 7.87353515625e-07, "model_forward_time": 0.025187969207763672, "step": 516 }, { "epoch": 7.87353515625e-07, "step": 516, "training_step_time": 0.10793042182922363 }, { "epoch": 7.8887939453125e-07, "model_forward_time": 0.025238752365112305, "step": 517 }, { "epoch": 7.8887939453125e-07, "step": 517, "training_step_time": 0.10804891586303711 }, { "epoch": 7.904052734375e-07, "model_forward_time": 0.025426626205444336, "step": 518 }, { "epoch": 7.904052734375e-07, "step": 518, "training_step_time": 0.10785818099975586 }, { "epoch": 7.9193115234375e-07, "model_forward_time": 0.025222063064575195, "step": 519 }, { "epoch": 7.9193115234375e-07, "step": 519, "training_step_time": 0.11051344871520996 }, { "epoch": 7.9345703125e-07, "grad_norm": 3.525778293609619, "learning_rate": 3.466666666666667e-05, "loss": 0.2519, "step": 520 }, { "epoch": 7.9345703125e-07, "model_forward_time": 0.02473926544189453, "step": 520 }, { "epoch": 7.9345703125e-07, "step": 520, "training_step_time": 0.10759711265563965 }, { "epoch": 7.9498291015625e-07, "model_forward_time": 0.02552938461303711, "step": 521 }, { "epoch": 7.9498291015625e-07, "step": 521, "training_step_time": 0.13205361366271973 }, { "epoch": 7.965087890625e-07, "model_forward_time": 0.025357484817504883, "step": 522 }, { "epoch": 7.965087890625e-07, "step": 522, "training_step_time": 0.10655045509338379 }, { "epoch": 7.9803466796875e-07, "model_forward_time": 0.025282859802246094, "step": 523 }, { "epoch": 7.9803466796875e-07, "step": 523, "training_step_time": 0.20685696601867676 }, { "epoch": 7.99560546875e-07, "model_forward_time": 0.02485370635986328, "step": 524 }, { "epoch": 7.99560546875e-07, "step": 524, "training_step_time": 0.12297701835632324 }, { "epoch": 8.0108642578125e-07, "model_forward_time": 0.024420738220214844, "step": 525 }, { "epoch": 8.0108642578125e-07, "step": 525, "training_step_time": 0.1862623691558838 }, { "epoch": 8.026123046875e-07, "model_forward_time": 0.028551578521728516, "step": 526 }, { "epoch": 8.026123046875e-07, "step": 526, "training_step_time": 0.11381125450134277 }, { "epoch": 8.0413818359375e-07, "model_forward_time": 0.025136470794677734, "step": 527 }, { "epoch": 8.0413818359375e-07, "step": 527, "training_step_time": 0.11017727851867676 }, { "epoch": 8.056640625e-07, "model_forward_time": 0.025244474411010742, "step": 528 }, { "epoch": 8.056640625e-07, "step": 528, "training_step_time": 0.19591426849365234 }, { "epoch": 8.0718994140625e-07, "model_forward_time": 0.02447223663330078, "step": 529 }, { "epoch": 8.0718994140625e-07, "step": 529, "training_step_time": 0.10188841819763184 }, { "epoch": 8.087158203125e-07, "grad_norm": 3.6519904136657715, "learning_rate": 3.5333333333333336e-05, "loss": 0.2759, "step": 530 }, { "epoch": 8.087158203125e-07, "model_forward_time": 0.02434825897216797, "step": 530 }, { "epoch": 8.087158203125e-07, "step": 530, "training_step_time": 0.10427713394165039 }, { "epoch": 8.1024169921875e-07, "model_forward_time": 0.02560281753540039, "step": 531 }, { "epoch": 8.1024169921875e-07, "step": 531, "training_step_time": 0.1071784496307373 }, { "epoch": 8.11767578125e-07, "model_forward_time": 0.024807453155517578, "step": 532 }, { "epoch": 8.11767578125e-07, "step": 532, "training_step_time": 0.11983394622802734 }, { "epoch": 8.1329345703125e-07, "model_forward_time": 0.025418519973754883, "step": 533 }, { "epoch": 8.1329345703125e-07, "step": 533, "training_step_time": 0.12928223609924316 }, { "epoch": 8.148193359375e-07, "model_forward_time": 0.02558135986328125, "step": 534 }, { "epoch": 8.148193359375e-07, "step": 534, "training_step_time": 0.10668778419494629 }, { "epoch": 8.1634521484375e-07, "model_forward_time": 0.025340795516967773, "step": 535 }, { "epoch": 8.1634521484375e-07, "step": 535, "training_step_time": 0.10706448554992676 }, { "epoch": 8.1787109375e-07, "model_forward_time": 0.025357484817504883, "step": 536 }, { "epoch": 8.1787109375e-07, "step": 536, "training_step_time": 0.10590600967407227 }, { "epoch": 8.1939697265625e-07, "model_forward_time": 0.02528095245361328, "step": 537 }, { "epoch": 8.1939697265625e-07, "step": 537, "training_step_time": 0.17280125617980957 }, { "epoch": 8.209228515625e-07, "model_forward_time": 0.024541139602661133, "step": 538 }, { "epoch": 8.209228515625e-07, "step": 538, "training_step_time": 0.10708093643188477 }, { "epoch": 8.2244873046875e-07, "model_forward_time": 0.025463104248046875, "step": 539 }, { "epoch": 8.2244873046875e-07, "step": 539, "training_step_time": 0.11112284660339355 }, { "epoch": 8.23974609375e-07, "grad_norm": 2.5824191570281982, "learning_rate": 3.6e-05, "loss": 0.2215, "step": 540 }, { "epoch": 8.23974609375e-07, "model_forward_time": 0.025034427642822266, "step": 540 }, { "epoch": 8.23974609375e-07, "step": 540, "training_step_time": 0.11156415939331055 }, { "epoch": 8.2550048828125e-07, "model_forward_time": 0.02607583999633789, "step": 541 }, { "epoch": 8.2550048828125e-07, "step": 541, "training_step_time": 0.11567115783691406 }, { "epoch": 8.270263671875e-07, "model_forward_time": 0.025719881057739258, "step": 542 }, { "epoch": 8.270263671875e-07, "step": 542, "training_step_time": 0.10983848571777344 }, { "epoch": 8.2855224609375e-07, "model_forward_time": 0.02523946762084961, "step": 543 }, { "epoch": 8.2855224609375e-07, "step": 543, "training_step_time": 0.21196937561035156 }, { "epoch": 8.30078125e-07, "model_forward_time": 0.024362802505493164, "step": 544 }, { "epoch": 8.30078125e-07, "step": 544, "training_step_time": 0.11173820495605469 }, { "epoch": 8.3160400390625e-07, "model_forward_time": 0.0249788761138916, "step": 545 }, { "epoch": 8.3160400390625e-07, "step": 545, "training_step_time": 0.1525402069091797 }, { "epoch": 8.331298828125e-07, "model_forward_time": 0.025024890899658203, "step": 546 }, { "epoch": 8.331298828125e-07, "step": 546, "training_step_time": 0.15052437782287598 }, { "epoch": 8.3465576171875e-07, "model_forward_time": 0.024741411209106445, "step": 547 }, { "epoch": 8.3465576171875e-07, "step": 547, "training_step_time": 0.10706686973571777 }, { "epoch": 8.36181640625e-07, "model_forward_time": 0.028111696243286133, "step": 548 }, { "epoch": 8.36181640625e-07, "step": 548, "training_step_time": 0.11016035079956055 }, { "epoch": 8.3770751953125e-07, "model_forward_time": 0.02521371841430664, "step": 549 }, { "epoch": 8.3770751953125e-07, "step": 549, "training_step_time": 0.10727596282958984 }, { "epoch": 8.392333984375e-07, "grad_norm": 2.894325017929077, "learning_rate": 3.6666666666666666e-05, "loss": 0.2651, "step": 550 }, { "epoch": 8.392333984375e-07, "model_forward_time": 0.024968624114990234, "step": 550 }, { "epoch": 8.392333984375e-07, "step": 550, "training_step_time": 0.10645294189453125 }, { "epoch": 8.4075927734375e-07, "model_forward_time": 0.024770259857177734, "step": 551 }, { "epoch": 8.4075927734375e-07, "step": 551, "training_step_time": 0.10753011703491211 }, { "epoch": 8.4228515625e-07, "model_forward_time": 0.025012731552124023, "step": 552 }, { "epoch": 8.4228515625e-07, "step": 552, "training_step_time": 0.10584521293640137 }, { "epoch": 8.4381103515625e-07, "model_forward_time": 0.025426387786865234, "step": 553 }, { "epoch": 8.4381103515625e-07, "step": 553, "training_step_time": 0.11516141891479492 }, { "epoch": 8.453369140625e-07, "model_forward_time": 0.025162458419799805, "step": 554 }, { "epoch": 8.453369140625e-07, "step": 554, "training_step_time": 0.11070060729980469 }, { "epoch": 8.4686279296875e-07, "model_forward_time": 0.025748014450073242, "step": 555 }, { "epoch": 8.4686279296875e-07, "step": 555, "training_step_time": 0.10787487030029297 }, { "epoch": 8.48388671875e-07, "model_forward_time": 0.025131940841674805, "step": 556 }, { "epoch": 8.48388671875e-07, "step": 556, "training_step_time": 0.1057441234588623 }, { "epoch": 8.4991455078125e-07, "model_forward_time": 0.025133848190307617, "step": 557 }, { "epoch": 8.4991455078125e-07, "step": 557, "training_step_time": 0.10863590240478516 }, { "epoch": 8.514404296875e-07, "model_forward_time": 0.025732755661010742, "step": 558 }, { "epoch": 8.514404296875e-07, "step": 558, "training_step_time": 0.11274504661560059 }, { "epoch": 8.5296630859375e-07, "model_forward_time": 0.02541661262512207, "step": 559 }, { "epoch": 8.5296630859375e-07, "step": 559, "training_step_time": 0.10598015785217285 }, { "epoch": 8.544921875e-07, "grad_norm": 1.9275555610656738, "learning_rate": 3.733333333333334e-05, "loss": 0.2481, "step": 560 }, { "epoch": 8.544921875e-07, "model_forward_time": 0.02519822120666504, "step": 560 }, { "epoch": 8.544921875e-07, "step": 560, "training_step_time": 0.10515832901000977 }, { "epoch": 8.5601806640625e-07, "model_forward_time": 0.02512836456298828, "step": 561 }, { "epoch": 8.5601806640625e-07, "step": 561, "training_step_time": 0.1047215461730957 }, { "epoch": 8.575439453125e-07, "model_forward_time": 0.025547504425048828, "step": 562 }, { "epoch": 8.575439453125e-07, "step": 562, "training_step_time": 0.10657382011413574 }, { "epoch": 8.5906982421875e-07, "model_forward_time": 0.026045799255371094, "step": 563 }, { "epoch": 8.5906982421875e-07, "step": 563, "training_step_time": 0.10592174530029297 }, { "epoch": 8.60595703125e-07, "model_forward_time": 0.025443077087402344, "step": 564 }, { "epoch": 8.60595703125e-07, "step": 564, "training_step_time": 0.1064920425415039 }, { "epoch": 8.6212158203125e-07, "model_forward_time": 0.02847743034362793, "step": 565 }, { "epoch": 8.6212158203125e-07, "step": 565, "training_step_time": 0.11181163787841797 }, { "epoch": 8.636474609375e-07, "model_forward_time": 0.025569677352905273, "step": 566 }, { "epoch": 8.636474609375e-07, "step": 566, "training_step_time": 0.10891580581665039 }, { "epoch": 8.6517333984375e-07, "model_forward_time": 0.028377056121826172, "step": 567 }, { "epoch": 8.6517333984375e-07, "step": 567, "training_step_time": 0.1757512092590332 }, { "epoch": 8.6669921875e-07, "model_forward_time": 0.02479839324951172, "step": 568 }, { "epoch": 8.6669921875e-07, "step": 568, "training_step_time": 0.10520267486572266 }, { "epoch": 8.6822509765625e-07, "model_forward_time": 0.02490973472595215, "step": 569 }, { "epoch": 8.6822509765625e-07, "step": 569, "training_step_time": 0.20324015617370605 }, { "epoch": 8.697509765625e-07, "grad_norm": 2.22090220451355, "learning_rate": 3.8e-05, "loss": 0.2211, "step": 570 }, { "epoch": 8.697509765625e-07, "model_forward_time": 0.024982452392578125, "step": 570 }, { "epoch": 8.697509765625e-07, "step": 570, "training_step_time": 0.1636977195739746 }, { "epoch": 8.7127685546875e-07, "model_forward_time": 0.024549245834350586, "step": 571 }, { "epoch": 8.7127685546875e-07, "step": 571, "training_step_time": 0.16640734672546387 }, { "epoch": 8.72802734375e-07, "model_forward_time": 0.024205923080444336, "step": 572 }, { "epoch": 8.72802734375e-07, "step": 572, "training_step_time": 0.17044806480407715 }, { "epoch": 8.7432861328125e-07, "model_forward_time": 0.02430582046508789, "step": 573 }, { "epoch": 8.7432861328125e-07, "step": 573, "training_step_time": 0.18944811820983887 }, { "epoch": 8.758544921875e-07, "model_forward_time": 0.025097370147705078, "step": 574 }, { "epoch": 8.758544921875e-07, "step": 574, "training_step_time": 0.10761809349060059 }, { "epoch": 8.7738037109375e-07, "model_forward_time": 0.02455282211303711, "step": 575 }, { "epoch": 8.7738037109375e-07, "step": 575, "training_step_time": 0.10445213317871094 }, { "epoch": 8.7890625e-07, "model_forward_time": 0.02567315101623535, "step": 576 }, { "epoch": 8.7890625e-07, "step": 576, "training_step_time": 0.10565567016601562 }, { "epoch": 8.8043212890625e-07, "model_forward_time": 0.025488615036010742, "step": 577 }, { "epoch": 8.8043212890625e-07, "step": 577, "training_step_time": 0.15418624877929688 }, { "epoch": 8.819580078125e-07, "model_forward_time": 0.02508997917175293, "step": 578 }, { "epoch": 8.819580078125e-07, "step": 578, "training_step_time": 0.12519001960754395 }, { "epoch": 8.8348388671875e-07, "model_forward_time": 0.024870872497558594, "step": 579 }, { "epoch": 8.8348388671875e-07, "step": 579, "training_step_time": 0.1559433937072754 }, { "epoch": 8.85009765625e-07, "grad_norm": 1.7533249855041504, "learning_rate": 3.866666666666667e-05, "loss": 0.2078, "step": 580 }, { "epoch": 8.85009765625e-07, "model_forward_time": 0.0251157283782959, "step": 580 }, { "epoch": 8.85009765625e-07, "step": 580, "training_step_time": 0.15197062492370605 }, { "epoch": 8.8653564453125e-07, "model_forward_time": 0.025473833084106445, "step": 581 }, { "epoch": 8.8653564453125e-07, "step": 581, "training_step_time": 0.14590120315551758 }, { "epoch": 8.880615234375e-07, "model_forward_time": 0.025167226791381836, "step": 582 }, { "epoch": 8.880615234375e-07, "step": 582, "training_step_time": 0.13872075080871582 }, { "epoch": 8.8958740234375e-07, "model_forward_time": 0.02444767951965332, "step": 583 }, { "epoch": 8.8958740234375e-07, "step": 583, "training_step_time": 0.10766482353210449 }, { "epoch": 8.9111328125e-07, "model_forward_time": 0.025127649307250977, "step": 584 }, { "epoch": 8.9111328125e-07, "step": 584, "training_step_time": 0.10739350318908691 }, { "epoch": 8.9263916015625e-07, "model_forward_time": 0.02514481544494629, "step": 585 }, { "epoch": 8.9263916015625e-07, "step": 585, "training_step_time": 0.10929989814758301 }, { "epoch": 8.941650390625e-07, "model_forward_time": 0.025516748428344727, "step": 586 }, { "epoch": 8.941650390625e-07, "step": 586, "training_step_time": 0.11003756523132324 }, { "epoch": 8.9569091796875e-07, "model_forward_time": 0.02533721923828125, "step": 587 }, { "epoch": 8.9569091796875e-07, "step": 587, "training_step_time": 0.20212364196777344 }, { "epoch": 8.97216796875e-07, "model_forward_time": 0.02463555335998535, "step": 588 }, { "epoch": 8.97216796875e-07, "step": 588, "training_step_time": 0.11271810531616211 }, { "epoch": 8.9874267578125e-07, "model_forward_time": 0.024624109268188477, "step": 589 }, { "epoch": 8.9874267578125e-07, "step": 589, "training_step_time": 0.10683798789978027 }, { "epoch": 9.002685546875e-07, "grad_norm": 1.4527404308319092, "learning_rate": 3.933333333333333e-05, "loss": 0.1965, "step": 590 }, { "epoch": 9.002685546875e-07, "model_forward_time": 0.025640249252319336, "step": 590 }, { "epoch": 9.002685546875e-07, "step": 590, "training_step_time": 0.19601798057556152 }, { "epoch": 9.0179443359375e-07, "model_forward_time": 0.024501800537109375, "step": 591 }, { "epoch": 9.0179443359375e-07, "step": 591, "training_step_time": 0.10685420036315918 }, { "epoch": 9.033203125e-07, "model_forward_time": 0.024839401245117188, "step": 592 }, { "epoch": 9.033203125e-07, "step": 592, "training_step_time": 0.10669231414794922 }, { "epoch": 9.0484619140625e-07, "model_forward_time": 0.02524399757385254, "step": 593 }, { "epoch": 9.0484619140625e-07, "step": 593, "training_step_time": 0.1076211929321289 }, { "epoch": 9.063720703125e-07, "model_forward_time": 0.025626420974731445, "step": 594 }, { "epoch": 9.063720703125e-07, "step": 594, "training_step_time": 0.11132454872131348 }, { "epoch": 9.0789794921875e-07, "model_forward_time": 0.02546215057373047, "step": 595 }, { "epoch": 9.0789794921875e-07, "step": 595, "training_step_time": 0.11248183250427246 }, { "epoch": 9.09423828125e-07, "model_forward_time": 0.025142192840576172, "step": 596 }, { "epoch": 9.09423828125e-07, "step": 596, "training_step_time": 0.11154317855834961 }, { "epoch": 9.1094970703125e-07, "model_forward_time": 0.025377273559570312, "step": 597 }, { "epoch": 9.1094970703125e-07, "step": 597, "training_step_time": 0.10977506637573242 }, { "epoch": 9.124755859375e-07, "model_forward_time": 0.02558445930480957, "step": 598 }, { "epoch": 9.124755859375e-07, "step": 598, "training_step_time": 0.1071016788482666 }, { "epoch": 9.1400146484375e-07, "model_forward_time": 0.025009632110595703, "step": 599 }, { "epoch": 9.1400146484375e-07, "step": 599, "training_step_time": 0.10792064666748047 }, { "epoch": 9.1552734375e-07, "grad_norm": 1.198233723640442, "learning_rate": 4e-05, "loss": 0.2007, "step": 600 }, { "epoch": 9.1552734375e-07, "model_forward_time": 0.026806116104125977, "step": 600 }, { "epoch": 9.1552734375e-07, "step": 600, "training_step_time": 0.10895800590515137 }, { "epoch": 9.1705322265625e-07, "model_forward_time": 0.02562117576599121, "step": 601 }, { "epoch": 9.1705322265625e-07, "step": 601, "training_step_time": 0.11057209968566895 }, { "epoch": 9.185791015625e-07, "model_forward_time": 0.025543689727783203, "step": 602 }, { "epoch": 9.185791015625e-07, "step": 602, "training_step_time": 0.10936522483825684 }, { "epoch": 9.2010498046875e-07, "model_forward_time": 0.025423049926757812, "step": 603 }, { "epoch": 9.2010498046875e-07, "step": 603, "training_step_time": 0.11207270622253418 }, { "epoch": 9.21630859375e-07, "model_forward_time": 0.025262117385864258, "step": 604 }, { "epoch": 9.21630859375e-07, "step": 604, "training_step_time": 0.10583043098449707 }, { "epoch": 9.2315673828125e-07, "model_forward_time": 0.02632451057434082, "step": 605 }, { "epoch": 9.2315673828125e-07, "step": 605, "training_step_time": 0.10900664329528809 }, { "epoch": 9.246826171875e-07, "model_forward_time": 0.025408506393432617, "step": 606 }, { "epoch": 9.246826171875e-07, "step": 606, "training_step_time": 0.10456633567810059 }, { "epoch": 9.2620849609375e-07, "model_forward_time": 0.025197505950927734, "step": 607 }, { "epoch": 9.2620849609375e-07, "step": 607, "training_step_time": 0.10456705093383789 }, { "epoch": 9.27734375e-07, "model_forward_time": 0.02599024772644043, "step": 608 }, { "epoch": 9.27734375e-07, "step": 608, "training_step_time": 0.10758280754089355 }, { "epoch": 9.2926025390625e-07, "model_forward_time": 0.026047706604003906, "step": 609 }, { "epoch": 9.2926025390625e-07, "step": 609, "training_step_time": 0.10677289962768555 }, { "epoch": 9.307861328125e-07, "grad_norm": 1.9473224878311157, "learning_rate": 4.066666666666667e-05, "loss": 0.2071, "step": 610 }, { "epoch": 9.307861328125e-07, "model_forward_time": 0.02504706382751465, "step": 610 }, { "epoch": 9.307861328125e-07, "step": 610, "training_step_time": 0.10535693168640137 }, { "epoch": 9.3231201171875e-07, "model_forward_time": 0.025238752365112305, "step": 611 }, { "epoch": 9.3231201171875e-07, "step": 611, "training_step_time": 0.16564345359802246 }, { "epoch": 9.33837890625e-07, "model_forward_time": 0.02473926544189453, "step": 612 }, { "epoch": 9.33837890625e-07, "step": 612, "training_step_time": 0.11058211326599121 }, { "epoch": 9.3536376953125e-07, "model_forward_time": 0.024482250213623047, "step": 613 }, { "epoch": 9.3536376953125e-07, "step": 613, "training_step_time": 0.20367789268493652 }, { "epoch": 9.368896484375e-07, "model_forward_time": 0.02383112907409668, "step": 614 }, { "epoch": 9.368896484375e-07, "step": 614, "training_step_time": 0.18216753005981445 }, { "epoch": 9.3841552734375e-07, "model_forward_time": 0.023954391479492188, "step": 615 }, { "epoch": 9.3841552734375e-07, "step": 615, "training_step_time": 0.19987225532531738 }, { "epoch": 9.3994140625e-07, "model_forward_time": 0.024993896484375, "step": 616 }, { "epoch": 9.3994140625e-07, "step": 616, "training_step_time": 0.10952091217041016 }, { "epoch": 9.4146728515625e-07, "model_forward_time": 0.02446436882019043, "step": 617 }, { "epoch": 9.4146728515625e-07, "step": 617, "training_step_time": 0.10840129852294922 }, { "epoch": 9.429931640625e-07, "model_forward_time": 0.02523493766784668, "step": 618 }, { "epoch": 9.429931640625e-07, "step": 618, "training_step_time": 0.11473250389099121 }, { "epoch": 9.4451904296875e-07, "model_forward_time": 0.02508068084716797, "step": 619 }, { "epoch": 9.4451904296875e-07, "step": 619, "training_step_time": 0.12007665634155273 }, { "epoch": 9.46044921875e-07, "grad_norm": 2.3724663257598877, "learning_rate": 4.133333333333333e-05, "loss": 0.2242, "step": 620 }, { "epoch": 9.46044921875e-07, "model_forward_time": 0.025259733200073242, "step": 620 }, { "epoch": 9.46044921875e-07, "step": 620, "training_step_time": 0.11275982856750488 }, { "epoch": 9.4757080078125e-07, "model_forward_time": 0.026327848434448242, "step": 621 }, { "epoch": 9.4757080078125e-07, "step": 621, "training_step_time": 0.1233530044555664 }, { "epoch": 9.490966796875e-07, "model_forward_time": 0.025542020797729492, "step": 622 }, { "epoch": 9.490966796875e-07, "step": 622, "training_step_time": 0.2064199447631836 }, { "epoch": 9.5062255859375e-07, "model_forward_time": 0.024581193923950195, "step": 623 }, { "epoch": 9.5062255859375e-07, "step": 623, "training_step_time": 0.10877299308776855 }, { "epoch": 9.521484375e-07, "model_forward_time": 0.0247189998626709, "step": 624 }, { "epoch": 9.521484375e-07, "step": 624, "training_step_time": 0.10849666595458984 }, { "epoch": 9.5367431640625e-07, "model_forward_time": 0.024769067764282227, "step": 625 }, { "epoch": 9.5367431640625e-07, "step": 625, "training_step_time": 0.15665078163146973 }, { "epoch": 9.552001953125e-07, "model_forward_time": 0.024300813674926758, "step": 626 }, { "epoch": 9.552001953125e-07, "step": 626, "training_step_time": 0.1331627368927002 }, { "epoch": 9.5672607421875e-07, "model_forward_time": 0.024785280227661133, "step": 627 }, { "epoch": 9.5672607421875e-07, "step": 627, "training_step_time": 0.10618805885314941 }, { "epoch": 9.58251953125e-07, "model_forward_time": 0.025282859802246094, "step": 628 }, { "epoch": 9.58251953125e-07, "step": 628, "training_step_time": 0.11105489730834961 }, { "epoch": 9.5977783203125e-07, "model_forward_time": 0.0250856876373291, "step": 629 }, { "epoch": 9.5977783203125e-07, "step": 629, "training_step_time": 0.1077427864074707 }, { "epoch": 9.613037109375e-07, "grad_norm": 2.2837870121002197, "learning_rate": 4.2e-05, "loss": 0.2133, "step": 630 }, { "epoch": 9.613037109375e-07, "model_forward_time": 0.025562047958374023, "step": 630 }, { "epoch": 9.613037109375e-07, "step": 630, "training_step_time": 0.11121892929077148 }, { "epoch": 9.6282958984375e-07, "model_forward_time": 0.0256350040435791, "step": 631 }, { "epoch": 9.6282958984375e-07, "step": 631, "training_step_time": 0.10900688171386719 }, { "epoch": 9.6435546875e-07, "model_forward_time": 0.02538442611694336, "step": 632 }, { "epoch": 9.6435546875e-07, "step": 632, "training_step_time": 0.20986294746398926 }, { "epoch": 9.6588134765625e-07, "model_forward_time": 0.025243759155273438, "step": 633 }, { "epoch": 9.6588134765625e-07, "step": 633, "training_step_time": 0.10717272758483887 }, { "epoch": 9.674072265625e-07, "model_forward_time": 0.024759769439697266, "step": 634 }, { "epoch": 9.674072265625e-07, "step": 634, "training_step_time": 0.11149835586547852 }, { "epoch": 9.6893310546875e-07, "model_forward_time": 0.025780677795410156, "step": 635 }, { "epoch": 9.6893310546875e-07, "step": 635, "training_step_time": 0.20018362998962402 }, { "epoch": 9.70458984375e-07, "model_forward_time": 0.024682998657226562, "step": 636 }, { "epoch": 9.70458984375e-07, "step": 636, "training_step_time": 0.1079702377319336 }, { "epoch": 9.7198486328125e-07, "model_forward_time": 0.024491548538208008, "step": 637 }, { "epoch": 9.7198486328125e-07, "step": 637, "training_step_time": 0.1093893051147461 }, { "epoch": 9.735107421875e-07, "model_forward_time": 0.025505781173706055, "step": 638 }, { "epoch": 9.735107421875e-07, "step": 638, "training_step_time": 0.11040329933166504 }, { "epoch": 9.7503662109375e-07, "model_forward_time": 0.025437593460083008, "step": 639 }, { "epoch": 9.7503662109375e-07, "step": 639, "training_step_time": 0.10527706146240234 }, { "epoch": 9.765625e-07, "grad_norm": 1.5601938962936401, "learning_rate": 4.266666666666667e-05, "loss": 0.22, "step": 640 }, { "epoch": 9.765625e-07, "model_forward_time": 0.025427818298339844, "step": 640 }, { "epoch": 9.765625e-07, "step": 640, "training_step_time": 0.10495877265930176 }, { "epoch": 9.7808837890625e-07, "model_forward_time": 0.025714874267578125, "step": 641 }, { "epoch": 9.7808837890625e-07, "step": 641, "training_step_time": 0.10501790046691895 }, { "epoch": 9.796142578125e-07, "model_forward_time": 0.02526378631591797, "step": 642 }, { "epoch": 9.796142578125e-07, "step": 642, "training_step_time": 0.10541796684265137 }, { "epoch": 9.8114013671875e-07, "model_forward_time": 0.02615833282470703, "step": 643 }, { "epoch": 9.8114013671875e-07, "step": 643, "training_step_time": 0.10561585426330566 }, { "epoch": 9.82666015625e-07, "model_forward_time": 0.02657938003540039, "step": 644 }, { "epoch": 9.82666015625e-07, "step": 644, "training_step_time": 0.10756278038024902 }, { "epoch": 9.8419189453125e-07, "model_forward_time": 0.025615692138671875, "step": 645 }, { "epoch": 9.8419189453125e-07, "step": 645, "training_step_time": 0.11654090881347656 }, { "epoch": 9.857177734375e-07, "model_forward_time": 0.025333642959594727, "step": 646 }, { "epoch": 9.857177734375e-07, "step": 646, "training_step_time": 0.10930633544921875 }, { "epoch": 9.8724365234375e-07, "model_forward_time": 0.02539658546447754, "step": 647 }, { "epoch": 9.8724365234375e-07, "step": 647, "training_step_time": 0.10354161262512207 }, { "epoch": 9.8876953125e-07, "model_forward_time": 0.027733325958251953, "step": 648 }, { "epoch": 9.8876953125e-07, "step": 648, "training_step_time": 0.11020636558532715 }, { "epoch": 9.9029541015625e-07, "model_forward_time": 0.026613473892211914, "step": 649 }, { "epoch": 9.9029541015625e-07, "step": 649, "training_step_time": 0.10996341705322266 }, { "epoch": 9.918212890625e-07, "grad_norm": 2.4662177562713623, "learning_rate": 4.3333333333333334e-05, "loss": 0.2189, "step": 650 }, { "epoch": 9.918212890625e-07, "model_forward_time": 0.025188922882080078, "step": 650 }, { "epoch": 9.918212890625e-07, "step": 650, "training_step_time": 0.10556530952453613 }, { "epoch": 9.9334716796875e-07, "model_forward_time": 0.025713205337524414, "step": 651 }, { "epoch": 9.9334716796875e-07, "step": 651, "training_step_time": 0.10782814025878906 }, { "epoch": 9.94873046875e-07, "model_forward_time": 0.025780916213989258, "step": 652 }, { "epoch": 9.94873046875e-07, "step": 652, "training_step_time": 0.10580801963806152 }, { "epoch": 9.9639892578125e-07, "model_forward_time": 0.025099754333496094, "step": 653 }, { "epoch": 9.9639892578125e-07, "step": 653, "training_step_time": 0.10844945907592773 }, { "epoch": 9.979248046875e-07, "model_forward_time": 0.02531743049621582, "step": 654 }, { "epoch": 9.979248046875e-07, "step": 654, "training_step_time": 0.10998678207397461 }, { "epoch": 9.9945068359375e-07, "model_forward_time": 0.02523660659790039, "step": 655 }, { "epoch": 9.9945068359375e-07, "step": 655, "training_step_time": 0.1447737216949463 }, { "epoch": 1.0009765625e-06, "model_forward_time": 0.024217844009399414, "step": 656 }, { "epoch": 1.0009765625e-06, "step": 656, "training_step_time": 0.1732311248779297 }, { "epoch": 1.00250244140625e-06, "model_forward_time": 0.02477884292602539, "step": 657 }, { "epoch": 1.00250244140625e-06, "step": 657, "training_step_time": 0.15137910842895508 }, { "epoch": 1.0040283203125e-06, "model_forward_time": 0.025076627731323242, "step": 658 }, { "epoch": 1.0040283203125e-06, "step": 658, "training_step_time": 0.1648862361907959 }, { "epoch": 1.00555419921875e-06, "model_forward_time": 0.024783849716186523, "step": 659 }, { "epoch": 1.00555419921875e-06, "step": 659, "training_step_time": 0.12984800338745117 }, { "epoch": 1.007080078125e-06, "grad_norm": 1.6586493253707886, "learning_rate": 4.4000000000000006e-05, "loss": 0.2396, "step": 660 }, { "epoch": 1.007080078125e-06, "model_forward_time": 0.02385234832763672, "step": 660 }, { "epoch": 1.007080078125e-06, "step": 660, "training_step_time": 0.19134187698364258 }, { "epoch": 1.00860595703125e-06, "model_forward_time": 0.024228334426879883, "step": 661 }, { "epoch": 1.00860595703125e-06, "step": 661, "training_step_time": 0.1558971405029297 }, { "epoch": 1.0101318359375e-06, "model_forward_time": 0.024208545684814453, "step": 662 }, { "epoch": 1.0101318359375e-06, "step": 662, "training_step_time": 0.10957694053649902 }, { "epoch": 1.01165771484375e-06, "model_forward_time": 0.024410247802734375, "step": 663 }, { "epoch": 1.01165771484375e-06, "step": 663, "training_step_time": 0.11977529525756836 }, { "epoch": 1.01318359375e-06, "model_forward_time": 0.024786949157714844, "step": 664 }, { "epoch": 1.01318359375e-06, "step": 664, "training_step_time": 0.10622620582580566 }, { "epoch": 1.01470947265625e-06, "model_forward_time": 0.025542497634887695, "step": 665 }, { "epoch": 1.01470947265625e-06, "step": 665, "training_step_time": 0.10407471656799316 }, { "epoch": 1.0162353515625e-06, "model_forward_time": 0.02534937858581543, "step": 666 }, { "epoch": 1.0162353515625e-06, "step": 666, "training_step_time": 0.16604065895080566 }, { "epoch": 1.01776123046875e-06, "model_forward_time": 0.024855375289916992, "step": 667 }, { "epoch": 1.01776123046875e-06, "step": 667, "training_step_time": 0.12375545501708984 }, { "epoch": 1.019287109375e-06, "model_forward_time": 0.024580955505371094, "step": 668 }, { "epoch": 1.019287109375e-06, "step": 668, "training_step_time": 0.10639286041259766 }, { "epoch": 1.02081298828125e-06, "model_forward_time": 0.02517104148864746, "step": 669 }, { "epoch": 1.02081298828125e-06, "step": 669, "training_step_time": 0.11187219619750977 }, { "epoch": 1.0223388671875e-06, "grad_norm": 0.8760672807693481, "learning_rate": 4.466666666666667e-05, "loss": 0.1729, "step": 670 }, { "epoch": 1.0223388671875e-06, "model_forward_time": 0.025316715240478516, "step": 670 }, { "epoch": 1.0223388671875e-06, "step": 670, "training_step_time": 0.23067688941955566 }, { "epoch": 1.02386474609375e-06, "model_forward_time": 0.025102615356445312, "step": 671 }, { "epoch": 1.02386474609375e-06, "step": 671, "training_step_time": 0.21572446823120117 }, { "epoch": 1.025390625e-06, "model_forward_time": 0.024440526962280273, "step": 672 }, { "epoch": 1.025390625e-06, "step": 672, "training_step_time": 0.19784760475158691 }, { "epoch": 1.02691650390625e-06, "model_forward_time": 0.024303197860717773, "step": 673 }, { "epoch": 1.02691650390625e-06, "step": 673, "training_step_time": 0.18396782875061035 }, { "epoch": 1.0284423828125e-06, "model_forward_time": 0.024264097213745117, "step": 674 }, { "epoch": 1.0284423828125e-06, "step": 674, "training_step_time": 0.16685247421264648 }, { "epoch": 1.02996826171875e-06, "model_forward_time": 0.024719953536987305, "step": 675 }, { "epoch": 1.02996826171875e-06, "step": 675, "training_step_time": 0.11927175521850586 }, { "epoch": 1.031494140625e-06, "model_forward_time": 0.02491021156311035, "step": 676 }, { "epoch": 1.031494140625e-06, "step": 676, "training_step_time": 0.10755157470703125 }, { "epoch": 1.03302001953125e-06, "model_forward_time": 0.02597975730895996, "step": 677 }, { "epoch": 1.03302001953125e-06, "step": 677, "training_step_time": 0.10407495498657227 }, { "epoch": 1.0345458984375e-06, "model_forward_time": 0.025519132614135742, "step": 678 }, { "epoch": 1.0345458984375e-06, "step": 678, "training_step_time": 0.20398902893066406 }, { "epoch": 1.03607177734375e-06, "model_forward_time": 0.02556300163269043, "step": 679 }, { "epoch": 1.03607177734375e-06, "step": 679, "training_step_time": 0.10717654228210449 }, { "epoch": 1.03759765625e-06, "grad_norm": 1.6215095520019531, "learning_rate": 4.5333333333333335e-05, "loss": 0.1979, "step": 680 }, { "epoch": 1.03759765625e-06, "model_forward_time": 0.0251615047454834, "step": 680 }, { "epoch": 1.03759765625e-06, "step": 680, "training_step_time": 0.1102452278137207 }, { "epoch": 1.03912353515625e-06, "model_forward_time": 0.025139570236206055, "step": 681 }, { "epoch": 1.03912353515625e-06, "step": 681, "training_step_time": 0.10706734657287598 }, { "epoch": 1.0406494140625e-06, "model_forward_time": 0.025552988052368164, "step": 682 }, { "epoch": 1.0406494140625e-06, "step": 682, "training_step_time": 0.10704517364501953 }, { "epoch": 1.04217529296875e-06, "model_forward_time": 0.025260448455810547, "step": 683 }, { "epoch": 1.04217529296875e-06, "step": 683, "training_step_time": 0.10667943954467773 }, { "epoch": 1.043701171875e-06, "model_forward_time": 0.026265621185302734, "step": 684 }, { "epoch": 1.043701171875e-06, "step": 684, "training_step_time": 0.10892581939697266 }, { "epoch": 1.04522705078125e-06, "model_forward_time": 0.025774478912353516, "step": 685 }, { "epoch": 1.04522705078125e-06, "step": 685, "training_step_time": 0.10874509811401367 }, { "epoch": 1.0467529296875e-06, "model_forward_time": 0.025040626525878906, "step": 686 }, { "epoch": 1.0467529296875e-06, "step": 686, "training_step_time": 0.10829544067382812 }, { "epoch": 1.04827880859375e-06, "model_forward_time": 0.025452852249145508, "step": 687 }, { "epoch": 1.04827880859375e-06, "step": 687, "training_step_time": 0.11186695098876953 }, { "epoch": 1.0498046875e-06, "model_forward_time": 0.025548219680786133, "step": 688 }, { "epoch": 1.0498046875e-06, "step": 688, "training_step_time": 0.14309239387512207 }, { "epoch": 1.05133056640625e-06, "model_forward_time": 0.026767253875732422, "step": 689 }, { "epoch": 1.05133056640625e-06, "step": 689, "training_step_time": 0.13017725944519043 }, { "epoch": 1.0528564453125e-06, "grad_norm": 1.5063233375549316, "learning_rate": 4.600000000000001e-05, "loss": 0.1865, "step": 690 }, { "epoch": 1.0528564453125e-06, "model_forward_time": 0.025287389755249023, "step": 690 }, { "epoch": 1.0528564453125e-06, "step": 690, "training_step_time": 0.1168820858001709 }, { "epoch": 1.05438232421875e-06, "model_forward_time": 0.025073766708374023, "step": 691 }, { "epoch": 1.05438232421875e-06, "step": 691, "training_step_time": 0.11841344833374023 }, { "epoch": 1.055908203125e-06, "model_forward_time": 0.025400400161743164, "step": 692 }, { "epoch": 1.055908203125e-06, "step": 692, "training_step_time": 0.1200706958770752 }, { "epoch": 1.05743408203125e-06, "model_forward_time": 0.024979829788208008, "step": 693 }, { "epoch": 1.05743408203125e-06, "step": 693, "training_step_time": 0.1238718032836914 }, { "epoch": 1.0589599609375e-06, "model_forward_time": 0.025542736053466797, "step": 694 }, { "epoch": 1.0589599609375e-06, "step": 694, "training_step_time": 0.11020517349243164 }, { "epoch": 1.06048583984375e-06, "model_forward_time": 0.02611565589904785, "step": 695 }, { "epoch": 1.06048583984375e-06, "step": 695, "training_step_time": 0.11434555053710938 }, { "epoch": 1.06201171875e-06, "model_forward_time": 0.025331497192382812, "step": 696 }, { "epoch": 1.06201171875e-06, "step": 696, "training_step_time": 0.11041998863220215 }, { "epoch": 1.06353759765625e-06, "model_forward_time": 0.025292158126831055, "step": 697 }, { "epoch": 1.06353759765625e-06, "step": 697, "training_step_time": 0.10665297508239746 }, { "epoch": 1.0650634765625e-06, "model_forward_time": 0.02544569969177246, "step": 698 }, { "epoch": 1.0650634765625e-06, "step": 698, "training_step_time": 0.10870766639709473 }, { "epoch": 1.06658935546875e-06, "model_forward_time": 0.025304079055786133, "step": 699 }, { "epoch": 1.06658935546875e-06, "step": 699, "training_step_time": 0.11166167259216309 }, { "epoch": 1.068115234375e-06, "grad_norm": 1.679792046546936, "learning_rate": 4.666666666666667e-05, "loss": 0.1993, "step": 700 }, { "epoch": 1.068115234375e-06, "model_forward_time": 0.024828195571899414, "step": 700 }, { "epoch": 1.068115234375e-06, "step": 700, "training_step_time": 0.15088129043579102 }, { "epoch": 1.06964111328125e-06, "model_forward_time": 0.025300025939941406, "step": 701 }, { "epoch": 1.06964111328125e-06, "step": 701, "training_step_time": 0.15264678001403809 }, { "epoch": 1.0711669921875e-06, "model_forward_time": 0.025058984756469727, "step": 702 }, { "epoch": 1.0711669921875e-06, "step": 702, "training_step_time": 0.1292116641998291 }, { "epoch": 1.07269287109375e-06, "model_forward_time": 0.024895429611206055, "step": 703 }, { "epoch": 1.07269287109375e-06, "step": 703, "training_step_time": 0.1650996208190918 }, { "epoch": 1.07421875e-06, "model_forward_time": 0.0276033878326416, "step": 704 }, { "epoch": 1.07421875e-06, "step": 704, "training_step_time": 0.12588858604431152 }, { "epoch": 1.07574462890625e-06, "model_forward_time": 0.02512216567993164, "step": 705 }, { "epoch": 1.07574462890625e-06, "step": 705, "training_step_time": 0.18714165687561035 }, { "epoch": 1.0772705078125e-06, "model_forward_time": 0.024817466735839844, "step": 706 }, { "epoch": 1.0772705078125e-06, "step": 706, "training_step_time": 0.12075018882751465 }, { "epoch": 1.07879638671875e-06, "model_forward_time": 0.024956703186035156, "step": 707 }, { "epoch": 1.07879638671875e-06, "step": 707, "training_step_time": 0.11095571517944336 }, { "epoch": 1.080322265625e-06, "model_forward_time": 0.025403499603271484, "step": 708 }, { "epoch": 1.080322265625e-06, "step": 708, "training_step_time": 0.10695981979370117 }, { "epoch": 1.08184814453125e-06, "model_forward_time": 0.02510380744934082, "step": 709 }, { "epoch": 1.08184814453125e-06, "step": 709, "training_step_time": 0.10754847526550293 }, { "epoch": 1.0833740234375e-06, "grad_norm": 1.7046579122543335, "learning_rate": 4.7333333333333336e-05, "loss": 0.1636, "step": 710 }, { "epoch": 1.0833740234375e-06, "model_forward_time": 0.025192737579345703, "step": 710 }, { "epoch": 1.0833740234375e-06, "step": 710, "training_step_time": 0.1233370304107666 }, { "epoch": 1.08489990234375e-06, "model_forward_time": 0.024378299713134766, "step": 711 }, { "epoch": 1.08489990234375e-06, "step": 711, "training_step_time": 0.10846662521362305 }, { "epoch": 1.08642578125e-06, "model_forward_time": 0.025356054306030273, "step": 712 }, { "epoch": 1.08642578125e-06, "step": 712, "training_step_time": 0.10688662528991699 }, { "epoch": 1.08795166015625e-06, "model_forward_time": 0.025412559509277344, "step": 713 }, { "epoch": 1.08795166015625e-06, "step": 713, "training_step_time": 0.10759639739990234 }, { "epoch": 1.0894775390625e-06, "model_forward_time": 0.02541494369506836, "step": 714 }, { "epoch": 1.0894775390625e-06, "step": 714, "training_step_time": 0.17981958389282227 }, { "epoch": 1.09100341796875e-06, "model_forward_time": 0.02480030059814453, "step": 715 }, { "epoch": 1.09100341796875e-06, "step": 715, "training_step_time": 0.10857892036437988 }, { "epoch": 1.092529296875e-06, "model_forward_time": 0.024381637573242188, "step": 716 }, { "epoch": 1.092529296875e-06, "step": 716, "training_step_time": 0.10985398292541504 }, { "epoch": 1.09405517578125e-06, "model_forward_time": 0.02529120445251465, "step": 717 }, { "epoch": 1.09405517578125e-06, "step": 717, "training_step_time": 0.10835027694702148 }, { "epoch": 1.0955810546875e-06, "model_forward_time": 0.025506973266601562, "step": 718 }, { "epoch": 1.0955810546875e-06, "step": 718, "training_step_time": 0.1086118221282959 }, { "epoch": 1.09710693359375e-06, "model_forward_time": 0.02535557746887207, "step": 719 }, { "epoch": 1.09710693359375e-06, "step": 719, "training_step_time": 0.1255626678466797 }, { "epoch": 1.0986328125e-06, "grad_norm": 1.6197246313095093, "learning_rate": 4.8e-05, "loss": 0.172, "step": 720 }, { "epoch": 1.0986328125e-06, "model_forward_time": 0.024336814880371094, "step": 720 }, { "epoch": 1.0986328125e-06, "step": 720, "training_step_time": 0.1492152214050293 }, { "epoch": 1.10015869140625e-06, "model_forward_time": 0.02407240867614746, "step": 721 }, { "epoch": 1.10015869140625e-06, "step": 721, "training_step_time": 0.18077611923217773 }, { "epoch": 1.1016845703125e-06, "model_forward_time": 0.024986982345581055, "step": 722 }, { "epoch": 1.1016845703125e-06, "step": 722, "training_step_time": 0.12772655487060547 }, { "epoch": 1.10321044921875e-06, "model_forward_time": 0.025682449340820312, "step": 723 }, { "epoch": 1.10321044921875e-06, "step": 723, "training_step_time": 0.19490957260131836 }, { "epoch": 1.104736328125e-06, "model_forward_time": 0.024980783462524414, "step": 724 }, { "epoch": 1.104736328125e-06, "step": 724, "training_step_time": 0.21149587631225586 }, { "epoch": 1.10626220703125e-06, "model_forward_time": 0.02501392364501953, "step": 725 }, { "epoch": 1.10626220703125e-06, "step": 725, "training_step_time": 0.11554503440856934 }, { "epoch": 1.1077880859375e-06, "model_forward_time": 0.025061368942260742, "step": 726 }, { "epoch": 1.1077880859375e-06, "step": 726, "training_step_time": 0.10441207885742188 }, { "epoch": 1.10931396484375e-06, "model_forward_time": 0.025601625442504883, "step": 727 }, { "epoch": 1.10931396484375e-06, "step": 727, "training_step_time": 0.10971212387084961 }, { "epoch": 1.11083984375e-06, "model_forward_time": 0.02520751953125, "step": 728 }, { "epoch": 1.11083984375e-06, "step": 728, "training_step_time": 0.10643768310546875 }, { "epoch": 1.11236572265625e-06, "model_forward_time": 0.02508068084716797, "step": 729 }, { "epoch": 1.11236572265625e-06, "step": 729, "training_step_time": 0.11068010330200195 }, { "epoch": 1.1138916015625e-06, "grad_norm": 1.5712406635284424, "learning_rate": 4.866666666666667e-05, "loss": 0.1658, "step": 730 }, { "epoch": 1.1138916015625e-06, "model_forward_time": 0.025307893753051758, "step": 730 }, { "epoch": 1.1138916015625e-06, "step": 730, "training_step_time": 0.11095976829528809 }, { "epoch": 1.11541748046875e-06, "model_forward_time": 0.02530050277709961, "step": 731 }, { "epoch": 1.11541748046875e-06, "step": 731, "training_step_time": 0.11368393898010254 }, { "epoch": 1.116943359375e-06, "model_forward_time": 0.025435447692871094, "step": 732 }, { "epoch": 1.116943359375e-06, "step": 732, "training_step_time": 0.11254763603210449 }, { "epoch": 1.11846923828125e-06, "model_forward_time": 0.025848865509033203, "step": 733 }, { "epoch": 1.11846923828125e-06, "step": 733, "training_step_time": 0.11185479164123535 }, { "epoch": 1.1199951171875e-06, "model_forward_time": 0.025411128997802734, "step": 734 }, { "epoch": 1.1199951171875e-06, "step": 734, "training_step_time": 0.10872364044189453 }, { "epoch": 1.12152099609375e-06, "model_forward_time": 0.025285005569458008, "step": 735 }, { "epoch": 1.12152099609375e-06, "step": 735, "training_step_time": 0.10886693000793457 }, { "epoch": 1.123046875e-06, "model_forward_time": 0.025758028030395508, "step": 736 }, { "epoch": 1.123046875e-06, "step": 736, "training_step_time": 0.11107420921325684 }, { "epoch": 1.12457275390625e-06, "model_forward_time": 0.02525019645690918, "step": 737 }, { "epoch": 1.12457275390625e-06, "step": 737, "training_step_time": 0.10840344429016113 }, { "epoch": 1.1260986328125e-06, "model_forward_time": 0.0252840518951416, "step": 738 }, { "epoch": 1.1260986328125e-06, "step": 738, "training_step_time": 0.11023712158203125 }, { "epoch": 1.12762451171875e-06, "model_forward_time": 0.025641918182373047, "step": 739 }, { "epoch": 1.12762451171875e-06, "step": 739, "training_step_time": 0.11357975006103516 }, { "epoch": 1.129150390625e-06, "grad_norm": 1.2396936416625977, "learning_rate": 4.933333333333334e-05, "loss": 0.1443, "step": 740 }, { "epoch": 1.129150390625e-06, "model_forward_time": 0.02529597282409668, "step": 740 }, { "epoch": 1.129150390625e-06, "step": 740, "training_step_time": 0.11376833915710449 }, { "epoch": 1.13067626953125e-06, "model_forward_time": 0.025099754333496094, "step": 741 }, { "epoch": 1.13067626953125e-06, "step": 741, "training_step_time": 0.1101534366607666 }, { "epoch": 1.1322021484375e-06, "model_forward_time": 0.025180578231811523, "step": 742 }, { "epoch": 1.1322021484375e-06, "step": 742, "training_step_time": 0.11317563056945801 }, { "epoch": 1.13372802734375e-06, "model_forward_time": 0.02505660057067871, "step": 743 }, { "epoch": 1.13372802734375e-06, "step": 743, "training_step_time": 0.10860848426818848 }, { "epoch": 1.13525390625e-06, "model_forward_time": 0.02516913414001465, "step": 744 }, { "epoch": 1.13525390625e-06, "step": 744, "training_step_time": 0.12061500549316406 }, { "epoch": 1.13677978515625e-06, "model_forward_time": 0.025065183639526367, "step": 745 }, { "epoch": 1.13677978515625e-06, "step": 745, "training_step_time": 0.10983467102050781 }, { "epoch": 1.1383056640625e-06, "model_forward_time": 0.025249004364013672, "step": 746 }, { "epoch": 1.1383056640625e-06, "step": 746, "training_step_time": 0.20649242401123047 }, { "epoch": 1.13983154296875e-06, "model_forward_time": 0.025673627853393555, "step": 747 }, { "epoch": 1.13983154296875e-06, "step": 747, "training_step_time": 0.18613719940185547 }, { "epoch": 1.141357421875e-06, "model_forward_time": 0.02463388442993164, "step": 748 }, { "epoch": 1.141357421875e-06, "step": 748, "training_step_time": 0.18962574005126953 }, { "epoch": 1.14288330078125e-06, "model_forward_time": 0.024750471115112305, "step": 749 }, { "epoch": 1.14288330078125e-06, "step": 749, "training_step_time": 0.1727430820465088 }, { "epoch": 1.1444091796875e-06, "grad_norm": 1.2854273319244385, "learning_rate": 5e-05, "loss": 0.1799, "step": 750 }, { "epoch": 1.1444091796875e-06, "model_forward_time": 0.02433037757873535, "step": 750 }, { "epoch": 1.1444091796875e-06, "step": 750, "training_step_time": 0.11752486228942871 }, { "epoch": 1.14593505859375e-06, "model_forward_time": 0.024219036102294922, "step": 751 }, { "epoch": 1.14593505859375e-06, "step": 751, "training_step_time": 0.11133933067321777 }, { "epoch": 1.1474609375e-06, "model_forward_time": 0.025148391723632812, "step": 752 }, { "epoch": 1.1474609375e-06, "step": 752, "training_step_time": 0.10635495185852051 }, { "epoch": 1.14898681640625e-06, "model_forward_time": 0.02523064613342285, "step": 753 }, { "epoch": 1.14898681640625e-06, "step": 753, "training_step_time": 0.14212560653686523 }, { "epoch": 1.1505126953125e-06, "model_forward_time": 0.02510857582092285, "step": 754 }, { "epoch": 1.1505126953125e-06, "step": 754, "training_step_time": 0.12310910224914551 }, { "epoch": 1.15203857421875e-06, "model_forward_time": 0.026286840438842773, "step": 755 }, { "epoch": 1.15203857421875e-06, "step": 755, "training_step_time": 0.15250372886657715 }, { "epoch": 1.153564453125e-06, "model_forward_time": 0.02438831329345703, "step": 756 }, { "epoch": 1.153564453125e-06, "step": 756, "training_step_time": 0.16698646545410156 }, { "epoch": 1.15509033203125e-06, "model_forward_time": 0.024314165115356445, "step": 757 }, { "epoch": 1.15509033203125e-06, "step": 757, "training_step_time": 0.17577767372131348 }, { "epoch": 1.1566162109375e-06, "model_forward_time": 0.024173974990844727, "step": 758 }, { "epoch": 1.1566162109375e-06, "step": 758, "training_step_time": 0.1104731559753418 }, { "epoch": 1.15814208984375e-06, "model_forward_time": 0.025225162506103516, "step": 759 }, { "epoch": 1.15814208984375e-06, "step": 759, "training_step_time": 0.10592007637023926 }, { "epoch": 1.15966796875e-06, "grad_norm": 1.571570634841919, "learning_rate": 5.0666666666666674e-05, "loss": 0.1561, "step": 760 }, { "epoch": 1.15966796875e-06, "model_forward_time": 0.02786540985107422, "step": 760 }, { "epoch": 1.15966796875e-06, "step": 760, "training_step_time": 0.11235785484313965 }, { "epoch": 1.16119384765625e-06, "model_forward_time": 0.02533888816833496, "step": 761 }, { "epoch": 1.16119384765625e-06, "step": 761, "training_step_time": 0.10991120338439941 }, { "epoch": 1.1627197265625e-06, "model_forward_time": 0.025574922561645508, "step": 762 }, { "epoch": 1.1627197265625e-06, "step": 762, "training_step_time": 0.10943102836608887 }, { "epoch": 1.16424560546875e-06, "model_forward_time": 0.025574207305908203, "step": 763 }, { "epoch": 1.16424560546875e-06, "step": 763, "training_step_time": 0.10944700241088867 }, { "epoch": 1.165771484375e-06, "model_forward_time": 0.02519369125366211, "step": 764 }, { "epoch": 1.165771484375e-06, "step": 764, "training_step_time": 0.10709524154663086 }, { "epoch": 1.16729736328125e-06, "model_forward_time": 0.025970935821533203, "step": 765 }, { "epoch": 1.16729736328125e-06, "step": 765, "training_step_time": 0.18401122093200684 }, { "epoch": 1.1688232421875e-06, "model_forward_time": 0.02460455894470215, "step": 766 }, { "epoch": 1.1688232421875e-06, "step": 766, "training_step_time": 0.11684060096740723 }, { "epoch": 1.17034912109375e-06, "model_forward_time": 0.025210857391357422, "step": 767 }, { "epoch": 1.17034912109375e-06, "step": 767, "training_step_time": 0.11130547523498535 }, { "epoch": 1.171875e-06, "model_forward_time": 0.02577352523803711, "step": 768 }, { "epoch": 1.171875e-06, "step": 768, "training_step_time": 0.10989856719970703 }, { "epoch": 1.17340087890625e-06, "model_forward_time": 0.02577948570251465, "step": 769 }, { "epoch": 1.17340087890625e-06, "step": 769, "training_step_time": 0.19843149185180664 }, { "epoch": 1.1749267578125e-06, "grad_norm": 2.9330291748046875, "learning_rate": 5.133333333333333e-05, "loss": 0.2266, "step": 770 }, { "epoch": 1.1749267578125e-06, "model_forward_time": 0.024728775024414062, "step": 770 }, { "epoch": 1.1749267578125e-06, "step": 770, "training_step_time": 0.11417913436889648 }, { "epoch": 1.17645263671875e-06, "model_forward_time": 0.02676701545715332, "step": 771 }, { "epoch": 1.17645263671875e-06, "step": 771, "training_step_time": 0.10785508155822754 }, { "epoch": 1.177978515625e-06, "model_forward_time": 0.02538323402404785, "step": 772 }, { "epoch": 1.177978515625e-06, "step": 772, "training_step_time": 0.10771942138671875 }, { "epoch": 1.17950439453125e-06, "model_forward_time": 0.025602340698242188, "step": 773 }, { "epoch": 1.17950439453125e-06, "step": 773, "training_step_time": 0.10840415954589844 }, { "epoch": 1.1810302734375e-06, "model_forward_time": 0.025500774383544922, "step": 774 }, { "epoch": 1.1810302734375e-06, "step": 774, "training_step_time": 0.10975027084350586 }, { "epoch": 1.18255615234375e-06, "model_forward_time": 0.025704622268676758, "step": 775 }, { "epoch": 1.18255615234375e-06, "step": 775, "training_step_time": 0.10821175575256348 }, { "epoch": 1.18408203125e-06, "model_forward_time": 0.025078296661376953, "step": 776 }, { "epoch": 1.18408203125e-06, "step": 776, "training_step_time": 0.10561895370483398 }, { "epoch": 1.18560791015625e-06, "model_forward_time": 0.0257108211517334, "step": 777 }, { "epoch": 1.18560791015625e-06, "step": 777, "training_step_time": 0.11000680923461914 }, { "epoch": 1.1871337890625e-06, "model_forward_time": 0.025235891342163086, "step": 778 }, { "epoch": 1.1871337890625e-06, "step": 778, "training_step_time": 0.10790348052978516 }, { "epoch": 1.18865966796875e-06, "model_forward_time": 0.02523207664489746, "step": 779 }, { "epoch": 1.18865966796875e-06, "step": 779, "training_step_time": 0.10610651969909668 }, { "epoch": 1.190185546875e-06, "grad_norm": 1.3401633501052856, "learning_rate": 5.2000000000000004e-05, "loss": 0.2208, "step": 780 }, { "epoch": 1.190185546875e-06, "model_forward_time": 0.0252072811126709, "step": 780 }, { "epoch": 1.190185546875e-06, "step": 780, "training_step_time": 0.11088132858276367 }, { "epoch": 1.19171142578125e-06, "model_forward_time": 0.025643587112426758, "step": 781 }, { "epoch": 1.19171142578125e-06, "step": 781, "training_step_time": 0.10860586166381836 }, { "epoch": 1.1932373046875e-06, "model_forward_time": 0.024716615676879883, "step": 782 }, { "epoch": 1.1932373046875e-06, "step": 782, "training_step_time": 0.10994243621826172 }, { "epoch": 1.19476318359375e-06, "model_forward_time": 0.02512526512145996, "step": 783 }, { "epoch": 1.19476318359375e-06, "step": 783, "training_step_time": 0.10535097122192383 }, { "epoch": 1.1962890625e-06, "model_forward_time": 0.02538323402404785, "step": 784 }, { "epoch": 1.1962890625e-06, "step": 784, "training_step_time": 0.10880637168884277 }, { "epoch": 1.19781494140625e-06, "model_forward_time": 0.025083541870117188, "step": 785 }, { "epoch": 1.19781494140625e-06, "step": 785, "training_step_time": 0.10922622680664062 }, { "epoch": 1.1993408203125e-06, "model_forward_time": 0.024970531463623047, "step": 786 }, { "epoch": 1.1993408203125e-06, "step": 786, "training_step_time": 0.10834074020385742 }, { "epoch": 1.20086669921875e-06, "model_forward_time": 0.02490854263305664, "step": 787 }, { "epoch": 1.20086669921875e-06, "step": 787, "training_step_time": 0.10458636283874512 }, { "epoch": 1.202392578125e-06, "model_forward_time": 0.02508544921875, "step": 788 }, { "epoch": 1.202392578125e-06, "step": 788, "training_step_time": 0.10519790649414062 }, { "epoch": 1.20391845703125e-06, "model_forward_time": 0.024126768112182617, "step": 789 }, { "epoch": 1.20391845703125e-06, "step": 789, "training_step_time": 0.13147473335266113 }, { "epoch": 1.2054443359375e-06, "grad_norm": 0.9743676781654358, "learning_rate": 5.266666666666666e-05, "loss": 0.178, "step": 790 }, { "epoch": 1.2054443359375e-06, "model_forward_time": 0.025664091110229492, "step": 790 }, { "epoch": 1.2054443359375e-06, "step": 790, "training_step_time": 0.16959929466247559 }, { "epoch": 1.20697021484375e-06, "model_forward_time": 0.024892807006835938, "step": 791 }, { "epoch": 1.20697021484375e-06, "step": 791, "training_step_time": 0.17504000663757324 }, { "epoch": 1.20849609375e-06, "model_forward_time": 0.02520012855529785, "step": 792 }, { "epoch": 1.20849609375e-06, "step": 792, "training_step_time": 0.16748404502868652 }, { "epoch": 1.21002197265625e-06, "model_forward_time": 0.02446293830871582, "step": 793 }, { "epoch": 1.21002197265625e-06, "step": 793, "training_step_time": 0.14552044868469238 }, { "epoch": 1.2115478515625e-06, "model_forward_time": 0.02431631088256836, "step": 794 }, { "epoch": 1.2115478515625e-06, "step": 794, "training_step_time": 0.15413522720336914 }, { "epoch": 1.21307373046875e-06, "model_forward_time": 0.02455592155456543, "step": 795 }, { "epoch": 1.21307373046875e-06, "step": 795, "training_step_time": 0.11932706832885742 }, { "epoch": 1.214599609375e-06, "model_forward_time": 0.02487492561340332, "step": 796 }, { "epoch": 1.214599609375e-06, "step": 796, "training_step_time": 0.10904192924499512 }, { "epoch": 1.21612548828125e-06, "model_forward_time": 0.025246620178222656, "step": 797 }, { "epoch": 1.21612548828125e-06, "step": 797, "training_step_time": 0.10794281959533691 }, { "epoch": 1.2176513671875e-06, "model_forward_time": 0.025601863861083984, "step": 798 }, { "epoch": 1.2176513671875e-06, "step": 798, "training_step_time": 0.1671278476715088 }, { "epoch": 1.21917724609375e-06, "model_forward_time": 0.02472829818725586, "step": 799 }, { "epoch": 1.21917724609375e-06, "step": 799, "training_step_time": 0.11611461639404297 }, { "epoch": 1.220703125e-06, "grad_norm": 1.5637664794921875, "learning_rate": 5.333333333333333e-05, "loss": 0.1663, "step": 800 }, { "epoch": 1.220703125e-06, "model_forward_time": 0.023726463317871094, "step": 800 }, { "epoch": 1.220703125e-06, "step": 800, "training_step_time": 0.2040729522705078 }, { "epoch": 1.22222900390625e-06, "model_forward_time": 0.0251157283782959, "step": 801 }, { "epoch": 1.22222900390625e-06, "step": 801, "training_step_time": 0.10689282417297363 }, { "epoch": 1.2237548828125e-06, "model_forward_time": 0.024749040603637695, "step": 802 }, { "epoch": 1.2237548828125e-06, "step": 802, "training_step_time": 0.17762041091918945 }, { "epoch": 1.22528076171875e-06, "model_forward_time": 0.025236129760742188, "step": 803 }, { "epoch": 1.22528076171875e-06, "step": 803, "training_step_time": 0.11282753944396973 }, { "epoch": 1.226806640625e-06, "model_forward_time": 0.024543046951293945, "step": 804 }, { "epoch": 1.226806640625e-06, "step": 804, "training_step_time": 0.10494351387023926 }, { "epoch": 1.22833251953125e-06, "model_forward_time": 0.025599956512451172, "step": 805 }, { "epoch": 1.22833251953125e-06, "step": 805, "training_step_time": 0.1090855598449707 }, { "epoch": 1.2298583984375e-06, "model_forward_time": 0.025291919708251953, "step": 806 }, { "epoch": 1.2298583984375e-06, "step": 806, "training_step_time": 0.11017894744873047 }, { "epoch": 1.23138427734375e-06, "model_forward_time": 0.02561354637145996, "step": 807 }, { "epoch": 1.23138427734375e-06, "step": 807, "training_step_time": 0.10650086402893066 }, { "epoch": 1.23291015625e-06, "model_forward_time": 0.025461196899414062, "step": 808 }, { "epoch": 1.23291015625e-06, "step": 808, "training_step_time": 0.10768532752990723 }, { "epoch": 1.23443603515625e-06, "model_forward_time": 0.025017499923706055, "step": 809 }, { "epoch": 1.23443603515625e-06, "step": 809, "training_step_time": 0.11121582984924316 }, { "epoch": 1.2359619140625e-06, "grad_norm": 1.7824689149856567, "learning_rate": 5.4000000000000005e-05, "loss": 0.1815, "step": 810 }, { "epoch": 1.2359619140625e-06, "model_forward_time": 0.026363849639892578, "step": 810 }, { "epoch": 1.2359619140625e-06, "step": 810, "training_step_time": 0.10753965377807617 }, { "epoch": 1.23748779296875e-06, "model_forward_time": 0.02601027488708496, "step": 811 }, { "epoch": 1.23748779296875e-06, "step": 811, "training_step_time": 0.20797109603881836 }, { "epoch": 1.239013671875e-06, "model_forward_time": 0.02433919906616211, "step": 812 }, { "epoch": 1.239013671875e-06, "step": 812, "training_step_time": 0.10780191421508789 }, { "epoch": 1.24053955078125e-06, "model_forward_time": 0.02543783187866211, "step": 813 }, { "epoch": 1.24053955078125e-06, "step": 813, "training_step_time": 0.1097879409790039 }, { "epoch": 1.2420654296875e-06, "model_forward_time": 0.025086402893066406, "step": 814 }, { "epoch": 1.2420654296875e-06, "step": 814, "training_step_time": 0.1974022388458252 }, { "epoch": 1.24359130859375e-06, "model_forward_time": 0.024779796600341797, "step": 815 }, { "epoch": 1.24359130859375e-06, "step": 815, "training_step_time": 0.10794305801391602 }, { "epoch": 1.2451171875e-06, "model_forward_time": 0.02465224266052246, "step": 816 }, { "epoch": 1.2451171875e-06, "step": 816, "training_step_time": 0.1103818416595459 }, { "epoch": 1.24664306640625e-06, "model_forward_time": 0.02538776397705078, "step": 817 }, { "epoch": 1.24664306640625e-06, "step": 817, "training_step_time": 0.10701918601989746 }, { "epoch": 1.2481689453125e-06, "model_forward_time": 0.02614760398864746, "step": 818 }, { "epoch": 1.2481689453125e-06, "step": 818, "training_step_time": 0.10727715492248535 }, { "epoch": 1.24969482421875e-06, "model_forward_time": 0.02506542205810547, "step": 819 }, { "epoch": 1.24969482421875e-06, "step": 819, "training_step_time": 0.11271452903747559 }, { "epoch": 1.251220703125e-06, "grad_norm": 0.9247801899909973, "learning_rate": 5.466666666666666e-05, "loss": 0.1907, "step": 820 }, { "epoch": 1.251220703125e-06, "model_forward_time": 0.026698589324951172, "step": 820 }, { "epoch": 1.251220703125e-06, "step": 820, "training_step_time": 0.11146426200866699 }, { "epoch": 1.25274658203125e-06, "model_forward_time": 0.02523946762084961, "step": 821 }, { "epoch": 1.25274658203125e-06, "step": 821, "training_step_time": 0.11313247680664062 }, { "epoch": 1.2542724609375e-06, "model_forward_time": 0.02542877197265625, "step": 822 }, { "epoch": 1.2542724609375e-06, "step": 822, "training_step_time": 0.10615134239196777 }, { "epoch": 1.25579833984375e-06, "model_forward_time": 0.02531576156616211, "step": 823 }, { "epoch": 1.25579833984375e-06, "step": 823, "training_step_time": 0.10747337341308594 }, { "epoch": 1.25732421875e-06, "model_forward_time": 0.025635242462158203, "step": 824 }, { "epoch": 1.25732421875e-06, "step": 824, "training_step_time": 0.10699796676635742 }, { "epoch": 1.25885009765625e-06, "model_forward_time": 0.02538895606994629, "step": 825 }, { "epoch": 1.25885009765625e-06, "step": 825, "training_step_time": 0.1090703010559082 }, { "epoch": 1.2603759765625e-06, "model_forward_time": 0.02551889419555664, "step": 826 }, { "epoch": 1.2603759765625e-06, "step": 826, "training_step_time": 0.10820293426513672 }, { "epoch": 1.26190185546875e-06, "model_forward_time": 0.025539875030517578, "step": 827 }, { "epoch": 1.26190185546875e-06, "step": 827, "training_step_time": 0.10791134834289551 }, { "epoch": 1.263427734375e-06, "model_forward_time": 0.025384902954101562, "step": 828 }, { "epoch": 1.263427734375e-06, "step": 828, "training_step_time": 0.10955405235290527 }, { "epoch": 1.26495361328125e-06, "model_forward_time": 0.025703907012939453, "step": 829 }, { "epoch": 1.26495361328125e-06, "step": 829, "training_step_time": 0.10976171493530273 }, { "epoch": 1.2664794921875e-06, "grad_norm": 1.4237875938415527, "learning_rate": 5.5333333333333334e-05, "loss": 0.1956, "step": 830 }, { "epoch": 1.2664794921875e-06, "model_forward_time": 0.025047779083251953, "step": 830 }, { "epoch": 1.2664794921875e-06, "step": 830, "training_step_time": 0.1073763370513916 }, { "epoch": 1.26800537109375e-06, "model_forward_time": 0.0254056453704834, "step": 831 }, { "epoch": 1.26800537109375e-06, "step": 831, "training_step_time": 0.11088395118713379 }, { "epoch": 1.26953125e-06, "model_forward_time": 0.025291919708251953, "step": 832 }, { "epoch": 1.26953125e-06, "step": 832, "training_step_time": 0.10759282112121582 }, { "epoch": 1.27105712890625e-06, "model_forward_time": 0.02551865577697754, "step": 833 }, { "epoch": 1.27105712890625e-06, "step": 833, "training_step_time": 0.19334745407104492 }, { "epoch": 1.2725830078125e-06, "model_forward_time": 0.02462935447692871, "step": 834 }, { "epoch": 1.2725830078125e-06, "step": 834, "training_step_time": 0.10671043395996094 }, { "epoch": 1.27410888671875e-06, "model_forward_time": 0.02471470832824707, "step": 835 }, { "epoch": 1.27410888671875e-06, "step": 835, "training_step_time": 0.21355009078979492 }, { "epoch": 1.275634765625e-06, "model_forward_time": 0.02554607391357422, "step": 836 }, { "epoch": 1.275634765625e-06, "step": 836, "training_step_time": 0.161391019821167 }, { "epoch": 1.27716064453125e-06, "model_forward_time": 0.024538040161132812, "step": 837 }, { "epoch": 1.27716064453125e-06, "step": 837, "training_step_time": 0.1728515625 }, { "epoch": 1.2786865234375e-06, "model_forward_time": 0.024572134017944336, "step": 838 }, { "epoch": 1.2786865234375e-06, "step": 838, "training_step_time": 0.14132213592529297 }, { "epoch": 1.28021240234375e-06, "model_forward_time": 0.025064468383789062, "step": 839 }, { "epoch": 1.28021240234375e-06, "step": 839, "training_step_time": 0.20406007766723633 }, { "epoch": 1.28173828125e-06, "grad_norm": 1.0427626371383667, "learning_rate": 5.6000000000000006e-05, "loss": 0.1682, "step": 840 }, { "epoch": 1.28173828125e-06, "model_forward_time": 0.024442672729492188, "step": 840 }, { "epoch": 1.28173828125e-06, "step": 840, "training_step_time": 0.10689544677734375 }, { "epoch": 1.28326416015625e-06, "model_forward_time": 0.02499222755432129, "step": 841 }, { "epoch": 1.28326416015625e-06, "step": 841, "training_step_time": 0.1052548885345459 }, { "epoch": 1.2847900390625e-06, "model_forward_time": 0.025845766067504883, "step": 842 }, { "epoch": 1.2847900390625e-06, "step": 842, "training_step_time": 0.15422654151916504 }, { "epoch": 1.28631591796875e-06, "model_forward_time": 0.025130748748779297, "step": 843 }, { "epoch": 1.28631591796875e-06, "step": 843, "training_step_time": 0.11932563781738281 }, { "epoch": 1.287841796875e-06, "model_forward_time": 0.02488994598388672, "step": 844 }, { "epoch": 1.287841796875e-06, "step": 844, "training_step_time": 0.10901236534118652 }, { "epoch": 1.28936767578125e-06, "model_forward_time": 0.02527761459350586, "step": 845 }, { "epoch": 1.28936767578125e-06, "step": 845, "training_step_time": 0.1108694076538086 }, { "epoch": 1.2908935546875e-06, "model_forward_time": 0.025673389434814453, "step": 846 }, { "epoch": 1.2908935546875e-06, "step": 846, "training_step_time": 0.1560213565826416 }, { "epoch": 1.29241943359375e-06, "model_forward_time": 0.025255441665649414, "step": 847 }, { "epoch": 1.29241943359375e-06, "step": 847, "training_step_time": 0.12120938301086426 }, { "epoch": 1.2939453125e-06, "model_forward_time": 0.025014162063598633, "step": 848 }, { "epoch": 1.2939453125e-06, "step": 848, "training_step_time": 0.10799694061279297 }, { "epoch": 1.29547119140625e-06, "model_forward_time": 0.025567293167114258, "step": 849 }, { "epoch": 1.29547119140625e-06, "step": 849, "training_step_time": 0.10947394371032715 }, { "epoch": 1.2969970703125e-06, "grad_norm": 1.135888934135437, "learning_rate": 5.666666666666667e-05, "loss": 0.1923, "step": 850 }, { "epoch": 1.2969970703125e-06, "model_forward_time": 0.025428056716918945, "step": 850 }, { "epoch": 1.2969970703125e-06, "step": 850, "training_step_time": 0.1135861873626709 }, { "epoch": 1.29852294921875e-06, "model_forward_time": 0.02511882781982422, "step": 851 }, { "epoch": 1.29852294921875e-06, "step": 851, "training_step_time": 0.10964608192443848 }, { "epoch": 1.300048828125e-06, "model_forward_time": 0.025823116302490234, "step": 852 }, { "epoch": 1.300048828125e-06, "step": 852, "training_step_time": 0.10938477516174316 }, { "epoch": 1.30157470703125e-06, "model_forward_time": 0.025175809860229492, "step": 853 }, { "epoch": 1.30157470703125e-06, "step": 853, "training_step_time": 0.10630249977111816 }, { "epoch": 1.3031005859375e-06, "model_forward_time": 0.025522708892822266, "step": 854 }, { "epoch": 1.3031005859375e-06, "step": 854, "training_step_time": 0.1079401969909668 }, { "epoch": 1.30462646484375e-06, "model_forward_time": 0.025684595108032227, "step": 855 }, { "epoch": 1.30462646484375e-06, "step": 855, "training_step_time": 0.10730624198913574 }, { "epoch": 1.30615234375e-06, "model_forward_time": 0.0256350040435791, "step": 856 }, { "epoch": 1.30615234375e-06, "step": 856, "training_step_time": 0.20493817329406738 }, { "epoch": 1.30767822265625e-06, "model_forward_time": 0.025014638900756836, "step": 857 }, { "epoch": 1.30767822265625e-06, "step": 857, "training_step_time": 0.11029052734375 }, { "epoch": 1.3092041015625e-06, "model_forward_time": 0.025072813034057617, "step": 858 }, { "epoch": 1.3092041015625e-06, "step": 858, "training_step_time": 0.10683917999267578 }, { "epoch": 1.31072998046875e-06, "model_forward_time": 0.025517940521240234, "step": 859 }, { "epoch": 1.31072998046875e-06, "step": 859, "training_step_time": 0.20279932022094727 }, { "epoch": 1.312255859375e-06, "grad_norm": 1.374436616897583, "learning_rate": 5.7333333333333336e-05, "loss": 0.1921, "step": 860 }, { "epoch": 1.312255859375e-06, "model_forward_time": 0.026016712188720703, "step": 860 }, { "epoch": 1.312255859375e-06, "step": 860, "training_step_time": 0.10799336433410645 }, { "epoch": 1.31378173828125e-06, "model_forward_time": 0.024817466735839844, "step": 861 }, { "epoch": 1.31378173828125e-06, "step": 861, "training_step_time": 0.10469913482666016 }, { "epoch": 1.3153076171875e-06, "model_forward_time": 0.02522444725036621, "step": 862 }, { "epoch": 1.3153076171875e-06, "step": 862, "training_step_time": 0.10624504089355469 }, { "epoch": 1.31683349609375e-06, "model_forward_time": 0.0252835750579834, "step": 863 }, { "epoch": 1.31683349609375e-06, "step": 863, "training_step_time": 0.10539531707763672 }, { "epoch": 1.318359375e-06, "model_forward_time": 0.025434494018554688, "step": 864 }, { "epoch": 1.318359375e-06, "step": 864, "training_step_time": 0.10926222801208496 }, { "epoch": 1.31988525390625e-06, "model_forward_time": 0.025406837463378906, "step": 865 }, { "epoch": 1.31988525390625e-06, "step": 865, "training_step_time": 0.10640454292297363 }, { "epoch": 1.3214111328125e-06, "model_forward_time": 0.025209903717041016, "step": 866 }, { "epoch": 1.3214111328125e-06, "step": 866, "training_step_time": 0.10465574264526367 }, { "epoch": 1.32293701171875e-06, "model_forward_time": 0.025572776794433594, "step": 867 }, { "epoch": 1.32293701171875e-06, "step": 867, "training_step_time": 0.10882973670959473 }, { "epoch": 1.324462890625e-06, "model_forward_time": 0.025415420532226562, "step": 868 }, { "epoch": 1.324462890625e-06, "step": 868, "training_step_time": 0.10596466064453125 }, { "epoch": 1.32598876953125e-06, "model_forward_time": 0.02488541603088379, "step": 869 }, { "epoch": 1.32598876953125e-06, "step": 869, "training_step_time": 0.10890769958496094 }, { "epoch": 1.3275146484375e-06, "grad_norm": 2.062347650527954, "learning_rate": 5.8e-05, "loss": 0.2042, "step": 870 }, { "epoch": 1.3275146484375e-06, "model_forward_time": 0.02486252784729004, "step": 870 }, { "epoch": 1.3275146484375e-06, "step": 870, "training_step_time": 0.1050269603729248 }, { "epoch": 1.32904052734375e-06, "model_forward_time": 0.02487969398498535, "step": 871 }, { "epoch": 1.32904052734375e-06, "step": 871, "training_step_time": 0.10508346557617188 }, { "epoch": 1.33056640625e-06, "model_forward_time": 0.025488615036010742, "step": 872 }, { "epoch": 1.33056640625e-06, "step": 872, "training_step_time": 0.1053617000579834 }, { "epoch": 1.33209228515625e-06, "model_forward_time": 0.028691768646240234, "step": 873 }, { "epoch": 1.33209228515625e-06, "step": 873, "training_step_time": 0.10998988151550293 }, { "epoch": 1.3336181640625e-06, "model_forward_time": 0.026524066925048828, "step": 874 }, { "epoch": 1.3336181640625e-06, "step": 874, "training_step_time": 0.1125645637512207 }, { "epoch": 1.33514404296875e-06, "model_forward_time": 0.025743961334228516, "step": 875 }, { "epoch": 1.33514404296875e-06, "step": 875, "training_step_time": 0.10617494583129883 }, { "epoch": 1.336669921875e-06, "model_forward_time": 0.025171279907226562, "step": 876 }, { "epoch": 1.336669921875e-06, "step": 876, "training_step_time": 0.11585307121276855 }, { "epoch": 1.33819580078125e-06, "model_forward_time": 0.02558159828186035, "step": 877 }, { "epoch": 1.33819580078125e-06, "step": 877, "training_step_time": 0.10683536529541016 }, { "epoch": 1.3397216796875e-06, "model_forward_time": 0.02523636817932129, "step": 878 }, { "epoch": 1.3397216796875e-06, "step": 878, "training_step_time": 0.19900131225585938 }, { "epoch": 1.34124755859375e-06, "model_forward_time": 0.02442336082458496, "step": 879 }, { "epoch": 1.34124755859375e-06, "step": 879, "training_step_time": 0.10666322708129883 }, { "epoch": 1.3427734375e-06, "grad_norm": 1.5996571779251099, "learning_rate": 5.866666666666667e-05, "loss": 0.1628, "step": 880 }, { "epoch": 1.3427734375e-06, "model_forward_time": 0.024828195571899414, "step": 880 }, { "epoch": 1.3427734375e-06, "step": 880, "training_step_time": 0.19969630241394043 }, { "epoch": 1.34429931640625e-06, "model_forward_time": 0.02455878257751465, "step": 881 }, { "epoch": 1.34429931640625e-06, "step": 881, "training_step_time": 0.11535906791687012 }, { "epoch": 1.3458251953125e-06, "model_forward_time": 0.025022268295288086, "step": 882 }, { "epoch": 1.3458251953125e-06, "step": 882, "training_step_time": 0.21144580841064453 }, { "epoch": 1.34735107421875e-06, "model_forward_time": 0.02456045150756836, "step": 883 }, { "epoch": 1.34735107421875e-06, "step": 883, "training_step_time": 0.15330243110656738 }, { "epoch": 1.348876953125e-06, "model_forward_time": 0.025060415267944336, "step": 884 }, { "epoch": 1.348876953125e-06, "step": 884, "training_step_time": 0.20932865142822266 }, { "epoch": 1.35040283203125e-06, "model_forward_time": 0.024442195892333984, "step": 885 }, { "epoch": 1.35040283203125e-06, "step": 885, "training_step_time": 0.10668277740478516 }, { "epoch": 1.3519287109375e-06, "model_forward_time": 0.0246737003326416, "step": 886 }, { "epoch": 1.3519287109375e-06, "step": 886, "training_step_time": 0.10990262031555176 }, { "epoch": 1.35345458984375e-06, "model_forward_time": 0.025329113006591797, "step": 887 }, { "epoch": 1.35345458984375e-06, "step": 887, "training_step_time": 0.12615203857421875 }, { "epoch": 1.35498046875e-06, "model_forward_time": 0.028086423873901367, "step": 888 }, { "epoch": 1.35498046875e-06, "step": 888, "training_step_time": 0.11855888366699219 }, { "epoch": 1.35650634765625e-06, "model_forward_time": 0.025999069213867188, "step": 889 }, { "epoch": 1.35650634765625e-06, "step": 889, "training_step_time": 0.11162257194519043 }, { "epoch": 1.3580322265625e-06, "grad_norm": 1.142017126083374, "learning_rate": 5.9333333333333343e-05, "loss": 0.1634, "step": 890 }, { "epoch": 1.3580322265625e-06, "model_forward_time": 0.02574610710144043, "step": 890 }, { "epoch": 1.3580322265625e-06, "step": 890, "training_step_time": 0.19820380210876465 }, { "epoch": 1.35955810546875e-06, "model_forward_time": 0.0249178409576416, "step": 891 }, { "epoch": 1.35955810546875e-06, "step": 891, "training_step_time": 0.17315220832824707 }, { "epoch": 1.361083984375e-06, "model_forward_time": 0.024877071380615234, "step": 892 }, { "epoch": 1.361083984375e-06, "step": 892, "training_step_time": 0.1156160831451416 }, { "epoch": 1.36260986328125e-06, "model_forward_time": 0.025287866592407227, "step": 893 }, { "epoch": 1.36260986328125e-06, "step": 893, "training_step_time": 0.1058206558227539 }, { "epoch": 1.3641357421875e-06, "model_forward_time": 0.025472164154052734, "step": 894 }, { "epoch": 1.3641357421875e-06, "step": 894, "training_step_time": 0.10944414138793945 }, { "epoch": 1.36566162109375e-06, "model_forward_time": 0.025201797485351562, "step": 895 }, { "epoch": 1.36566162109375e-06, "step": 895, "training_step_time": 0.10701942443847656 }, { "epoch": 1.3671875e-06, "model_forward_time": 0.025209665298461914, "step": 896 }, { "epoch": 1.3671875e-06, "step": 896, "training_step_time": 0.10886192321777344 }, { "epoch": 1.36871337890625e-06, "model_forward_time": 0.025374650955200195, "step": 897 }, { "epoch": 1.36871337890625e-06, "step": 897, "training_step_time": 0.10753178596496582 }, { "epoch": 1.3702392578125e-06, "model_forward_time": 0.025010347366333008, "step": 898 }, { "epoch": 1.3702392578125e-06, "step": 898, "training_step_time": 0.10671615600585938 }, { "epoch": 1.37176513671875e-06, "model_forward_time": 0.025665283203125, "step": 899 }, { "epoch": 1.37176513671875e-06, "step": 899, "training_step_time": 0.10579776763916016 }, { "epoch": 1.373291015625e-06, "grad_norm": 1.4356186389923096, "learning_rate": 6e-05, "loss": 0.1725, "step": 900 }, { "epoch": 1.373291015625e-06, "model_forward_time": 0.025333642959594727, "step": 900 }, { "epoch": 1.373291015625e-06, "step": 900, "training_step_time": 0.20978879928588867 }, { "epoch": 1.37481689453125e-06, "model_forward_time": 0.025175809860229492, "step": 901 }, { "epoch": 1.37481689453125e-06, "step": 901, "training_step_time": 0.1122903823852539 }, { "epoch": 1.3763427734375e-06, "model_forward_time": 0.02466297149658203, "step": 902 }, { "epoch": 1.3763427734375e-06, "step": 902, "training_step_time": 0.1049649715423584 }, { "epoch": 1.37786865234375e-06, "model_forward_time": 0.025486230850219727, "step": 903 }, { "epoch": 1.37786865234375e-06, "step": 903, "training_step_time": 0.20108819007873535 }, { "epoch": 1.37939453125e-06, "model_forward_time": 0.024318456649780273, "step": 904 }, { "epoch": 1.37939453125e-06, "step": 904, "training_step_time": 0.10948443412780762 }, { "epoch": 1.38092041015625e-06, "model_forward_time": 0.024786710739135742, "step": 905 }, { "epoch": 1.38092041015625e-06, "step": 905, "training_step_time": 0.10420083999633789 }, { "epoch": 1.3824462890625e-06, "model_forward_time": 0.025238752365112305, "step": 906 }, { "epoch": 1.3824462890625e-06, "step": 906, "training_step_time": 0.10785579681396484 }, { "epoch": 1.38397216796875e-06, "model_forward_time": 0.0261232852935791, "step": 907 }, { "epoch": 1.38397216796875e-06, "step": 907, "training_step_time": 0.10701894760131836 }, { "epoch": 1.385498046875e-06, "model_forward_time": 0.025119304656982422, "step": 908 }, { "epoch": 1.385498046875e-06, "step": 908, "training_step_time": 0.10891938209533691 }, { "epoch": 1.38702392578125e-06, "model_forward_time": 0.02426433563232422, "step": 909 }, { "epoch": 1.38702392578125e-06, "step": 909, "training_step_time": 0.11050534248352051 }, { "epoch": 1.3885498046875e-06, "grad_norm": 1.1422182321548462, "learning_rate": 6.066666666666667e-05, "loss": 0.1749, "step": 910 }, { "epoch": 1.3885498046875e-06, "model_forward_time": 0.02512979507446289, "step": 910 }, { "epoch": 1.3885498046875e-06, "step": 910, "training_step_time": 0.10527372360229492 }, { "epoch": 1.39007568359375e-06, "model_forward_time": 0.0250089168548584, "step": 911 }, { "epoch": 1.39007568359375e-06, "step": 911, "training_step_time": 0.11338305473327637 }, { "epoch": 1.3916015625e-06, "model_forward_time": 0.024873971939086914, "step": 912 }, { "epoch": 1.3916015625e-06, "step": 912, "training_step_time": 0.11277055740356445 }, { "epoch": 1.39312744140625e-06, "model_forward_time": 0.025377511978149414, "step": 913 }, { "epoch": 1.39312744140625e-06, "step": 913, "training_step_time": 0.10681915283203125 }, { "epoch": 1.3946533203125e-06, "model_forward_time": 0.025179147720336914, "step": 914 }, { "epoch": 1.3946533203125e-06, "step": 914, "training_step_time": 0.1075286865234375 }, { "epoch": 1.39617919921875e-06, "model_forward_time": 0.025724172592163086, "step": 915 }, { "epoch": 1.39617919921875e-06, "step": 915, "training_step_time": 0.10797691345214844 }, { "epoch": 1.397705078125e-06, "model_forward_time": 0.02528071403503418, "step": 916 }, { "epoch": 1.397705078125e-06, "step": 916, "training_step_time": 0.10988545417785645 }, { "epoch": 1.39923095703125e-06, "model_forward_time": 0.025521039962768555, "step": 917 }, { "epoch": 1.39923095703125e-06, "step": 917, "training_step_time": 0.1135554313659668 }, { "epoch": 1.4007568359375e-06, "model_forward_time": 0.025025367736816406, "step": 918 }, { "epoch": 1.4007568359375e-06, "step": 918, "training_step_time": 0.11817789077758789 }, { "epoch": 1.40228271484375e-06, "model_forward_time": 0.02422356605529785, "step": 919 }, { "epoch": 1.40228271484375e-06, "step": 919, "training_step_time": 0.12440180778503418 }, { "epoch": 1.40380859375e-06, "grad_norm": 1.2643885612487793, "learning_rate": 6.133333333333334e-05, "loss": 0.154, "step": 920 }, { "epoch": 1.40380859375e-06, "model_forward_time": 0.02406764030456543, "step": 920 }, { "epoch": 1.40380859375e-06, "step": 920, "training_step_time": 0.12764978408813477 }, { "epoch": 1.40533447265625e-06, "model_forward_time": 0.02401137351989746, "step": 921 }, { "epoch": 1.40533447265625e-06, "step": 921, "training_step_time": 0.11670327186584473 }, { "epoch": 1.4068603515625e-06, "model_forward_time": 0.02414560317993164, "step": 922 }, { "epoch": 1.4068603515625e-06, "step": 922, "training_step_time": 0.1206510066986084 }, { "epoch": 1.40838623046875e-06, "model_forward_time": 0.02466607093811035, "step": 923 }, { "epoch": 1.40838623046875e-06, "step": 923, "training_step_time": 0.11596298217773438 }, { "epoch": 1.409912109375e-06, "model_forward_time": 0.025465965270996094, "step": 924 }, { "epoch": 1.409912109375e-06, "step": 924, "training_step_time": 0.19401812553405762 }, { "epoch": 1.41143798828125e-06, "model_forward_time": 0.02424764633178711, "step": 925 }, { "epoch": 1.41143798828125e-06, "step": 925, "training_step_time": 0.18452143669128418 }, { "epoch": 1.4129638671875e-06, "model_forward_time": 0.024307727813720703, "step": 926 }, { "epoch": 1.4129638671875e-06, "step": 926, "training_step_time": 0.1534273624420166 }, { "epoch": 1.41448974609375e-06, "model_forward_time": 0.024196386337280273, "step": 927 }, { "epoch": 1.41448974609375e-06, "step": 927, "training_step_time": 0.12951970100402832 }, { "epoch": 1.416015625e-06, "model_forward_time": 0.02430891990661621, "step": 928 }, { "epoch": 1.416015625e-06, "step": 928, "training_step_time": 0.2087705135345459 }, { "epoch": 1.41754150390625e-06, "model_forward_time": 0.024546146392822266, "step": 929 }, { "epoch": 1.41754150390625e-06, "step": 929, "training_step_time": 0.12098336219787598 }, { "epoch": 1.4190673828125e-06, "grad_norm": 0.938713788986206, "learning_rate": 6.2e-05, "loss": 0.1516, "step": 930 }, { "epoch": 1.4190673828125e-06, "model_forward_time": 0.024710416793823242, "step": 930 }, { "epoch": 1.4190673828125e-06, "step": 930, "training_step_time": 0.10630536079406738 }, { "epoch": 1.42059326171875e-06, "model_forward_time": 0.025505542755126953, "step": 931 }, { "epoch": 1.42059326171875e-06, "step": 931, "training_step_time": 0.10724806785583496 }, { "epoch": 1.422119140625e-06, "model_forward_time": 0.025040864944458008, "step": 932 }, { "epoch": 1.422119140625e-06, "step": 932, "training_step_time": 0.10668301582336426 }, { "epoch": 1.42364501953125e-06, "model_forward_time": 0.025459766387939453, "step": 933 }, { "epoch": 1.42364501953125e-06, "step": 933, "training_step_time": 0.16424107551574707 }, { "epoch": 1.4251708984375e-06, "model_forward_time": 0.024644851684570312, "step": 934 }, { "epoch": 1.4251708984375e-06, "step": 934, "training_step_time": 0.15659189224243164 }, { "epoch": 1.42669677734375e-06, "model_forward_time": 0.024046659469604492, "step": 935 }, { "epoch": 1.42669677734375e-06, "step": 935, "training_step_time": 0.10995745658874512 }, { "epoch": 1.42822265625e-06, "model_forward_time": 0.024933576583862305, "step": 936 }, { "epoch": 1.42822265625e-06, "step": 936, "training_step_time": 0.10501241683959961 }, { "epoch": 1.42974853515625e-06, "model_forward_time": 0.024575233459472656, "step": 937 }, { "epoch": 1.42974853515625e-06, "step": 937, "training_step_time": 0.17602777481079102 }, { "epoch": 1.4312744140625e-06, "model_forward_time": 0.02487921714782715, "step": 938 }, { "epoch": 1.4312744140625e-06, "step": 938, "training_step_time": 0.10854315757751465 }, { "epoch": 1.43280029296875e-06, "model_forward_time": 0.02436995506286621, "step": 939 }, { "epoch": 1.43280029296875e-06, "step": 939, "training_step_time": 0.10376501083374023 }, { "epoch": 1.434326171875e-06, "grad_norm": 1.2495813369750977, "learning_rate": 6.266666666666667e-05, "loss": 0.1659, "step": 940 }, { "epoch": 1.434326171875e-06, "model_forward_time": 0.025046825408935547, "step": 940 }, { "epoch": 1.434326171875e-06, "step": 940, "training_step_time": 0.1043097972869873 }, { "epoch": 1.43585205078125e-06, "model_forward_time": 0.02518749237060547, "step": 941 }, { "epoch": 1.43585205078125e-06, "step": 941, "training_step_time": 0.10824966430664062 }, { "epoch": 1.4373779296875e-06, "model_forward_time": 0.025079727172851562, "step": 942 }, { "epoch": 1.4373779296875e-06, "step": 942, "training_step_time": 0.1077127456665039 }, { "epoch": 1.43890380859375e-06, "model_forward_time": 0.025205373764038086, "step": 943 }, { "epoch": 1.43890380859375e-06, "step": 943, "training_step_time": 0.11058306694030762 }, { "epoch": 1.4404296875e-06, "model_forward_time": 0.026085615158081055, "step": 944 }, { "epoch": 1.4404296875e-06, "step": 944, "training_step_time": 0.11014008522033691 }, { "epoch": 1.44195556640625e-06, "model_forward_time": 0.025360584259033203, "step": 945 }, { "epoch": 1.44195556640625e-06, "step": 945, "training_step_time": 0.19955921173095703 }, { "epoch": 1.4434814453125e-06, "model_forward_time": 0.024649620056152344, "step": 946 }, { "epoch": 1.4434814453125e-06, "step": 946, "training_step_time": 0.10422062873840332 }, { "epoch": 1.44500732421875e-06, "model_forward_time": 0.024888038635253906, "step": 947 }, { "epoch": 1.44500732421875e-06, "step": 947, "training_step_time": 0.10775303840637207 }, { "epoch": 1.446533203125e-06, "model_forward_time": 0.025457382202148438, "step": 948 }, { "epoch": 1.446533203125e-06, "step": 948, "training_step_time": 0.20613670349121094 }, { "epoch": 1.44805908203125e-06, "model_forward_time": 0.024790525436401367, "step": 949 }, { "epoch": 1.44805908203125e-06, "step": 949, "training_step_time": 0.10842728614807129 }, { "epoch": 1.4495849609375e-06, "grad_norm": 1.6541813611984253, "learning_rate": 6.333333333333333e-05, "loss": 0.1667, "step": 950 }, { "epoch": 1.4495849609375e-06, "model_forward_time": 0.02460169792175293, "step": 950 }, { "epoch": 1.4495849609375e-06, "step": 950, "training_step_time": 0.10522675514221191 }, { "epoch": 1.45111083984375e-06, "model_forward_time": 0.02578425407409668, "step": 951 }, { "epoch": 1.45111083984375e-06, "step": 951, "training_step_time": 0.10966944694519043 }, { "epoch": 1.45263671875e-06, "model_forward_time": 0.02548074722290039, "step": 952 }, { "epoch": 1.45263671875e-06, "step": 952, "training_step_time": 0.11618733406066895 }, { "epoch": 1.45416259765625e-06, "model_forward_time": 0.024392127990722656, "step": 953 }, { "epoch": 1.45416259765625e-06, "step": 953, "training_step_time": 0.11213970184326172 }, { "epoch": 1.4556884765625e-06, "model_forward_time": 0.0252225399017334, "step": 954 }, { "epoch": 1.4556884765625e-06, "step": 954, "training_step_time": 0.1082155704498291 }, { "epoch": 1.45721435546875e-06, "model_forward_time": 0.025261402130126953, "step": 955 }, { "epoch": 1.45721435546875e-06, "step": 955, "training_step_time": 0.10611438751220703 }, { "epoch": 1.458740234375e-06, "model_forward_time": 0.025281429290771484, "step": 956 }, { "epoch": 1.458740234375e-06, "step": 956, "training_step_time": 0.1083526611328125 }, { "epoch": 1.46026611328125e-06, "model_forward_time": 0.025420188903808594, "step": 957 }, { "epoch": 1.46026611328125e-06, "step": 957, "training_step_time": 0.10825753211975098 }, { "epoch": 1.4617919921875e-06, "model_forward_time": 0.025020599365234375, "step": 958 }, { "epoch": 1.4617919921875e-06, "step": 958, "training_step_time": 0.10666775703430176 }, { "epoch": 1.46331787109375e-06, "model_forward_time": 0.025125980377197266, "step": 959 }, { "epoch": 1.46331787109375e-06, "step": 959, "training_step_time": 0.18561768531799316 }, { "epoch": 1.46484375e-06, "grad_norm": 1.3631620407104492, "learning_rate": 6.400000000000001e-05, "loss": 0.1641, "step": 960 }, { "epoch": 1.46484375e-06, "model_forward_time": 0.027225971221923828, "step": 960 }, { "epoch": 1.46484375e-06, "step": 960, "training_step_time": 0.20401239395141602 }, { "epoch": 1.46636962890625e-06, "model_forward_time": 0.02450084686279297, "step": 961 }, { "epoch": 1.46636962890625e-06, "step": 961, "training_step_time": 0.19464325904846191 }, { "epoch": 1.4678955078125e-06, "model_forward_time": 0.02423882484436035, "step": 962 }, { "epoch": 1.4678955078125e-06, "step": 962, "training_step_time": 0.18244552612304688 }, { "epoch": 1.46942138671875e-06, "model_forward_time": 0.024138927459716797, "step": 963 }, { "epoch": 1.46942138671875e-06, "step": 963, "training_step_time": 0.17023897171020508 }, { "epoch": 1.470947265625e-06, "model_forward_time": 0.024628400802612305, "step": 964 }, { "epoch": 1.470947265625e-06, "step": 964, "training_step_time": 0.11276507377624512 }, { "epoch": 1.47247314453125e-06, "model_forward_time": 0.024641036987304688, "step": 965 }, { "epoch": 1.47247314453125e-06, "step": 965, "training_step_time": 0.10498809814453125 }, { "epoch": 1.4739990234375e-06, "model_forward_time": 0.025558948516845703, "step": 966 }, { "epoch": 1.4739990234375e-06, "step": 966, "training_step_time": 0.20514297485351562 }, { "epoch": 1.47552490234375e-06, "model_forward_time": 0.024341106414794922, "step": 967 }, { "epoch": 1.47552490234375e-06, "step": 967, "training_step_time": 0.12818026542663574 }, { "epoch": 1.47705078125e-06, "model_forward_time": 0.02411651611328125, "step": 968 }, { "epoch": 1.47705078125e-06, "step": 968, "training_step_time": 0.10740494728088379 }, { "epoch": 1.47857666015625e-06, "model_forward_time": 0.025133371353149414, "step": 969 }, { "epoch": 1.47857666015625e-06, "step": 969, "training_step_time": 0.11165308952331543 }, { "epoch": 1.4801025390625e-06, "grad_norm": 1.1530307531356812, "learning_rate": 6.466666666666666e-05, "loss": 0.1916, "step": 970 }, { "epoch": 1.4801025390625e-06, "model_forward_time": 0.02537703514099121, "step": 970 }, { "epoch": 1.4801025390625e-06, "step": 970, "training_step_time": 0.16168856620788574 }, { "epoch": 1.48162841796875e-06, "model_forward_time": 0.024684667587280273, "step": 971 }, { "epoch": 1.48162841796875e-06, "step": 971, "training_step_time": 0.210524320602417 }, { "epoch": 1.483154296875e-06, "model_forward_time": 0.02805352210998535, "step": 972 }, { "epoch": 1.483154296875e-06, "step": 972, "training_step_time": 0.12474703788757324 }, { "epoch": 1.48468017578125e-06, "model_forward_time": 0.024551868438720703, "step": 973 }, { "epoch": 1.48468017578125e-06, "step": 973, "training_step_time": 0.11638402938842773 }, { "epoch": 1.4862060546875e-06, "model_forward_time": 0.024555206298828125, "step": 974 }, { "epoch": 1.4862060546875e-06, "step": 974, "training_step_time": 0.11339402198791504 }, { "epoch": 1.48773193359375e-06, "model_forward_time": 0.024006366729736328, "step": 975 }, { "epoch": 1.48773193359375e-06, "step": 975, "training_step_time": 0.11186575889587402 }, { "epoch": 1.4892578125e-06, "model_forward_time": 0.025236129760742188, "step": 976 }, { "epoch": 1.4892578125e-06, "step": 976, "training_step_time": 0.12916326522827148 }, { "epoch": 1.49078369140625e-06, "model_forward_time": 0.024791717529296875, "step": 977 }, { "epoch": 1.49078369140625e-06, "step": 977, "training_step_time": 0.15468549728393555 }, { "epoch": 1.4923095703125e-06, "model_forward_time": 0.024564743041992188, "step": 978 }, { "epoch": 1.4923095703125e-06, "step": 978, "training_step_time": 0.1587238311767578 }, { "epoch": 1.49383544921875e-06, "model_forward_time": 0.024112462997436523, "step": 979 }, { "epoch": 1.49383544921875e-06, "step": 979, "training_step_time": 0.1819000244140625 }, { "epoch": 1.495361328125e-06, "grad_norm": 0.9992648363113403, "learning_rate": 6.533333333333334e-05, "loss": 0.1338, "step": 980 }, { "epoch": 1.495361328125e-06, "model_forward_time": 0.024796724319458008, "step": 980 }, { "epoch": 1.495361328125e-06, "step": 980, "training_step_time": 0.10483264923095703 }, { "epoch": 1.49688720703125e-06, "model_forward_time": 0.024901866912841797, "step": 981 }, { "epoch": 1.49688720703125e-06, "step": 981, "training_step_time": 0.1039280891418457 }, { "epoch": 1.4984130859375e-06, "model_forward_time": 0.025199413299560547, "step": 982 }, { "epoch": 1.4984130859375e-06, "step": 982, "training_step_time": 0.10642552375793457 }, { "epoch": 1.49993896484375e-06, "model_forward_time": 0.025878429412841797, "step": 983 }, { "epoch": 1.49993896484375e-06, "step": 983, "training_step_time": 0.10968661308288574 }, { "epoch": 1.50146484375e-06, "model_forward_time": 0.025981426239013672, "step": 984 }, { "epoch": 1.50146484375e-06, "step": 984, "training_step_time": 0.10543394088745117 }, { "epoch": 1.50299072265625e-06, "model_forward_time": 0.025553226470947266, "step": 985 }, { "epoch": 1.50299072265625e-06, "step": 985, "training_step_time": 0.10822367668151855 }, { "epoch": 1.5045166015625e-06, "model_forward_time": 0.025400638580322266, "step": 986 }, { "epoch": 1.5045166015625e-06, "step": 986, "training_step_time": 0.1064004898071289 }, { "epoch": 1.50604248046875e-06, "model_forward_time": 0.024796485900878906, "step": 987 }, { "epoch": 1.50604248046875e-06, "step": 987, "training_step_time": 0.16751623153686523 }, { "epoch": 1.507568359375e-06, "model_forward_time": 0.024163246154785156, "step": 988 }, { "epoch": 1.507568359375e-06, "step": 988, "training_step_time": 0.15427350997924805 }, { "epoch": 1.50909423828125e-06, "model_forward_time": 0.02545905113220215, "step": 989 }, { "epoch": 1.50909423828125e-06, "step": 989, "training_step_time": 0.11192631721496582 }, { "epoch": 1.5106201171875e-06, "grad_norm": 1.400564193725586, "learning_rate": 6.6e-05, "loss": 0.1444, "step": 990 }, { "epoch": 1.5106201171875e-06, "model_forward_time": 0.02580571174621582, "step": 990 }, { "epoch": 1.5106201171875e-06, "step": 990, "training_step_time": 0.20200586318969727 }, { "epoch": 1.51214599609375e-06, "model_forward_time": 0.024875402450561523, "step": 991 }, { "epoch": 1.51214599609375e-06, "step": 991, "training_step_time": 0.10776424407958984 }, { "epoch": 1.513671875e-06, "model_forward_time": 0.02468729019165039, "step": 992 }, { "epoch": 1.513671875e-06, "step": 992, "training_step_time": 0.10306096076965332 }, { "epoch": 1.51519775390625e-06, "model_forward_time": 0.025013446807861328, "step": 993 }, { "epoch": 1.51519775390625e-06, "step": 993, "training_step_time": 0.10837674140930176 }, { "epoch": 1.5167236328125e-06, "model_forward_time": 0.02664804458618164, "step": 994 }, { "epoch": 1.5167236328125e-06, "step": 994, "training_step_time": 0.10871458053588867 }, { "epoch": 1.51824951171875e-06, "model_forward_time": 0.025187015533447266, "step": 995 }, { "epoch": 1.51824951171875e-06, "step": 995, "training_step_time": 0.10617589950561523 }, { "epoch": 1.519775390625e-06, "model_forward_time": 0.02538156509399414, "step": 996 }, { "epoch": 1.519775390625e-06, "step": 996, "training_step_time": 0.10701131820678711 }, { "epoch": 1.52130126953125e-06, "model_forward_time": 0.025536775588989258, "step": 997 }, { "epoch": 1.52130126953125e-06, "step": 997, "training_step_time": 0.1121518611907959 }, { "epoch": 1.5228271484375e-06, "model_forward_time": 0.02524590492248535, "step": 998 }, { "epoch": 1.5228271484375e-06, "step": 998, "training_step_time": 0.1093595027923584 }, { "epoch": 1.52435302734375e-06, "model_forward_time": 0.02507781982421875, "step": 999 }, { "epoch": 1.52435302734375e-06, "step": 999, "training_step_time": 0.15689635276794434 }, { "epoch": 1.52587890625e-06, "grad_norm": 1.3624902963638306, "learning_rate": 6.666666666666667e-05, "loss": 0.146, "step": 1000 }, { "epoch": 1.52587890625e-06, "model_forward_time": 0.025603532791137695, "step": 1000 }, { "epoch": 1.52587890625e-06, "step": 1000, "training_step_time": 0.1047816276550293 }, { "epoch": 1.52740478515625e-06, "model_forward_time": 0.025385141372680664, "step": 1001 }, { "epoch": 1.52740478515625e-06, "step": 1001, "training_step_time": 0.1654202938079834 }, { "epoch": 1.5289306640625e-06, "model_forward_time": 0.024989843368530273, "step": 1002 }, { "epoch": 1.5289306640625e-06, "step": 1002, "training_step_time": 0.12518787384033203 }, { "epoch": 1.53045654296875e-06, "model_forward_time": 0.024707317352294922, "step": 1003 }, { "epoch": 1.53045654296875e-06, "step": 1003, "training_step_time": 0.10969424247741699 }, { "epoch": 1.531982421875e-06, "model_forward_time": 0.025409460067749023, "step": 1004 }, { "epoch": 1.531982421875e-06, "step": 1004, "training_step_time": 0.11253118515014648 }, { "epoch": 1.53350830078125e-06, "model_forward_time": 0.025266647338867188, "step": 1005 }, { "epoch": 1.53350830078125e-06, "step": 1005, "training_step_time": 0.2053070068359375 }, { "epoch": 1.5350341796875e-06, "model_forward_time": 0.024524450302124023, "step": 1006 }, { "epoch": 1.5350341796875e-06, "step": 1006, "training_step_time": 0.12968015670776367 }, { "epoch": 1.53656005859375e-06, "model_forward_time": 0.024885177612304688, "step": 1007 }, { "epoch": 1.53656005859375e-06, "step": 1007, "training_step_time": 0.11052060127258301 }, { "epoch": 1.5380859375e-06, "model_forward_time": 0.025480270385742188, "step": 1008 }, { "epoch": 1.5380859375e-06, "step": 1008, "training_step_time": 0.11458420753479004 }, { "epoch": 1.53961181640625e-06, "model_forward_time": 0.02569413185119629, "step": 1009 }, { "epoch": 1.53961181640625e-06, "step": 1009, "training_step_time": 0.11475086212158203 }, { "epoch": 1.5411376953125e-06, "grad_norm": 1.154253602027893, "learning_rate": 6.733333333333333e-05, "loss": 0.1322, "step": 1010 }, { "epoch": 1.5411376953125e-06, "model_forward_time": 0.02691030502319336, "step": 1010 }, { "epoch": 1.5411376953125e-06, "step": 1010, "training_step_time": 0.18335223197937012 }, { "epoch": 1.54266357421875e-06, "model_forward_time": 0.02480316162109375, "step": 1011 }, { "epoch": 1.54266357421875e-06, "step": 1011, "training_step_time": 0.12211203575134277 }, { "epoch": 1.544189453125e-06, "model_forward_time": 0.024060964584350586, "step": 1012 }, { "epoch": 1.544189453125e-06, "step": 1012, "training_step_time": 0.1065986156463623 }, { "epoch": 1.54571533203125e-06, "model_forward_time": 0.0254666805267334, "step": 1013 }, { "epoch": 1.54571533203125e-06, "step": 1013, "training_step_time": 0.11172652244567871 }, { "epoch": 1.5472412109375e-06, "model_forward_time": 0.02579474449157715, "step": 1014 }, { "epoch": 1.5472412109375e-06, "step": 1014, "training_step_time": 0.11539030075073242 }, { "epoch": 1.54876708984375e-06, "model_forward_time": 0.025887250900268555, "step": 1015 }, { "epoch": 1.54876708984375e-06, "step": 1015, "training_step_time": 0.10739731788635254 }, { "epoch": 1.55029296875e-06, "model_forward_time": 0.025766849517822266, "step": 1016 }, { "epoch": 1.55029296875e-06, "step": 1016, "training_step_time": 0.11510419845581055 }, { "epoch": 1.55181884765625e-06, "model_forward_time": 0.025266408920288086, "step": 1017 }, { "epoch": 1.55181884765625e-06, "step": 1017, "training_step_time": 0.10869026184082031 }, { "epoch": 1.5533447265625e-06, "model_forward_time": 0.02482008934020996, "step": 1018 }, { "epoch": 1.5533447265625e-06, "step": 1018, "training_step_time": 0.11139249801635742 }, { "epoch": 1.55487060546875e-06, "model_forward_time": 0.027956724166870117, "step": 1019 }, { "epoch": 1.55487060546875e-06, "step": 1019, "training_step_time": 0.11034464836120605 }, { "epoch": 1.556396484375e-06, "grad_norm": 1.202590823173523, "learning_rate": 6.800000000000001e-05, "loss": 0.1665, "step": 1020 }, { "epoch": 1.556396484375e-06, "model_forward_time": 0.025843381881713867, "step": 1020 }, { "epoch": 1.556396484375e-06, "step": 1020, "training_step_time": 0.11314225196838379 }, { "epoch": 1.55792236328125e-06, "model_forward_time": 0.02661275863647461, "step": 1021 }, { "epoch": 1.55792236328125e-06, "step": 1021, "training_step_time": 0.11158967018127441 }, { "epoch": 1.5594482421875e-06, "model_forward_time": 0.02548956871032715, "step": 1022 }, { "epoch": 1.5594482421875e-06, "step": 1022, "training_step_time": 0.21105599403381348 }, { "epoch": 1.56097412109375e-06, "model_forward_time": 0.02461719512939453, "step": 1023 }, { "epoch": 1.56097412109375e-06, "step": 1023, "training_step_time": 0.11298942565917969 }, { "epoch": 1.5625e-06, "model_forward_time": 0.024887800216674805, "step": 1024 }, { "epoch": 1.5625e-06, "step": 1024, "training_step_time": 0.10465312004089355 }, { "epoch": 1.56402587890625e-06, "model_forward_time": 0.02590036392211914, "step": 1025 }, { "epoch": 1.56402587890625e-06, "step": 1025, "training_step_time": 0.10803675651550293 }, { "epoch": 1.5655517578125e-06, "model_forward_time": 0.02511882781982422, "step": 1026 }, { "epoch": 1.5655517578125e-06, "step": 1026, "training_step_time": 0.10473370552062988 }, { "epoch": 1.56707763671875e-06, "model_forward_time": 0.025392532348632812, "step": 1027 }, { "epoch": 1.56707763671875e-06, "step": 1027, "training_step_time": 0.10641360282897949 }, { "epoch": 1.568603515625e-06, "model_forward_time": 0.025468111038208008, "step": 1028 }, { "epoch": 1.568603515625e-06, "step": 1028, "training_step_time": 0.1081857681274414 }, { "epoch": 1.57012939453125e-06, "model_forward_time": 0.025626659393310547, "step": 1029 }, { "epoch": 1.57012939453125e-06, "step": 1029, "training_step_time": 0.1059579849243164 }, { "epoch": 1.5716552734375e-06, "grad_norm": 1.2344614267349243, "learning_rate": 6.866666666666666e-05, "loss": 0.1456, "step": 1030 }, { "epoch": 1.5716552734375e-06, "model_forward_time": 0.025172710418701172, "step": 1030 }, { "epoch": 1.5716552734375e-06, "step": 1030, "training_step_time": 0.10354948043823242 }, { "epoch": 1.57318115234375e-06, "model_forward_time": 0.025776386260986328, "step": 1031 }, { "epoch": 1.57318115234375e-06, "step": 1031, "training_step_time": 0.11005020141601562 }, { "epoch": 1.57470703125e-06, "model_forward_time": 0.025769472122192383, "step": 1032 }, { "epoch": 1.57470703125e-06, "step": 1032, "training_step_time": 0.10905647277832031 }, { "epoch": 1.57623291015625e-06, "model_forward_time": 0.025789737701416016, "step": 1033 }, { "epoch": 1.57623291015625e-06, "step": 1033, "training_step_time": 0.11543011665344238 }, { "epoch": 1.5777587890625e-06, "model_forward_time": 0.024952173233032227, "step": 1034 }, { "epoch": 1.5777587890625e-06, "step": 1034, "training_step_time": 0.12747621536254883 }, { "epoch": 1.57928466796875e-06, "model_forward_time": 0.0252377986907959, "step": 1035 }, { "epoch": 1.57928466796875e-06, "step": 1035, "training_step_time": 0.11939525604248047 }, { "epoch": 1.580810546875e-06, "model_forward_time": 0.02542281150817871, "step": 1036 }, { "epoch": 1.580810546875e-06, "step": 1036, "training_step_time": 0.12109208106994629 }, { "epoch": 1.58233642578125e-06, "model_forward_time": 0.025468111038208008, "step": 1037 }, { "epoch": 1.58233642578125e-06, "step": 1037, "training_step_time": 0.11703062057495117 }, { "epoch": 1.5838623046875e-06, "model_forward_time": 0.02591395378112793, "step": 1038 }, { "epoch": 1.5838623046875e-06, "step": 1038, "training_step_time": 0.11315035820007324 }, { "epoch": 1.58538818359375e-06, "model_forward_time": 0.024561643600463867, "step": 1039 }, { "epoch": 1.58538818359375e-06, "step": 1039, "training_step_time": 0.10950875282287598 }, { "epoch": 1.5869140625e-06, "grad_norm": 1.288399577140808, "learning_rate": 6.933333333333334e-05, "loss": 0.1432, "step": 1040 }, { "epoch": 1.5869140625e-06, "model_forward_time": 0.02623152732849121, "step": 1040 }, { "epoch": 1.5869140625e-06, "step": 1040, "training_step_time": 0.1795346736907959 }, { "epoch": 1.58843994140625e-06, "model_forward_time": 0.024980783462524414, "step": 1041 }, { "epoch": 1.58843994140625e-06, "step": 1041, "training_step_time": 0.10783100128173828 }, { "epoch": 1.5899658203125e-06, "model_forward_time": 0.024764537811279297, "step": 1042 }, { "epoch": 1.5899658203125e-06, "step": 1042, "training_step_time": 0.1115577220916748 }, { "epoch": 1.59149169921875e-06, "model_forward_time": 0.026003599166870117, "step": 1043 }, { "epoch": 1.59149169921875e-06, "step": 1043, "training_step_time": 0.12750792503356934 }, { "epoch": 1.593017578125e-06, "model_forward_time": 0.025630712509155273, "step": 1044 }, { "epoch": 1.593017578125e-06, "step": 1044, "training_step_time": 0.1301584243774414 }, { "epoch": 1.59454345703125e-06, "model_forward_time": 0.025237321853637695, "step": 1045 }, { "epoch": 1.59454345703125e-06, "step": 1045, "training_step_time": 0.2173008918762207 }, { "epoch": 1.5960693359375e-06, "model_forward_time": 0.0244443416595459, "step": 1046 }, { "epoch": 1.5960693359375e-06, "step": 1046, "training_step_time": 0.10609722137451172 }, { "epoch": 1.59759521484375e-06, "model_forward_time": 0.024541854858398438, "step": 1047 }, { "epoch": 1.59759521484375e-06, "step": 1047, "training_step_time": 0.18325495719909668 }, { "epoch": 1.59912109375e-06, "model_forward_time": 0.02502131462097168, "step": 1048 }, { "epoch": 1.59912109375e-06, "step": 1048, "training_step_time": 0.12955331802368164 }, { "epoch": 1.60064697265625e-06, "model_forward_time": 0.024990558624267578, "step": 1049 }, { "epoch": 1.60064697265625e-06, "step": 1049, "training_step_time": 0.12085366249084473 }, { "epoch": 1.6021728515625e-06, "grad_norm": 1.2722276449203491, "learning_rate": 7e-05, "loss": 0.14, "step": 1050 }, { "epoch": 1.6021728515625e-06, "model_forward_time": 0.02550959587097168, "step": 1050 }, { "epoch": 1.6021728515625e-06, "step": 1050, "training_step_time": 0.10591650009155273 }, { "epoch": 1.60369873046875e-06, "model_forward_time": 0.025501251220703125, "step": 1051 }, { "epoch": 1.60369873046875e-06, "step": 1051, "training_step_time": 0.1157996654510498 }, { "epoch": 1.605224609375e-06, "model_forward_time": 0.025095701217651367, "step": 1052 }, { "epoch": 1.605224609375e-06, "step": 1052, "training_step_time": 0.11974263191223145 }, { "epoch": 1.60675048828125e-06, "model_forward_time": 0.0254056453704834, "step": 1053 }, { "epoch": 1.60675048828125e-06, "step": 1053, "training_step_time": 0.11528444290161133 }, { "epoch": 1.6082763671875e-06, "model_forward_time": 0.025166034698486328, "step": 1054 }, { "epoch": 1.6082763671875e-06, "step": 1054, "training_step_time": 0.1144866943359375 }, { "epoch": 1.60980224609375e-06, "model_forward_time": 0.026715517044067383, "step": 1055 }, { "epoch": 1.60980224609375e-06, "step": 1055, "training_step_time": 0.11703324317932129 }, { "epoch": 1.611328125e-06, "model_forward_time": 0.025484561920166016, "step": 1056 }, { "epoch": 1.611328125e-06, "step": 1056, "training_step_time": 0.1573629379272461 }, { "epoch": 1.61285400390625e-06, "model_forward_time": 0.02785181999206543, "step": 1057 }, { "epoch": 1.61285400390625e-06, "step": 1057, "training_step_time": 0.15215301513671875 }, { "epoch": 1.6143798828125e-06, "model_forward_time": 0.02455615997314453, "step": 1058 }, { "epoch": 1.6143798828125e-06, "step": 1058, "training_step_time": 0.10976409912109375 }, { "epoch": 1.61590576171875e-06, "model_forward_time": 0.024673938751220703, "step": 1059 }, { "epoch": 1.61590576171875e-06, "step": 1059, "training_step_time": 0.10842204093933105 }, { "epoch": 1.617431640625e-06, "grad_norm": 0.8888778686523438, "learning_rate": 7.066666666666667e-05, "loss": 0.1744, "step": 1060 }, { "epoch": 1.617431640625e-06, "model_forward_time": 0.025425195693969727, "step": 1060 }, { "epoch": 1.617431640625e-06, "step": 1060, "training_step_time": 0.11020565032958984 }, { "epoch": 1.61895751953125e-06, "model_forward_time": 0.02549910545349121, "step": 1061 }, { "epoch": 1.61895751953125e-06, "step": 1061, "training_step_time": 0.10889887809753418 }, { "epoch": 1.6204833984375e-06, "model_forward_time": 0.02554917335510254, "step": 1062 }, { "epoch": 1.6204833984375e-06, "step": 1062, "training_step_time": 0.10643172264099121 }, { "epoch": 1.62200927734375e-06, "model_forward_time": 0.027194738388061523, "step": 1063 }, { "epoch": 1.62200927734375e-06, "step": 1063, "training_step_time": 0.10718798637390137 }, { "epoch": 1.62353515625e-06, "model_forward_time": 0.024444580078125, "step": 1064 }, { "epoch": 1.62353515625e-06, "step": 1064, "training_step_time": 0.10753321647644043 }, { "epoch": 1.62506103515625e-06, "model_forward_time": 0.02469921112060547, "step": 1065 }, { "epoch": 1.62506103515625e-06, "step": 1065, "training_step_time": 0.11422371864318848 }, { "epoch": 1.6265869140625e-06, "model_forward_time": 0.02574777603149414, "step": 1066 }, { "epoch": 1.6265869140625e-06, "step": 1066, "training_step_time": 0.11976003646850586 }, { "epoch": 1.62811279296875e-06, "model_forward_time": 0.025679349899291992, "step": 1067 }, { "epoch": 1.62811279296875e-06, "step": 1067, "training_step_time": 0.10972070693969727 }, { "epoch": 1.629638671875e-06, "model_forward_time": 0.02556157112121582, "step": 1068 }, { "epoch": 1.629638671875e-06, "step": 1068, "training_step_time": 0.210374116897583 }, { "epoch": 1.63116455078125e-06, "model_forward_time": 0.024676799774169922, "step": 1069 }, { "epoch": 1.63116455078125e-06, "step": 1069, "training_step_time": 0.1168832778930664 }, { "epoch": 1.6326904296875e-06, "grad_norm": 1.361880898475647, "learning_rate": 7.133333333333334e-05, "loss": 0.1801, "step": 1070 }, { "epoch": 1.6326904296875e-06, "model_forward_time": 0.02513289451599121, "step": 1070 }, { "epoch": 1.6326904296875e-06, "step": 1070, "training_step_time": 0.10729861259460449 }, { "epoch": 1.63421630859375e-06, "model_forward_time": 0.02518439292907715, "step": 1071 }, { "epoch": 1.63421630859375e-06, "step": 1071, "training_step_time": 0.10757255554199219 }, { "epoch": 1.6357421875e-06, "model_forward_time": 0.025432109832763672, "step": 1072 }, { "epoch": 1.6357421875e-06, "step": 1072, "training_step_time": 0.10678672790527344 }, { "epoch": 1.63726806640625e-06, "model_forward_time": 0.02492070198059082, "step": 1073 }, { "epoch": 1.63726806640625e-06, "step": 1073, "training_step_time": 0.10640501976013184 }, { "epoch": 1.6387939453125e-06, "model_forward_time": 0.02562880516052246, "step": 1074 }, { "epoch": 1.6387939453125e-06, "step": 1074, "training_step_time": 0.1062004566192627 }, { "epoch": 1.64031982421875e-06, "model_forward_time": 0.025224685668945312, "step": 1075 }, { "epoch": 1.64031982421875e-06, "step": 1075, "training_step_time": 0.10606169700622559 }, { "epoch": 1.641845703125e-06, "model_forward_time": 0.026064157485961914, "step": 1076 }, { "epoch": 1.641845703125e-06, "step": 1076, "training_step_time": 0.10569143295288086 }, { "epoch": 1.64337158203125e-06, "model_forward_time": 0.025818347930908203, "step": 1077 }, { "epoch": 1.64337158203125e-06, "step": 1077, "training_step_time": 0.10859990119934082 }, { "epoch": 1.6448974609375e-06, "model_forward_time": 0.02905130386352539, "step": 1078 }, { "epoch": 1.6448974609375e-06, "step": 1078, "training_step_time": 0.11166000366210938 }, { "epoch": 1.64642333984375e-06, "model_forward_time": 0.025181293487548828, "step": 1079 }, { "epoch": 1.64642333984375e-06, "step": 1079, "training_step_time": 0.1067955493927002 }, { "epoch": 1.64794921875e-06, "grad_norm": 1.405672311782837, "learning_rate": 7.2e-05, "loss": 0.209, "step": 1080 }, { "epoch": 1.64794921875e-06, "model_forward_time": 0.025452852249145508, "step": 1080 }, { "epoch": 1.64794921875e-06, "step": 1080, "training_step_time": 0.10531067848205566 }, { "epoch": 1.64947509765625e-06, "model_forward_time": 0.025574922561645508, "step": 1081 }, { "epoch": 1.64947509765625e-06, "step": 1081, "training_step_time": 0.12761259078979492 }, { "epoch": 1.6510009765625e-06, "model_forward_time": 0.02525782585144043, "step": 1082 }, { "epoch": 1.6510009765625e-06, "step": 1082, "training_step_time": 0.16738271713256836 }, { "epoch": 1.65252685546875e-06, "model_forward_time": 0.026235580444335938, "step": 1083 }, { "epoch": 1.65252685546875e-06, "step": 1083, "training_step_time": 0.15385866165161133 }, { "epoch": 1.654052734375e-06, "model_forward_time": 0.023982524871826172, "step": 1084 }, { "epoch": 1.654052734375e-06, "step": 1084, "training_step_time": 0.14533233642578125 }, { "epoch": 1.65557861328125e-06, "model_forward_time": 0.026013612747192383, "step": 1085 }, { "epoch": 1.65557861328125e-06, "step": 1085, "training_step_time": 0.12836217880249023 }, { "epoch": 1.6571044921875e-06, "model_forward_time": 0.024506807327270508, "step": 1086 }, { "epoch": 1.6571044921875e-06, "step": 1086, "training_step_time": 0.13121747970581055 }, { "epoch": 1.65863037109375e-06, "model_forward_time": 0.02429056167602539, "step": 1087 }, { "epoch": 1.65863037109375e-06, "step": 1087, "training_step_time": 0.1856839656829834 }, { "epoch": 1.66015625e-06, "model_forward_time": 0.024260520935058594, "step": 1088 }, { "epoch": 1.66015625e-06, "step": 1088, "training_step_time": 0.20662331581115723 }, { "epoch": 1.66168212890625e-06, "model_forward_time": 0.024363279342651367, "step": 1089 }, { "epoch": 1.66168212890625e-06, "step": 1089, "training_step_time": 0.16556668281555176 }, { "epoch": 1.6632080078125e-06, "grad_norm": 0.9802555441856384, "learning_rate": 7.266666666666667e-05, "loss": 0.192, "step": 1090 }, { "epoch": 1.6632080078125e-06, "model_forward_time": 0.024334430694580078, "step": 1090 }, { "epoch": 1.6632080078125e-06, "step": 1090, "training_step_time": 0.17417645454406738 }, { "epoch": 1.66473388671875e-06, "model_forward_time": 0.024724483489990234, "step": 1091 }, { "epoch": 1.66473388671875e-06, "step": 1091, "training_step_time": 0.16185903549194336 }, { "epoch": 1.666259765625e-06, "model_forward_time": 0.02400684356689453, "step": 1092 }, { "epoch": 1.666259765625e-06, "step": 1092, "training_step_time": 0.12656760215759277 }, { "epoch": 1.66778564453125e-06, "model_forward_time": 0.024321317672729492, "step": 1093 }, { "epoch": 1.66778564453125e-06, "step": 1093, "training_step_time": 0.11409902572631836 }, { "epoch": 1.6693115234375e-06, "model_forward_time": 0.024447917938232422, "step": 1094 }, { "epoch": 1.6693115234375e-06, "step": 1094, "training_step_time": 0.10630130767822266 }, { "epoch": 1.67083740234375e-06, "model_forward_time": 0.025160789489746094, "step": 1095 }, { "epoch": 1.67083740234375e-06, "step": 1095, "training_step_time": 0.16701292991638184 }, { "epoch": 1.67236328125e-06, "model_forward_time": 0.025110483169555664, "step": 1096 }, { "epoch": 1.67236328125e-06, "step": 1096, "training_step_time": 0.1374666690826416 }, { "epoch": 1.67388916015625e-06, "model_forward_time": 0.024764299392700195, "step": 1097 }, { "epoch": 1.67388916015625e-06, "step": 1097, "training_step_time": 0.11067771911621094 }, { "epoch": 1.6754150390625e-06, "model_forward_time": 0.025705575942993164, "step": 1098 }, { "epoch": 1.6754150390625e-06, "step": 1098, "training_step_time": 0.1098320484161377 }, { "epoch": 1.67694091796875e-06, "model_forward_time": 0.025183439254760742, "step": 1099 }, { "epoch": 1.67694091796875e-06, "step": 1099, "training_step_time": 0.11371445655822754 }, { "epoch": 1.678466796875e-06, "grad_norm": 1.2202184200286865, "learning_rate": 7.333333333333333e-05, "loss": 0.1265, "step": 1100 }, { "epoch": 1.678466796875e-06, "model_forward_time": 0.025087356567382812, "step": 1100 }, { "epoch": 1.678466796875e-06, "step": 1100, "training_step_time": 0.1554563045501709 }, { "epoch": 1.67999267578125e-06, "model_forward_time": 0.02444601058959961, "step": 1101 }, { "epoch": 1.67999267578125e-06, "step": 1101, "training_step_time": 0.14522957801818848 }, { "epoch": 1.6815185546875e-06, "model_forward_time": 0.02455759048461914, "step": 1102 }, { "epoch": 1.6815185546875e-06, "step": 1102, "training_step_time": 0.10157060623168945 }, { "epoch": 1.68304443359375e-06, "model_forward_time": 0.025468826293945312, "step": 1103 }, { "epoch": 1.68304443359375e-06, "step": 1103, "training_step_time": 0.11110997200012207 }, { "epoch": 1.6845703125e-06, "model_forward_time": 0.02529621124267578, "step": 1104 }, { "epoch": 1.6845703125e-06, "step": 1104, "training_step_time": 0.10544061660766602 }, { "epoch": 1.68609619140625e-06, "model_forward_time": 0.02568531036376953, "step": 1105 }, { "epoch": 1.68609619140625e-06, "step": 1105, "training_step_time": 0.1126554012298584 }, { "epoch": 1.6876220703125e-06, "model_forward_time": 0.02559518814086914, "step": 1106 }, { "epoch": 1.6876220703125e-06, "step": 1106, "training_step_time": 0.11029887199401855 }, { "epoch": 1.68914794921875e-06, "model_forward_time": 0.0261080265045166, "step": 1107 }, { "epoch": 1.68914794921875e-06, "step": 1107, "training_step_time": 0.10719037055969238 }, { "epoch": 1.690673828125e-06, "model_forward_time": 0.025641202926635742, "step": 1108 }, { "epoch": 1.690673828125e-06, "step": 1108, "training_step_time": 0.16606760025024414 }, { "epoch": 1.69219970703125e-06, "model_forward_time": 0.024934053421020508, "step": 1109 }, { "epoch": 1.69219970703125e-06, "step": 1109, "training_step_time": 0.1658623218536377 }, { "epoch": 1.6937255859375e-06, "grad_norm": 1.5171630382537842, "learning_rate": 7.4e-05, "loss": 0.2076, "step": 1110 }, { "epoch": 1.6937255859375e-06, "model_forward_time": 0.025086641311645508, "step": 1110 }, { "epoch": 1.6937255859375e-06, "step": 1110, "training_step_time": 0.10617804527282715 }, { "epoch": 1.69525146484375e-06, "model_forward_time": 0.025186777114868164, "step": 1111 }, { "epoch": 1.69525146484375e-06, "step": 1111, "training_step_time": 0.1072835922241211 }, { "epoch": 1.69677734375e-06, "model_forward_time": 0.02573680877685547, "step": 1112 }, { "epoch": 1.69677734375e-06, "step": 1112, "training_step_time": 0.11734127998352051 }, { "epoch": 1.69830322265625e-06, "model_forward_time": 0.025831222534179688, "step": 1113 }, { "epoch": 1.69830322265625e-06, "step": 1113, "training_step_time": 0.11211228370666504 }, { "epoch": 1.6998291015625e-06, "model_forward_time": 0.026093721389770508, "step": 1114 }, { "epoch": 1.6998291015625e-06, "step": 1114, "training_step_time": 0.1062004566192627 }, { "epoch": 1.70135498046875e-06, "model_forward_time": 0.025349140167236328, "step": 1115 }, { "epoch": 1.70135498046875e-06, "step": 1115, "training_step_time": 0.1083824634552002 }, { "epoch": 1.702880859375e-06, "model_forward_time": 0.025402307510375977, "step": 1116 }, { "epoch": 1.702880859375e-06, "step": 1116, "training_step_time": 0.10837841033935547 }, { "epoch": 1.70440673828125e-06, "model_forward_time": 0.025722980499267578, "step": 1117 }, { "epoch": 1.70440673828125e-06, "step": 1117, "training_step_time": 0.10738134384155273 }, { "epoch": 1.7059326171875e-06, "model_forward_time": 0.025147676467895508, "step": 1118 }, { "epoch": 1.7059326171875e-06, "step": 1118, "training_step_time": 0.10828781127929688 }, { "epoch": 1.70745849609375e-06, "model_forward_time": 0.025195837020874023, "step": 1119 }, { "epoch": 1.70745849609375e-06, "step": 1119, "training_step_time": 0.10542535781860352 }, { "epoch": 1.708984375e-06, "grad_norm": 1.1591230630874634, "learning_rate": 7.466666666666667e-05, "loss": 0.1457, "step": 1120 }, { "epoch": 1.708984375e-06, "model_forward_time": 0.025481700897216797, "step": 1120 }, { "epoch": 1.708984375e-06, "step": 1120, "training_step_time": 0.10607600212097168 }, { "epoch": 1.71051025390625e-06, "model_forward_time": 0.026244640350341797, "step": 1121 }, { "epoch": 1.71051025390625e-06, "step": 1121, "training_step_time": 0.10785222053527832 }, { "epoch": 1.7120361328125e-06, "model_forward_time": 0.02579784393310547, "step": 1122 }, { "epoch": 1.7120361328125e-06, "step": 1122, "training_step_time": 0.10676932334899902 }, { "epoch": 1.71356201171875e-06, "model_forward_time": 0.02545166015625, "step": 1123 }, { "epoch": 1.71356201171875e-06, "step": 1123, "training_step_time": 0.10704469680786133 }, { "epoch": 1.715087890625e-06, "model_forward_time": 0.0267026424407959, "step": 1124 }, { "epoch": 1.715087890625e-06, "step": 1124, "training_step_time": 0.10695505142211914 }, { "epoch": 1.71661376953125e-06, "model_forward_time": 0.025321483612060547, "step": 1125 }, { "epoch": 1.71661376953125e-06, "step": 1125, "training_step_time": 0.10326433181762695 }, { "epoch": 1.7181396484375e-06, "model_forward_time": 0.02545642852783203, "step": 1126 }, { "epoch": 1.7181396484375e-06, "step": 1126, "training_step_time": 0.10436439514160156 }, { "epoch": 1.71966552734375e-06, "model_forward_time": 0.027254104614257812, "step": 1127 }, { "epoch": 1.71966552734375e-06, "step": 1127, "training_step_time": 0.11226606369018555 }, { "epoch": 1.72119140625e-06, "model_forward_time": 0.02574944496154785, "step": 1128 }, { "epoch": 1.72119140625e-06, "step": 1128, "training_step_time": 0.1058351993560791 }, { "epoch": 1.72271728515625e-06, "model_forward_time": 0.025252103805541992, "step": 1129 }, { "epoch": 1.72271728515625e-06, "step": 1129, "training_step_time": 0.10646653175354004 }, { "epoch": 1.7242431640625e-06, "grad_norm": 1.320570707321167, "learning_rate": 7.533333333333334e-05, "loss": 0.1719, "step": 1130 }, { "epoch": 1.7242431640625e-06, "model_forward_time": 0.025857210159301758, "step": 1130 }, { "epoch": 1.7242431640625e-06, "step": 1130, "training_step_time": 0.10575461387634277 }, { "epoch": 1.72576904296875e-06, "model_forward_time": 0.025391578674316406, "step": 1131 }, { "epoch": 1.72576904296875e-06, "step": 1131, "training_step_time": 0.19349002838134766 }, { "epoch": 1.727294921875e-06, "model_forward_time": 0.02381610870361328, "step": 1132 }, { "epoch": 1.727294921875e-06, "step": 1132, "training_step_time": 0.10652017593383789 }, { "epoch": 1.72882080078125e-06, "model_forward_time": 0.024476289749145508, "step": 1133 }, { "epoch": 1.72882080078125e-06, "step": 1133, "training_step_time": 0.1157069206237793 }, { "epoch": 1.7303466796875e-06, "model_forward_time": 0.025134563446044922, "step": 1134 }, { "epoch": 1.7303466796875e-06, "step": 1134, "training_step_time": 0.17812681198120117 }, { "epoch": 1.73187255859375e-06, "model_forward_time": 0.02457880973815918, "step": 1135 }, { "epoch": 1.73187255859375e-06, "step": 1135, "training_step_time": 0.19104671478271484 }, { "epoch": 1.7333984375e-06, "model_forward_time": 0.024499893188476562, "step": 1136 }, { "epoch": 1.7333984375e-06, "step": 1136, "training_step_time": 0.15276217460632324 }, { "epoch": 1.73492431640625e-06, "model_forward_time": 0.024434566497802734, "step": 1137 }, { "epoch": 1.73492431640625e-06, "step": 1137, "training_step_time": 0.16435503959655762 }, { "epoch": 1.7364501953125e-06, "model_forward_time": 0.02401137351989746, "step": 1138 }, { "epoch": 1.7364501953125e-06, "step": 1138, "training_step_time": 0.13677048683166504 }, { "epoch": 1.73797607421875e-06, "model_forward_time": 0.024253368377685547, "step": 1139 }, { "epoch": 1.73797607421875e-06, "step": 1139, "training_step_time": 0.11883234977722168 }, { "epoch": 1.739501953125e-06, "grad_norm": 1.3508999347686768, "learning_rate": 7.6e-05, "loss": 0.199, "step": 1140 }, { "epoch": 1.739501953125e-06, "model_forward_time": 0.025207996368408203, "step": 1140 }, { "epoch": 1.739501953125e-06, "step": 1140, "training_step_time": 0.10568594932556152 }, { "epoch": 1.74102783203125e-06, "model_forward_time": 0.025411605834960938, "step": 1141 }, { "epoch": 1.74102783203125e-06, "step": 1141, "training_step_time": 0.16100406646728516 }, { "epoch": 1.7425537109375e-06, "model_forward_time": 0.02485799789428711, "step": 1142 }, { "epoch": 1.7425537109375e-06, "step": 1142, "training_step_time": 0.1254122257232666 }, { "epoch": 1.74407958984375e-06, "model_forward_time": 0.024767398834228516, "step": 1143 }, { "epoch": 1.74407958984375e-06, "step": 1143, "training_step_time": 0.1106865406036377 }, { "epoch": 1.74560546875e-06, "model_forward_time": 0.025363683700561523, "step": 1144 }, { "epoch": 1.74560546875e-06, "step": 1144, "training_step_time": 0.11659932136535645 }, { "epoch": 1.74713134765625e-06, "model_forward_time": 0.025749921798706055, "step": 1145 }, { "epoch": 1.74713134765625e-06, "step": 1145, "training_step_time": 0.11061739921569824 }, { "epoch": 1.7486572265625e-06, "model_forward_time": 0.02597355842590332, "step": 1146 }, { "epoch": 1.7486572265625e-06, "step": 1146, "training_step_time": 0.11140155792236328 }, { "epoch": 1.75018310546875e-06, "model_forward_time": 0.02596569061279297, "step": 1147 }, { "epoch": 1.75018310546875e-06, "step": 1147, "training_step_time": 0.20169734954833984 }, { "epoch": 1.751708984375e-06, "model_forward_time": 0.024764060974121094, "step": 1148 }, { "epoch": 1.751708984375e-06, "step": 1148, "training_step_time": 0.10485363006591797 }, { "epoch": 1.75323486328125e-06, "model_forward_time": 0.024885892868041992, "step": 1149 }, { "epoch": 1.75323486328125e-06, "step": 1149, "training_step_time": 0.10564446449279785 }, { "epoch": 1.7547607421875e-06, "grad_norm": 1.5766886472702026, "learning_rate": 7.666666666666667e-05, "loss": 0.1969, "step": 1150 }, { "epoch": 1.7547607421875e-06, "model_forward_time": 0.025374650955200195, "step": 1150 }, { "epoch": 1.7547607421875e-06, "step": 1150, "training_step_time": 0.10841631889343262 }, { "epoch": 1.75628662109375e-06, "model_forward_time": 0.02546095848083496, "step": 1151 }, { "epoch": 1.75628662109375e-06, "step": 1151, "training_step_time": 0.10930299758911133 }, { "epoch": 1.7578125e-06, "model_forward_time": 0.025404930114746094, "step": 1152 }, { "epoch": 1.7578125e-06, "step": 1152, "training_step_time": 0.1210775375366211 }, { "epoch": 1.75933837890625e-06, "model_forward_time": 0.025130271911621094, "step": 1153 }, { "epoch": 1.75933837890625e-06, "step": 1153, "training_step_time": 0.1463611125946045 }, { "epoch": 1.7608642578125e-06, "model_forward_time": 0.025393962860107422, "step": 1154 }, { "epoch": 1.7608642578125e-06, "step": 1154, "training_step_time": 0.18546247482299805 }, { "epoch": 1.76239013671875e-06, "model_forward_time": 0.025324106216430664, "step": 1155 }, { "epoch": 1.76239013671875e-06, "step": 1155, "training_step_time": 0.12699103355407715 }, { "epoch": 1.763916015625e-06, "model_forward_time": 0.024593114852905273, "step": 1156 }, { "epoch": 1.763916015625e-06, "step": 1156, "training_step_time": 0.12203669548034668 }, { "epoch": 1.76544189453125e-06, "model_forward_time": 0.025598526000976562, "step": 1157 }, { "epoch": 1.76544189453125e-06, "step": 1157, "training_step_time": 0.19469904899597168 }, { "epoch": 1.7669677734375e-06, "model_forward_time": 0.02519679069519043, "step": 1158 }, { "epoch": 1.7669677734375e-06, "step": 1158, "training_step_time": 0.11834096908569336 }, { "epoch": 1.76849365234375e-06, "model_forward_time": 0.023514270782470703, "step": 1159 }, { "epoch": 1.76849365234375e-06, "step": 1159, "training_step_time": 0.10878109931945801 }, { "epoch": 1.77001953125e-06, "grad_norm": 0.8673734664916992, "learning_rate": 7.733333333333333e-05, "loss": 0.1694, "step": 1160 }, { "epoch": 1.77001953125e-06, "model_forward_time": 0.025103330612182617, "step": 1160 }, { "epoch": 1.77001953125e-06, "step": 1160, "training_step_time": 0.1074070930480957 }, { "epoch": 1.77154541015625e-06, "model_forward_time": 0.02619624137878418, "step": 1161 }, { "epoch": 1.77154541015625e-06, "step": 1161, "training_step_time": 0.11041069030761719 }, { "epoch": 1.7730712890625e-06, "model_forward_time": 0.025389432907104492, "step": 1162 }, { "epoch": 1.7730712890625e-06, "step": 1162, "training_step_time": 0.10895061492919922 }, { "epoch": 1.77459716796875e-06, "model_forward_time": 0.02507305145263672, "step": 1163 }, { "epoch": 1.77459716796875e-06, "step": 1163, "training_step_time": 0.10828685760498047 }, { "epoch": 1.776123046875e-06, "model_forward_time": 0.025153160095214844, "step": 1164 }, { "epoch": 1.776123046875e-06, "step": 1164, "training_step_time": 0.10617661476135254 }, { "epoch": 1.77764892578125e-06, "model_forward_time": 0.024858713150024414, "step": 1165 }, { "epoch": 1.77764892578125e-06, "step": 1165, "training_step_time": 0.10968232154846191 }, { "epoch": 1.7791748046875e-06, "model_forward_time": 0.02523970603942871, "step": 1166 }, { "epoch": 1.7791748046875e-06, "step": 1166, "training_step_time": 0.1078341007232666 }, { "epoch": 1.78070068359375e-06, "model_forward_time": 0.025442838668823242, "step": 1167 }, { "epoch": 1.78070068359375e-06, "step": 1167, "training_step_time": 0.10771560668945312 }, { "epoch": 1.7822265625e-06, "model_forward_time": 0.025145292282104492, "step": 1168 }, { "epoch": 1.7822265625e-06, "step": 1168, "training_step_time": 0.10511112213134766 }, { "epoch": 1.78375244140625e-06, "model_forward_time": 0.025131702423095703, "step": 1169 }, { "epoch": 1.78375244140625e-06, "step": 1169, "training_step_time": 0.10630130767822266 }, { "epoch": 1.7852783203125e-06, "grad_norm": 1.313318133354187, "learning_rate": 7.800000000000001e-05, "loss": 0.1877, "step": 1170 }, { "epoch": 1.7852783203125e-06, "model_forward_time": 0.025074243545532227, "step": 1170 }, { "epoch": 1.7852783203125e-06, "step": 1170, "training_step_time": 0.10598230361938477 }, { "epoch": 1.78680419921875e-06, "model_forward_time": 0.02524423599243164, "step": 1171 }, { "epoch": 1.78680419921875e-06, "step": 1171, "training_step_time": 0.10527491569519043 }, { "epoch": 1.788330078125e-06, "model_forward_time": 0.025203466415405273, "step": 1172 }, { "epoch": 1.788330078125e-06, "step": 1172, "training_step_time": 0.10583162307739258 }, { "epoch": 1.78985595703125e-06, "model_forward_time": 0.024821758270263672, "step": 1173 }, { "epoch": 1.78985595703125e-06, "step": 1173, "training_step_time": 0.10391569137573242 }, { "epoch": 1.7913818359375e-06, "model_forward_time": 0.02537059783935547, "step": 1174 }, { "epoch": 1.7913818359375e-06, "step": 1174, "training_step_time": 0.10833501815795898 }, { "epoch": 1.79290771484375e-06, "model_forward_time": 0.025058746337890625, "step": 1175 }, { "epoch": 1.79290771484375e-06, "step": 1175, "training_step_time": 0.10681462287902832 }, { "epoch": 1.79443359375e-06, "model_forward_time": 0.025298118591308594, "step": 1176 }, { "epoch": 1.79443359375e-06, "step": 1176, "training_step_time": 0.17254924774169922 }, { "epoch": 1.79595947265625e-06, "model_forward_time": 0.024231672286987305, "step": 1177 }, { "epoch": 1.79595947265625e-06, "step": 1177, "training_step_time": 0.14313292503356934 }, { "epoch": 1.7974853515625e-06, "model_forward_time": 0.02443671226501465, "step": 1178 }, { "epoch": 1.7974853515625e-06, "step": 1178, "training_step_time": 0.11342692375183105 }, { "epoch": 1.79901123046875e-06, "model_forward_time": 0.0251615047454834, "step": 1179 }, { "epoch": 1.79901123046875e-06, "step": 1179, "training_step_time": 0.1950817108154297 }, { "epoch": 1.800537109375e-06, "grad_norm": 0.5958669781684875, "learning_rate": 7.866666666666666e-05, "loss": 0.1539, "step": 1180 }, { "epoch": 1.800537109375e-06, "model_forward_time": 0.02401566505432129, "step": 1180 }, { "epoch": 1.800537109375e-06, "step": 1180, "training_step_time": 0.16553854942321777 }, { "epoch": 1.80206298828125e-06, "model_forward_time": 0.024207592010498047, "step": 1181 }, { "epoch": 1.80206298828125e-06, "step": 1181, "training_step_time": 0.16862750053405762 }, { "epoch": 1.8035888671875e-06, "model_forward_time": 0.02448248863220215, "step": 1182 }, { "epoch": 1.8035888671875e-06, "step": 1182, "training_step_time": 0.16234207153320312 }, { "epoch": 1.80511474609375e-06, "model_forward_time": 0.023937225341796875, "step": 1183 }, { "epoch": 1.80511474609375e-06, "step": 1183, "training_step_time": 0.1292862892150879 }, { "epoch": 1.806640625e-06, "model_forward_time": 0.024244070053100586, "step": 1184 }, { "epoch": 1.806640625e-06, "step": 1184, "training_step_time": 0.11646246910095215 }, { "epoch": 1.80816650390625e-06, "model_forward_time": 0.025247812271118164, "step": 1185 }, { "epoch": 1.80816650390625e-06, "step": 1185, "training_step_time": 0.10264897346496582 }, { "epoch": 1.8096923828125e-06, "model_forward_time": 0.024966955184936523, "step": 1186 }, { "epoch": 1.8096923828125e-06, "step": 1186, "training_step_time": 0.10508108139038086 }, { "epoch": 1.81121826171875e-06, "model_forward_time": 0.025043964385986328, "step": 1187 }, { "epoch": 1.81121826171875e-06, "step": 1187, "training_step_time": 0.1116480827331543 }, { "epoch": 1.812744140625e-06, "model_forward_time": 0.02576756477355957, "step": 1188 }, { "epoch": 1.812744140625e-06, "step": 1188, "training_step_time": 0.12711548805236816 }, { "epoch": 1.81427001953125e-06, "model_forward_time": 0.025079011917114258, "step": 1189 }, { "epoch": 1.81427001953125e-06, "step": 1189, "training_step_time": 0.129302978515625 }, { "epoch": 1.8157958984375e-06, "grad_norm": 1.3377705812454224, "learning_rate": 7.933333333333334e-05, "loss": 0.1747, "step": 1190 }, { "epoch": 1.8157958984375e-06, "model_forward_time": 0.02483820915222168, "step": 1190 }, { "epoch": 1.8157958984375e-06, "step": 1190, "training_step_time": 0.15600013732910156 }, { "epoch": 1.81732177734375e-06, "model_forward_time": 0.024726152420043945, "step": 1191 }, { "epoch": 1.81732177734375e-06, "step": 1191, "training_step_time": 0.15784597396850586 }, { "epoch": 1.81884765625e-06, "model_forward_time": 0.024142742156982422, "step": 1192 }, { "epoch": 1.81884765625e-06, "step": 1192, "training_step_time": 0.19258451461791992 }, { "epoch": 1.82037353515625e-06, "model_forward_time": 0.024500131607055664, "step": 1193 }, { "epoch": 1.82037353515625e-06, "step": 1193, "training_step_time": 0.10904979705810547 }, { "epoch": 1.8218994140625e-06, "model_forward_time": 0.02503180503845215, "step": 1194 }, { "epoch": 1.8218994140625e-06, "step": 1194, "training_step_time": 0.11116695404052734 }, { "epoch": 1.82342529296875e-06, "model_forward_time": 0.02527928352355957, "step": 1195 }, { "epoch": 1.82342529296875e-06, "step": 1195, "training_step_time": 0.11079216003417969 }, { "epoch": 1.824951171875e-06, "model_forward_time": 0.025384187698364258, "step": 1196 }, { "epoch": 1.824951171875e-06, "step": 1196, "training_step_time": 0.10915899276733398 }, { "epoch": 1.82647705078125e-06, "model_forward_time": 0.025018692016601562, "step": 1197 }, { "epoch": 1.82647705078125e-06, "step": 1197, "training_step_time": 0.10354018211364746 }, { "epoch": 1.8280029296875e-06, "model_forward_time": 0.024402141571044922, "step": 1198 }, { "epoch": 1.8280029296875e-06, "step": 1198, "training_step_time": 0.10785222053527832 }, { "epoch": 1.82952880859375e-06, "model_forward_time": 0.025052547454833984, "step": 1199 }, { "epoch": 1.82952880859375e-06, "step": 1199, "training_step_time": 0.11100172996520996 }, { "epoch": 1.8310546875e-06, "grad_norm": 1.7151702642440796, "learning_rate": 8e-05, "loss": 0.1892, "step": 1200 }, { "epoch": 1.8310546875e-06, "model_forward_time": 0.025517702102661133, "step": 1200 }, { "epoch": 1.8310546875e-06, "step": 1200, "training_step_time": 0.11256003379821777 }, { "epoch": 1.83258056640625e-06, "model_forward_time": 0.02570486068725586, "step": 1201 }, { "epoch": 1.83258056640625e-06, "step": 1201, "training_step_time": 0.10862898826599121 }, { "epoch": 1.8341064453125e-06, "model_forward_time": 0.02572011947631836, "step": 1202 }, { "epoch": 1.8341064453125e-06, "step": 1202, "training_step_time": 0.10820460319519043 }, { "epoch": 1.83563232421875e-06, "model_forward_time": 0.025420427322387695, "step": 1203 }, { "epoch": 1.83563232421875e-06, "step": 1203, "training_step_time": 0.11374711990356445 }, { "epoch": 1.837158203125e-06, "model_forward_time": 0.02543163299560547, "step": 1204 }, { "epoch": 1.837158203125e-06, "step": 1204, "training_step_time": 0.1141047477722168 }, { "epoch": 1.83868408203125e-06, "model_forward_time": 0.025302648544311523, "step": 1205 }, { "epoch": 1.83868408203125e-06, "step": 1205, "training_step_time": 0.10389876365661621 }, { "epoch": 1.8402099609375e-06, "model_forward_time": 0.025333881378173828, "step": 1206 }, { "epoch": 1.8402099609375e-06, "step": 1206, "training_step_time": 0.10593366622924805 }, { "epoch": 1.84173583984375e-06, "model_forward_time": 0.02511429786682129, "step": 1207 }, { "epoch": 1.84173583984375e-06, "step": 1207, "training_step_time": 0.10630536079406738 }, { "epoch": 1.84326171875e-06, "model_forward_time": 0.024990558624267578, "step": 1208 }, { "epoch": 1.84326171875e-06, "step": 1208, "training_step_time": 0.10520148277282715 }, { "epoch": 1.84478759765625e-06, "model_forward_time": 0.025490522384643555, "step": 1209 }, { "epoch": 1.84478759765625e-06, "step": 1209, "training_step_time": 0.10633516311645508 }, { "epoch": 1.8463134765625e-06, "grad_norm": 1.1224215030670166, "learning_rate": 8.066666666666667e-05, "loss": 0.2051, "step": 1210 }, { "epoch": 1.8463134765625e-06, "model_forward_time": 0.025072097778320312, "step": 1210 }, { "epoch": 1.8463134765625e-06, "step": 1210, "training_step_time": 0.1083838939666748 }, { "epoch": 1.84783935546875e-06, "model_forward_time": 0.02538776397705078, "step": 1211 }, { "epoch": 1.84783935546875e-06, "step": 1211, "training_step_time": 0.10822677612304688 }, { "epoch": 1.849365234375e-06, "model_forward_time": 0.025068998336791992, "step": 1212 }, { "epoch": 1.849365234375e-06, "step": 1212, "training_step_time": 0.10438132286071777 }, { "epoch": 1.85089111328125e-06, "model_forward_time": 0.02514934539794922, "step": 1213 }, { "epoch": 1.85089111328125e-06, "step": 1213, "training_step_time": 0.10590362548828125 }, { "epoch": 1.8524169921875e-06, "model_forward_time": 0.025738954544067383, "step": 1214 }, { "epoch": 1.8524169921875e-06, "step": 1214, "training_step_time": 0.10514092445373535 }, { "epoch": 1.85394287109375e-06, "model_forward_time": 0.02535247802734375, "step": 1215 }, { "epoch": 1.85394287109375e-06, "step": 1215, "training_step_time": 0.10616397857666016 }, { "epoch": 1.85546875e-06, "model_forward_time": 0.024689197540283203, "step": 1216 }, { "epoch": 1.85546875e-06, "step": 1216, "training_step_time": 0.1058807373046875 }, { "epoch": 1.85699462890625e-06, "model_forward_time": 0.026338815689086914, "step": 1217 }, { "epoch": 1.85699462890625e-06, "step": 1217, "training_step_time": 0.10562491416931152 }, { "epoch": 1.8585205078125e-06, "model_forward_time": 0.025639057159423828, "step": 1218 }, { "epoch": 1.8585205078125e-06, "step": 1218, "training_step_time": 0.1046438217163086 }, { "epoch": 1.86004638671875e-06, "model_forward_time": 0.02837967872619629, "step": 1219 }, { "epoch": 1.86004638671875e-06, "step": 1219, "training_step_time": 0.10834789276123047 }, { "epoch": 1.861572265625e-06, "grad_norm": 1.3597825765609741, "learning_rate": 8.133333333333334e-05, "loss": 0.178, "step": 1220 }, { "epoch": 1.861572265625e-06, "model_forward_time": 0.02520275115966797, "step": 1220 }, { "epoch": 1.861572265625e-06, "step": 1220, "training_step_time": 0.10679030418395996 }, { "epoch": 1.86309814453125e-06, "model_forward_time": 0.02521991729736328, "step": 1221 }, { "epoch": 1.86309814453125e-06, "step": 1221, "training_step_time": 0.1117544174194336 }, { "epoch": 1.8646240234375e-06, "model_forward_time": 0.025551557540893555, "step": 1222 }, { "epoch": 1.8646240234375e-06, "step": 1222, "training_step_time": 0.11016273498535156 }, { "epoch": 1.86614990234375e-06, "model_forward_time": 0.025426149368286133, "step": 1223 }, { "epoch": 1.86614990234375e-06, "step": 1223, "training_step_time": 0.15128302574157715 }, { "epoch": 1.86767578125e-06, "model_forward_time": 0.02533125877380371, "step": 1224 }, { "epoch": 1.86767578125e-06, "step": 1224, "training_step_time": 0.11080312728881836 }, { "epoch": 1.86920166015625e-06, "model_forward_time": 0.024776697158813477, "step": 1225 }, { "epoch": 1.86920166015625e-06, "step": 1225, "training_step_time": 0.10645294189453125 }, { "epoch": 1.8707275390625e-06, "model_forward_time": 0.025487661361694336, "step": 1226 }, { "epoch": 1.8707275390625e-06, "step": 1226, "training_step_time": 0.17316389083862305 }, { "epoch": 1.87225341796875e-06, "model_forward_time": 0.025194406509399414, "step": 1227 }, { "epoch": 1.87225341796875e-06, "step": 1227, "training_step_time": 0.19153761863708496 }, { "epoch": 1.873779296875e-06, "model_forward_time": 0.02446889877319336, "step": 1228 }, { "epoch": 1.873779296875e-06, "step": 1228, "training_step_time": 0.15164971351623535 }, { "epoch": 1.87530517578125e-06, "model_forward_time": 0.02520012855529785, "step": 1229 }, { "epoch": 1.87530517578125e-06, "step": 1229, "training_step_time": 0.15472984313964844 }, { "epoch": 1.8768310546875e-06, "grad_norm": 1.1429742574691772, "learning_rate": 8.2e-05, "loss": 0.2217, "step": 1230 }, { "epoch": 1.8768310546875e-06, "model_forward_time": 0.024310588836669922, "step": 1230 }, { "epoch": 1.8768310546875e-06, "step": 1230, "training_step_time": 0.12451791763305664 }, { "epoch": 1.87835693359375e-06, "model_forward_time": 0.024514198303222656, "step": 1231 }, { "epoch": 1.87835693359375e-06, "step": 1231, "training_step_time": 0.11500954627990723 }, { "epoch": 1.8798828125e-06, "model_forward_time": 0.025276899337768555, "step": 1232 }, { "epoch": 1.8798828125e-06, "step": 1232, "training_step_time": 0.10410523414611816 }, { "epoch": 1.88140869140625e-06, "model_forward_time": 0.025158166885375977, "step": 1233 }, { "epoch": 1.88140869140625e-06, "step": 1233, "training_step_time": 0.1769578456878662 }, { "epoch": 1.8829345703125e-06, "model_forward_time": 0.024852752685546875, "step": 1234 }, { "epoch": 1.8829345703125e-06, "step": 1234, "training_step_time": 0.13149356842041016 }, { "epoch": 1.88446044921875e-06, "model_forward_time": 0.025168657302856445, "step": 1235 }, { "epoch": 1.88446044921875e-06, "step": 1235, "training_step_time": 0.1051630973815918 }, { "epoch": 1.885986328125e-06, "model_forward_time": 0.02644062042236328, "step": 1236 }, { "epoch": 1.885986328125e-06, "step": 1236, "training_step_time": 0.1124112606048584 }, { "epoch": 1.88751220703125e-06, "model_forward_time": 0.025076866149902344, "step": 1237 }, { "epoch": 1.88751220703125e-06, "step": 1237, "training_step_time": 0.11532330513000488 }, { "epoch": 1.8890380859375e-06, "model_forward_time": 0.025855302810668945, "step": 1238 }, { "epoch": 1.8890380859375e-06, "step": 1238, "training_step_time": 0.10875129699707031 }, { "epoch": 1.89056396484375e-06, "model_forward_time": 0.025469303131103516, "step": 1239 }, { "epoch": 1.89056396484375e-06, "step": 1239, "training_step_time": 0.16292619705200195 }, { "epoch": 1.89208984375e-06, "grad_norm": 1.0527982711791992, "learning_rate": 8.266666666666667e-05, "loss": 0.1652, "step": 1240 }, { "epoch": 1.89208984375e-06, "model_forward_time": 0.025038480758666992, "step": 1240 }, { "epoch": 1.89208984375e-06, "step": 1240, "training_step_time": 0.10667014122009277 }, { "epoch": 1.89361572265625e-06, "model_forward_time": 0.025140762329101562, "step": 1241 }, { "epoch": 1.89361572265625e-06, "step": 1241, "training_step_time": 0.1059577465057373 }, { "epoch": 1.8951416015625e-06, "model_forward_time": 0.025479555130004883, "step": 1242 }, { "epoch": 1.8951416015625e-06, "step": 1242, "training_step_time": 0.10683703422546387 }, { "epoch": 1.89666748046875e-06, "model_forward_time": 0.02544546127319336, "step": 1243 }, { "epoch": 1.89666748046875e-06, "step": 1243, "training_step_time": 0.1068580150604248 }, { "epoch": 1.898193359375e-06, "model_forward_time": 0.02506089210510254, "step": 1244 }, { "epoch": 1.898193359375e-06, "step": 1244, "training_step_time": 0.10724449157714844 }, { "epoch": 1.89971923828125e-06, "model_forward_time": 0.025569677352905273, "step": 1245 }, { "epoch": 1.89971923828125e-06, "step": 1245, "training_step_time": 0.10748434066772461 }, { "epoch": 1.9012451171875e-06, "model_forward_time": 0.02441263198852539, "step": 1246 }, { "epoch": 1.9012451171875e-06, "step": 1246, "training_step_time": 0.10440969467163086 }, { "epoch": 1.90277099609375e-06, "model_forward_time": 0.02512192726135254, "step": 1247 }, { "epoch": 1.90277099609375e-06, "step": 1247, "training_step_time": 0.11462926864624023 }, { "epoch": 1.904296875e-06, "model_forward_time": 0.025743961334228516, "step": 1248 }, { "epoch": 1.904296875e-06, "step": 1248, "training_step_time": 0.11517047882080078 }, { "epoch": 1.90582275390625e-06, "model_forward_time": 0.025432348251342773, "step": 1249 }, { "epoch": 1.90582275390625e-06, "step": 1249, "training_step_time": 0.11037802696228027 }, { "epoch": 1.9073486328125e-06, "grad_norm": 1.1967273950576782, "learning_rate": 8.333333333333334e-05, "loss": 0.1523, "step": 1250 }, { "epoch": 1.9073486328125e-06, "model_forward_time": 0.025904178619384766, "step": 1250 }, { "epoch": 1.9073486328125e-06, "step": 1250, "training_step_time": 0.21413612365722656 }, { "epoch": 1.90887451171875e-06, "model_forward_time": 0.024668216705322266, "step": 1251 }, { "epoch": 1.90887451171875e-06, "step": 1251, "training_step_time": 0.12158465385437012 }, { "epoch": 1.910400390625e-06, "model_forward_time": 0.024970531463623047, "step": 1252 }, { "epoch": 1.910400390625e-06, "step": 1252, "training_step_time": 0.11132287979125977 }, { "epoch": 1.91192626953125e-06, "model_forward_time": 0.02595043182373047, "step": 1253 }, { "epoch": 1.91192626953125e-06, "step": 1253, "training_step_time": 0.11145377159118652 }, { "epoch": 1.9134521484375e-06, "model_forward_time": 0.025386810302734375, "step": 1254 }, { "epoch": 1.9134521484375e-06, "step": 1254, "training_step_time": 0.10927700996398926 }, { "epoch": 1.91497802734375e-06, "model_forward_time": 0.025403499603271484, "step": 1255 }, { "epoch": 1.91497802734375e-06, "step": 1255, "training_step_time": 0.10837388038635254 }, { "epoch": 1.91650390625e-06, "model_forward_time": 0.026601552963256836, "step": 1256 }, { "epoch": 1.91650390625e-06, "step": 1256, "training_step_time": 0.11478757858276367 }, { "epoch": 1.91802978515625e-06, "model_forward_time": 0.025215625762939453, "step": 1257 }, { "epoch": 1.91802978515625e-06, "step": 1257, "training_step_time": 0.11050009727478027 }, { "epoch": 1.9195556640625e-06, "model_forward_time": 0.025013208389282227, "step": 1258 }, { "epoch": 1.9195556640625e-06, "step": 1258, "training_step_time": 0.11108922958374023 }, { "epoch": 1.92108154296875e-06, "model_forward_time": 0.02761077880859375, "step": 1259 }, { "epoch": 1.92108154296875e-06, "step": 1259, "training_step_time": 0.11068534851074219 }, { "epoch": 1.922607421875e-06, "grad_norm": 0.8787569999694824, "learning_rate": 8.4e-05, "loss": 0.1607, "step": 1260 }, { "epoch": 1.922607421875e-06, "model_forward_time": 0.026442289352416992, "step": 1260 }, { "epoch": 1.922607421875e-06, "step": 1260, "training_step_time": 0.11026668548583984 }, { "epoch": 1.92413330078125e-06, "model_forward_time": 0.02527475357055664, "step": 1261 }, { "epoch": 1.92413330078125e-06, "step": 1261, "training_step_time": 0.1059408187866211 }, { "epoch": 1.9256591796875e-06, "model_forward_time": 0.025359630584716797, "step": 1262 }, { "epoch": 1.9256591796875e-06, "step": 1262, "training_step_time": 0.11127209663391113 }, { "epoch": 1.92718505859375e-06, "model_forward_time": 0.025199174880981445, "step": 1263 }, { "epoch": 1.92718505859375e-06, "step": 1263, "training_step_time": 0.1074681282043457 }, { "epoch": 1.9287109375e-06, "model_forward_time": 0.025114059448242188, "step": 1264 }, { "epoch": 1.9287109375e-06, "step": 1264, "training_step_time": 0.11225652694702148 }, { "epoch": 1.93023681640625e-06, "model_forward_time": 0.025652408599853516, "step": 1265 }, { "epoch": 1.93023681640625e-06, "step": 1265, "training_step_time": 0.10862898826599121 }, { "epoch": 1.9317626953125e-06, "model_forward_time": 0.02547907829284668, "step": 1266 }, { "epoch": 1.9317626953125e-06, "step": 1266, "training_step_time": 0.11748576164245605 }, { "epoch": 1.93328857421875e-06, "model_forward_time": 0.025145769119262695, "step": 1267 }, { "epoch": 1.93328857421875e-06, "step": 1267, "training_step_time": 0.10688948631286621 }, { "epoch": 1.934814453125e-06, "model_forward_time": 0.025434494018554688, "step": 1268 }, { "epoch": 1.934814453125e-06, "step": 1268, "training_step_time": 0.11642241477966309 }, { "epoch": 1.93634033203125e-06, "model_forward_time": 0.02573680877685547, "step": 1269 }, { "epoch": 1.93634033203125e-06, "step": 1269, "training_step_time": 0.19725751876831055 }, { "epoch": 1.9378662109375e-06, "grad_norm": 0.8215675354003906, "learning_rate": 8.466666666666667e-05, "loss": 0.1354, "step": 1270 }, { "epoch": 1.9378662109375e-06, "model_forward_time": 0.024491548538208008, "step": 1270 }, { "epoch": 1.9378662109375e-06, "step": 1270, "training_step_time": 0.10608100891113281 }, { "epoch": 1.93939208984375e-06, "model_forward_time": 0.024990320205688477, "step": 1271 }, { "epoch": 1.93939208984375e-06, "step": 1271, "training_step_time": 0.11183047294616699 }, { "epoch": 1.94091796875e-06, "model_forward_time": 0.025394916534423828, "step": 1272 }, { "epoch": 1.94091796875e-06, "step": 1272, "training_step_time": 0.12755179405212402 }, { "epoch": 1.94244384765625e-06, "model_forward_time": 0.02653336524963379, "step": 1273 }, { "epoch": 1.94244384765625e-06, "step": 1273, "training_step_time": 0.11612796783447266 }, { "epoch": 1.9439697265625e-06, "model_forward_time": 0.025595426559448242, "step": 1274 }, { "epoch": 1.9439697265625e-06, "step": 1274, "training_step_time": 0.13203763961791992 }, { "epoch": 1.94549560546875e-06, "model_forward_time": 0.02534627914428711, "step": 1275 }, { "epoch": 1.94549560546875e-06, "step": 1275, "training_step_time": 0.15285325050354004 }, { "epoch": 1.947021484375e-06, "model_forward_time": 0.02494192123413086, "step": 1276 }, { "epoch": 1.947021484375e-06, "step": 1276, "training_step_time": 0.17490196228027344 }, { "epoch": 1.94854736328125e-06, "model_forward_time": 0.025095224380493164, "step": 1277 }, { "epoch": 1.94854736328125e-06, "step": 1277, "training_step_time": 0.16460013389587402 }, { "epoch": 1.9500732421875e-06, "model_forward_time": 0.02494978904724121, "step": 1278 }, { "epoch": 1.9500732421875e-06, "step": 1278, "training_step_time": 0.10694408416748047 }, { "epoch": 1.95159912109375e-06, "model_forward_time": 0.024760723114013672, "step": 1279 }, { "epoch": 1.95159912109375e-06, "step": 1279, "training_step_time": 0.17813968658447266 }, { "epoch": 1.953125e-06, "grad_norm": 1.017802357673645, "learning_rate": 8.533333333333334e-05, "loss": 0.1764, "step": 1280 }, { "epoch": 1.953125e-06, "model_forward_time": 0.02478933334350586, "step": 1280 }, { "epoch": 1.953125e-06, "step": 1280, "training_step_time": 0.12899208068847656 }, { "epoch": 1.95465087890625e-06, "model_forward_time": 0.024315595626831055, "step": 1281 }, { "epoch": 1.95465087890625e-06, "step": 1281, "training_step_time": 0.11281275749206543 }, { "epoch": 1.9561767578125e-06, "model_forward_time": 0.025557994842529297, "step": 1282 }, { "epoch": 1.9561767578125e-06, "step": 1282, "training_step_time": 0.11891531944274902 }, { "epoch": 1.95770263671875e-06, "model_forward_time": 0.025480985641479492, "step": 1283 }, { "epoch": 1.95770263671875e-06, "step": 1283, "training_step_time": 0.11455106735229492 }, { "epoch": 1.959228515625e-06, "model_forward_time": 0.02533268928527832, "step": 1284 }, { "epoch": 1.959228515625e-06, "step": 1284, "training_step_time": 0.1552126407623291 }, { "epoch": 1.96075439453125e-06, "model_forward_time": 0.024867534637451172, "step": 1285 }, { "epoch": 1.96075439453125e-06, "step": 1285, "training_step_time": 0.1475541591644287 }, { "epoch": 1.9622802734375e-06, "model_forward_time": 0.024742603302001953, "step": 1286 }, { "epoch": 1.9622802734375e-06, "step": 1286, "training_step_time": 0.10786318778991699 }, { "epoch": 1.96380615234375e-06, "model_forward_time": 0.025065183639526367, "step": 1287 }, { "epoch": 1.96380615234375e-06, "step": 1287, "training_step_time": 0.10768795013427734 }, { "epoch": 1.96533203125e-06, "model_forward_time": 0.025559663772583008, "step": 1288 }, { "epoch": 1.96533203125e-06, "step": 1288, "training_step_time": 0.1079401969909668 }, { "epoch": 1.96685791015625e-06, "model_forward_time": 0.025629520416259766, "step": 1289 }, { "epoch": 1.96685791015625e-06, "step": 1289, "training_step_time": 0.10952162742614746 }, { "epoch": 1.9683837890625e-06, "grad_norm": 0.8259382843971252, "learning_rate": 8.6e-05, "loss": 0.1595, "step": 1290 }, { "epoch": 1.9683837890625e-06, "model_forward_time": 0.02487325668334961, "step": 1290 }, { "epoch": 1.9683837890625e-06, "step": 1290, "training_step_time": 0.11021041870117188 }, { "epoch": 1.96990966796875e-06, "model_forward_time": 0.025946378707885742, "step": 1291 }, { "epoch": 1.96990966796875e-06, "step": 1291, "training_step_time": 0.10572671890258789 }, { "epoch": 1.971435546875e-06, "model_forward_time": 0.0256500244140625, "step": 1292 }, { "epoch": 1.971435546875e-06, "step": 1292, "training_step_time": 0.18448877334594727 }, { "epoch": 1.97296142578125e-06, "model_forward_time": 0.025085926055908203, "step": 1293 }, { "epoch": 1.97296142578125e-06, "step": 1293, "training_step_time": 0.15895318984985352 }, { "epoch": 1.9744873046875e-06, "model_forward_time": 0.024848461151123047, "step": 1294 }, { "epoch": 1.9744873046875e-06, "step": 1294, "training_step_time": 0.11060714721679688 }, { "epoch": 1.97601318359375e-06, "model_forward_time": 0.025487661361694336, "step": 1295 }, { "epoch": 1.97601318359375e-06, "step": 1295, "training_step_time": 0.10629844665527344 }, { "epoch": 1.9775390625e-06, "model_forward_time": 0.025498628616333008, "step": 1296 }, { "epoch": 1.9775390625e-06, "step": 1296, "training_step_time": 0.1059727668762207 }, { "epoch": 1.97906494140625e-06, "model_forward_time": 0.0265653133392334, "step": 1297 }, { "epoch": 1.97906494140625e-06, "step": 1297, "training_step_time": 0.11496949195861816 }, { "epoch": 1.9805908203125e-06, "model_forward_time": 0.02538919448852539, "step": 1298 }, { "epoch": 1.9805908203125e-06, "step": 1298, "training_step_time": 0.10827040672302246 }, { "epoch": 1.98211669921875e-06, "model_forward_time": 0.02447652816772461, "step": 1299 }, { "epoch": 1.98211669921875e-06, "step": 1299, "training_step_time": 0.10711097717285156 }, { "epoch": 1.983642578125e-06, "grad_norm": 1.377303123474121, "learning_rate": 8.666666666666667e-05, "loss": 0.1958, "step": 1300 }, { "epoch": 1.983642578125e-06, "model_forward_time": 0.025193214416503906, "step": 1300 }, { "epoch": 1.983642578125e-06, "step": 1300, "training_step_time": 0.10614585876464844 }, { "epoch": 1.98516845703125e-06, "model_forward_time": 0.028905391693115234, "step": 1301 }, { "epoch": 1.98516845703125e-06, "step": 1301, "training_step_time": 0.11077356338500977 }, { "epoch": 1.9866943359375e-06, "model_forward_time": 0.025732040405273438, "step": 1302 }, { "epoch": 1.9866943359375e-06, "step": 1302, "training_step_time": 0.10677981376647949 }, { "epoch": 1.98822021484375e-06, "model_forward_time": 0.02523493766784668, "step": 1303 }, { "epoch": 1.98822021484375e-06, "step": 1303, "training_step_time": 0.11117339134216309 }, { "epoch": 1.98974609375e-06, "model_forward_time": 0.025220870971679688, "step": 1304 }, { "epoch": 1.98974609375e-06, "step": 1304, "training_step_time": 0.11197733879089355 }, { "epoch": 1.99127197265625e-06, "model_forward_time": 0.024124622344970703, "step": 1305 }, { "epoch": 1.99127197265625e-06, "step": 1305, "training_step_time": 0.10925102233886719 }, { "epoch": 1.9927978515625e-06, "model_forward_time": 0.025442123413085938, "step": 1306 }, { "epoch": 1.9927978515625e-06, "step": 1306, "training_step_time": 0.10828447341918945 }, { "epoch": 1.99432373046875e-06, "model_forward_time": 0.02536916732788086, "step": 1307 }, { "epoch": 1.99432373046875e-06, "step": 1307, "training_step_time": 0.10908031463623047 }, { "epoch": 1.995849609375e-06, "model_forward_time": 0.02540278434753418, "step": 1308 }, { "epoch": 1.995849609375e-06, "step": 1308, "training_step_time": 0.10738182067871094 }, { "epoch": 1.99737548828125e-06, "model_forward_time": 0.025277376174926758, "step": 1309 }, { "epoch": 1.99737548828125e-06, "step": 1309, "training_step_time": 0.11009049415588379 }, { "epoch": 1.9989013671875e-06, "grad_norm": 2.1278889179229736, "learning_rate": 8.733333333333333e-05, "loss": 0.1806, "step": 1310 }, { "epoch": 1.9989013671875e-06, "model_forward_time": 0.02548074722290039, "step": 1310 }, { "epoch": 1.9989013671875e-06, "step": 1310, "training_step_time": 0.10800623893737793 }, { "epoch": 2.00042724609375e-06, "model_forward_time": 0.02530670166015625, "step": 1311 }, { "epoch": 2.00042724609375e-06, "step": 1311, "training_step_time": 0.10456681251525879 }, { "epoch": 2.001953125e-06, "model_forward_time": 0.025532960891723633, "step": 1312 }, { "epoch": 2.001953125e-06, "step": 1312, "training_step_time": 0.10766410827636719 }, { "epoch": 2.00347900390625e-06, "model_forward_time": 0.0254819393157959, "step": 1313 }, { "epoch": 2.00347900390625e-06, "step": 1313, "training_step_time": 0.1063995361328125 }, { "epoch": 2.0050048828125e-06, "model_forward_time": 0.025454282760620117, "step": 1314 }, { "epoch": 2.0050048828125e-06, "step": 1314, "training_step_time": 0.10877442359924316 }, { "epoch": 2.00653076171875e-06, "model_forward_time": 0.02510380744934082, "step": 1315 }, { "epoch": 2.00653076171875e-06, "step": 1315, "training_step_time": 0.10476922988891602 }, { "epoch": 2.008056640625e-06, "model_forward_time": 0.026585817337036133, "step": 1316 }, { "epoch": 2.008056640625e-06, "step": 1316, "training_step_time": 0.15471768379211426 }, { "epoch": 2.00958251953125e-06, "model_forward_time": 0.025205135345458984, "step": 1317 }, { "epoch": 2.00958251953125e-06, "step": 1317, "training_step_time": 0.11152386665344238 }, { "epoch": 2.0111083984375e-06, "model_forward_time": 0.02523517608642578, "step": 1318 }, { "epoch": 2.0111083984375e-06, "step": 1318, "training_step_time": 0.21431779861450195 }, { "epoch": 2.01263427734375e-06, "model_forward_time": 0.024465084075927734, "step": 1319 }, { "epoch": 2.01263427734375e-06, "step": 1319, "training_step_time": 0.14450311660766602 }, { "epoch": 2.01416015625e-06, "grad_norm": 0.6938613653182983, "learning_rate": 8.800000000000001e-05, "loss": 0.1772, "step": 1320 }, { "epoch": 2.01416015625e-06, "model_forward_time": 0.024492979049682617, "step": 1320 }, { "epoch": 2.01416015625e-06, "step": 1320, "training_step_time": 0.10764026641845703 }, { "epoch": 2.01568603515625e-06, "model_forward_time": 0.024759292602539062, "step": 1321 }, { "epoch": 2.01568603515625e-06, "step": 1321, "training_step_time": 0.15001654624938965 }, { "epoch": 2.0172119140625e-06, "model_forward_time": 0.024561166763305664, "step": 1322 }, { "epoch": 2.0172119140625e-06, "step": 1322, "training_step_time": 0.15971136093139648 }, { "epoch": 2.01873779296875e-06, "model_forward_time": 0.024271488189697266, "step": 1323 }, { "epoch": 2.01873779296875e-06, "step": 1323, "training_step_time": 0.1512136459350586 }, { "epoch": 2.020263671875e-06, "model_forward_time": 0.026529550552368164, "step": 1324 }, { "epoch": 2.020263671875e-06, "step": 1324, "training_step_time": 0.11220192909240723 }, { "epoch": 2.02178955078125e-06, "model_forward_time": 0.024919986724853516, "step": 1325 }, { "epoch": 2.02178955078125e-06, "step": 1325, "training_step_time": 0.10515546798706055 }, { "epoch": 2.0233154296875e-06, "model_forward_time": 0.025232791900634766, "step": 1326 }, { "epoch": 2.0233154296875e-06, "step": 1326, "training_step_time": 0.12074518203735352 }, { "epoch": 2.02484130859375e-06, "model_forward_time": 0.025687456130981445, "step": 1327 }, { "epoch": 2.02484130859375e-06, "step": 1327, "training_step_time": 0.13652658462524414 }, { "epoch": 2.0263671875e-06, "model_forward_time": 0.0252377986907959, "step": 1328 }, { "epoch": 2.0263671875e-06, "step": 1328, "training_step_time": 0.11081981658935547 }, { "epoch": 2.02789306640625e-06, "model_forward_time": 0.025160789489746094, "step": 1329 }, { "epoch": 2.02789306640625e-06, "step": 1329, "training_step_time": 0.11311936378479004 }, { "epoch": 2.0294189453125e-06, "grad_norm": 0.8620762825012207, "learning_rate": 8.866666666666668e-05, "loss": 0.1365, "step": 1330 }, { "epoch": 2.0294189453125e-06, "model_forward_time": 0.025264263153076172, "step": 1330 }, { "epoch": 2.0294189453125e-06, "step": 1330, "training_step_time": 0.11312174797058105 }, { "epoch": 2.03094482421875e-06, "model_forward_time": 0.024471282958984375, "step": 1331 }, { "epoch": 2.03094482421875e-06, "step": 1331, "training_step_time": 0.1130518913269043 }, { "epoch": 2.032470703125e-06, "model_forward_time": 0.025809049606323242, "step": 1332 }, { "epoch": 2.032470703125e-06, "step": 1332, "training_step_time": 0.20093965530395508 }, { "epoch": 2.03399658203125e-06, "model_forward_time": 0.024343252182006836, "step": 1333 }, { "epoch": 2.03399658203125e-06, "step": 1333, "training_step_time": 0.10858726501464844 }, { "epoch": 2.0355224609375e-06, "model_forward_time": 0.023871660232543945, "step": 1334 }, { "epoch": 2.0355224609375e-06, "step": 1334, "training_step_time": 0.10741734504699707 }, { "epoch": 2.03704833984375e-06, "model_forward_time": 0.02486586570739746, "step": 1335 }, { "epoch": 2.03704833984375e-06, "step": 1335, "training_step_time": 0.1115102767944336 }, { "epoch": 2.03857421875e-06, "model_forward_time": 0.0257568359375, "step": 1336 }, { "epoch": 2.03857421875e-06, "step": 1336, "training_step_time": 0.10562515258789062 }, { "epoch": 2.04010009765625e-06, "model_forward_time": 0.025190114974975586, "step": 1337 }, { "epoch": 2.04010009765625e-06, "step": 1337, "training_step_time": 0.188643217086792 }, { "epoch": 2.0416259765625e-06, "model_forward_time": 0.026105165481567383, "step": 1338 }, { "epoch": 2.0416259765625e-06, "step": 1338, "training_step_time": 0.1392374038696289 }, { "epoch": 2.04315185546875e-06, "model_forward_time": 0.024634361267089844, "step": 1339 }, { "epoch": 2.04315185546875e-06, "step": 1339, "training_step_time": 0.12996745109558105 }, { "epoch": 2.044677734375e-06, "grad_norm": 1.3869773149490356, "learning_rate": 8.933333333333334e-05, "loss": 0.147, "step": 1340 }, { "epoch": 2.044677734375e-06, "model_forward_time": 0.024804115295410156, "step": 1340 }, { "epoch": 2.044677734375e-06, "step": 1340, "training_step_time": 0.10552811622619629 }, { "epoch": 2.04620361328125e-06, "model_forward_time": 0.02529740333557129, "step": 1341 }, { "epoch": 2.04620361328125e-06, "step": 1341, "training_step_time": 0.10736823081970215 }, { "epoch": 2.0477294921875e-06, "model_forward_time": 0.025519609451293945, "step": 1342 }, { "epoch": 2.0477294921875e-06, "step": 1342, "training_step_time": 0.1083841323852539 }, { "epoch": 2.04925537109375e-06, "model_forward_time": 0.025340795516967773, "step": 1343 }, { "epoch": 2.04925537109375e-06, "step": 1343, "training_step_time": 0.10663962364196777 }, { "epoch": 2.05078125e-06, "model_forward_time": 0.025208473205566406, "step": 1344 }, { "epoch": 2.05078125e-06, "step": 1344, "training_step_time": 0.1079409122467041 }, { "epoch": 2.05230712890625e-06, "model_forward_time": 0.02545452117919922, "step": 1345 }, { "epoch": 2.05230712890625e-06, "step": 1345, "training_step_time": 0.1059122085571289 }, { "epoch": 2.0538330078125e-06, "model_forward_time": 0.024996280670166016, "step": 1346 }, { "epoch": 2.0538330078125e-06, "step": 1346, "training_step_time": 0.11000514030456543 }, { "epoch": 2.05535888671875e-06, "model_forward_time": 0.025363683700561523, "step": 1347 }, { "epoch": 2.05535888671875e-06, "step": 1347, "training_step_time": 0.10774588584899902 }, { "epoch": 2.056884765625e-06, "model_forward_time": 0.025640487670898438, "step": 1348 }, { "epoch": 2.056884765625e-06, "step": 1348, "training_step_time": 0.10753393173217773 }, { "epoch": 2.05841064453125e-06, "model_forward_time": 0.025701522827148438, "step": 1349 }, { "epoch": 2.05841064453125e-06, "step": 1349, "training_step_time": 0.1148684024810791 }, { "epoch": 2.0599365234375e-06, "grad_norm": 0.7594265341758728, "learning_rate": 9e-05, "loss": 0.1954, "step": 1350 }, { "epoch": 2.0599365234375e-06, "model_forward_time": 0.025305747985839844, "step": 1350 }, { "epoch": 2.0599365234375e-06, "step": 1350, "training_step_time": 0.10439133644104004 }, { "epoch": 2.06146240234375e-06, "model_forward_time": 0.025230884552001953, "step": 1351 }, { "epoch": 2.06146240234375e-06, "step": 1351, "training_step_time": 0.11135077476501465 }, { "epoch": 2.06298828125e-06, "model_forward_time": 0.025582075119018555, "step": 1352 }, { "epoch": 2.06298828125e-06, "step": 1352, "training_step_time": 0.10800790786743164 }, { "epoch": 2.06451416015625e-06, "model_forward_time": 0.025220632553100586, "step": 1353 }, { "epoch": 2.06451416015625e-06, "step": 1353, "training_step_time": 0.1056063175201416 }, { "epoch": 2.0660400390625e-06, "model_forward_time": 0.026401042938232422, "step": 1354 }, { "epoch": 2.0660400390625e-06, "step": 1354, "training_step_time": 0.10689640045166016 }, { "epoch": 2.06756591796875e-06, "model_forward_time": 0.02664041519165039, "step": 1355 }, { "epoch": 2.06756591796875e-06, "step": 1355, "training_step_time": 0.10986995697021484 }, { "epoch": 2.069091796875e-06, "model_forward_time": 0.025368452072143555, "step": 1356 }, { "epoch": 2.069091796875e-06, "step": 1356, "training_step_time": 0.10695433616638184 }, { "epoch": 2.07061767578125e-06, "model_forward_time": 0.025503158569335938, "step": 1357 }, { "epoch": 2.07061767578125e-06, "step": 1357, "training_step_time": 0.10642409324645996 }, { "epoch": 2.0721435546875e-06, "model_forward_time": 0.025232791900634766, "step": 1358 }, { "epoch": 2.0721435546875e-06, "step": 1358, "training_step_time": 0.10681033134460449 }, { "epoch": 2.07366943359375e-06, "model_forward_time": 0.02518606185913086, "step": 1359 }, { "epoch": 2.07366943359375e-06, "step": 1359, "training_step_time": 0.1127328872680664 }, { "epoch": 2.0751953125e-06, "grad_norm": 0.98515784740448, "learning_rate": 9.066666666666667e-05, "loss": 0.1698, "step": 1360 }, { "epoch": 2.0751953125e-06, "model_forward_time": 0.025780677795410156, "step": 1360 }, { "epoch": 2.0751953125e-06, "step": 1360, "training_step_time": 0.10627460479736328 }, { "epoch": 2.07672119140625e-06, "model_forward_time": 0.02504897117614746, "step": 1361 }, { "epoch": 2.07672119140625e-06, "step": 1361, "training_step_time": 0.10714435577392578 }, { "epoch": 2.0782470703125e-06, "model_forward_time": 0.024992942810058594, "step": 1362 }, { "epoch": 2.0782470703125e-06, "step": 1362, "training_step_time": 0.20529413223266602 }, { "epoch": 2.07977294921875e-06, "model_forward_time": 0.02433156967163086, "step": 1363 }, { "epoch": 2.07977294921875e-06, "step": 1363, "training_step_time": 0.10849475860595703 }, { "epoch": 2.081298828125e-06, "model_forward_time": 0.025172948837280273, "step": 1364 }, { "epoch": 2.081298828125e-06, "step": 1364, "training_step_time": 0.18041586875915527 }, { "epoch": 2.08282470703125e-06, "model_forward_time": 0.024529695510864258, "step": 1365 }, { "epoch": 2.08282470703125e-06, "step": 1365, "training_step_time": 0.18352413177490234 }, { "epoch": 2.0843505859375e-06, "model_forward_time": 0.02430438995361328, "step": 1366 }, { "epoch": 2.0843505859375e-06, "step": 1366, "training_step_time": 0.1677396297454834 }, { "epoch": 2.08587646484375e-06, "model_forward_time": 0.025100231170654297, "step": 1367 }, { "epoch": 2.08587646484375e-06, "step": 1367, "training_step_time": 0.15979933738708496 }, { "epoch": 2.08740234375e-06, "model_forward_time": 0.024810791015625, "step": 1368 }, { "epoch": 2.08740234375e-06, "step": 1368, "training_step_time": 0.12097287178039551 }, { "epoch": 2.08892822265625e-06, "model_forward_time": 0.02483081817626953, "step": 1369 }, { "epoch": 2.08892822265625e-06, "step": 1369, "training_step_time": 0.12729477882385254 }, { "epoch": 2.0904541015625e-06, "grad_norm": 1.365875482559204, "learning_rate": 9.133333333333334e-05, "loss": 0.1436, "step": 1370 }, { "epoch": 2.0904541015625e-06, "model_forward_time": 0.024931669235229492, "step": 1370 }, { "epoch": 2.0904541015625e-06, "step": 1370, "training_step_time": 0.10365152359008789 }, { "epoch": 2.09197998046875e-06, "model_forward_time": 0.025618314743041992, "step": 1371 }, { "epoch": 2.09197998046875e-06, "step": 1371, "training_step_time": 0.10644173622131348 }, { "epoch": 2.093505859375e-06, "model_forward_time": 0.025599956512451172, "step": 1372 }, { "epoch": 2.093505859375e-06, "step": 1372, "training_step_time": 0.166151762008667 }, { "epoch": 2.09503173828125e-06, "model_forward_time": 0.025098085403442383, "step": 1373 }, { "epoch": 2.09503173828125e-06, "step": 1373, "training_step_time": 0.1393415927886963 }, { "epoch": 2.0965576171875e-06, "model_forward_time": 0.024698734283447266, "step": 1374 }, { "epoch": 2.0965576171875e-06, "step": 1374, "training_step_time": 0.10863733291625977 }, { "epoch": 2.09808349609375e-06, "model_forward_time": 0.024762868881225586, "step": 1375 }, { "epoch": 2.09808349609375e-06, "step": 1375, "training_step_time": 0.11086225509643555 }, { "epoch": 2.099609375e-06, "model_forward_time": 0.025424480438232422, "step": 1376 }, { "epoch": 2.099609375e-06, "step": 1376, "training_step_time": 0.11547994613647461 }, { "epoch": 2.10113525390625e-06, "model_forward_time": 0.02519989013671875, "step": 1377 }, { "epoch": 2.10113525390625e-06, "step": 1377, "training_step_time": 0.11206674575805664 }, { "epoch": 2.1026611328125e-06, "model_forward_time": 0.025083303451538086, "step": 1378 }, { "epoch": 2.1026611328125e-06, "step": 1378, "training_step_time": 0.11881184577941895 }, { "epoch": 2.10418701171875e-06, "model_forward_time": 0.025065898895263672, "step": 1379 }, { "epoch": 2.10418701171875e-06, "step": 1379, "training_step_time": 0.11109232902526855 }, { "epoch": 2.105712890625e-06, "grad_norm": 1.4226160049438477, "learning_rate": 9.200000000000001e-05, "loss": 0.1428, "step": 1380 }, { "epoch": 2.105712890625e-06, "model_forward_time": 0.025165319442749023, "step": 1380 }, { "epoch": 2.105712890625e-06, "step": 1380, "training_step_time": 0.1115870475769043 }, { "epoch": 2.10723876953125e-06, "model_forward_time": 0.025751113891601562, "step": 1381 }, { "epoch": 2.10723876953125e-06, "step": 1381, "training_step_time": 0.10475730895996094 }, { "epoch": 2.1087646484375e-06, "model_forward_time": 0.02521824836730957, "step": 1382 }, { "epoch": 2.1087646484375e-06, "step": 1382, "training_step_time": 0.11392545700073242 }, { "epoch": 2.11029052734375e-06, "model_forward_time": 0.024941682815551758, "step": 1383 }, { "epoch": 2.11029052734375e-06, "step": 1383, "training_step_time": 0.11006975173950195 }, { "epoch": 2.11181640625e-06, "model_forward_time": 0.025444746017456055, "step": 1384 }, { "epoch": 2.11181640625e-06, "step": 1384, "training_step_time": 0.10741901397705078 }, { "epoch": 2.11334228515625e-06, "model_forward_time": 0.025771141052246094, "step": 1385 }, { "epoch": 2.11334228515625e-06, "step": 1385, "training_step_time": 0.10753822326660156 }, { "epoch": 2.1148681640625e-06, "model_forward_time": 0.025191545486450195, "step": 1386 }, { "epoch": 2.1148681640625e-06, "step": 1386, "training_step_time": 0.10698175430297852 }, { "epoch": 2.11639404296875e-06, "model_forward_time": 0.025516510009765625, "step": 1387 }, { "epoch": 2.11639404296875e-06, "step": 1387, "training_step_time": 0.12093162536621094 }, { "epoch": 2.117919921875e-06, "model_forward_time": 0.025435686111450195, "step": 1388 }, { "epoch": 2.117919921875e-06, "step": 1388, "training_step_time": 0.11258983612060547 }, { "epoch": 2.11944580078125e-06, "model_forward_time": 0.025190114974975586, "step": 1389 }, { "epoch": 2.11944580078125e-06, "step": 1389, "training_step_time": 0.10873651504516602 }, { "epoch": 2.1209716796875e-06, "grad_norm": 1.1633135080337524, "learning_rate": 9.266666666666666e-05, "loss": 0.1679, "step": 1390 }, { "epoch": 2.1209716796875e-06, "model_forward_time": 0.025387048721313477, "step": 1390 }, { "epoch": 2.1209716796875e-06, "step": 1390, "training_step_time": 0.10318899154663086 }, { "epoch": 2.12249755859375e-06, "model_forward_time": 0.025238513946533203, "step": 1391 }, { "epoch": 2.12249755859375e-06, "step": 1391, "training_step_time": 0.10442447662353516 }, { "epoch": 2.1240234375e-06, "model_forward_time": 0.025487422943115234, "step": 1392 }, { "epoch": 2.1240234375e-06, "step": 1392, "training_step_time": 0.10919785499572754 }, { "epoch": 2.12554931640625e-06, "model_forward_time": 0.025844335556030273, "step": 1393 }, { "epoch": 2.12554931640625e-06, "step": 1393, "training_step_time": 0.10811805725097656 }, { "epoch": 2.1270751953125e-06, "model_forward_time": 0.025135040283203125, "step": 1394 }, { "epoch": 2.1270751953125e-06, "step": 1394, "training_step_time": 0.1067664623260498 }, { "epoch": 2.12860107421875e-06, "model_forward_time": 0.024470806121826172, "step": 1395 }, { "epoch": 2.12860107421875e-06, "step": 1395, "training_step_time": 0.1043238639831543 }, { "epoch": 2.130126953125e-06, "model_forward_time": 0.025542259216308594, "step": 1396 }, { "epoch": 2.130126953125e-06, "step": 1396, "training_step_time": 0.10828661918640137 }, { "epoch": 2.13165283203125e-06, "model_forward_time": 0.0255587100982666, "step": 1397 }, { "epoch": 2.13165283203125e-06, "step": 1397, "training_step_time": 0.11431670188903809 }, { "epoch": 2.1331787109375e-06, "model_forward_time": 0.02551746368408203, "step": 1398 }, { "epoch": 2.1331787109375e-06, "step": 1398, "training_step_time": 0.13465547561645508 }, { "epoch": 2.13470458984375e-06, "model_forward_time": 0.0254824161529541, "step": 1399 }, { "epoch": 2.13470458984375e-06, "step": 1399, "training_step_time": 0.1233057975769043 }, { "epoch": 2.13623046875e-06, "grad_norm": 0.8213808536529541, "learning_rate": 9.333333333333334e-05, "loss": 0.1646, "step": 1400 }, { "epoch": 2.13623046875e-06, "model_forward_time": 0.02496790885925293, "step": 1400 }, { "epoch": 2.13623046875e-06, "step": 1400, "training_step_time": 0.12115097045898438 }, { "epoch": 2.13775634765625e-06, "model_forward_time": 0.025421142578125, "step": 1401 }, { "epoch": 2.13775634765625e-06, "step": 1401, "training_step_time": 0.11547350883483887 }, { "epoch": 2.1392822265625e-06, "model_forward_time": 0.025584697723388672, "step": 1402 }, { "epoch": 2.1392822265625e-06, "step": 1402, "training_step_time": 0.11190557479858398 }, { "epoch": 2.14080810546875e-06, "model_forward_time": 0.02526092529296875, "step": 1403 }, { "epoch": 2.14080810546875e-06, "step": 1403, "training_step_time": 0.11491799354553223 }, { "epoch": 2.142333984375e-06, "model_forward_time": 0.025345325469970703, "step": 1404 }, { "epoch": 2.142333984375e-06, "step": 1404, "training_step_time": 0.1106102466583252 }, { "epoch": 2.14385986328125e-06, "model_forward_time": 0.025770187377929688, "step": 1405 }, { "epoch": 2.14385986328125e-06, "step": 1405, "training_step_time": 0.1104581356048584 }, { "epoch": 2.1453857421875e-06, "model_forward_time": 0.025198936462402344, "step": 1406 }, { "epoch": 2.1453857421875e-06, "step": 1406, "training_step_time": 0.10603857040405273 }, { "epoch": 2.14691162109375e-06, "model_forward_time": 0.02532482147216797, "step": 1407 }, { "epoch": 2.14691162109375e-06, "step": 1407, "training_step_time": 0.11012148857116699 }, { "epoch": 2.1484375e-06, "model_forward_time": 0.025563716888427734, "step": 1408 }, { "epoch": 2.1484375e-06, "step": 1408, "training_step_time": 0.10848116874694824 }, { "epoch": 2.14996337890625e-06, "model_forward_time": 0.025565147399902344, "step": 1409 }, { "epoch": 2.14996337890625e-06, "step": 1409, "training_step_time": 0.18502593040466309 }, { "epoch": 2.1514892578125e-06, "grad_norm": 0.6870678067207336, "learning_rate": 9.4e-05, "loss": 0.1654, "step": 1410 }, { "epoch": 2.1514892578125e-06, "model_forward_time": 0.024663448333740234, "step": 1410 }, { "epoch": 2.1514892578125e-06, "step": 1410, "training_step_time": 0.10868406295776367 }, { "epoch": 2.15301513671875e-06, "model_forward_time": 0.02468585968017578, "step": 1411 }, { "epoch": 2.15301513671875e-06, "step": 1411, "training_step_time": 0.1329805850982666 }, { "epoch": 2.154541015625e-06, "model_forward_time": 0.025498151779174805, "step": 1412 }, { "epoch": 2.154541015625e-06, "step": 1412, "training_step_time": 0.11503171920776367 }, { "epoch": 2.15606689453125e-06, "model_forward_time": 0.02512049674987793, "step": 1413 }, { "epoch": 2.15606689453125e-06, "step": 1413, "training_step_time": 0.12928462028503418 }, { "epoch": 2.1575927734375e-06, "model_forward_time": 0.025092363357543945, "step": 1414 }, { "epoch": 2.1575927734375e-06, "step": 1414, "training_step_time": 0.15896105766296387 }, { "epoch": 2.15911865234375e-06, "model_forward_time": 0.02454686164855957, "step": 1415 }, { "epoch": 2.15911865234375e-06, "step": 1415, "training_step_time": 0.18423247337341309 }, { "epoch": 2.16064453125e-06, "model_forward_time": 0.02487802505493164, "step": 1416 }, { "epoch": 2.16064453125e-06, "step": 1416, "training_step_time": 0.15381836891174316 }, { "epoch": 2.16217041015625e-06, "model_forward_time": 0.02419567108154297, "step": 1417 }, { "epoch": 2.16217041015625e-06, "step": 1417, "training_step_time": 0.1028435230255127 }, { "epoch": 2.1636962890625e-06, "model_forward_time": 0.024589061737060547, "step": 1418 }, { "epoch": 2.1636962890625e-06, "step": 1418, "training_step_time": 0.10290408134460449 }, { "epoch": 2.16522216796875e-06, "model_forward_time": 0.025154590606689453, "step": 1419 }, { "epoch": 2.16522216796875e-06, "step": 1419, "training_step_time": 0.10628390312194824 }, { "epoch": 2.166748046875e-06, "grad_norm": 1.0266139507293701, "learning_rate": 9.466666666666667e-05, "loss": 0.1573, "step": 1420 }, { "epoch": 2.166748046875e-06, "model_forward_time": 0.02559185028076172, "step": 1420 }, { "epoch": 2.166748046875e-06, "step": 1420, "training_step_time": 0.11061215400695801 }, { "epoch": 2.16827392578125e-06, "model_forward_time": 0.02534008026123047, "step": 1421 }, { "epoch": 2.16827392578125e-06, "step": 1421, "training_step_time": 0.13797211647033691 }, { "epoch": 2.1697998046875e-06, "model_forward_time": 0.025868892669677734, "step": 1422 }, { "epoch": 2.1697998046875e-06, "step": 1422, "training_step_time": 0.11163997650146484 }, { "epoch": 2.17132568359375e-06, "model_forward_time": 0.028405189514160156, "step": 1423 }, { "epoch": 2.17132568359375e-06, "step": 1423, "training_step_time": 0.11517572402954102 }, { "epoch": 2.1728515625e-06, "model_forward_time": 0.025037050247192383, "step": 1424 }, { "epoch": 2.1728515625e-06, "step": 1424, "training_step_time": 0.11580061912536621 }, { "epoch": 2.17437744140625e-06, "model_forward_time": 0.025238513946533203, "step": 1425 }, { "epoch": 2.17437744140625e-06, "step": 1425, "training_step_time": 0.19036149978637695 }, { "epoch": 2.1759033203125e-06, "model_forward_time": 0.02507495880126953, "step": 1426 }, { "epoch": 2.1759033203125e-06, "step": 1426, "training_step_time": 0.11758232116699219 }, { "epoch": 2.17742919921875e-06, "model_forward_time": 0.0245513916015625, "step": 1427 }, { "epoch": 2.17742919921875e-06, "step": 1427, "training_step_time": 0.11407113075256348 }, { "epoch": 2.178955078125e-06, "model_forward_time": 0.025414228439331055, "step": 1428 }, { "epoch": 2.178955078125e-06, "step": 1428, "training_step_time": 0.10669732093811035 }, { "epoch": 2.18048095703125e-06, "model_forward_time": 0.02521038055419922, "step": 1429 }, { "epoch": 2.18048095703125e-06, "step": 1429, "training_step_time": 0.10910439491271973 }, { "epoch": 2.1820068359375e-06, "grad_norm": 0.6546294689178467, "learning_rate": 9.533333333333334e-05, "loss": 0.1192, "step": 1430 }, { "epoch": 2.1820068359375e-06, "model_forward_time": 0.028010845184326172, "step": 1430 }, { "epoch": 2.1820068359375e-06, "step": 1430, "training_step_time": 0.110015869140625 }, { "epoch": 2.18353271484375e-06, "model_forward_time": 0.026095867156982422, "step": 1431 }, { "epoch": 2.18353271484375e-06, "step": 1431, "training_step_time": 0.11234498023986816 }, { "epoch": 2.18505859375e-06, "model_forward_time": 0.025310754776000977, "step": 1432 }, { "epoch": 2.18505859375e-06, "step": 1432, "training_step_time": 0.10950970649719238 }, { "epoch": 2.18658447265625e-06, "model_forward_time": 0.025457143783569336, "step": 1433 }, { "epoch": 2.18658447265625e-06, "step": 1433, "training_step_time": 0.1683039665222168 }, { "epoch": 2.1881103515625e-06, "model_forward_time": 0.025000572204589844, "step": 1434 }, { "epoch": 2.1881103515625e-06, "step": 1434, "training_step_time": 0.16678833961486816 }, { "epoch": 2.18963623046875e-06, "model_forward_time": 0.0244295597076416, "step": 1435 }, { "epoch": 2.18963623046875e-06, "step": 1435, "training_step_time": 0.1041109561920166 }, { "epoch": 2.191162109375e-06, "model_forward_time": 0.02525925636291504, "step": 1436 }, { "epoch": 2.191162109375e-06, "step": 1436, "training_step_time": 0.10423755645751953 }, { "epoch": 2.19268798828125e-06, "model_forward_time": 0.025871753692626953, "step": 1437 }, { "epoch": 2.19268798828125e-06, "step": 1437, "training_step_time": 0.1102755069732666 }, { "epoch": 2.1942138671875e-06, "model_forward_time": 0.025707006454467773, "step": 1438 }, { "epoch": 2.1942138671875e-06, "step": 1438, "training_step_time": 0.10861945152282715 }, { "epoch": 2.19573974609375e-06, "model_forward_time": 0.025672197341918945, "step": 1439 }, { "epoch": 2.19573974609375e-06, "step": 1439, "training_step_time": 0.10539579391479492 }, { "epoch": 2.197265625e-06, "grad_norm": 0.6670544743537903, "learning_rate": 9.6e-05, "loss": 0.16, "step": 1440 }, { "epoch": 2.197265625e-06, "model_forward_time": 0.02554178237915039, "step": 1440 }, { "epoch": 2.197265625e-06, "step": 1440, "training_step_time": 0.10809659957885742 }, { "epoch": 2.19879150390625e-06, "model_forward_time": 0.02510976791381836, "step": 1441 }, { "epoch": 2.19879150390625e-06, "step": 1441, "training_step_time": 0.1068410873413086 }, { "epoch": 2.2003173828125e-06, "model_forward_time": 0.02552652359008789, "step": 1442 }, { "epoch": 2.2003173828125e-06, "step": 1442, "training_step_time": 0.11005473136901855 }, { "epoch": 2.20184326171875e-06, "model_forward_time": 0.025165557861328125, "step": 1443 }, { "epoch": 2.20184326171875e-06, "step": 1443, "training_step_time": 0.10723471641540527 }, { "epoch": 2.203369140625e-06, "model_forward_time": 0.025207042694091797, "step": 1444 }, { "epoch": 2.203369140625e-06, "step": 1444, "training_step_time": 0.10516786575317383 }, { "epoch": 2.20489501953125e-06, "model_forward_time": 0.025761127471923828, "step": 1445 }, { "epoch": 2.20489501953125e-06, "step": 1445, "training_step_time": 0.10593652725219727 }, { "epoch": 2.2064208984375e-06, "model_forward_time": 0.025539398193359375, "step": 1446 }, { "epoch": 2.2064208984375e-06, "step": 1446, "training_step_time": 0.10727167129516602 }, { "epoch": 2.20794677734375e-06, "model_forward_time": 0.02572011947631836, "step": 1447 }, { "epoch": 2.20794677734375e-06, "step": 1447, "training_step_time": 0.10550975799560547 }, { "epoch": 2.20947265625e-06, "model_forward_time": 0.025275707244873047, "step": 1448 }, { "epoch": 2.20947265625e-06, "step": 1448, "training_step_time": 0.1038062572479248 }, { "epoch": 2.21099853515625e-06, "model_forward_time": 0.025598526000976562, "step": 1449 }, { "epoch": 2.21099853515625e-06, "step": 1449, "training_step_time": 0.10940861701965332 }, { "epoch": 2.2125244140625e-06, "grad_norm": 1.329904317855835, "learning_rate": 9.666666666666667e-05, "loss": 0.1751, "step": 1450 }, { "epoch": 2.2125244140625e-06, "model_forward_time": 0.02562093734741211, "step": 1450 }, { "epoch": 2.2125244140625e-06, "step": 1450, "training_step_time": 0.10740423202514648 }, { "epoch": 2.21405029296875e-06, "model_forward_time": 0.025353670120239258, "step": 1451 }, { "epoch": 2.21405029296875e-06, "step": 1451, "training_step_time": 0.10501313209533691 }, { "epoch": 2.215576171875e-06, "model_forward_time": 0.025502681732177734, "step": 1452 }, { "epoch": 2.215576171875e-06, "step": 1452, "training_step_time": 0.1130983829498291 }, { "epoch": 2.21710205078125e-06, "model_forward_time": 0.025914907455444336, "step": 1453 }, { "epoch": 2.21710205078125e-06, "step": 1453, "training_step_time": 0.10426592826843262 }, { "epoch": 2.2186279296875e-06, "model_forward_time": 0.02516913414001465, "step": 1454 }, { "epoch": 2.2186279296875e-06, "step": 1454, "training_step_time": 0.10678982734680176 }, { "epoch": 2.22015380859375e-06, "model_forward_time": 0.025825023651123047, "step": 1455 }, { "epoch": 2.22015380859375e-06, "step": 1455, "training_step_time": 0.11348795890808105 }, { "epoch": 2.2216796875e-06, "model_forward_time": 0.02558612823486328, "step": 1456 }, { "epoch": 2.2216796875e-06, "step": 1456, "training_step_time": 0.20345592498779297 }, { "epoch": 2.22320556640625e-06, "model_forward_time": 0.02460479736328125, "step": 1457 }, { "epoch": 2.22320556640625e-06, "step": 1457, "training_step_time": 0.1753673553466797 }, { "epoch": 2.2247314453125e-06, "model_forward_time": 0.024578332901000977, "step": 1458 }, { "epoch": 2.2247314453125e-06, "step": 1458, "training_step_time": 0.1734919548034668 }, { "epoch": 2.22625732421875e-06, "model_forward_time": 0.024336814880371094, "step": 1459 }, { "epoch": 2.22625732421875e-06, "step": 1459, "training_step_time": 0.16818761825561523 }, { "epoch": 2.227783203125e-06, "grad_norm": 1.1440796852111816, "learning_rate": 9.733333333333335e-05, "loss": 0.1812, "step": 1460 }, { "epoch": 2.227783203125e-06, "model_forward_time": 0.02463054656982422, "step": 1460 }, { "epoch": 2.227783203125e-06, "step": 1460, "training_step_time": 0.18352651596069336 }, { "epoch": 2.22930908203125e-06, "model_forward_time": 0.026389598846435547, "step": 1461 }, { "epoch": 2.22930908203125e-06, "step": 1461, "training_step_time": 0.1149282455444336 }, { "epoch": 2.2308349609375e-06, "model_forward_time": 0.024538278579711914, "step": 1462 }, { "epoch": 2.2308349609375e-06, "step": 1462, "training_step_time": 0.105133056640625 }, { "epoch": 2.23236083984375e-06, "model_forward_time": 0.02522754669189453, "step": 1463 }, { "epoch": 2.23236083984375e-06, "step": 1463, "training_step_time": 0.10737967491149902 }, { "epoch": 2.23388671875e-06, "model_forward_time": 0.025313377380371094, "step": 1464 }, { "epoch": 2.23388671875e-06, "step": 1464, "training_step_time": 0.10556316375732422 }, { "epoch": 2.23541259765625e-06, "model_forward_time": 0.025303363800048828, "step": 1465 }, { "epoch": 2.23541259765625e-06, "step": 1465, "training_step_time": 0.10871553421020508 }, { "epoch": 2.2369384765625e-06, "model_forward_time": 0.02571272850036621, "step": 1466 }, { "epoch": 2.2369384765625e-06, "step": 1466, "training_step_time": 0.13390660285949707 }, { "epoch": 2.23846435546875e-06, "model_forward_time": 0.025513410568237305, "step": 1467 }, { "epoch": 2.23846435546875e-06, "step": 1467, "training_step_time": 0.1381216049194336 }, { "epoch": 2.239990234375e-06, "model_forward_time": 0.026959657669067383, "step": 1468 }, { "epoch": 2.239990234375e-06, "step": 1468, "training_step_time": 0.10939860343933105 }, { "epoch": 2.24151611328125e-06, "model_forward_time": 0.025315523147583008, "step": 1469 }, { "epoch": 2.24151611328125e-06, "step": 1469, "training_step_time": 0.11547088623046875 }, { "epoch": 2.2430419921875e-06, "grad_norm": 0.6092724800109863, "learning_rate": 9.8e-05, "loss": 0.166, "step": 1470 }, { "epoch": 2.2430419921875e-06, "model_forward_time": 0.025532245635986328, "step": 1470 }, { "epoch": 2.2430419921875e-06, "step": 1470, "training_step_time": 0.10926604270935059 }, { "epoch": 2.24456787109375e-06, "model_forward_time": 0.025789976119995117, "step": 1471 }, { "epoch": 2.24456787109375e-06, "step": 1471, "training_step_time": 0.1045832633972168 }, { "epoch": 2.24609375e-06, "model_forward_time": 0.024941682815551758, "step": 1472 }, { "epoch": 2.24609375e-06, "step": 1472, "training_step_time": 0.1971895694732666 }, { "epoch": 2.24761962890625e-06, "model_forward_time": 0.024367094039916992, "step": 1473 }, { "epoch": 2.24761962890625e-06, "step": 1473, "training_step_time": 0.10470128059387207 }, { "epoch": 2.2491455078125e-06, "model_forward_time": 0.02476334571838379, "step": 1474 }, { "epoch": 2.2491455078125e-06, "step": 1474, "training_step_time": 0.10367774963378906 }, { "epoch": 2.25067138671875e-06, "model_forward_time": 0.025284290313720703, "step": 1475 }, { "epoch": 2.25067138671875e-06, "step": 1475, "training_step_time": 0.14227652549743652 }, { "epoch": 2.252197265625e-06, "model_forward_time": 0.025363922119140625, "step": 1476 }, { "epoch": 2.252197265625e-06, "step": 1476, "training_step_time": 0.12005734443664551 }, { "epoch": 2.25372314453125e-06, "model_forward_time": 0.025188207626342773, "step": 1477 }, { "epoch": 2.25372314453125e-06, "step": 1477, "training_step_time": 0.10769152641296387 }, { "epoch": 2.2552490234375e-06, "model_forward_time": 0.02599024772644043, "step": 1478 }, { "epoch": 2.2552490234375e-06, "step": 1478, "training_step_time": 0.11042118072509766 }, { "epoch": 2.25677490234375e-06, "model_forward_time": 0.026059389114379883, "step": 1479 }, { "epoch": 2.25677490234375e-06, "step": 1479, "training_step_time": 0.10733914375305176 }, { "epoch": 2.25830078125e-06, "grad_norm": 0.7142199873924255, "learning_rate": 9.866666666666668e-05, "loss": 0.1575, "step": 1480 }, { "epoch": 2.25830078125e-06, "model_forward_time": 0.025785446166992188, "step": 1480 }, { "epoch": 2.25830078125e-06, "step": 1480, "training_step_time": 0.1099233627319336 }, { "epoch": 2.25982666015625e-06, "model_forward_time": 0.0255887508392334, "step": 1481 }, { "epoch": 2.25982666015625e-06, "step": 1481, "training_step_time": 0.11271071434020996 }, { "epoch": 2.2613525390625e-06, "model_forward_time": 0.026090621948242188, "step": 1482 }, { "epoch": 2.2613525390625e-06, "step": 1482, "training_step_time": 0.10665321350097656 }, { "epoch": 2.26287841796875e-06, "model_forward_time": 0.02603745460510254, "step": 1483 }, { "epoch": 2.26287841796875e-06, "step": 1483, "training_step_time": 0.10934114456176758 }, { "epoch": 2.264404296875e-06, "model_forward_time": 0.025475025177001953, "step": 1484 }, { "epoch": 2.264404296875e-06, "step": 1484, "training_step_time": 0.1168220043182373 }, { "epoch": 2.26593017578125e-06, "model_forward_time": 0.02499532699584961, "step": 1485 }, { "epoch": 2.26593017578125e-06, "step": 1485, "training_step_time": 0.10541892051696777 }, { "epoch": 2.2674560546875e-06, "model_forward_time": 0.025171995162963867, "step": 1486 }, { "epoch": 2.2674560546875e-06, "step": 1486, "training_step_time": 0.10609817504882812 }, { "epoch": 2.26898193359375e-06, "model_forward_time": 0.025940895080566406, "step": 1487 }, { "epoch": 2.26898193359375e-06, "step": 1487, "training_step_time": 0.10712933540344238 }, { "epoch": 2.2705078125e-06, "model_forward_time": 0.025603771209716797, "step": 1488 }, { "epoch": 2.2705078125e-06, "step": 1488, "training_step_time": 0.10471200942993164 }, { "epoch": 2.27203369140625e-06, "model_forward_time": 0.02543330192565918, "step": 1489 }, { "epoch": 2.27203369140625e-06, "step": 1489, "training_step_time": 0.10407114028930664 }, { "epoch": 2.2735595703125e-06, "grad_norm": 0.9511436820030212, "learning_rate": 9.933333333333334e-05, "loss": 0.1787, "step": 1490 }, { "epoch": 2.2735595703125e-06, "model_forward_time": 0.02528858184814453, "step": 1490 }, { "epoch": 2.2735595703125e-06, "step": 1490, "training_step_time": 0.11156201362609863 }, { "epoch": 2.27508544921875e-06, "model_forward_time": 0.025616884231567383, "step": 1491 }, { "epoch": 2.27508544921875e-06, "step": 1491, "training_step_time": 0.12158775329589844 }, { "epoch": 2.276611328125e-06, "model_forward_time": 0.024897098541259766, "step": 1492 }, { "epoch": 2.276611328125e-06, "step": 1492, "training_step_time": 0.1252896785736084 }, { "epoch": 2.27813720703125e-06, "model_forward_time": 0.024675607681274414, "step": 1493 }, { "epoch": 2.27813720703125e-06, "step": 1493, "training_step_time": 0.12450051307678223 }, { "epoch": 2.2796630859375e-06, "model_forward_time": 0.024403095245361328, "step": 1494 }, { "epoch": 2.2796630859375e-06, "step": 1494, "training_step_time": 0.11598849296569824 }, { "epoch": 2.28118896484375e-06, "model_forward_time": 0.0244290828704834, "step": 1495 }, { "epoch": 2.28118896484375e-06, "step": 1495, "training_step_time": 0.12032890319824219 }, { "epoch": 2.28271484375e-06, "model_forward_time": 0.025087356567382812, "step": 1496 }, { "epoch": 2.28271484375e-06, "step": 1496, "training_step_time": 0.11457419395446777 }, { "epoch": 2.28424072265625e-06, "model_forward_time": 0.02712726593017578, "step": 1497 }, { "epoch": 2.28424072265625e-06, "step": 1497, "training_step_time": 0.11216497421264648 }, { "epoch": 2.2857666015625e-06, "model_forward_time": 0.02434396743774414, "step": 1498 }, { "epoch": 2.2857666015625e-06, "step": 1498, "training_step_time": 0.11299896240234375 }, { "epoch": 2.28729248046875e-06, "model_forward_time": 0.025021076202392578, "step": 1499 }, { "epoch": 2.28729248046875e-06, "step": 1499, "training_step_time": 0.11070585250854492 }, { "epoch": 2.288818359375e-06, "grad_norm": 0.6693169474601746, "learning_rate": 0.0001, "loss": 0.1696, "step": 1500 }, { "epoch": 2.288818359375e-06, "model_forward_time": 0.02524566650390625, "step": 1500 }, { "epoch": 2.288818359375e-06, "step": 1500, "training_step_time": 0.11181497573852539 }, { "epoch": 2.29034423828125e-06, "model_forward_time": 0.025269031524658203, "step": 1501 }, { "epoch": 2.29034423828125e-06, "step": 1501, "training_step_time": 0.10571575164794922 }, { "epoch": 2.2918701171875e-06, "model_forward_time": 0.02554488182067871, "step": 1502 }, { "epoch": 2.2918701171875e-06, "step": 1502, "training_step_time": 0.1989579200744629 }, { "epoch": 2.29339599609375e-06, "model_forward_time": 0.024329423904418945, "step": 1503 }, { "epoch": 2.29339599609375e-06, "step": 1503, "training_step_time": 0.17641091346740723 }, { "epoch": 2.294921875e-06, "model_forward_time": 0.024661779403686523, "step": 1504 }, { "epoch": 2.294921875e-06, "step": 1504, "training_step_time": 0.17810988426208496 }, { "epoch": 2.29644775390625e-06, "model_forward_time": 0.024297475814819336, "step": 1505 }, { "epoch": 2.29644775390625e-06, "step": 1505, "training_step_time": 0.15257525444030762 }, { "epoch": 2.2979736328125e-06, "model_forward_time": 0.025177001953125, "step": 1506 }, { "epoch": 2.2979736328125e-06, "step": 1506, "training_step_time": 0.10593819618225098 }, { "epoch": 2.29949951171875e-06, "model_forward_time": 0.024662494659423828, "step": 1507 }, { "epoch": 2.29949951171875e-06, "step": 1507, "training_step_time": 0.1587827205657959 }, { "epoch": 2.301025390625e-06, "model_forward_time": 0.02459263801574707, "step": 1508 }, { "epoch": 2.301025390625e-06, "step": 1508, "training_step_time": 0.12143325805664062 }, { "epoch": 2.30255126953125e-06, "model_forward_time": 0.024259090423583984, "step": 1509 }, { "epoch": 2.30255126953125e-06, "step": 1509, "training_step_time": 0.11380934715270996 }, { "epoch": 2.3040771484375e-06, "grad_norm": 0.8862974643707275, "learning_rate": 9.999996962264266e-05, "loss": 0.1563, "step": 1510 }, { "epoch": 2.3040771484375e-06, "model_forward_time": 0.025377988815307617, "step": 1510 }, { "epoch": 2.3040771484375e-06, "step": 1510, "training_step_time": 0.10877585411071777 }, { "epoch": 2.30560302734375e-06, "model_forward_time": 0.025152206420898438, "step": 1511 }, { "epoch": 2.30560302734375e-06, "step": 1511, "training_step_time": 0.10483407974243164 }, { "epoch": 2.30712890625e-06, "model_forward_time": 0.025164365768432617, "step": 1512 }, { "epoch": 2.30712890625e-06, "step": 1512, "training_step_time": 0.1571347713470459 }, { "epoch": 2.30865478515625e-06, "model_forward_time": 0.024854660034179688, "step": 1513 }, { "epoch": 2.30865478515625e-06, "step": 1513, "training_step_time": 0.14067697525024414 }, { "epoch": 2.3101806640625e-06, "model_forward_time": 0.02483510971069336, "step": 1514 }, { "epoch": 2.3101806640625e-06, "step": 1514, "training_step_time": 0.10798215866088867 }, { "epoch": 2.31170654296875e-06, "model_forward_time": 0.025162696838378906, "step": 1515 }, { "epoch": 2.31170654296875e-06, "step": 1515, "training_step_time": 0.11213088035583496 }, { "epoch": 2.313232421875e-06, "model_forward_time": 0.025634050369262695, "step": 1516 }, { "epoch": 2.313232421875e-06, "step": 1516, "training_step_time": 0.11103343963623047 }, { "epoch": 2.31475830078125e-06, "model_forward_time": 0.025055646896362305, "step": 1517 }, { "epoch": 2.31475830078125e-06, "step": 1517, "training_step_time": 0.11058259010314941 }, { "epoch": 2.3162841796875e-06, "model_forward_time": 0.02537822723388672, "step": 1518 }, { "epoch": 2.3162841796875e-06, "step": 1518, "training_step_time": 0.1532423496246338 }, { "epoch": 2.31781005859375e-06, "model_forward_time": 0.024830102920532227, "step": 1519 }, { "epoch": 2.31781005859375e-06, "step": 1519, "training_step_time": 0.10341310501098633 }, { "epoch": 2.3193359375e-06, "grad_norm": 1.0126616954803467, "learning_rate": 9.999987849060753e-05, "loss": 0.1561, "step": 1520 }, { "epoch": 2.3193359375e-06, "model_forward_time": 0.025298595428466797, "step": 1520 }, { "epoch": 2.3193359375e-06, "step": 1520, "training_step_time": 0.10521435737609863 }, { "epoch": 2.32086181640625e-06, "model_forward_time": 0.029204368591308594, "step": 1521 }, { "epoch": 2.32086181640625e-06, "step": 1521, "training_step_time": 0.11081242561340332 }, { "epoch": 2.3223876953125e-06, "model_forward_time": 0.02555561065673828, "step": 1522 }, { "epoch": 2.3223876953125e-06, "step": 1522, "training_step_time": 0.10884857177734375 }, { "epoch": 2.32391357421875e-06, "model_forward_time": 0.025867700576782227, "step": 1523 }, { "epoch": 2.32391357421875e-06, "step": 1523, "training_step_time": 0.129561185836792 }, { "epoch": 2.325439453125e-06, "model_forward_time": 0.02549004554748535, "step": 1524 }, { "epoch": 2.325439453125e-06, "step": 1524, "training_step_time": 0.1743464469909668 }, { "epoch": 2.32696533203125e-06, "model_forward_time": 0.024520158767700195, "step": 1525 }, { "epoch": 2.32696533203125e-06, "step": 1525, "training_step_time": 0.13298964500427246 }, { "epoch": 2.3284912109375e-06, "model_forward_time": 0.02425527572631836, "step": 1526 }, { "epoch": 2.3284912109375e-06, "step": 1526, "training_step_time": 0.12737154960632324 }, { "epoch": 2.33001708984375e-06, "model_forward_time": 0.025834321975708008, "step": 1527 }, { "epoch": 2.33001708984375e-06, "step": 1527, "training_step_time": 0.11346840858459473 }, { "epoch": 2.33154296875e-06, "model_forward_time": 0.025515317916870117, "step": 1528 }, { "epoch": 2.33154296875e-06, "step": 1528, "training_step_time": 0.11336469650268555 }, { "epoch": 2.33306884765625e-06, "model_forward_time": 0.025148868560791016, "step": 1529 }, { "epoch": 2.33306884765625e-06, "step": 1529, "training_step_time": 0.11333703994750977 }, { "epoch": 2.3345947265625e-06, "grad_norm": 0.6787976622581482, "learning_rate": 9.999972660400536e-05, "loss": 0.1554, "step": 1530 }, { "epoch": 2.3345947265625e-06, "model_forward_time": 0.025073528289794922, "step": 1530 }, { "epoch": 2.3345947265625e-06, "step": 1530, "training_step_time": 0.1073770523071289 }, { "epoch": 2.33612060546875e-06, "model_forward_time": 0.02554774284362793, "step": 1531 }, { "epoch": 2.33612060546875e-06, "step": 1531, "training_step_time": 0.10764288902282715 }, { "epoch": 2.337646484375e-06, "model_forward_time": 0.025145530700683594, "step": 1532 }, { "epoch": 2.337646484375e-06, "step": 1532, "training_step_time": 0.10599684715270996 }, { "epoch": 2.33917236328125e-06, "model_forward_time": 0.02475285530090332, "step": 1533 }, { "epoch": 2.33917236328125e-06, "step": 1533, "training_step_time": 0.108551025390625 }, { "epoch": 2.3406982421875e-06, "model_forward_time": 0.024851322174072266, "step": 1534 }, { "epoch": 2.3406982421875e-06, "step": 1534, "training_step_time": 0.10662627220153809 }, { "epoch": 2.34222412109375e-06, "model_forward_time": 0.025217294692993164, "step": 1535 }, { "epoch": 2.34222412109375e-06, "step": 1535, "training_step_time": 0.10664987564086914 }, { "epoch": 2.34375e-06, "model_forward_time": 0.025756359100341797, "step": 1536 }, { "epoch": 2.34375e-06, "step": 1536, "training_step_time": 0.10522127151489258 }, { "epoch": 2.34527587890625e-06, "model_forward_time": 0.029313087463378906, "step": 1537 }, { "epoch": 2.34527587890625e-06, "step": 1537, "training_step_time": 0.11299586296081543 }, { "epoch": 2.3468017578125e-06, "model_forward_time": 0.02495861053466797, "step": 1538 }, { "epoch": 2.3468017578125e-06, "step": 1538, "training_step_time": 0.10603451728820801 }, { "epoch": 2.34832763671875e-06, "model_forward_time": 0.02507781982421875, "step": 1539 }, { "epoch": 2.34832763671875e-06, "step": 1539, "training_step_time": 0.10508990287780762 }, { "epoch": 2.349853515625e-06, "grad_norm": 1.0427889823913574, "learning_rate": 9.999951396302069e-05, "loss": 0.1667, "step": 1540 }, { "epoch": 2.349853515625e-06, "model_forward_time": 0.024873971939086914, "step": 1540 }, { "epoch": 2.349853515625e-06, "step": 1540, "training_step_time": 0.10645270347595215 }, { "epoch": 2.35137939453125e-06, "model_forward_time": 0.025048255920410156, "step": 1541 }, { "epoch": 2.35137939453125e-06, "step": 1541, "training_step_time": 0.10559558868408203 }, { "epoch": 2.3529052734375e-06, "model_forward_time": 0.02483963966369629, "step": 1542 }, { "epoch": 2.3529052734375e-06, "step": 1542, "training_step_time": 0.10787081718444824 }, { "epoch": 2.35443115234375e-06, "model_forward_time": 0.025114774703979492, "step": 1543 }, { "epoch": 2.35443115234375e-06, "step": 1543, "training_step_time": 0.10422015190124512 }, { "epoch": 2.35595703125e-06, "model_forward_time": 0.02503800392150879, "step": 1544 }, { "epoch": 2.35595703125e-06, "step": 1544, "training_step_time": 0.10575270652770996 }, { "epoch": 2.35748291015625e-06, "model_forward_time": 0.024692058563232422, "step": 1545 }, { "epoch": 2.35748291015625e-06, "step": 1545, "training_step_time": 0.11096620559692383 }, { "epoch": 2.3590087890625e-06, "model_forward_time": 0.02535390853881836, "step": 1546 }, { "epoch": 2.3590087890625e-06, "step": 1546, "training_step_time": 0.11087369918823242 }, { "epoch": 2.36053466796875e-06, "model_forward_time": 0.02507495880126953, "step": 1547 }, { "epoch": 2.36053466796875e-06, "step": 1547, "training_step_time": 0.1046602725982666 }, { "epoch": 2.362060546875e-06, "model_forward_time": 0.0252227783203125, "step": 1548 }, { "epoch": 2.362060546875e-06, "step": 1548, "training_step_time": 0.10464715957641602 }, { "epoch": 2.36358642578125e-06, "model_forward_time": 0.02490830421447754, "step": 1549 }, { "epoch": 2.36358642578125e-06, "step": 1549, "training_step_time": 0.2046661376953125 }, { "epoch": 2.3651123046875e-06, "grad_norm": 0.6926366686820984, "learning_rate": 9.999924056791192e-05, "loss": 0.187, "step": 1550 }, { "epoch": 2.3651123046875e-06, "model_forward_time": 0.02468729019165039, "step": 1550 }, { "epoch": 2.3651123046875e-06, "step": 1550, "training_step_time": 0.21181702613830566 }, { "epoch": 2.36663818359375e-06, "model_forward_time": 0.02469921112060547, "step": 1551 }, { "epoch": 2.36663818359375e-06, "step": 1551, "training_step_time": 0.12556934356689453 }, { "epoch": 2.3681640625e-06, "model_forward_time": 0.024120330810546875, "step": 1552 }, { "epoch": 2.3681640625e-06, "step": 1552, "training_step_time": 0.1341536045074463 }, { "epoch": 2.36968994140625e-06, "model_forward_time": 0.025313615798950195, "step": 1553 }, { "epoch": 2.36968994140625e-06, "step": 1553, "training_step_time": 0.14391827583312988 }, { "epoch": 2.3712158203125e-06, "model_forward_time": 0.025265216827392578, "step": 1554 }, { "epoch": 2.3712158203125e-06, "step": 1554, "training_step_time": 0.17690753936767578 }, { "epoch": 2.37274169921875e-06, "model_forward_time": 0.02501368522644043, "step": 1555 }, { "epoch": 2.37274169921875e-06, "step": 1555, "training_step_time": 0.1683824062347412 }, { "epoch": 2.374267578125e-06, "model_forward_time": 0.024725914001464844, "step": 1556 }, { "epoch": 2.374267578125e-06, "step": 1556, "training_step_time": 0.10369706153869629 }, { "epoch": 2.37579345703125e-06, "model_forward_time": 0.024688005447387695, "step": 1557 }, { "epoch": 2.37579345703125e-06, "step": 1557, "training_step_time": 0.10619783401489258 }, { "epoch": 2.3773193359375e-06, "model_forward_time": 0.02528667449951172, "step": 1558 }, { "epoch": 2.3773193359375e-06, "step": 1558, "training_step_time": 0.16920804977416992 }, { "epoch": 2.37884521484375e-06, "model_forward_time": 0.02508234977722168, "step": 1559 }, { "epoch": 2.37884521484375e-06, "step": 1559, "training_step_time": 0.17266845703125 }, { "epoch": 2.38037109375e-06, "grad_norm": 0.8135605454444885, "learning_rate": 9.999890641901125e-05, "loss": 0.1778, "step": 1560 }, { "epoch": 2.38037109375e-06, "model_forward_time": 0.024245738983154297, "step": 1560 }, { "epoch": 2.38037109375e-06, "step": 1560, "training_step_time": 0.10927152633666992 }, { "epoch": 2.38189697265625e-06, "model_forward_time": 0.02481222152709961, "step": 1561 }, { "epoch": 2.38189697265625e-06, "step": 1561, "training_step_time": 0.10656404495239258 }, { "epoch": 2.3834228515625e-06, "model_forward_time": 0.025086641311645508, "step": 1562 }, { "epoch": 2.3834228515625e-06, "step": 1562, "training_step_time": 0.11495614051818848 }, { "epoch": 2.38494873046875e-06, "model_forward_time": 0.025652647018432617, "step": 1563 }, { "epoch": 2.38494873046875e-06, "step": 1563, "training_step_time": 0.10726046562194824 }, { "epoch": 2.386474609375e-06, "model_forward_time": 0.025349140167236328, "step": 1564 }, { "epoch": 2.386474609375e-06, "step": 1564, "training_step_time": 0.1928558349609375 }, { "epoch": 2.38800048828125e-06, "model_forward_time": 0.024384498596191406, "step": 1565 }, { "epoch": 2.38800048828125e-06, "step": 1565, "training_step_time": 0.10361766815185547 }, { "epoch": 2.3895263671875e-06, "model_forward_time": 0.02527022361755371, "step": 1566 }, { "epoch": 2.3895263671875e-06, "step": 1566, "training_step_time": 0.10757112503051758 }, { "epoch": 2.39105224609375e-06, "model_forward_time": 0.02475118637084961, "step": 1567 }, { "epoch": 2.39105224609375e-06, "step": 1567, "training_step_time": 0.10572028160095215 }, { "epoch": 2.392578125e-06, "model_forward_time": 0.024987220764160156, "step": 1568 }, { "epoch": 2.392578125e-06, "step": 1568, "training_step_time": 0.11092543601989746 }, { "epoch": 2.39410400390625e-06, "model_forward_time": 0.025340557098388672, "step": 1569 }, { "epoch": 2.39410400390625e-06, "step": 1569, "training_step_time": 0.11502385139465332 }, { "epoch": 2.3956298828125e-06, "grad_norm": 0.7793666124343872, "learning_rate": 9.999851151672466e-05, "loss": 0.1475, "step": 1570 }, { "epoch": 2.3956298828125e-06, "model_forward_time": 0.025092363357543945, "step": 1570 }, { "epoch": 2.3956298828125e-06, "step": 1570, "training_step_time": 0.11787033081054688 }, { "epoch": 2.39715576171875e-06, "model_forward_time": 0.025498151779174805, "step": 1571 }, { "epoch": 2.39715576171875e-06, "step": 1571, "training_step_time": 0.21085405349731445 }, { "epoch": 2.398681640625e-06, "model_forward_time": 0.024493932723999023, "step": 1572 }, { "epoch": 2.398681640625e-06, "step": 1572, "training_step_time": 0.1144406795501709 }, { "epoch": 2.40020751953125e-06, "model_forward_time": 0.0244443416595459, "step": 1573 }, { "epoch": 2.40020751953125e-06, "step": 1573, "training_step_time": 0.11126399040222168 }, { "epoch": 2.4017333984375e-06, "model_forward_time": 0.025219440460205078, "step": 1574 }, { "epoch": 2.4017333984375e-06, "step": 1574, "training_step_time": 0.11485075950622559 }, { "epoch": 2.40325927734375e-06, "model_forward_time": 0.025014638900756836, "step": 1575 }, { "epoch": 2.40325927734375e-06, "step": 1575, "training_step_time": 0.11290979385375977 }, { "epoch": 2.40478515625e-06, "model_forward_time": 0.025056123733520508, "step": 1576 }, { "epoch": 2.40478515625e-06, "step": 1576, "training_step_time": 0.11079597473144531 }, { "epoch": 2.40631103515625e-06, "model_forward_time": 0.0247952938079834, "step": 1577 }, { "epoch": 2.40631103515625e-06, "step": 1577, "training_step_time": 0.11685681343078613 }, { "epoch": 2.4078369140625e-06, "model_forward_time": 0.02493453025817871, "step": 1578 }, { "epoch": 2.4078369140625e-06, "step": 1578, "training_step_time": 0.11461615562438965 }, { "epoch": 2.40936279296875e-06, "model_forward_time": 0.02508831024169922, "step": 1579 }, { "epoch": 2.40936279296875e-06, "step": 1579, "training_step_time": 0.11315345764160156 }, { "epoch": 2.410888671875e-06, "grad_norm": 1.0081157684326172, "learning_rate": 9.999805586153205e-05, "loss": 0.1533, "step": 1580 }, { "epoch": 2.410888671875e-06, "model_forward_time": 0.024403810501098633, "step": 1580 }, { "epoch": 2.410888671875e-06, "step": 1580, "training_step_time": 0.10820603370666504 }, { "epoch": 2.41241455078125e-06, "model_forward_time": 0.028634071350097656, "step": 1581 }, { "epoch": 2.41241455078125e-06, "step": 1581, "training_step_time": 0.11458706855773926 }, { "epoch": 2.4139404296875e-06, "model_forward_time": 0.024892330169677734, "step": 1582 }, { "epoch": 2.4139404296875e-06, "step": 1582, "training_step_time": 0.10680365562438965 }, { "epoch": 2.41546630859375e-06, "model_forward_time": 0.025922536849975586, "step": 1583 }, { "epoch": 2.41546630859375e-06, "step": 1583, "training_step_time": 0.10855865478515625 }, { "epoch": 2.4169921875e-06, "model_forward_time": 0.025766849517822266, "step": 1584 }, { "epoch": 2.4169921875e-06, "step": 1584, "training_step_time": 0.10608816146850586 }, { "epoch": 2.41851806640625e-06, "model_forward_time": 0.025815248489379883, "step": 1585 }, { "epoch": 2.41851806640625e-06, "step": 1585, "training_step_time": 0.11077761650085449 }, { "epoch": 2.4200439453125e-06, "model_forward_time": 0.025668859481811523, "step": 1586 }, { "epoch": 2.4200439453125e-06, "step": 1586, "training_step_time": 0.10840249061584473 }, { "epoch": 2.42156982421875e-06, "model_forward_time": 0.025699853897094727, "step": 1587 }, { "epoch": 2.42156982421875e-06, "step": 1587, "training_step_time": 0.10577964782714844 }, { "epoch": 2.423095703125e-06, "model_forward_time": 0.025195598602294922, "step": 1588 }, { "epoch": 2.423095703125e-06, "step": 1588, "training_step_time": 0.10590195655822754 }, { "epoch": 2.42462158203125e-06, "model_forward_time": 0.026769161224365234, "step": 1589 }, { "epoch": 2.42462158203125e-06, "step": 1589, "training_step_time": 0.11101388931274414 }, { "epoch": 2.4261474609375e-06, "grad_norm": 0.8160147070884705, "learning_rate": 9.999753945398704e-05, "loss": 0.1827, "step": 1590 }, { "epoch": 2.4261474609375e-06, "model_forward_time": 0.024907827377319336, "step": 1590 }, { "epoch": 2.4261474609375e-06, "step": 1590, "training_step_time": 0.10895228385925293 }, { "epoch": 2.42767333984375e-06, "model_forward_time": 0.025563955307006836, "step": 1591 }, { "epoch": 2.42767333984375e-06, "step": 1591, "training_step_time": 0.10527420043945312 }, { "epoch": 2.42919921875e-06, "model_forward_time": 0.029664039611816406, "step": 1592 }, { "epoch": 2.42919921875e-06, "step": 1592, "training_step_time": 0.1105034351348877 }, { "epoch": 2.43072509765625e-06, "model_forward_time": 0.02521681785583496, "step": 1593 }, { "epoch": 2.43072509765625e-06, "step": 1593, "training_step_time": 0.1050713062286377 }, { "epoch": 2.4322509765625e-06, "model_forward_time": 0.02588486671447754, "step": 1594 }, { "epoch": 2.4322509765625e-06, "step": 1594, "training_step_time": 0.1811234951019287 }, { "epoch": 2.43377685546875e-06, "model_forward_time": 0.024675607681274414, "step": 1595 }, { "epoch": 2.43377685546875e-06, "step": 1595, "training_step_time": 0.18916702270507812 }, { "epoch": 2.435302734375e-06, "model_forward_time": 0.024202585220336914, "step": 1596 }, { "epoch": 2.435302734375e-06, "step": 1596, "training_step_time": 0.16530394554138184 }, { "epoch": 2.43682861328125e-06, "model_forward_time": 0.024588823318481445, "step": 1597 }, { "epoch": 2.43682861328125e-06, "step": 1597, "training_step_time": 0.1716609001159668 }, { "epoch": 2.4383544921875e-06, "model_forward_time": 0.02463054656982422, "step": 1598 }, { "epoch": 2.4383544921875e-06, "step": 1598, "training_step_time": 0.18728399276733398 }, { "epoch": 2.43988037109375e-06, "model_forward_time": 0.025175809860229492, "step": 1599 }, { "epoch": 2.43988037109375e-06, "step": 1599, "training_step_time": 0.10735058784484863 }, { "epoch": 2.44140625e-06, "grad_norm": 1.0092593431472778, "learning_rate": 9.999696229471716e-05, "loss": 0.1709, "step": 1600 }, { "epoch": 2.44140625e-06, "model_forward_time": 0.024734020233154297, "step": 1600 }, { "epoch": 2.44140625e-06, "step": 1600, "training_step_time": 0.1192929744720459 }, { "epoch": 2.44293212890625e-06, "model_forward_time": 0.02531719207763672, "step": 1601 }, { "epoch": 2.44293212890625e-06, "step": 1601, "training_step_time": 0.10948729515075684 }, { "epoch": 2.4444580078125e-06, "model_forward_time": 0.025701284408569336, "step": 1602 }, { "epoch": 2.4444580078125e-06, "step": 1602, "training_step_time": 0.11162304878234863 }, { "epoch": 2.44598388671875e-06, "model_forward_time": 0.026262283325195312, "step": 1603 }, { "epoch": 2.44598388671875e-06, "step": 1603, "training_step_time": 0.10985779762268066 }, { "epoch": 2.447509765625e-06, "model_forward_time": 0.025617361068725586, "step": 1604 }, { "epoch": 2.447509765625e-06, "step": 1604, "training_step_time": 0.11783075332641602 }, { "epoch": 2.44903564453125e-06, "model_forward_time": 0.025620698928833008, "step": 1605 }, { "epoch": 2.44903564453125e-06, "step": 1605, "training_step_time": 0.14901280403137207 }, { "epoch": 2.4505615234375e-06, "model_forward_time": 0.02506113052368164, "step": 1606 }, { "epoch": 2.4505615234375e-06, "step": 1606, "training_step_time": 0.11181807518005371 }, { "epoch": 2.45208740234375e-06, "model_forward_time": 0.025460481643676758, "step": 1607 }, { "epoch": 2.45208740234375e-06, "step": 1607, "training_step_time": 0.11501336097717285 }, { "epoch": 2.45361328125e-06, "model_forward_time": 0.025843143463134766, "step": 1608 }, { "epoch": 2.45361328125e-06, "step": 1608, "training_step_time": 0.11146688461303711 }, { "epoch": 2.45513916015625e-06, "model_forward_time": 0.024762868881225586, "step": 1609 }, { "epoch": 2.45513916015625e-06, "step": 1609, "training_step_time": 0.18772244453430176 }, { "epoch": 2.4566650390625e-06, "grad_norm": 0.6679033637046814, "learning_rate": 9.999632438442367e-05, "loss": 0.1554, "step": 1610 }, { "epoch": 2.4566650390625e-06, "model_forward_time": 0.024842500686645508, "step": 1610 }, { "epoch": 2.4566650390625e-06, "step": 1610, "training_step_time": 0.11209440231323242 }, { "epoch": 2.45819091796875e-06, "model_forward_time": 0.024820327758789062, "step": 1611 }, { "epoch": 2.45819091796875e-06, "step": 1611, "training_step_time": 0.11181807518005371 }, { "epoch": 2.459716796875e-06, "model_forward_time": 0.0253753662109375, "step": 1612 }, { "epoch": 2.459716796875e-06, "step": 1612, "training_step_time": 0.10635232925415039 }, { "epoch": 2.46124267578125e-06, "model_forward_time": 0.024628162384033203, "step": 1613 }, { "epoch": 2.46124267578125e-06, "step": 1613, "training_step_time": 0.10764741897583008 }, { "epoch": 2.4627685546875e-06, "model_forward_time": 0.025855302810668945, "step": 1614 }, { "epoch": 2.4627685546875e-06, "step": 1614, "training_step_time": 0.11245179176330566 }, { "epoch": 2.46429443359375e-06, "model_forward_time": 0.025545835494995117, "step": 1615 }, { "epoch": 2.46429443359375e-06, "step": 1615, "training_step_time": 0.10765624046325684 }, { "epoch": 2.4658203125e-06, "model_forward_time": 0.025587797164916992, "step": 1616 }, { "epoch": 2.4658203125e-06, "step": 1616, "training_step_time": 0.10793781280517578 }, { "epoch": 2.46734619140625e-06, "model_forward_time": 0.025378942489624023, "step": 1617 }, { "epoch": 2.46734619140625e-06, "step": 1617, "training_step_time": 0.10886073112487793 }, { "epoch": 2.4688720703125e-06, "model_forward_time": 0.025366783142089844, "step": 1618 }, { "epoch": 2.4688720703125e-06, "step": 1618, "training_step_time": 0.11404919624328613 }, { "epoch": 2.47039794921875e-06, "model_forward_time": 0.025451183319091797, "step": 1619 }, { "epoch": 2.47039794921875e-06, "step": 1619, "training_step_time": 0.11955642700195312 }, { "epoch": 2.471923828125e-06, "grad_norm": 0.9833411574363708, "learning_rate": 9.99956257238817e-05, "loss": 0.1512, "step": 1620 }, { "epoch": 2.471923828125e-06, "model_forward_time": 0.025299549102783203, "step": 1620 }, { "epoch": 2.471923828125e-06, "step": 1620, "training_step_time": 0.10600805282592773 }, { "epoch": 2.47344970703125e-06, "model_forward_time": 0.02526545524597168, "step": 1621 }, { "epoch": 2.47344970703125e-06, "step": 1621, "training_step_time": 0.10697603225708008 }, { "epoch": 2.4749755859375e-06, "model_forward_time": 0.025009870529174805, "step": 1622 }, { "epoch": 2.4749755859375e-06, "step": 1622, "training_step_time": 0.10750985145568848 }, { "epoch": 2.47650146484375e-06, "model_forward_time": 0.02521038055419922, "step": 1623 }, { "epoch": 2.47650146484375e-06, "step": 1623, "training_step_time": 0.11015105247497559 }, { "epoch": 2.47802734375e-06, "model_forward_time": 0.02494192123413086, "step": 1624 }, { "epoch": 2.47802734375e-06, "step": 1624, "training_step_time": 0.10486960411071777 }, { "epoch": 2.47955322265625e-06, "model_forward_time": 0.02512359619140625, "step": 1625 }, { "epoch": 2.47955322265625e-06, "step": 1625, "training_step_time": 0.10471177101135254 }, { "epoch": 2.4810791015625e-06, "model_forward_time": 0.025434017181396484, "step": 1626 }, { "epoch": 2.4810791015625e-06, "step": 1626, "training_step_time": 0.10604977607727051 }, { "epoch": 2.48260498046875e-06, "model_forward_time": 0.0268862247467041, "step": 1627 }, { "epoch": 2.48260498046875e-06, "step": 1627, "training_step_time": 0.11164212226867676 }, { "epoch": 2.484130859375e-06, "model_forward_time": 0.024907827377319336, "step": 1628 }, { "epoch": 2.484130859375e-06, "step": 1628, "training_step_time": 0.10750126838684082 }, { "epoch": 2.48565673828125e-06, "model_forward_time": 0.025626659393310547, "step": 1629 }, { "epoch": 2.48565673828125e-06, "step": 1629, "training_step_time": 0.10658764839172363 }, { "epoch": 2.4871826171875e-06, "grad_norm": 0.9036684632301331, "learning_rate": 9.999486631394021e-05, "loss": 0.1753, "step": 1630 }, { "epoch": 2.4871826171875e-06, "model_forward_time": 0.02560281753540039, "step": 1630 }, { "epoch": 2.4871826171875e-06, "step": 1630, "training_step_time": 0.10422348976135254 }, { "epoch": 2.48870849609375e-06, "model_forward_time": 0.025482177734375, "step": 1631 }, { "epoch": 2.48870849609375e-06, "step": 1631, "training_step_time": 0.1080784797668457 }, { "epoch": 2.490234375e-06, "model_forward_time": 0.025289535522460938, "step": 1632 }, { "epoch": 2.490234375e-06, "step": 1632, "training_step_time": 0.10364174842834473 }, { "epoch": 2.49176025390625e-06, "model_forward_time": 0.025511980056762695, "step": 1633 }, { "epoch": 2.49176025390625e-06, "step": 1633, "training_step_time": 0.10306596755981445 }, { "epoch": 2.4932861328125e-06, "model_forward_time": 0.025533199310302734, "step": 1634 }, { "epoch": 2.4932861328125e-06, "step": 1634, "training_step_time": 0.10653114318847656 }, { "epoch": 2.49481201171875e-06, "model_forward_time": 0.02448439598083496, "step": 1635 }, { "epoch": 2.49481201171875e-06, "step": 1635, "training_step_time": 0.10642290115356445 }, { "epoch": 2.496337890625e-06, "model_forward_time": 0.025453567504882812, "step": 1636 }, { "epoch": 2.496337890625e-06, "step": 1636, "training_step_time": 0.10813236236572266 }, { "epoch": 2.49786376953125e-06, "model_forward_time": 0.02580857276916504, "step": 1637 }, { "epoch": 2.49786376953125e-06, "step": 1637, "training_step_time": 0.1059873104095459 }, { "epoch": 2.4993896484375e-06, "model_forward_time": 0.02550220489501953, "step": 1638 }, { "epoch": 2.4993896484375e-06, "step": 1638, "training_step_time": 0.10642313957214355 }, { "epoch": 2.50091552734375e-06, "model_forward_time": 0.025683164596557617, "step": 1639 }, { "epoch": 2.50091552734375e-06, "step": 1639, "training_step_time": 0.10556793212890625 }, { "epoch": 2.50244140625e-06, "grad_norm": 1.1588786840438843, "learning_rate": 9.999404615552194e-05, "loss": 0.1688, "step": 1640 }, { "epoch": 2.50244140625e-06, "model_forward_time": 0.025720834732055664, "step": 1640 }, { "epoch": 2.50244140625e-06, "step": 1640, "training_step_time": 0.10625624656677246 }, { "epoch": 2.50396728515625e-06, "model_forward_time": 0.02566981315612793, "step": 1641 }, { "epoch": 2.50396728515625e-06, "step": 1641, "training_step_time": 0.10621213912963867 }, { "epoch": 2.5054931640625e-06, "model_forward_time": 0.025335311889648438, "step": 1642 }, { "epoch": 2.5054931640625e-06, "step": 1642, "training_step_time": 0.14310026168823242 }, { "epoch": 2.50701904296875e-06, "model_forward_time": 0.025811195373535156, "step": 1643 }, { "epoch": 2.50701904296875e-06, "step": 1643, "training_step_time": 0.14880943298339844 }, { "epoch": 2.508544921875e-06, "model_forward_time": 0.025105714797973633, "step": 1644 }, { "epoch": 2.508544921875e-06, "step": 1644, "training_step_time": 0.19982671737670898 }, { "epoch": 2.51007080078125e-06, "model_forward_time": 0.024760723114013672, "step": 1645 }, { "epoch": 2.51007080078125e-06, "step": 1645, "training_step_time": 0.14131951332092285 }, { "epoch": 2.5115966796875e-06, "model_forward_time": 0.024747848510742188, "step": 1646 }, { "epoch": 2.5115966796875e-06, "step": 1646, "training_step_time": 0.15313315391540527 }, { "epoch": 2.51312255859375e-06, "model_forward_time": 0.02459120750427246, "step": 1647 }, { "epoch": 2.51312255859375e-06, "step": 1647, "training_step_time": 0.1614854335784912 }, { "epoch": 2.5146484375e-06, "model_forward_time": 0.025159597396850586, "step": 1648 }, { "epoch": 2.5146484375e-06, "step": 1648, "training_step_time": 0.11337447166442871 }, { "epoch": 2.51617431640625e-06, "model_forward_time": 0.024147748947143555, "step": 1649 }, { "epoch": 2.51617431640625e-06, "step": 1649, "training_step_time": 0.11206936836242676 }, { "epoch": 2.5177001953125e-06, "grad_norm": 0.9198539853096008, "learning_rate": 9.999316524962345e-05, "loss": 0.1555, "step": 1650 }, { "epoch": 2.5177001953125e-06, "model_forward_time": 0.029583215713500977, "step": 1650 }, { "epoch": 2.5177001953125e-06, "step": 1650, "training_step_time": 0.11736631393432617 }, { "epoch": 2.51922607421875e-06, "model_forward_time": 0.025638341903686523, "step": 1651 }, { "epoch": 2.51922607421875e-06, "step": 1651, "training_step_time": 0.17767643928527832 }, { "epoch": 2.520751953125e-06, "model_forward_time": 0.024290084838867188, "step": 1652 }, { "epoch": 2.520751953125e-06, "step": 1652, "training_step_time": 0.13448023796081543 }, { "epoch": 2.52227783203125e-06, "model_forward_time": 0.024469614028930664, "step": 1653 }, { "epoch": 2.52227783203125e-06, "step": 1653, "training_step_time": 0.10694003105163574 }, { "epoch": 2.5238037109375e-06, "model_forward_time": 0.025369882583618164, "step": 1654 }, { "epoch": 2.5238037109375e-06, "step": 1654, "training_step_time": 0.11945462226867676 }, { "epoch": 2.52532958984375e-06, "model_forward_time": 0.025226593017578125, "step": 1655 }, { "epoch": 2.52532958984375e-06, "step": 1655, "training_step_time": 0.11599254608154297 }, { "epoch": 2.52685546875e-06, "model_forward_time": 0.02557682991027832, "step": 1656 }, { "epoch": 2.52685546875e-06, "step": 1656, "training_step_time": 0.11301517486572266 }, { "epoch": 2.52838134765625e-06, "model_forward_time": 0.025043010711669922, "step": 1657 }, { "epoch": 2.52838134765625e-06, "step": 1657, "training_step_time": 0.1908574104309082 }, { "epoch": 2.5299072265625e-06, "model_forward_time": 0.026215553283691406, "step": 1658 }, { "epoch": 2.5299072265625e-06, "step": 1658, "training_step_time": 0.10634136199951172 }, { "epoch": 2.53143310546875e-06, "model_forward_time": 0.02524113655090332, "step": 1659 }, { "epoch": 2.53143310546875e-06, "step": 1659, "training_step_time": 0.10634851455688477 }, { "epoch": 2.532958984375e-06, "grad_norm": 1.0522770881652832, "learning_rate": 9.999222359731514e-05, "loss": 0.1707, "step": 1660 }, { "epoch": 2.532958984375e-06, "model_forward_time": 0.024770021438598633, "step": 1660 }, { "epoch": 2.532958984375e-06, "step": 1660, "training_step_time": 0.10791730880737305 }, { "epoch": 2.53448486328125e-06, "model_forward_time": 0.024999618530273438, "step": 1661 }, { "epoch": 2.53448486328125e-06, "step": 1661, "training_step_time": 0.11050176620483398 }, { "epoch": 2.5360107421875e-06, "model_forward_time": 0.025586605072021484, "step": 1662 }, { "epoch": 2.5360107421875e-06, "step": 1662, "training_step_time": 0.11785173416137695 }, { "epoch": 2.53753662109375e-06, "model_forward_time": 0.0258634090423584, "step": 1663 }, { "epoch": 2.53753662109375e-06, "step": 1663, "training_step_time": 0.11034440994262695 }, { "epoch": 2.5390625e-06, "model_forward_time": 0.025838851928710938, "step": 1664 }, { "epoch": 2.5390625e-06, "step": 1664, "training_step_time": 0.21900415420532227 }, { "epoch": 2.54058837890625e-06, "model_forward_time": 0.024809837341308594, "step": 1665 }, { "epoch": 2.54058837890625e-06, "step": 1665, "training_step_time": 0.11528658866882324 }, { "epoch": 2.5421142578125e-06, "model_forward_time": 0.024857282638549805, "step": 1666 }, { "epoch": 2.5421142578125e-06, "step": 1666, "training_step_time": 0.10450148582458496 }, { "epoch": 2.54364013671875e-06, "model_forward_time": 0.025818347930908203, "step": 1667 }, { "epoch": 2.54364013671875e-06, "step": 1667, "training_step_time": 0.10629725456237793 }, { "epoch": 2.545166015625e-06, "model_forward_time": 0.025442838668823242, "step": 1668 }, { "epoch": 2.545166015625e-06, "step": 1668, "training_step_time": 0.11000633239746094 }, { "epoch": 2.54669189453125e-06, "model_forward_time": 0.025569677352905273, "step": 1669 }, { "epoch": 2.54669189453125e-06, "step": 1669, "training_step_time": 0.1071164608001709 }, { "epoch": 2.5482177734375e-06, "grad_norm": 0.6459003686904907, "learning_rate": 9.999122119974121e-05, "loss": 0.1302, "step": 1670 }, { "epoch": 2.5482177734375e-06, "model_forward_time": 0.025876283645629883, "step": 1670 }, { "epoch": 2.5482177734375e-06, "step": 1670, "training_step_time": 0.10610389709472656 }, { "epoch": 2.54974365234375e-06, "model_forward_time": 0.025436878204345703, "step": 1671 }, { "epoch": 2.54974365234375e-06, "step": 1671, "training_step_time": 0.10762882232666016 }, { "epoch": 2.55126953125e-06, "model_forward_time": 0.024594545364379883, "step": 1672 }, { "epoch": 2.55126953125e-06, "step": 1672, "training_step_time": 0.11058759689331055 }, { "epoch": 2.55279541015625e-06, "model_forward_time": 0.02453923225402832, "step": 1673 }, { "epoch": 2.55279541015625e-06, "step": 1673, "training_step_time": 0.10865974426269531 }, { "epoch": 2.5543212890625e-06, "model_forward_time": 0.024367332458496094, "step": 1674 }, { "epoch": 2.5543212890625e-06, "step": 1674, "training_step_time": 0.10678243637084961 }, { "epoch": 2.55584716796875e-06, "model_forward_time": 0.024334192276000977, "step": 1675 }, { "epoch": 2.55584716796875e-06, "step": 1675, "training_step_time": 0.10779595375061035 }, { "epoch": 2.557373046875e-06, "model_forward_time": 0.0243072509765625, "step": 1676 }, { "epoch": 2.557373046875e-06, "step": 1676, "training_step_time": 0.1083981990814209 }, { "epoch": 2.55889892578125e-06, "model_forward_time": 0.024738788604736328, "step": 1677 }, { "epoch": 2.55889892578125e-06, "step": 1677, "training_step_time": 0.10526275634765625 }, { "epoch": 2.5604248046875e-06, "model_forward_time": 0.025640010833740234, "step": 1678 }, { "epoch": 2.5604248046875e-06, "step": 1678, "training_step_time": 0.10669565200805664 }, { "epoch": 2.56195068359375e-06, "model_forward_time": 0.02536463737487793, "step": 1679 }, { "epoch": 2.56195068359375e-06, "step": 1679, "training_step_time": 0.10544252395629883 }, { "epoch": 2.5634765625e-06, "grad_norm": 0.7667890787124634, "learning_rate": 9.999015805811965e-05, "loss": 0.1444, "step": 1680 }, { "epoch": 2.5634765625e-06, "model_forward_time": 0.025038719177246094, "step": 1680 }, { "epoch": 2.5634765625e-06, "step": 1680, "training_step_time": 0.10881948471069336 }, { "epoch": 2.56500244140625e-06, "model_forward_time": 0.02547621726989746, "step": 1681 }, { "epoch": 2.56500244140625e-06, "step": 1681, "training_step_time": 0.10595035552978516 }, { "epoch": 2.5665283203125e-06, "model_forward_time": 0.02556467056274414, "step": 1682 }, { "epoch": 2.5665283203125e-06, "step": 1682, "training_step_time": 0.10466957092285156 }, { "epoch": 2.56805419921875e-06, "model_forward_time": 0.025458335876464844, "step": 1683 }, { "epoch": 2.56805419921875e-06, "step": 1683, "training_step_time": 0.10703253746032715 }, { "epoch": 2.569580078125e-06, "model_forward_time": 0.025152206420898438, "step": 1684 }, { "epoch": 2.569580078125e-06, "step": 1684, "training_step_time": 0.10369753837585449 }, { "epoch": 2.57110595703125e-06, "model_forward_time": 0.025420665740966797, "step": 1685 }, { "epoch": 2.57110595703125e-06, "step": 1685, "training_step_time": 0.1065969467163086 }, { "epoch": 2.5726318359375e-06, "model_forward_time": 0.025539875030517578, "step": 1686 }, { "epoch": 2.5726318359375e-06, "step": 1686, "training_step_time": 0.10527348518371582 }, { "epoch": 2.57415771484375e-06, "model_forward_time": 0.025393962860107422, "step": 1687 }, { "epoch": 2.57415771484375e-06, "step": 1687, "training_step_time": 0.10501480102539062 }, { "epoch": 2.57568359375e-06, "model_forward_time": 0.025498628616333008, "step": 1688 }, { "epoch": 2.57568359375e-06, "step": 1688, "training_step_time": 0.1864030361175537 }, { "epoch": 2.57720947265625e-06, "model_forward_time": 0.024951934814453125, "step": 1689 }, { "epoch": 2.57720947265625e-06, "step": 1689, "training_step_time": 0.17250490188598633 }, { "epoch": 2.5787353515625e-06, "grad_norm": 1.1121721267700195, "learning_rate": 9.998903417374228e-05, "loss": 0.1606, "step": 1690 }, { "epoch": 2.5787353515625e-06, "model_forward_time": 0.024769067764282227, "step": 1690 }, { "epoch": 2.5787353515625e-06, "step": 1690, "training_step_time": 0.17179083824157715 }, { "epoch": 2.58026123046875e-06, "model_forward_time": 0.024854660034179688, "step": 1691 }, { "epoch": 2.58026123046875e-06, "step": 1691, "training_step_time": 0.14887189865112305 }, { "epoch": 2.581787109375e-06, "model_forward_time": 0.024777889251708984, "step": 1692 }, { "epoch": 2.581787109375e-06, "step": 1692, "training_step_time": 0.2230379581451416 }, { "epoch": 2.58331298828125e-06, "model_forward_time": 0.025450468063354492, "step": 1693 }, { "epoch": 2.58331298828125e-06, "step": 1693, "training_step_time": 0.11564517021179199 }, { "epoch": 2.5848388671875e-06, "model_forward_time": 0.02463817596435547, "step": 1694 }, { "epoch": 2.5848388671875e-06, "step": 1694, "training_step_time": 0.1060943603515625 }, { "epoch": 2.58636474609375e-06, "model_forward_time": 0.025239944458007812, "step": 1695 }, { "epoch": 2.58636474609375e-06, "step": 1695, "training_step_time": 0.1125338077545166 }, { "epoch": 2.587890625e-06, "model_forward_time": 0.025891780853271484, "step": 1696 }, { "epoch": 2.587890625e-06, "step": 1696, "training_step_time": 0.10701942443847656 }, { "epoch": 2.58941650390625e-06, "model_forward_time": 0.025365114212036133, "step": 1697 }, { "epoch": 2.58941650390625e-06, "step": 1697, "training_step_time": 0.1244502067565918 }, { "epoch": 2.5909423828125e-06, "model_forward_time": 0.025916337966918945, "step": 1698 }, { "epoch": 2.5909423828125e-06, "step": 1698, "training_step_time": 0.1482686996459961 }, { "epoch": 2.59246826171875e-06, "model_forward_time": 0.02519822120666504, "step": 1699 }, { "epoch": 2.59246826171875e-06, "step": 1699, "training_step_time": 0.10748696327209473 }, { "epoch": 2.593994140625e-06, "grad_norm": 0.8197618126869202, "learning_rate": 9.998784954797474e-05, "loss": 0.1401, "step": 1700 }, { "epoch": 2.593994140625e-06, "model_forward_time": 0.02612471580505371, "step": 1700 }, { "epoch": 2.593994140625e-06, "step": 1700, "training_step_time": 0.10956525802612305 }, { "epoch": 2.59552001953125e-06, "model_forward_time": 0.025414705276489258, "step": 1701 }, { "epoch": 2.59552001953125e-06, "step": 1701, "training_step_time": 0.10976600646972656 }, { "epoch": 2.5970458984375e-06, "model_forward_time": 0.025226354598999023, "step": 1702 }, { "epoch": 2.5970458984375e-06, "step": 1702, "training_step_time": 0.16132068634033203 }, { "epoch": 2.59857177734375e-06, "model_forward_time": 0.024600505828857422, "step": 1703 }, { "epoch": 2.59857177734375e-06, "step": 1703, "training_step_time": 0.1523456573486328 }, { "epoch": 2.60009765625e-06, "model_forward_time": 0.024468660354614258, "step": 1704 }, { "epoch": 2.60009765625e-06, "step": 1704, "training_step_time": 0.1074533462524414 }, { "epoch": 2.60162353515625e-06, "model_forward_time": 0.024718284606933594, "step": 1705 }, { "epoch": 2.60162353515625e-06, "step": 1705, "training_step_time": 0.1078338623046875 }, { "epoch": 2.6031494140625e-06, "model_forward_time": 0.02620387077331543, "step": 1706 }, { "epoch": 2.6031494140625e-06, "step": 1706, "training_step_time": 0.11070632934570312 }, { "epoch": 2.60467529296875e-06, "model_forward_time": 0.02525043487548828, "step": 1707 }, { "epoch": 2.60467529296875e-06, "step": 1707, "training_step_time": 0.21219873428344727 }, { "epoch": 2.606201171875e-06, "model_forward_time": 0.025011301040649414, "step": 1708 }, { "epoch": 2.606201171875e-06, "step": 1708, "training_step_time": 0.12181925773620605 }, { "epoch": 2.60772705078125e-06, "model_forward_time": 0.02830028533935547, "step": 1709 }, { "epoch": 2.60772705078125e-06, "step": 1709, "training_step_time": 0.1104726791381836 }, { "epoch": 2.6092529296875e-06, "grad_norm": 1.292397379875183, "learning_rate": 9.998660418225645e-05, "loss": 0.1681, "step": 1710 }, { "epoch": 2.6092529296875e-06, "model_forward_time": 0.02522897720336914, "step": 1710 }, { "epoch": 2.6092529296875e-06, "step": 1710, "training_step_time": 0.22046256065368652 }, { "epoch": 2.61077880859375e-06, "model_forward_time": 0.0238802433013916, "step": 1711 }, { "epoch": 2.61077880859375e-06, "step": 1711, "training_step_time": 0.11639142036437988 }, { "epoch": 2.6123046875e-06, "model_forward_time": 0.023783206939697266, "step": 1712 }, { "epoch": 2.6123046875e-06, "step": 1712, "training_step_time": 0.10813117027282715 }, { "epoch": 2.61383056640625e-06, "model_forward_time": 0.0244901180267334, "step": 1713 }, { "epoch": 2.61383056640625e-06, "step": 1713, "training_step_time": 0.10531377792358398 }, { "epoch": 2.6153564453125e-06, "model_forward_time": 0.0253448486328125, "step": 1714 }, { "epoch": 2.6153564453125e-06, "step": 1714, "training_step_time": 0.11465668678283691 }, { "epoch": 2.61688232421875e-06, "model_forward_time": 0.025406360626220703, "step": 1715 }, { "epoch": 2.61688232421875e-06, "step": 1715, "training_step_time": 0.12316679954528809 }, { "epoch": 2.618408203125e-06, "model_forward_time": 0.02521204948425293, "step": 1716 }, { "epoch": 2.618408203125e-06, "step": 1716, "training_step_time": 0.10997319221496582 }, { "epoch": 2.61993408203125e-06, "model_forward_time": 0.025532245635986328, "step": 1717 }, { "epoch": 2.61993408203125e-06, "step": 1717, "training_step_time": 0.14649748802185059 }, { "epoch": 2.6214599609375e-06, "model_forward_time": 0.02529001235961914, "step": 1718 }, { "epoch": 2.6214599609375e-06, "step": 1718, "training_step_time": 0.20202302932739258 }, { "epoch": 2.62298583984375e-06, "model_forward_time": 0.02382373809814453, "step": 1719 }, { "epoch": 2.62298583984375e-06, "step": 1719, "training_step_time": 0.2074873447418213 }, { "epoch": 2.62451171875e-06, "grad_norm": 0.7815642356872559, "learning_rate": 9.998529807810064e-05, "loss": 0.1668, "step": 1720 }, { "epoch": 2.62451171875e-06, "model_forward_time": 0.02386641502380371, "step": 1720 }, { "epoch": 2.62451171875e-06, "step": 1720, "training_step_time": 0.19618439674377441 }, { "epoch": 2.62603759765625e-06, "model_forward_time": 0.02385091781616211, "step": 1721 }, { "epoch": 2.62603759765625e-06, "step": 1721, "training_step_time": 0.1876358985900879 }, { "epoch": 2.6275634765625e-06, "model_forward_time": 0.023627519607543945, "step": 1722 }, { "epoch": 2.6275634765625e-06, "step": 1722, "training_step_time": 0.17208218574523926 }, { "epoch": 2.62908935546875e-06, "model_forward_time": 0.024857282638549805, "step": 1723 }, { "epoch": 2.62908935546875e-06, "step": 1723, "training_step_time": 0.15814995765686035 }, { "epoch": 2.630615234375e-06, "model_forward_time": 0.024690866470336914, "step": 1724 }, { "epoch": 2.630615234375e-06, "step": 1724, "training_step_time": 0.1476595401763916 }, { "epoch": 2.63214111328125e-06, "model_forward_time": 0.024652481079101562, "step": 1725 }, { "epoch": 2.63214111328125e-06, "step": 1725, "training_step_time": 0.10276484489440918 }, { "epoch": 2.6336669921875e-06, "model_forward_time": 0.025558948516845703, "step": 1726 }, { "epoch": 2.6336669921875e-06, "step": 1726, "training_step_time": 0.10359597206115723 }, { "epoch": 2.63519287109375e-06, "model_forward_time": 0.02512812614440918, "step": 1727 }, { "epoch": 2.63519287109375e-06, "step": 1727, "training_step_time": 0.10404753684997559 }, { "epoch": 2.63671875e-06, "model_forward_time": 0.025196075439453125, "step": 1728 }, { "epoch": 2.63671875e-06, "step": 1728, "training_step_time": 0.10520076751708984 }, { "epoch": 2.63824462890625e-06, "model_forward_time": 0.02586674690246582, "step": 1729 }, { "epoch": 2.63824462890625e-06, "step": 1729, "training_step_time": 0.20693659782409668 }, { "epoch": 2.6397705078125e-06, "grad_norm": 0.9279288649559021, "learning_rate": 9.998393123709438e-05, "loss": 0.1554, "step": 1730 }, { "epoch": 2.6397705078125e-06, "model_forward_time": 0.024558067321777344, "step": 1730 }, { "epoch": 2.6397705078125e-06, "step": 1730, "training_step_time": 0.11133003234863281 }, { "epoch": 2.64129638671875e-06, "model_forward_time": 0.024407386779785156, "step": 1731 }, { "epoch": 2.64129638671875e-06, "step": 1731, "training_step_time": 0.22078156471252441 }, { "epoch": 2.642822265625e-06, "model_forward_time": 0.02470850944519043, "step": 1732 }, { "epoch": 2.642822265625e-06, "step": 1732, "training_step_time": 0.17116665840148926 }, { "epoch": 2.64434814453125e-06, "model_forward_time": 0.024406909942626953, "step": 1733 }, { "epoch": 2.64434814453125e-06, "step": 1733, "training_step_time": 0.18570280075073242 }, { "epoch": 2.6458740234375e-06, "model_forward_time": 0.02422165870666504, "step": 1734 }, { "epoch": 2.6458740234375e-06, "step": 1734, "training_step_time": 0.12007713317871094 }, { "epoch": 2.64739990234375e-06, "model_forward_time": 0.023708343505859375, "step": 1735 }, { "epoch": 2.64739990234375e-06, "step": 1735, "training_step_time": 0.1344454288482666 }, { "epoch": 2.64892578125e-06, "model_forward_time": 0.024355173110961914, "step": 1736 }, { "epoch": 2.64892578125e-06, "step": 1736, "training_step_time": 0.1295299530029297 }, { "epoch": 2.65045166015625e-06, "model_forward_time": 0.023968219757080078, "step": 1737 }, { "epoch": 2.65045166015625e-06, "step": 1737, "training_step_time": 0.18445229530334473 }, { "epoch": 2.6519775390625e-06, "model_forward_time": 0.024669408798217773, "step": 1738 }, { "epoch": 2.6519775390625e-06, "step": 1738, "training_step_time": 0.13410305976867676 }, { "epoch": 2.65350341796875e-06, "model_forward_time": 0.024066448211669922, "step": 1739 }, { "epoch": 2.65350341796875e-06, "step": 1739, "training_step_time": 0.11945533752441406 }, { "epoch": 2.655029296875e-06, "grad_norm": 1.3572587966918945, "learning_rate": 9.998250366089848e-05, "loss": 0.1891, "step": 1740 }, { "epoch": 2.655029296875e-06, "model_forward_time": 0.02459716796875, "step": 1740 }, { "epoch": 2.655029296875e-06, "step": 1740, "training_step_time": 0.20980310440063477 }, { "epoch": 2.65655517578125e-06, "model_forward_time": 0.024874448776245117, "step": 1741 }, { "epoch": 2.65655517578125e-06, "step": 1741, "training_step_time": 0.11301040649414062 }, { "epoch": 2.6580810546875e-06, "model_forward_time": 0.024437665939331055, "step": 1742 }, { "epoch": 2.6580810546875e-06, "step": 1742, "training_step_time": 0.19518685340881348 }, { "epoch": 2.65960693359375e-06, "model_forward_time": 0.02416062355041504, "step": 1743 }, { "epoch": 2.65960693359375e-06, "step": 1743, "training_step_time": 0.1085824966430664 }, { "epoch": 2.6611328125e-06, "model_forward_time": 0.024532794952392578, "step": 1744 }, { "epoch": 2.6611328125e-06, "step": 1744, "training_step_time": 0.10963058471679688 }, { "epoch": 2.66265869140625e-06, "model_forward_time": 0.025799036026000977, "step": 1745 }, { "epoch": 2.66265869140625e-06, "step": 1745, "training_step_time": 0.10655498504638672 }, { "epoch": 2.6641845703125e-06, "model_forward_time": 0.025012731552124023, "step": 1746 }, { "epoch": 2.6641845703125e-06, "step": 1746, "training_step_time": 0.10536885261535645 }, { "epoch": 2.66571044921875e-06, "model_forward_time": 0.02554631233215332, "step": 1747 }, { "epoch": 2.66571044921875e-06, "step": 1747, "training_step_time": 0.11204719543457031 }, { "epoch": 2.667236328125e-06, "model_forward_time": 0.027681350708007812, "step": 1748 }, { "epoch": 2.667236328125e-06, "step": 1748, "training_step_time": 0.11464190483093262 }, { "epoch": 2.66876220703125e-06, "model_forward_time": 0.025501012802124023, "step": 1749 }, { "epoch": 2.66876220703125e-06, "step": 1749, "training_step_time": 0.10691475868225098 }, { "epoch": 2.6702880859375e-06, "grad_norm": 1.452040195465088, "learning_rate": 9.998101535124758e-05, "loss": 0.1468, "step": 1750 }, { "epoch": 2.6702880859375e-06, "model_forward_time": 0.025337696075439453, "step": 1750 }, { "epoch": 2.6702880859375e-06, "step": 1750, "training_step_time": 0.16977453231811523 }, { "epoch": 2.67181396484375e-06, "model_forward_time": 0.02416253089904785, "step": 1751 }, { "epoch": 2.67181396484375e-06, "step": 1751, "training_step_time": 0.16734576225280762 }, { "epoch": 2.67333984375e-06, "model_forward_time": 0.0244753360748291, "step": 1752 }, { "epoch": 2.67333984375e-06, "step": 1752, "training_step_time": 0.10255169868469238 }, { "epoch": 2.67486572265625e-06, "model_forward_time": 0.02461838722229004, "step": 1753 }, { "epoch": 2.67486572265625e-06, "step": 1753, "training_step_time": 0.10480976104736328 }, { "epoch": 2.6763916015625e-06, "model_forward_time": 0.025255680084228516, "step": 1754 }, { "epoch": 2.6763916015625e-06, "step": 1754, "training_step_time": 0.10711407661437988 }, { "epoch": 2.67791748046875e-06, "model_forward_time": 0.024811983108520508, "step": 1755 }, { "epoch": 2.67791748046875e-06, "step": 1755, "training_step_time": 0.10537958145141602 }, { "epoch": 2.679443359375e-06, "model_forward_time": 0.025687456130981445, "step": 1756 }, { "epoch": 2.679443359375e-06, "step": 1756, "training_step_time": 0.11127805709838867 }, { "epoch": 2.68096923828125e-06, "model_forward_time": 0.025592803955078125, "step": 1757 }, { "epoch": 2.68096923828125e-06, "step": 1757, "training_step_time": 0.10577225685119629 }, { "epoch": 2.6824951171875e-06, "model_forward_time": 0.02527141571044922, "step": 1758 }, { "epoch": 2.6824951171875e-06, "step": 1758, "training_step_time": 0.10488677024841309 }, { "epoch": 2.68402099609375e-06, "model_forward_time": 0.026350021362304688, "step": 1759 }, { "epoch": 2.68402099609375e-06, "step": 1759, "training_step_time": 0.10598134994506836 }, { "epoch": 2.685546875e-06, "grad_norm": 0.9804360270500183, "learning_rate": 9.997946630995013e-05, "loss": 0.1408, "step": 1760 }, { "epoch": 2.685546875e-06, "model_forward_time": 0.02541637420654297, "step": 1760 }, { "epoch": 2.685546875e-06, "step": 1760, "training_step_time": 0.10829663276672363 }, { "epoch": 2.68707275390625e-06, "model_forward_time": 0.0254213809967041, "step": 1761 }, { "epoch": 2.68707275390625e-06, "step": 1761, "training_step_time": 0.10474324226379395 }, { "epoch": 2.6885986328125e-06, "model_forward_time": 0.0254061222076416, "step": 1762 }, { "epoch": 2.6885986328125e-06, "step": 1762, "training_step_time": 0.10452938079833984 }, { "epoch": 2.69012451171875e-06, "model_forward_time": 0.026445388793945312, "step": 1763 }, { "epoch": 2.69012451171875e-06, "step": 1763, "training_step_time": 0.1069033145904541 }, { "epoch": 2.691650390625e-06, "model_forward_time": 0.02569437026977539, "step": 1764 }, { "epoch": 2.691650390625e-06, "step": 1764, "training_step_time": 0.11055326461791992 }, { "epoch": 2.69317626953125e-06, "model_forward_time": 0.025249481201171875, "step": 1765 }, { "epoch": 2.69317626953125e-06, "step": 1765, "training_step_time": 0.1125478744506836 }, { "epoch": 2.6947021484375e-06, "model_forward_time": 0.024700164794921875, "step": 1766 }, { "epoch": 2.6947021484375e-06, "step": 1766, "training_step_time": 0.10641694068908691 }, { "epoch": 2.69622802734375e-06, "model_forward_time": 0.025064945220947266, "step": 1767 }, { "epoch": 2.69622802734375e-06, "step": 1767, "training_step_time": 0.10965824127197266 }, { "epoch": 2.69775390625e-06, "model_forward_time": 0.026335477828979492, "step": 1768 }, { "epoch": 2.69775390625e-06, "step": 1768, "training_step_time": 0.1074528694152832 }, { "epoch": 2.69927978515625e-06, "model_forward_time": 0.025423765182495117, "step": 1769 }, { "epoch": 2.69927978515625e-06, "step": 1769, "training_step_time": 0.10599374771118164 }, { "epoch": 2.7008056640625e-06, "grad_norm": 1.212485909461975, "learning_rate": 9.997785653888835e-05, "loss": 0.1296, "step": 1770 }, { "epoch": 2.7008056640625e-06, "model_forward_time": 0.024813175201416016, "step": 1770 }, { "epoch": 2.7008056640625e-06, "step": 1770, "training_step_time": 0.10941004753112793 }, { "epoch": 2.70233154296875e-06, "model_forward_time": 0.02537989616394043, "step": 1771 }, { "epoch": 2.70233154296875e-06, "step": 1771, "training_step_time": 0.10610604286193848 }, { "epoch": 2.703857421875e-06, "model_forward_time": 0.025090694427490234, "step": 1772 }, { "epoch": 2.703857421875e-06, "step": 1772, "training_step_time": 0.11089229583740234 }, { "epoch": 2.70538330078125e-06, "model_forward_time": 0.025800704956054688, "step": 1773 }, { "epoch": 2.70538330078125e-06, "step": 1773, "training_step_time": 0.11060070991516113 }, { "epoch": 2.7069091796875e-06, "model_forward_time": 0.025324344635009766, "step": 1774 }, { "epoch": 2.7069091796875e-06, "step": 1774, "training_step_time": 0.20423603057861328 }, { "epoch": 2.70843505859375e-06, "model_forward_time": 0.024809837341308594, "step": 1775 }, { "epoch": 2.70843505859375e-06, "step": 1775, "training_step_time": 0.20985841751098633 }, { "epoch": 2.7099609375e-06, "model_forward_time": 0.02463531494140625, "step": 1776 }, { "epoch": 2.7099609375e-06, "step": 1776, "training_step_time": 0.1501476764678955 }, { "epoch": 2.71148681640625e-06, "model_forward_time": 0.024366378784179688, "step": 1777 }, { "epoch": 2.71148681640625e-06, "step": 1777, "training_step_time": 0.21163129806518555 }, { "epoch": 2.7130126953125e-06, "model_forward_time": 0.024646997451782227, "step": 1778 }, { "epoch": 2.7130126953125e-06, "step": 1778, "training_step_time": 0.14583420753479004 }, { "epoch": 2.71453857421875e-06, "model_forward_time": 0.024041175842285156, "step": 1779 }, { "epoch": 2.71453857421875e-06, "step": 1779, "training_step_time": 0.10576987266540527 }, { "epoch": 2.716064453125e-06, "grad_norm": 0.6639909148216248, "learning_rate": 9.997618604001829e-05, "loss": 0.1776, "step": 1780 }, { "epoch": 2.716064453125e-06, "model_forward_time": 0.025102853775024414, "step": 1780 }, { "epoch": 2.716064453125e-06, "step": 1780, "training_step_time": 0.11065435409545898 }, { "epoch": 2.71759033203125e-06, "model_forward_time": 0.025342226028442383, "step": 1781 }, { "epoch": 2.71759033203125e-06, "step": 1781, "training_step_time": 0.10585570335388184 }, { "epoch": 2.7191162109375e-06, "model_forward_time": 0.025157928466796875, "step": 1782 }, { "epoch": 2.7191162109375e-06, "step": 1782, "training_step_time": 0.2035233974456787 }, { "epoch": 2.72064208984375e-06, "model_forward_time": 0.02389669418334961, "step": 1783 }, { "epoch": 2.72064208984375e-06, "step": 1783, "training_step_time": 0.13607263565063477 }, { "epoch": 2.72216796875e-06, "model_forward_time": 0.02414226531982422, "step": 1784 }, { "epoch": 2.72216796875e-06, "step": 1784, "training_step_time": 0.11014604568481445 }, { "epoch": 2.72369384765625e-06, "model_forward_time": 0.025170564651489258, "step": 1785 }, { "epoch": 2.72369384765625e-06, "step": 1785, "training_step_time": 0.11640071868896484 }, { "epoch": 2.7252197265625e-06, "model_forward_time": 0.02529454231262207, "step": 1786 }, { "epoch": 2.7252197265625e-06, "step": 1786, "training_step_time": 0.1109473705291748 }, { "epoch": 2.72674560546875e-06, "model_forward_time": 0.02518916130065918, "step": 1787 }, { "epoch": 2.72674560546875e-06, "step": 1787, "training_step_time": 0.11364459991455078 }, { "epoch": 2.728271484375e-06, "model_forward_time": 0.026539087295532227, "step": 1788 }, { "epoch": 2.728271484375e-06, "step": 1788, "training_step_time": 0.2055072784423828 }, { "epoch": 2.72979736328125e-06, "model_forward_time": 0.024203062057495117, "step": 1789 }, { "epoch": 2.72979736328125e-06, "step": 1789, "training_step_time": 0.1112067699432373 }, { "epoch": 2.7313232421875e-06, "grad_norm": 1.1101709604263306, "learning_rate": 9.997445481536973e-05, "loss": 0.1352, "step": 1790 }, { "epoch": 2.7313232421875e-06, "model_forward_time": 0.024616479873657227, "step": 1790 }, { "epoch": 2.7313232421875e-06, "step": 1790, "training_step_time": 0.10577082633972168 }, { "epoch": 2.73284912109375e-06, "model_forward_time": 0.025639057159423828, "step": 1791 }, { "epoch": 2.73284912109375e-06, "step": 1791, "training_step_time": 0.10722017288208008 }, { "epoch": 2.734375e-06, "model_forward_time": 0.025412321090698242, "step": 1792 }, { "epoch": 2.734375e-06, "step": 1792, "training_step_time": 0.106903076171875 }, { "epoch": 2.73590087890625e-06, "model_forward_time": 0.029206514358520508, "step": 1793 }, { "epoch": 2.73590087890625e-06, "step": 1793, "training_step_time": 0.12263107299804688 }, { "epoch": 2.7374267578125e-06, "model_forward_time": 0.025545597076416016, "step": 1794 }, { "epoch": 2.7374267578125e-06, "step": 1794, "training_step_time": 0.1107485294342041 }, { "epoch": 2.73895263671875e-06, "model_forward_time": 0.025102615356445312, "step": 1795 }, { "epoch": 2.73895263671875e-06, "step": 1795, "training_step_time": 0.1124734878540039 }, { "epoch": 2.740478515625e-06, "model_forward_time": 0.025334835052490234, "step": 1796 }, { "epoch": 2.740478515625e-06, "step": 1796, "training_step_time": 0.21331572532653809 }, { "epoch": 2.74200439453125e-06, "model_forward_time": 0.024575233459472656, "step": 1797 }, { "epoch": 2.74200439453125e-06, "step": 1797, "training_step_time": 0.11715364456176758 }, { "epoch": 2.7435302734375e-06, "model_forward_time": 0.02477884292602539, "step": 1798 }, { "epoch": 2.7435302734375e-06, "step": 1798, "training_step_time": 0.1045994758605957 }, { "epoch": 2.74505615234375e-06, "model_forward_time": 0.025434017181396484, "step": 1799 }, { "epoch": 2.74505615234375e-06, "step": 1799, "training_step_time": 0.10998272895812988 }, { "epoch": 2.74658203125e-06, "grad_norm": 0.6286399364471436, "learning_rate": 9.997266286704631e-05, "loss": 0.1496, "step": 1800 }, { "epoch": 2.74658203125e-06, "model_forward_time": 0.02579498291015625, "step": 1800 }, { "epoch": 2.74658203125e-06, "step": 1800, "training_step_time": 0.12121963500976562 }, { "epoch": 2.74810791015625e-06, "model_forward_time": 0.02513718605041504, "step": 1801 }, { "epoch": 2.74810791015625e-06, "step": 1801, "training_step_time": 0.12234854698181152 }, { "epoch": 2.7496337890625e-06, "model_forward_time": 0.025505781173706055, "step": 1802 }, { "epoch": 2.7496337890625e-06, "step": 1802, "training_step_time": 0.12576866149902344 }, { "epoch": 2.75115966796875e-06, "model_forward_time": 0.025467872619628906, "step": 1803 }, { "epoch": 2.75115966796875e-06, "step": 1803, "training_step_time": 0.11745119094848633 }, { "epoch": 2.752685546875e-06, "model_forward_time": 0.024660110473632812, "step": 1804 }, { "epoch": 2.752685546875e-06, "step": 1804, "training_step_time": 0.11979126930236816 }, { "epoch": 2.75421142578125e-06, "model_forward_time": 0.02440190315246582, "step": 1805 }, { "epoch": 2.75421142578125e-06, "step": 1805, "training_step_time": 0.11834263801574707 }, { "epoch": 2.7557373046875e-06, "model_forward_time": 0.02427959442138672, "step": 1806 }, { "epoch": 2.7557373046875e-06, "step": 1806, "training_step_time": 0.11388421058654785 }, { "epoch": 2.75726318359375e-06, "model_forward_time": 0.02536487579345703, "step": 1807 }, { "epoch": 2.75726318359375e-06, "step": 1807, "training_step_time": 0.11600446701049805 }, { "epoch": 2.7587890625e-06, "model_forward_time": 0.025497913360595703, "step": 1808 }, { "epoch": 2.7587890625e-06, "step": 1808, "training_step_time": 0.11174941062927246 }, { "epoch": 2.76031494140625e-06, "model_forward_time": 0.024935007095336914, "step": 1809 }, { "epoch": 2.76031494140625e-06, "step": 1809, "training_step_time": 0.11052107810974121 }, { "epoch": 2.7618408203125e-06, "grad_norm": 1.036386489868164, "learning_rate": 9.997081019722537e-05, "loss": 0.1256, "step": 1810 }, { "epoch": 2.7618408203125e-06, "model_forward_time": 0.02544116973876953, "step": 1810 }, { "epoch": 2.7618408203125e-06, "step": 1810, "training_step_time": 0.1088249683380127 }, { "epoch": 2.76336669921875e-06, "model_forward_time": 0.025402545928955078, "step": 1811 }, { "epoch": 2.76336669921875e-06, "step": 1811, "training_step_time": 0.10657811164855957 }, { "epoch": 2.764892578125e-06, "model_forward_time": 0.0251772403717041, "step": 1812 }, { "epoch": 2.764892578125e-06, "step": 1812, "training_step_time": 0.10655951499938965 }, { "epoch": 2.76641845703125e-06, "model_forward_time": 0.02485942840576172, "step": 1813 }, { "epoch": 2.76641845703125e-06, "step": 1813, "training_step_time": 0.10702252388000488 }, { "epoch": 2.7679443359375e-06, "model_forward_time": 0.02496790885925293, "step": 1814 }, { "epoch": 2.7679443359375e-06, "step": 1814, "training_step_time": 0.10612940788269043 }, { "epoch": 2.76947021484375e-06, "model_forward_time": 0.025246620178222656, "step": 1815 }, { "epoch": 2.76947021484375e-06, "step": 1815, "training_step_time": 0.10505461692810059 }, { "epoch": 2.77099609375e-06, "model_forward_time": 0.027647733688354492, "step": 1816 }, { "epoch": 2.77099609375e-06, "step": 1816, "training_step_time": 0.1110830307006836 }, { "epoch": 2.77252197265625e-06, "model_forward_time": 0.02538442611694336, "step": 1817 }, { "epoch": 2.77252197265625e-06, "step": 1817, "training_step_time": 0.11000871658325195 }, { "epoch": 2.7740478515625e-06, "model_forward_time": 0.025326013565063477, "step": 1818 }, { "epoch": 2.7740478515625e-06, "step": 1818, "training_step_time": 0.22469353675842285 }, { "epoch": 2.77557373046875e-06, "model_forward_time": 0.024481534957885742, "step": 1819 }, { "epoch": 2.77557373046875e-06, "step": 1819, "training_step_time": 0.11764669418334961 }, { "epoch": 2.777099609375e-06, "grad_norm": 0.8333061933517456, "learning_rate": 9.99688968081581e-05, "loss": 0.1471, "step": 1820 }, { "epoch": 2.777099609375e-06, "model_forward_time": 0.024678945541381836, "step": 1820 }, { "epoch": 2.777099609375e-06, "step": 1820, "training_step_time": 0.12758731842041016 }, { "epoch": 2.77862548828125e-06, "model_forward_time": 0.025342464447021484, "step": 1821 }, { "epoch": 2.77862548828125e-06, "step": 1821, "training_step_time": 0.21818852424621582 }, { "epoch": 2.7801513671875e-06, "model_forward_time": 0.024321794509887695, "step": 1822 }, { "epoch": 2.7801513671875e-06, "step": 1822, "training_step_time": 0.16818833351135254 }, { "epoch": 2.78167724609375e-06, "model_forward_time": 0.024546384811401367, "step": 1823 }, { "epoch": 2.78167724609375e-06, "step": 1823, "training_step_time": 0.18592286109924316 }, { "epoch": 2.783203125e-06, "model_forward_time": 0.02414107322692871, "step": 1824 }, { "epoch": 2.783203125e-06, "step": 1824, "training_step_time": 0.10913395881652832 }, { "epoch": 2.78472900390625e-06, "model_forward_time": 0.024141788482666016, "step": 1825 }, { "epoch": 2.78472900390625e-06, "step": 1825, "training_step_time": 0.1048421859741211 }, { "epoch": 2.7862548828125e-06, "model_forward_time": 0.0249021053314209, "step": 1826 }, { "epoch": 2.7862548828125e-06, "step": 1826, "training_step_time": 0.11409425735473633 }, { "epoch": 2.78778076171875e-06, "model_forward_time": 0.025697708129882812, "step": 1827 }, { "epoch": 2.78778076171875e-06, "step": 1827, "training_step_time": 0.15686392784118652 }, { "epoch": 2.789306640625e-06, "model_forward_time": 0.02518486976623535, "step": 1828 }, { "epoch": 2.789306640625e-06, "step": 1828, "training_step_time": 0.13190722465515137 }, { "epoch": 2.79083251953125e-06, "model_forward_time": 0.024442434310913086, "step": 1829 }, { "epoch": 2.79083251953125e-06, "step": 1829, "training_step_time": 0.11804938316345215 }, { "epoch": 2.7923583984375e-06, "grad_norm": 1.2781480550765991, "learning_rate": 9.996692270216947e-05, "loss": 0.1507, "step": 1830 }, { "epoch": 2.7923583984375e-06, "model_forward_time": 0.02542591094970703, "step": 1830 }, { "epoch": 2.7923583984375e-06, "step": 1830, "training_step_time": 0.11500358581542969 }, { "epoch": 2.79388427734375e-06, "model_forward_time": 0.025402307510375977, "step": 1831 }, { "epoch": 2.79388427734375e-06, "step": 1831, "training_step_time": 0.10614705085754395 }, { "epoch": 2.79541015625e-06, "model_forward_time": 0.025272130966186523, "step": 1832 }, { "epoch": 2.79541015625e-06, "step": 1832, "training_step_time": 0.10848045349121094 }, { "epoch": 2.79693603515625e-06, "model_forward_time": 0.02506709098815918, "step": 1833 }, { "epoch": 2.79693603515625e-06, "step": 1833, "training_step_time": 0.19932842254638672 }, { "epoch": 2.7984619140625e-06, "model_forward_time": 0.024361610412597656, "step": 1834 }, { "epoch": 2.7984619140625e-06, "step": 1834, "training_step_time": 0.10292339324951172 }, { "epoch": 2.79998779296875e-06, "model_forward_time": 0.024723529815673828, "step": 1835 }, { "epoch": 2.79998779296875e-06, "step": 1835, "training_step_time": 0.10399508476257324 }, { "epoch": 2.801513671875e-06, "model_forward_time": 0.026547908782958984, "step": 1836 }, { "epoch": 2.801513671875e-06, "step": 1836, "training_step_time": 0.11370658874511719 }, { "epoch": 2.80303955078125e-06, "model_forward_time": 0.02829742431640625, "step": 1837 }, { "epoch": 2.80303955078125e-06, "step": 1837, "training_step_time": 0.21724414825439453 }, { "epoch": 2.8045654296875e-06, "model_forward_time": 0.024499177932739258, "step": 1838 }, { "epoch": 2.8045654296875e-06, "step": 1838, "training_step_time": 0.11639022827148438 }, { "epoch": 2.80609130859375e-06, "model_forward_time": 0.025053739547729492, "step": 1839 }, { "epoch": 2.80609130859375e-06, "step": 1839, "training_step_time": 0.1108248233795166 }, { "epoch": 2.8076171875e-06, "grad_norm": 1.1419516801834106, "learning_rate": 9.996488788165816e-05, "loss": 0.1685, "step": 1840 }, { "epoch": 2.8076171875e-06, "model_forward_time": 0.025682449340820312, "step": 1840 }, { "epoch": 2.8076171875e-06, "step": 1840, "training_step_time": 0.21270537376403809 }, { "epoch": 2.80914306640625e-06, "model_forward_time": 0.024951696395874023, "step": 1841 }, { "epoch": 2.80914306640625e-06, "step": 1841, "training_step_time": 0.1273365020751953 }, { "epoch": 2.8106689453125e-06, "model_forward_time": 0.024540424346923828, "step": 1842 }, { "epoch": 2.8106689453125e-06, "step": 1842, "training_step_time": 0.10443735122680664 }, { "epoch": 2.81219482421875e-06, "model_forward_time": 0.025720596313476562, "step": 1843 }, { "epoch": 2.81219482421875e-06, "step": 1843, "training_step_time": 0.11735987663269043 }, { "epoch": 2.813720703125e-06, "model_forward_time": 0.02510237693786621, "step": 1844 }, { "epoch": 2.813720703125e-06, "step": 1844, "training_step_time": 0.10677528381347656 }, { "epoch": 2.81524658203125e-06, "model_forward_time": 0.025218725204467773, "step": 1845 }, { "epoch": 2.81524658203125e-06, "step": 1845, "training_step_time": 0.11403369903564453 }, { "epoch": 2.8167724609375e-06, "model_forward_time": 0.024869918823242188, "step": 1846 }, { "epoch": 2.8167724609375e-06, "step": 1846, "training_step_time": 0.10435366630554199 }, { "epoch": 2.81829833984375e-06, "model_forward_time": 0.025243043899536133, "step": 1847 }, { "epoch": 2.81829833984375e-06, "step": 1847, "training_step_time": 0.10698270797729492 }, { "epoch": 2.81982421875e-06, "model_forward_time": 0.025437116622924805, "step": 1848 }, { "epoch": 2.81982421875e-06, "step": 1848, "training_step_time": 0.10965323448181152 }, { "epoch": 2.82135009765625e-06, "model_forward_time": 0.02542877197265625, "step": 1849 }, { "epoch": 2.82135009765625e-06, "step": 1849, "training_step_time": 0.10606908798217773 }, { "epoch": 2.8228759765625e-06, "grad_norm": 1.2174485921859741, "learning_rate": 9.996279234909671e-05, "loss": 0.1748, "step": 1850 }, { "epoch": 2.8228759765625e-06, "model_forward_time": 0.025156021118164062, "step": 1850 }, { "epoch": 2.8228759765625e-06, "step": 1850, "training_step_time": 0.11067533493041992 }, { "epoch": 2.82440185546875e-06, "model_forward_time": 0.02543497085571289, "step": 1851 }, { "epoch": 2.82440185546875e-06, "step": 1851, "training_step_time": 0.10841655731201172 }, { "epoch": 2.825927734375e-06, "model_forward_time": 0.02602386474609375, "step": 1852 }, { "epoch": 2.825927734375e-06, "step": 1852, "training_step_time": 0.14484000205993652 }, { "epoch": 2.82745361328125e-06, "model_forward_time": 0.02520275115966797, "step": 1853 }, { "epoch": 2.82745361328125e-06, "step": 1853, "training_step_time": 0.1279306411743164 }, { "epoch": 2.8289794921875e-06, "model_forward_time": 0.02490520477294922, "step": 1854 }, { "epoch": 2.8289794921875e-06, "step": 1854, "training_step_time": 0.1275637149810791 }, { "epoch": 2.83050537109375e-06, "model_forward_time": 0.02480936050415039, "step": 1855 }, { "epoch": 2.83050537109375e-06, "step": 1855, "training_step_time": 0.12048530578613281 }, { "epoch": 2.83203125e-06, "model_forward_time": 0.024746179580688477, "step": 1856 }, { "epoch": 2.83203125e-06, "step": 1856, "training_step_time": 0.11751151084899902 }, { "epoch": 2.83355712890625e-06, "model_forward_time": 0.024985074996948242, "step": 1857 }, { "epoch": 2.83355712890625e-06, "step": 1857, "training_step_time": 0.11470794677734375 }, { "epoch": 2.8350830078125e-06, "model_forward_time": 0.025122404098510742, "step": 1858 }, { "epoch": 2.8350830078125e-06, "step": 1858, "training_step_time": 0.11133146286010742 }, { "epoch": 2.83660888671875e-06, "model_forward_time": 0.025282859802246094, "step": 1859 }, { "epoch": 2.83660888671875e-06, "step": 1859, "training_step_time": 0.11348962783813477 }, { "epoch": 2.838134765625e-06, "grad_norm": 0.8413699269294739, "learning_rate": 9.996063610703137e-05, "loss": 0.166, "step": 1860 }, { "epoch": 2.838134765625e-06, "model_forward_time": 0.02525639533996582, "step": 1860 }, { "epoch": 2.838134765625e-06, "step": 1860, "training_step_time": 0.11232233047485352 }, { "epoch": 2.83966064453125e-06, "model_forward_time": 0.025519609451293945, "step": 1861 }, { "epoch": 2.83966064453125e-06, "step": 1861, "training_step_time": 0.10790729522705078 }, { "epoch": 2.8411865234375e-06, "model_forward_time": 0.025049209594726562, "step": 1862 }, { "epoch": 2.8411865234375e-06, "step": 1862, "training_step_time": 0.15474772453308105 }, { "epoch": 2.84271240234375e-06, "model_forward_time": 0.025496244430541992, "step": 1863 }, { "epoch": 2.84271240234375e-06, "step": 1863, "training_step_time": 0.1703324317932129 }, { "epoch": 2.84423828125e-06, "model_forward_time": 0.023806095123291016, "step": 1864 }, { "epoch": 2.84423828125e-06, "step": 1864, "training_step_time": 0.17456531524658203 }, { "epoch": 2.84576416015625e-06, "model_forward_time": 0.024547576904296875, "step": 1865 }, { "epoch": 2.84576416015625e-06, "step": 1865, "training_step_time": 0.16968345642089844 }, { "epoch": 2.8472900390625e-06, "model_forward_time": 0.025000810623168945, "step": 1866 }, { "epoch": 2.8472900390625e-06, "step": 1866, "training_step_time": 0.15189409255981445 }, { "epoch": 2.84881591796875e-06, "model_forward_time": 0.024269580841064453, "step": 1867 }, { "epoch": 2.84881591796875e-06, "step": 1867, "training_step_time": 0.11424612998962402 }, { "epoch": 2.850341796875e-06, "model_forward_time": 0.024725675582885742, "step": 1868 }, { "epoch": 2.850341796875e-06, "step": 1868, "training_step_time": 0.1562044620513916 }, { "epoch": 2.85186767578125e-06, "model_forward_time": 0.024988412857055664, "step": 1869 }, { "epoch": 2.85186767578125e-06, "step": 1869, "training_step_time": 0.10584068298339844 }, { "epoch": 2.8533935546875e-06, "grad_norm": 1.2796015739440918, "learning_rate": 9.995841915808218e-05, "loss": 0.1571, "step": 1870 }, { "epoch": 2.8533935546875e-06, "model_forward_time": 0.024498939514160156, "step": 1870 }, { "epoch": 2.8533935546875e-06, "step": 1870, "training_step_time": 0.10613131523132324 }, { "epoch": 2.85491943359375e-06, "model_forward_time": 0.025120258331298828, "step": 1871 }, { "epoch": 2.85491943359375e-06, "step": 1871, "training_step_time": 0.1054072380065918 }, { "epoch": 2.8564453125e-06, "model_forward_time": 0.025531530380249023, "step": 1872 }, { "epoch": 2.8564453125e-06, "step": 1872, "training_step_time": 0.10857462882995605 }, { "epoch": 2.85797119140625e-06, "model_forward_time": 0.02545785903930664, "step": 1873 }, { "epoch": 2.85797119140625e-06, "step": 1873, "training_step_time": 0.13704848289489746 }, { "epoch": 2.8594970703125e-06, "model_forward_time": 0.025249481201171875, "step": 1874 }, { "epoch": 2.8594970703125e-06, "step": 1874, "training_step_time": 0.11295819282531738 }, { "epoch": 2.86102294921875e-06, "model_forward_time": 0.02603602409362793, "step": 1875 }, { "epoch": 2.86102294921875e-06, "step": 1875, "training_step_time": 0.1112053394317627 }, { "epoch": 2.862548828125e-06, "model_forward_time": 0.025376319885253906, "step": 1876 }, { "epoch": 2.862548828125e-06, "step": 1876, "training_step_time": 0.11547994613647461 }, { "epoch": 2.86407470703125e-06, "model_forward_time": 0.02539801597595215, "step": 1877 }, { "epoch": 2.86407470703125e-06, "step": 1877, "training_step_time": 0.1099696159362793 }, { "epoch": 2.8656005859375e-06, "model_forward_time": 0.025517702102661133, "step": 1878 }, { "epoch": 2.8656005859375e-06, "step": 1878, "training_step_time": 0.1985776424407959 }, { "epoch": 2.86712646484375e-06, "model_forward_time": 0.024854421615600586, "step": 1879 }, { "epoch": 2.86712646484375e-06, "step": 1879, "training_step_time": 0.10831761360168457 }, { "epoch": 2.86865234375e-06, "grad_norm": 1.016643762588501, "learning_rate": 9.995614150494293e-05, "loss": 0.1497, "step": 1880 }, { "epoch": 2.86865234375e-06, "model_forward_time": 0.02571725845336914, "step": 1880 }, { "epoch": 2.86865234375e-06, "step": 1880, "training_step_time": 0.10736393928527832 }, { "epoch": 2.87017822265625e-06, "model_forward_time": 0.02530360221862793, "step": 1881 }, { "epoch": 2.87017822265625e-06, "step": 1881, "training_step_time": 0.11052322387695312 }, { "epoch": 2.8717041015625e-06, "model_forward_time": 0.02534008026123047, "step": 1882 }, { "epoch": 2.8717041015625e-06, "step": 1882, "training_step_time": 0.12479472160339355 }, { "epoch": 2.87322998046875e-06, "model_forward_time": 0.025632143020629883, "step": 1883 }, { "epoch": 2.87322998046875e-06, "step": 1883, "training_step_time": 0.1278977394104004 }, { "epoch": 2.874755859375e-06, "model_forward_time": 0.02551126480102539, "step": 1884 }, { "epoch": 2.874755859375e-06, "step": 1884, "training_step_time": 0.18056058883666992 }, { "epoch": 2.87628173828125e-06, "model_forward_time": 0.025063514709472656, "step": 1885 }, { "epoch": 2.87628173828125e-06, "step": 1885, "training_step_time": 0.13353562355041504 }, { "epoch": 2.8778076171875e-06, "model_forward_time": 0.025174856185913086, "step": 1886 }, { "epoch": 2.8778076171875e-06, "step": 1886, "training_step_time": 0.11635804176330566 }, { "epoch": 2.87933349609375e-06, "model_forward_time": 0.025496959686279297, "step": 1887 }, { "epoch": 2.87933349609375e-06, "step": 1887, "training_step_time": 0.11378908157348633 }, { "epoch": 2.880859375e-06, "model_forward_time": 0.025817394256591797, "step": 1888 }, { "epoch": 2.880859375e-06, "step": 1888, "training_step_time": 0.11269330978393555 }, { "epoch": 2.88238525390625e-06, "model_forward_time": 0.025176525115966797, "step": 1889 }, { "epoch": 2.88238525390625e-06, "step": 1889, "training_step_time": 0.10948562622070312 }, { "epoch": 2.8839111328125e-06, "grad_norm": 0.7047964930534363, "learning_rate": 9.995380315038119e-05, "loss": 0.1435, "step": 1890 }, { "epoch": 2.8839111328125e-06, "model_forward_time": 0.024848222732543945, "step": 1890 }, { "epoch": 2.8839111328125e-06, "step": 1890, "training_step_time": 0.10723495483398438 }, { "epoch": 2.88543701171875e-06, "model_forward_time": 0.02545952796936035, "step": 1891 }, { "epoch": 2.88543701171875e-06, "step": 1891, "training_step_time": 0.10792994499206543 }, { "epoch": 2.886962890625e-06, "model_forward_time": 0.025435686111450195, "step": 1892 }, { "epoch": 2.886962890625e-06, "step": 1892, "training_step_time": 0.10947227478027344 }, { "epoch": 2.88848876953125e-06, "model_forward_time": 0.025304079055786133, "step": 1893 }, { "epoch": 2.88848876953125e-06, "step": 1893, "training_step_time": 0.10916423797607422 }, { "epoch": 2.8900146484375e-06, "model_forward_time": 0.02515244483947754, "step": 1894 }, { "epoch": 2.8900146484375e-06, "step": 1894, "training_step_time": 0.1080162525177002 }, { "epoch": 2.89154052734375e-06, "model_forward_time": 0.02533698081970215, "step": 1895 }, { "epoch": 2.89154052734375e-06, "step": 1895, "training_step_time": 0.10825538635253906 }, { "epoch": 2.89306640625e-06, "model_forward_time": 0.025634050369262695, "step": 1896 }, { "epoch": 2.89306640625e-06, "step": 1896, "training_step_time": 0.10918927192687988 }, { "epoch": 2.89459228515625e-06, "model_forward_time": 0.02533268928527832, "step": 1897 }, { "epoch": 2.89459228515625e-06, "step": 1897, "training_step_time": 0.10771608352661133 }, { "epoch": 2.8961181640625e-06, "model_forward_time": 0.02513289451599121, "step": 1898 }, { "epoch": 2.8961181640625e-06, "step": 1898, "training_step_time": 0.1048574447631836 }, { "epoch": 2.89764404296875e-06, "model_forward_time": 0.025398731231689453, "step": 1899 }, { "epoch": 2.89764404296875e-06, "step": 1899, "training_step_time": 0.10567426681518555 }, { "epoch": 2.899169921875e-06, "grad_norm": 0.7637397646903992, "learning_rate": 9.99514040972383e-05, "loss": 0.1593, "step": 1900 }, { "epoch": 2.899169921875e-06, "model_forward_time": 0.025198698043823242, "step": 1900 }, { "epoch": 2.899169921875e-06, "step": 1900, "training_step_time": 0.10682225227355957 }, { "epoch": 2.90069580078125e-06, "model_forward_time": 0.02555227279663086, "step": 1901 }, { "epoch": 2.90069580078125e-06, "step": 1901, "training_step_time": 0.10581541061401367 }, { "epoch": 2.9022216796875e-06, "model_forward_time": 0.02541065216064453, "step": 1902 }, { "epoch": 2.9022216796875e-06, "step": 1902, "training_step_time": 0.10606122016906738 }, { "epoch": 2.90374755859375e-06, "model_forward_time": 0.025368928909301758, "step": 1903 }, { "epoch": 2.90374755859375e-06, "step": 1903, "training_step_time": 0.1044607162475586 }, { "epoch": 2.9052734375e-06, "model_forward_time": 0.026404142379760742, "step": 1904 }, { "epoch": 2.9052734375e-06, "step": 1904, "training_step_time": 0.10947704315185547 }, { "epoch": 2.90679931640625e-06, "model_forward_time": 0.0252230167388916, "step": 1905 }, { "epoch": 2.90679931640625e-06, "step": 1905, "training_step_time": 0.11258840560913086 }, { "epoch": 2.9083251953125e-06, "model_forward_time": 0.0250396728515625, "step": 1906 }, { "epoch": 2.9083251953125e-06, "step": 1906, "training_step_time": 0.10487484931945801 }, { "epoch": 2.90985107421875e-06, "model_forward_time": 0.025368690490722656, "step": 1907 }, { "epoch": 2.90985107421875e-06, "step": 1907, "training_step_time": 0.10548019409179688 }, { "epoch": 2.911376953125e-06, "model_forward_time": 0.02552175521850586, "step": 1908 }, { "epoch": 2.911376953125e-06, "step": 1908, "training_step_time": 0.18558001518249512 }, { "epoch": 2.91290283203125e-06, "model_forward_time": 0.024863243103027344, "step": 1909 }, { "epoch": 2.91290283203125e-06, "step": 1909, "training_step_time": 0.16714763641357422 }, { "epoch": 2.9144287109375e-06, "grad_norm": 1.1091893911361694, "learning_rate": 9.994894434842932e-05, "loss": 0.1622, "step": 1910 }, { "epoch": 2.9144287109375e-06, "model_forward_time": 0.024555683135986328, "step": 1910 }, { "epoch": 2.9144287109375e-06, "step": 1910, "training_step_time": 0.16541171073913574 }, { "epoch": 2.91595458984375e-06, "model_forward_time": 0.024396896362304688, "step": 1911 }, { "epoch": 2.91595458984375e-06, "step": 1911, "training_step_time": 0.1871027946472168 }, { "epoch": 2.91748046875e-06, "model_forward_time": 0.024492979049682617, "step": 1912 }, { "epoch": 2.91748046875e-06, "step": 1912, "training_step_time": 0.12774276733398438 }, { "epoch": 2.91900634765625e-06, "model_forward_time": 0.024938344955444336, "step": 1913 }, { "epoch": 2.91900634765625e-06, "step": 1913, "training_step_time": 0.10907268524169922 }, { "epoch": 2.9205322265625e-06, "model_forward_time": 0.025458097457885742, "step": 1914 }, { "epoch": 2.9205322265625e-06, "step": 1914, "training_step_time": 0.12136673927307129 }, { "epoch": 2.92205810546875e-06, "model_forward_time": 0.026766538619995117, "step": 1915 }, { "epoch": 2.92205810546875e-06, "step": 1915, "training_step_time": 0.10941743850708008 }, { "epoch": 2.923583984375e-06, "model_forward_time": 0.02539205551147461, "step": 1916 }, { "epoch": 2.923583984375e-06, "step": 1916, "training_step_time": 0.10824155807495117 }, { "epoch": 2.92510986328125e-06, "model_forward_time": 0.025044918060302734, "step": 1917 }, { "epoch": 2.92510986328125e-06, "step": 1917, "training_step_time": 0.10641813278198242 }, { "epoch": 2.9266357421875e-06, "model_forward_time": 0.026980161666870117, "step": 1918 }, { "epoch": 2.9266357421875e-06, "step": 1918, "training_step_time": 0.1630873680114746 }, { "epoch": 2.92816162109375e-06, "model_forward_time": 0.02554178237915039, "step": 1919 }, { "epoch": 2.92816162109375e-06, "step": 1919, "training_step_time": 0.13785362243652344 }, { "epoch": 2.9296875e-06, "grad_norm": 1.265679955482483, "learning_rate": 9.994642390694308e-05, "loss": 0.1643, "step": 1920 }, { "epoch": 2.9296875e-06, "model_forward_time": 0.02440476417541504, "step": 1920 }, { "epoch": 2.9296875e-06, "step": 1920, "training_step_time": 0.11036562919616699 }, { "epoch": 2.93121337890625e-06, "model_forward_time": 0.025008440017700195, "step": 1921 }, { "epoch": 2.93121337890625e-06, "step": 1921, "training_step_time": 0.11149191856384277 }, { "epoch": 2.9327392578125e-06, "model_forward_time": 0.026048898696899414, "step": 1922 }, { "epoch": 2.9327392578125e-06, "step": 1922, "training_step_time": 0.11435818672180176 }, { "epoch": 2.93426513671875e-06, "model_forward_time": 0.025460243225097656, "step": 1923 }, { "epoch": 2.93426513671875e-06, "step": 1923, "training_step_time": 0.11545991897583008 }, { "epoch": 2.935791015625e-06, "model_forward_time": 0.02558279037475586, "step": 1924 }, { "epoch": 2.935791015625e-06, "step": 1924, "training_step_time": 0.19411754608154297 }, { "epoch": 2.93731689453125e-06, "model_forward_time": 0.02492499351501465, "step": 1925 }, { "epoch": 2.93731689453125e-06, "step": 1925, "training_step_time": 0.11108016967773438 }, { "epoch": 2.9388427734375e-06, "model_forward_time": 0.025048255920410156, "step": 1926 }, { "epoch": 2.9388427734375e-06, "step": 1926, "training_step_time": 0.10763287544250488 }, { "epoch": 2.94036865234375e-06, "model_forward_time": 0.02613210678100586, "step": 1927 }, { "epoch": 2.94036865234375e-06, "step": 1927, "training_step_time": 0.11142778396606445 }, { "epoch": 2.94189453125e-06, "model_forward_time": 0.025040626525878906, "step": 1928 }, { "epoch": 2.94189453125e-06, "step": 1928, "training_step_time": 0.10770320892333984 }, { "epoch": 2.94342041015625e-06, "model_forward_time": 0.025653839111328125, "step": 1929 }, { "epoch": 2.94342041015625e-06, "step": 1929, "training_step_time": 0.10874056816101074 }, { "epoch": 2.9449462890625e-06, "grad_norm": 0.7586075663566589, "learning_rate": 9.994384277584214e-05, "loss": 0.1409, "step": 1930 }, { "epoch": 2.9449462890625e-06, "model_forward_time": 0.02581930160522461, "step": 1930 }, { "epoch": 2.9449462890625e-06, "step": 1930, "training_step_time": 0.11140680313110352 }, { "epoch": 2.94647216796875e-06, "model_forward_time": 0.02597332000732422, "step": 1931 }, { "epoch": 2.94647216796875e-06, "step": 1931, "training_step_time": 0.1695399284362793 }, { "epoch": 2.947998046875e-06, "model_forward_time": 0.025102853775024414, "step": 1932 }, { "epoch": 2.947998046875e-06, "step": 1932, "training_step_time": 0.17125248908996582 }, { "epoch": 2.94952392578125e-06, "model_forward_time": 0.024445056915283203, "step": 1933 }, { "epoch": 2.94952392578125e-06, "step": 1933, "training_step_time": 0.10594320297241211 }, { "epoch": 2.9510498046875e-06, "model_forward_time": 0.024946928024291992, "step": 1934 }, { "epoch": 2.9510498046875e-06, "step": 1934, "training_step_time": 0.10984253883361816 }, { "epoch": 2.95257568359375e-06, "model_forward_time": 0.02544093132019043, "step": 1935 }, { "epoch": 2.95257568359375e-06, "step": 1935, "training_step_time": 0.10640692710876465 }, { "epoch": 2.9541015625e-06, "model_forward_time": 0.025753021240234375, "step": 1936 }, { "epoch": 2.9541015625e-06, "step": 1936, "training_step_time": 0.10672688484191895 }, { "epoch": 2.95562744140625e-06, "model_forward_time": 0.025441884994506836, "step": 1937 }, { "epoch": 2.95562744140625e-06, "step": 1937, "training_step_time": 0.10877275466918945 }, { "epoch": 2.9571533203125e-06, "model_forward_time": 0.025537729263305664, "step": 1938 }, { "epoch": 2.9571533203125e-06, "step": 1938, "training_step_time": 0.11223125457763672 }, { "epoch": 2.95867919921875e-06, "model_forward_time": 0.025737762451171875, "step": 1939 }, { "epoch": 2.95867919921875e-06, "step": 1939, "training_step_time": 0.10566854476928711 }, { "epoch": 2.960205078125e-06, "grad_norm": 1.0724173784255981, "learning_rate": 9.994120095826285e-05, "loss": 0.1453, "step": 1940 }, { "epoch": 2.960205078125e-06, "model_forward_time": 0.02599310874938965, "step": 1940 }, { "epoch": 2.960205078125e-06, "step": 1940, "training_step_time": 0.10955691337585449 }, { "epoch": 2.96173095703125e-06, "model_forward_time": 0.025438308715820312, "step": 1941 }, { "epoch": 2.96173095703125e-06, "step": 1941, "training_step_time": 0.11724257469177246 }, { "epoch": 2.9632568359375e-06, "model_forward_time": 0.02529740333557129, "step": 1942 }, { "epoch": 2.9632568359375e-06, "step": 1942, "training_step_time": 0.1090691089630127 }, { "epoch": 2.96478271484375e-06, "model_forward_time": 0.025452136993408203, "step": 1943 }, { "epoch": 2.96478271484375e-06, "step": 1943, "training_step_time": 0.11310744285583496 }, { "epoch": 2.96630859375e-06, "model_forward_time": 0.02564859390258789, "step": 1944 }, { "epoch": 2.96630859375e-06, "step": 1944, "training_step_time": 0.11342811584472656 }, { "epoch": 2.96783447265625e-06, "model_forward_time": 0.025187015533447266, "step": 1945 }, { "epoch": 2.96783447265625e-06, "step": 1945, "training_step_time": 0.11098694801330566 }, { "epoch": 2.9693603515625e-06, "model_forward_time": 0.02458786964416504, "step": 1946 }, { "epoch": 2.9693603515625e-06, "step": 1946, "training_step_time": 0.11322331428527832 }, { "epoch": 2.97088623046875e-06, "model_forward_time": 0.025740861892700195, "step": 1947 }, { "epoch": 2.97088623046875e-06, "step": 1947, "training_step_time": 0.11025476455688477 }, { "epoch": 2.972412109375e-06, "model_forward_time": 0.026183128356933594, "step": 1948 }, { "epoch": 2.972412109375e-06, "step": 1948, "training_step_time": 0.10956573486328125 }, { "epoch": 2.97393798828125e-06, "model_forward_time": 0.029927492141723633, "step": 1949 }, { "epoch": 2.97393798828125e-06, "step": 1949, "training_step_time": 0.11331057548522949 }, { "epoch": 2.9754638671875e-06, "grad_norm": 0.4724397659301758, "learning_rate": 9.993849845741524e-05, "loss": 0.1095, "step": 1950 }, { "epoch": 2.9754638671875e-06, "model_forward_time": 0.02519536018371582, "step": 1950 }, { "epoch": 2.9754638671875e-06, "step": 1950, "training_step_time": 0.1093137264251709 }, { "epoch": 2.97698974609375e-06, "model_forward_time": 0.025475025177001953, "step": 1951 }, { "epoch": 2.97698974609375e-06, "step": 1951, "training_step_time": 0.10733890533447266 }, { "epoch": 2.978515625e-06, "model_forward_time": 0.025781869888305664, "step": 1952 }, { "epoch": 2.978515625e-06, "step": 1952, "training_step_time": 0.11156868934631348 }, { "epoch": 2.98004150390625e-06, "model_forward_time": 0.025430679321289062, "step": 1953 }, { "epoch": 2.98004150390625e-06, "step": 1953, "training_step_time": 0.10851716995239258 }, { "epoch": 2.9815673828125e-06, "model_forward_time": 0.025382518768310547, "step": 1954 }, { "epoch": 2.9815673828125e-06, "step": 1954, "training_step_time": 0.11156392097473145 }, { "epoch": 2.98309326171875e-06, "model_forward_time": 0.025736570358276367, "step": 1955 }, { "epoch": 2.98309326171875e-06, "step": 1955, "training_step_time": 0.11125874519348145 }, { "epoch": 2.984619140625e-06, "model_forward_time": 0.028539419174194336, "step": 1956 }, { "epoch": 2.984619140625e-06, "step": 1956, "training_step_time": 0.12648630142211914 }, { "epoch": 2.98614501953125e-06, "model_forward_time": 0.02822256088256836, "step": 1957 }, { "epoch": 2.98614501953125e-06, "step": 1957, "training_step_time": 0.14502239227294922 }, { "epoch": 2.9876708984375e-06, "model_forward_time": 0.025331735610961914, "step": 1958 }, { "epoch": 2.9876708984375e-06, "step": 1958, "training_step_time": 0.18198585510253906 }, { "epoch": 2.98919677734375e-06, "model_forward_time": 0.024846315383911133, "step": 1959 }, { "epoch": 2.98919677734375e-06, "step": 1959, "training_step_time": 0.1720259189605713 }, { "epoch": 2.99072265625e-06, "grad_norm": 0.6855494379997253, "learning_rate": 9.99357352765831e-05, "loss": 0.1564, "step": 1960 }, { "epoch": 2.99072265625e-06, "model_forward_time": 0.02470874786376953, "step": 1960 }, { "epoch": 2.99072265625e-06, "step": 1960, "training_step_time": 0.18732357025146484 }, { "epoch": 2.99224853515625e-06, "model_forward_time": 0.024956226348876953, "step": 1961 }, { "epoch": 2.99224853515625e-06, "step": 1961, "training_step_time": 0.1044917106628418 }, { "epoch": 2.9937744140625e-06, "model_forward_time": 0.025068283081054688, "step": 1962 }, { "epoch": 2.9937744140625e-06, "step": 1962, "training_step_time": 0.10672950744628906 }, { "epoch": 2.99530029296875e-06, "model_forward_time": 0.026046276092529297, "step": 1963 }, { "epoch": 2.99530029296875e-06, "step": 1963, "training_step_time": 0.10952329635620117 }, { "epoch": 2.996826171875e-06, "model_forward_time": 0.025975704193115234, "step": 1964 }, { "epoch": 2.996826171875e-06, "step": 1964, "training_step_time": 0.16299128532409668 }, { "epoch": 2.99835205078125e-06, "model_forward_time": 0.025108814239501953, "step": 1965 }, { "epoch": 2.99835205078125e-06, "step": 1965, "training_step_time": 0.13865208625793457 }, { "epoch": 2.9998779296875e-06, "model_forward_time": 0.02524089813232422, "step": 1966 }, { "epoch": 2.9998779296875e-06, "step": 1966, "training_step_time": 0.10971307754516602 }, { "epoch": 3.00140380859375e-06, "model_forward_time": 0.02578449249267578, "step": 1967 }, { "epoch": 3.00140380859375e-06, "step": 1967, "training_step_time": 0.11868000030517578 }, { "epoch": 3.0029296875e-06, "model_forward_time": 0.02603626251220703, "step": 1968 }, { "epoch": 3.0029296875e-06, "step": 1968, "training_step_time": 0.11072802543640137 }, { "epoch": 3.00445556640625e-06, "model_forward_time": 0.02520751953125, "step": 1969 }, { "epoch": 3.00445556640625e-06, "step": 1969, "training_step_time": 0.10694336891174316 }, { "epoch": 3.0059814453125e-06, "grad_norm": 0.6952374577522278, "learning_rate": 9.9932911419124e-05, "loss": 0.1304, "step": 1970 }, { "epoch": 3.0059814453125e-06, "model_forward_time": 0.02581501007080078, "step": 1970 }, { "epoch": 3.0059814453125e-06, "step": 1970, "training_step_time": 0.20280909538269043 }, { "epoch": 3.00750732421875e-06, "model_forward_time": 0.024640560150146484, "step": 1971 }, { "epoch": 3.00750732421875e-06, "step": 1971, "training_step_time": 0.10941433906555176 }, { "epoch": 3.009033203125e-06, "model_forward_time": 0.02518606185913086, "step": 1972 }, { "epoch": 3.009033203125e-06, "step": 1972, "training_step_time": 0.10547113418579102 }, { "epoch": 3.01055908203125e-06, "model_forward_time": 0.02469325065612793, "step": 1973 }, { "epoch": 3.01055908203125e-06, "step": 1973, "training_step_time": 0.10996365547180176 }, { "epoch": 3.0120849609375e-06, "model_forward_time": 0.025483369827270508, "step": 1974 }, { "epoch": 3.0120849609375e-06, "step": 1974, "training_step_time": 0.11454057693481445 }, { "epoch": 3.01361083984375e-06, "model_forward_time": 0.025638580322265625, "step": 1975 }, { "epoch": 3.01361083984375e-06, "step": 1975, "training_step_time": 0.11404204368591309 }, { "epoch": 3.01513671875e-06, "model_forward_time": 0.025010108947753906, "step": 1976 }, { "epoch": 3.01513671875e-06, "step": 1976, "training_step_time": 0.17455482482910156 }, { "epoch": 3.01666259765625e-06, "model_forward_time": 0.024666547775268555, "step": 1977 }, { "epoch": 3.01666259765625e-06, "step": 1977, "training_step_time": 0.20657992362976074 }, { "epoch": 3.0181884765625e-06, "model_forward_time": 0.024562597274780273, "step": 1978 }, { "epoch": 3.0181884765625e-06, "step": 1978, "training_step_time": 0.19892644882202148 }, { "epoch": 3.01971435546875e-06, "model_forward_time": 0.02467060089111328, "step": 1979 }, { "epoch": 3.01971435546875e-06, "step": 1979, "training_step_time": 0.19018864631652832 }, { "epoch": 3.021240234375e-06, "grad_norm": 0.709253191947937, "learning_rate": 9.993002688846913e-05, "loss": 0.1683, "step": 1980 }, { "epoch": 3.021240234375e-06, "model_forward_time": 0.024123430252075195, "step": 1980 }, { "epoch": 3.021240234375e-06, "step": 1980, "training_step_time": 0.17840576171875 }, { "epoch": 3.02276611328125e-06, "model_forward_time": 0.024484872817993164, "step": 1981 }, { "epoch": 3.02276611328125e-06, "step": 1981, "training_step_time": 0.16625404357910156 }, { "epoch": 3.0242919921875e-06, "model_forward_time": 0.02425074577331543, "step": 1982 }, { "epoch": 3.0242919921875e-06, "step": 1982, "training_step_time": 0.11740279197692871 }, { "epoch": 3.02581787109375e-06, "model_forward_time": 0.02485346794128418, "step": 1983 }, { "epoch": 3.02581787109375e-06, "step": 1983, "training_step_time": 0.10535669326782227 }, { "epoch": 3.02734375e-06, "model_forward_time": 0.02521491050720215, "step": 1984 }, { "epoch": 3.02734375e-06, "step": 1984, "training_step_time": 0.10390210151672363 }, { "epoch": 3.02886962890625e-06, "model_forward_time": 0.025643348693847656, "step": 1985 }, { "epoch": 3.02886962890625e-06, "step": 1985, "training_step_time": 0.10459208488464355 }, { "epoch": 3.0303955078125e-06, "model_forward_time": 0.025364398956298828, "step": 1986 }, { "epoch": 3.0303955078125e-06, "step": 1986, "training_step_time": 0.10532951354980469 }, { "epoch": 3.03192138671875e-06, "model_forward_time": 0.025266170501708984, "step": 1987 }, { "epoch": 3.03192138671875e-06, "step": 1987, "training_step_time": 0.10615348815917969 }, { "epoch": 3.033447265625e-06, "model_forward_time": 0.02510976791381836, "step": 1988 }, { "epoch": 3.033447265625e-06, "step": 1988, "training_step_time": 0.11060190200805664 }, { "epoch": 3.03497314453125e-06, "model_forward_time": 0.024952411651611328, "step": 1989 }, { "epoch": 3.03497314453125e-06, "step": 1989, "training_step_time": 0.10471582412719727 }, { "epoch": 3.0364990234375e-06, "grad_norm": 0.5270886421203613, "learning_rate": 9.99270816881235e-05, "loss": 0.1446, "step": 1990 }, { "epoch": 3.0364990234375e-06, "model_forward_time": 0.02549910545349121, "step": 1990 }, { "epoch": 3.0364990234375e-06, "step": 1990, "training_step_time": 0.10418057441711426 }, { "epoch": 3.03802490234375e-06, "model_forward_time": 0.025516748428344727, "step": 1991 }, { "epoch": 3.03802490234375e-06, "step": 1991, "training_step_time": 0.10926461219787598 }, { "epoch": 3.03955078125e-06, "model_forward_time": 0.025645732879638672, "step": 1992 }, { "epoch": 3.03955078125e-06, "step": 1992, "training_step_time": 0.11027145385742188 }, { "epoch": 3.04107666015625e-06, "model_forward_time": 0.025409936904907227, "step": 1993 }, { "epoch": 3.04107666015625e-06, "step": 1993, "training_step_time": 0.10542654991149902 }, { "epoch": 3.0426025390625e-06, "model_forward_time": 0.025529146194458008, "step": 1994 }, { "epoch": 3.0426025390625e-06, "step": 1994, "training_step_time": 0.11611413955688477 }, { "epoch": 3.04412841796875e-06, "model_forward_time": 0.025817394256591797, "step": 1995 }, { "epoch": 3.04412841796875e-06, "step": 1995, "training_step_time": 0.1058652400970459 }, { "epoch": 3.045654296875e-06, "model_forward_time": 0.02559947967529297, "step": 1996 }, { "epoch": 3.045654296875e-06, "step": 1996, "training_step_time": 0.10748600959777832 }, { "epoch": 3.04718017578125e-06, "model_forward_time": 0.025455951690673828, "step": 1997 }, { "epoch": 3.04718017578125e-06, "step": 1997, "training_step_time": 0.12153196334838867 }, { "epoch": 3.0487060546875e-06, "model_forward_time": 0.02539348602294922, "step": 1998 }, { "epoch": 3.0487060546875e-06, "step": 1998, "training_step_time": 0.10736274719238281 }, { "epoch": 3.05023193359375e-06, "model_forward_time": 0.025676488876342773, "step": 1999 }, { "epoch": 3.05023193359375e-06, "step": 1999, "training_step_time": 0.1779797077178955 }, { "epoch": 3.0517578125e-06, "grad_norm": 1.4205913543701172, "learning_rate": 9.992407582166581e-05, "loss": 0.1625, "step": 2000 }, { "epoch": 3.0517578125e-06, "model_forward_time": 0.025179386138916016, "step": 2000 }, { "epoch": 3.0517578125e-06, "step": 2000, "training_step_time": 0.09915947914123535 }, { "epoch": 3.05328369140625e-06, "model_forward_time": 0.02448248863220215, "step": 2001 }, { "epoch": 3.05328369140625e-06, "step": 2001, "training_step_time": 0.09954714775085449 }, { "epoch": 3.0548095703125e-06, "model_forward_time": 0.025098800659179688, "step": 2002 }, { "epoch": 3.0548095703125e-06, "step": 2002, "training_step_time": 0.10394859313964844 }, { "epoch": 3.05633544921875e-06, "model_forward_time": 0.02563309669494629, "step": 2003 }, { "epoch": 3.05633544921875e-06, "step": 2003, "training_step_time": 0.10478830337524414 }, { "epoch": 3.057861328125e-06, "model_forward_time": 0.02525186538696289, "step": 2004 }, { "epoch": 3.057861328125e-06, "step": 2004, "training_step_time": 0.1038506031036377 }, { "epoch": 3.05938720703125e-06, "model_forward_time": 0.02580428123474121, "step": 2005 }, { "epoch": 3.05938720703125e-06, "step": 2005, "training_step_time": 0.10744881629943848 }, { "epoch": 3.0609130859375e-06, "model_forward_time": 0.025240659713745117, "step": 2006 }, { "epoch": 3.0609130859375e-06, "step": 2006, "training_step_time": 0.1900327205657959 }, { "epoch": 3.06243896484375e-06, "model_forward_time": 0.024365901947021484, "step": 2007 }, { "epoch": 3.06243896484375e-06, "step": 2007, "training_step_time": 0.12936973571777344 }, { "epoch": 3.06396484375e-06, "model_forward_time": 0.024620532989501953, "step": 2008 }, { "epoch": 3.06396484375e-06, "step": 2008, "training_step_time": 0.2175137996673584 }, { "epoch": 3.06549072265625e-06, "model_forward_time": 0.023994922637939453, "step": 2009 }, { "epoch": 3.06549072265625e-06, "step": 2009, "training_step_time": 0.14101886749267578 }, { "epoch": 3.0670166015625e-06, "grad_norm": 0.6931583881378174, "learning_rate": 9.992100929274846e-05, "loss": 0.1831, "step": 2010 }, { "epoch": 3.0670166015625e-06, "model_forward_time": 0.02390742301940918, "step": 2010 }, { "epoch": 3.0670166015625e-06, "step": 2010, "training_step_time": 0.1059420108795166 }, { "epoch": 3.06854248046875e-06, "model_forward_time": 0.025151968002319336, "step": 2011 }, { "epoch": 3.06854248046875e-06, "step": 2011, "training_step_time": 0.11233735084533691 }, { "epoch": 3.070068359375e-06, "model_forward_time": 0.025288105010986328, "step": 2012 }, { "epoch": 3.070068359375e-06, "step": 2012, "training_step_time": 0.10818290710449219 }, { "epoch": 3.07159423828125e-06, "model_forward_time": 0.02550530433654785, "step": 2013 }, { "epoch": 3.07159423828125e-06, "step": 2013, "training_step_time": 0.10864925384521484 }, { "epoch": 3.0731201171875e-06, "model_forward_time": 0.025258541107177734, "step": 2014 }, { "epoch": 3.0731201171875e-06, "step": 2014, "training_step_time": 0.10622262954711914 }, { "epoch": 3.07464599609375e-06, "model_forward_time": 0.025121450424194336, "step": 2015 }, { "epoch": 3.07464599609375e-06, "step": 2015, "training_step_time": 0.1245272159576416 }, { "epoch": 3.076171875e-06, "model_forward_time": 0.02507925033569336, "step": 2016 }, { "epoch": 3.076171875e-06, "step": 2016, "training_step_time": 0.14467501640319824 }, { "epoch": 3.07769775390625e-06, "model_forward_time": 0.02483224868774414, "step": 2017 }, { "epoch": 3.07769775390625e-06, "step": 2017, "training_step_time": 0.11435580253601074 }, { "epoch": 3.0792236328125e-06, "model_forward_time": 0.024974822998046875, "step": 2018 }, { "epoch": 3.0792236328125e-06, "step": 2018, "training_step_time": 0.11614990234375 }, { "epoch": 3.08074951171875e-06, "model_forward_time": 0.025118589401245117, "step": 2019 }, { "epoch": 3.08074951171875e-06, "step": 2019, "training_step_time": 0.11707258224487305 }, { "epoch": 3.082275390625e-06, "grad_norm": 0.949308454990387, "learning_rate": 9.991788210509758e-05, "loss": 0.1566, "step": 2020 }, { "epoch": 3.082275390625e-06, "model_forward_time": 0.025453567504882812, "step": 2020 }, { "epoch": 3.082275390625e-06, "step": 2020, "training_step_time": 0.18665218353271484 }, { "epoch": 3.08380126953125e-06, "model_forward_time": 0.0244290828704834, "step": 2021 }, { "epoch": 3.08380126953125e-06, "step": 2021, "training_step_time": 0.11759328842163086 }, { "epoch": 3.0853271484375e-06, "model_forward_time": 0.024521589279174805, "step": 2022 }, { "epoch": 3.0853271484375e-06, "step": 2022, "training_step_time": 0.10910296440124512 }, { "epoch": 3.08685302734375e-06, "model_forward_time": 0.02557659149169922, "step": 2023 }, { "epoch": 3.08685302734375e-06, "step": 2023, "training_step_time": 0.10752582550048828 }, { "epoch": 3.08837890625e-06, "model_forward_time": 0.026337146759033203, "step": 2024 }, { "epoch": 3.08837890625e-06, "step": 2024, "training_step_time": 0.14465785026550293 }, { "epoch": 3.08990478515625e-06, "model_forward_time": 0.02496790885925293, "step": 2025 }, { "epoch": 3.08990478515625e-06, "step": 2025, "training_step_time": 0.10874366760253906 }, { "epoch": 3.0914306640625e-06, "model_forward_time": 0.025317668914794922, "step": 2026 }, { "epoch": 3.0914306640625e-06, "step": 2026, "training_step_time": 0.21014761924743652 }, { "epoch": 3.09295654296875e-06, "model_forward_time": 0.024985790252685547, "step": 2027 }, { "epoch": 3.09295654296875e-06, "step": 2027, "training_step_time": 0.10713934898376465 }, { "epoch": 3.094482421875e-06, "model_forward_time": 0.025104522705078125, "step": 2028 }, { "epoch": 3.094482421875e-06, "step": 2028, "training_step_time": 0.12088894844055176 }, { "epoch": 3.09600830078125e-06, "model_forward_time": 0.02487349510192871, "step": 2029 }, { "epoch": 3.09600830078125e-06, "step": 2029, "training_step_time": 0.11110949516296387 }, { "epoch": 3.0975341796875e-06, "grad_norm": 1.022132396697998, "learning_rate": 9.9914694262513e-05, "loss": 0.121, "step": 2030 }, { "epoch": 3.0975341796875e-06, "model_forward_time": 0.024765491485595703, "step": 2030 }, { "epoch": 3.0975341796875e-06, "step": 2030, "training_step_time": 0.11166810989379883 }, { "epoch": 3.09906005859375e-06, "model_forward_time": 0.02648782730102539, "step": 2031 }, { "epoch": 3.09906005859375e-06, "step": 2031, "training_step_time": 0.11631202697753906 }, { "epoch": 3.1005859375e-06, "model_forward_time": 0.025310754776000977, "step": 2032 }, { "epoch": 3.1005859375e-06, "step": 2032, "training_step_time": 0.11087274551391602 }, { "epoch": 3.10211181640625e-06, "model_forward_time": 0.02505946159362793, "step": 2033 }, { "epoch": 3.10211181640625e-06, "step": 2033, "training_step_time": 0.10766887664794922 }, { "epoch": 3.1036376953125e-06, "model_forward_time": 0.02511882781982422, "step": 2034 }, { "epoch": 3.1036376953125e-06, "step": 2034, "training_step_time": 0.10961437225341797 }, { "epoch": 3.10516357421875e-06, "model_forward_time": 0.025962114334106445, "step": 2035 }, { "epoch": 3.10516357421875e-06, "step": 2035, "training_step_time": 0.1130836009979248 }, { "epoch": 3.106689453125e-06, "model_forward_time": 0.02511763572692871, "step": 2036 }, { "epoch": 3.106689453125e-06, "step": 2036, "training_step_time": 0.11104178428649902 }, { "epoch": 3.10821533203125e-06, "model_forward_time": 0.02573680877685547, "step": 2037 }, { "epoch": 3.10821533203125e-06, "step": 2037, "training_step_time": 0.11015129089355469 }, { "epoch": 3.1097412109375e-06, "model_forward_time": 0.025629520416259766, "step": 2038 }, { "epoch": 3.1097412109375e-06, "step": 2038, "training_step_time": 0.10697746276855469 }, { "epoch": 3.11126708984375e-06, "model_forward_time": 0.025533199310302734, "step": 2039 }, { "epoch": 3.11126708984375e-06, "step": 2039, "training_step_time": 0.11368727684020996 }, { "epoch": 3.11279296875e-06, "grad_norm": 0.8981501460075378, "learning_rate": 9.991144576886823e-05, "loss": 0.15, "step": 2040 }, { "epoch": 3.11279296875e-06, "model_forward_time": 0.024079561233520508, "step": 2040 }, { "epoch": 3.11279296875e-06, "step": 2040, "training_step_time": 0.10770583152770996 }, { "epoch": 3.11431884765625e-06, "model_forward_time": 0.02429342269897461, "step": 2041 }, { "epoch": 3.11431884765625e-06, "step": 2041, "training_step_time": 0.11056280136108398 }, { "epoch": 3.1158447265625e-06, "model_forward_time": 0.025574445724487305, "step": 2042 }, { "epoch": 3.1158447265625e-06, "step": 2042, "training_step_time": 0.10752487182617188 }, { "epoch": 3.11737060546875e-06, "model_forward_time": 0.025130033493041992, "step": 2043 }, { "epoch": 3.11737060546875e-06, "step": 2043, "training_step_time": 0.11660265922546387 }, { "epoch": 3.118896484375e-06, "model_forward_time": 0.025330781936645508, "step": 2044 }, { "epoch": 3.118896484375e-06, "step": 2044, "training_step_time": 0.11204743385314941 }, { "epoch": 3.12042236328125e-06, "model_forward_time": 0.025309324264526367, "step": 2045 }, { "epoch": 3.12042236328125e-06, "step": 2045, "training_step_time": 0.10686254501342773 }, { "epoch": 3.1219482421875e-06, "model_forward_time": 0.025354862213134766, "step": 2046 }, { "epoch": 3.1219482421875e-06, "step": 2046, "training_step_time": 0.11196517944335938 }, { "epoch": 3.12347412109375e-06, "model_forward_time": 0.025183916091918945, "step": 2047 }, { "epoch": 3.12347412109375e-06, "step": 2047, "training_step_time": 0.1076822280883789 }, { "epoch": 3.125e-06, "model_forward_time": 0.024935245513916016, "step": 2048 }, { "epoch": 3.125e-06, "step": 2048, "training_step_time": 0.10955548286437988 }, { "epoch": 3.12652587890625e-06, "model_forward_time": 0.025295019149780273, "step": 2049 }, { "epoch": 3.12652587890625e-06, "step": 2049, "training_step_time": 0.11477971076965332 }, { "epoch": 3.1280517578125e-06, "grad_norm": 0.6280609965324402, "learning_rate": 9.990813662811051e-05, "loss": 0.1311, "step": 2050 }, { "epoch": 3.1280517578125e-06, "model_forward_time": 0.025088787078857422, "step": 2050 }, { "epoch": 3.1280517578125e-06, "step": 2050, "training_step_time": 0.18810796737670898 }, { "epoch": 3.12957763671875e-06, "model_forward_time": 0.025364160537719727, "step": 2051 }, { "epoch": 3.12957763671875e-06, "step": 2051, "training_step_time": 0.1219489574432373 }, { "epoch": 3.131103515625e-06, "model_forward_time": 0.02609705924987793, "step": 2052 }, { "epoch": 3.131103515625e-06, "step": 2052, "training_step_time": 0.17927050590515137 }, { "epoch": 3.13262939453125e-06, "model_forward_time": 0.024739742279052734, "step": 2053 }, { "epoch": 3.13262939453125e-06, "step": 2053, "training_step_time": 0.11631250381469727 }, { "epoch": 3.1341552734375e-06, "model_forward_time": 0.025247812271118164, "step": 2054 }, { "epoch": 3.1341552734375e-06, "step": 2054, "training_step_time": 0.21597838401794434 }, { "epoch": 3.13568115234375e-06, "model_forward_time": 0.0245513916015625, "step": 2055 }, { "epoch": 3.13568115234375e-06, "step": 2055, "training_step_time": 0.1456131935119629 }, { "epoch": 3.13720703125e-06, "model_forward_time": 0.025038719177246094, "step": 2056 }, { "epoch": 3.13720703125e-06, "step": 2056, "training_step_time": 0.10952234268188477 }, { "epoch": 3.13873291015625e-06, "model_forward_time": 0.025162458419799805, "step": 2057 }, { "epoch": 3.13873291015625e-06, "step": 2057, "training_step_time": 0.11988949775695801 }, { "epoch": 3.1402587890625e-06, "model_forward_time": 0.02513861656188965, "step": 2058 }, { "epoch": 3.1402587890625e-06, "step": 2058, "training_step_time": 0.10820245742797852 }, { "epoch": 3.14178466796875e-06, "model_forward_time": 0.025768280029296875, "step": 2059 }, { "epoch": 3.14178466796875e-06, "step": 2059, "training_step_time": 0.10930800437927246 }, { "epoch": 3.143310546875e-06, "grad_norm": 0.7524927258491516, "learning_rate": 9.990476684426075e-05, "loss": 0.1324, "step": 2060 }, { "epoch": 3.143310546875e-06, "model_forward_time": 0.0254976749420166, "step": 2060 }, { "epoch": 3.143310546875e-06, "step": 2060, "training_step_time": 0.1744976043701172 }, { "epoch": 3.14483642578125e-06, "model_forward_time": 0.02460646629333496, "step": 2061 }, { "epoch": 3.14483642578125e-06, "step": 2061, "training_step_time": 0.14745163917541504 }, { "epoch": 3.1463623046875e-06, "model_forward_time": 0.024044275283813477, "step": 2062 }, { "epoch": 3.1463623046875e-06, "step": 2062, "training_step_time": 0.10992836952209473 }, { "epoch": 3.14788818359375e-06, "model_forward_time": 0.025064945220947266, "step": 2063 }, { "epoch": 3.14788818359375e-06, "step": 2063, "training_step_time": 0.10988306999206543 }, { "epoch": 3.1494140625e-06, "model_forward_time": 0.025089502334594727, "step": 2064 }, { "epoch": 3.1494140625e-06, "step": 2064, "training_step_time": 0.11190271377563477 }, { "epoch": 3.15093994140625e-06, "model_forward_time": 0.0263669490814209, "step": 2065 }, { "epoch": 3.15093994140625e-06, "step": 2065, "training_step_time": 0.160369873046875 }, { "epoch": 3.1524658203125e-06, "model_forward_time": 0.024605512619018555, "step": 2066 }, { "epoch": 3.1524658203125e-06, "step": 2066, "training_step_time": 0.15439772605895996 }, { "epoch": 3.15399169921875e-06, "model_forward_time": 0.02458500862121582, "step": 2067 }, { "epoch": 3.15399169921875e-06, "step": 2067, "training_step_time": 0.10489153861999512 }, { "epoch": 3.155517578125e-06, "model_forward_time": 0.02501201629638672, "step": 2068 }, { "epoch": 3.155517578125e-06, "step": 2068, "training_step_time": 0.10581231117248535 }, { "epoch": 3.15704345703125e-06, "model_forward_time": 0.02628636360168457, "step": 2069 }, { "epoch": 3.15704345703125e-06, "step": 2069, "training_step_time": 0.10709309577941895 }, { "epoch": 3.1585693359375e-06, "grad_norm": 0.6197975277900696, "learning_rate": 9.990133642141359e-05, "loss": 0.1219, "step": 2070 }, { "epoch": 3.1585693359375e-06, "model_forward_time": 0.025173664093017578, "step": 2070 }, { "epoch": 3.1585693359375e-06, "step": 2070, "training_step_time": 0.11158990859985352 }, { "epoch": 3.16009521484375e-06, "model_forward_time": 0.02557826042175293, "step": 2071 }, { "epoch": 3.16009521484375e-06, "step": 2071, "training_step_time": 0.12154388427734375 }, { "epoch": 3.16162109375e-06, "model_forward_time": 0.028257131576538086, "step": 2072 }, { "epoch": 3.16162109375e-06, "step": 2072, "training_step_time": 0.11698007583618164 }, { "epoch": 3.16314697265625e-06, "model_forward_time": 0.02584528923034668, "step": 2073 }, { "epoch": 3.16314697265625e-06, "step": 2073, "training_step_time": 0.11852598190307617 }, { "epoch": 3.1646728515625e-06, "model_forward_time": 0.025632619857788086, "step": 2074 }, { "epoch": 3.1646728515625e-06, "step": 2074, "training_step_time": 0.10630321502685547 }, { "epoch": 3.16619873046875e-06, "model_forward_time": 0.02563166618347168, "step": 2075 }, { "epoch": 3.16619873046875e-06, "step": 2075, "training_step_time": 0.11893653869628906 }, { "epoch": 3.167724609375e-06, "model_forward_time": 0.02534317970275879, "step": 2076 }, { "epoch": 3.167724609375e-06, "step": 2076, "training_step_time": 0.14342761039733887 }, { "epoch": 3.16925048828125e-06, "model_forward_time": 0.024918079376220703, "step": 2077 }, { "epoch": 3.16925048828125e-06, "step": 2077, "training_step_time": 0.1947317123413086 }, { "epoch": 3.1707763671875e-06, "model_forward_time": 0.02443838119506836, "step": 2078 }, { "epoch": 3.1707763671875e-06, "step": 2078, "training_step_time": 0.1858081817626953 }, { "epoch": 3.17230224609375e-06, "model_forward_time": 0.02475762367248535, "step": 2079 }, { "epoch": 3.17230224609375e-06, "step": 2079, "training_step_time": 0.1803886890411377 }, { "epoch": 3.173828125e-06, "grad_norm": 0.713837742805481, "learning_rate": 9.989784536373726e-05, "loss": 0.1199, "step": 2080 }, { "epoch": 3.173828125e-06, "model_forward_time": 0.02427363395690918, "step": 2080 }, { "epoch": 3.173828125e-06, "step": 2080, "training_step_time": 0.16886472702026367 }, { "epoch": 3.17535400390625e-06, "model_forward_time": 0.02570343017578125, "step": 2081 }, { "epoch": 3.17535400390625e-06, "step": 2081, "training_step_time": 0.14660906791687012 }, { "epoch": 3.1768798828125e-06, "model_forward_time": 0.024727344512939453, "step": 2082 }, { "epoch": 3.1768798828125e-06, "step": 2082, "training_step_time": 0.13730072975158691 }, { "epoch": 3.17840576171875e-06, "model_forward_time": 0.024831056594848633, "step": 2083 }, { "epoch": 3.17840576171875e-06, "step": 2083, "training_step_time": 0.13072848320007324 }, { "epoch": 3.179931640625e-06, "model_forward_time": 0.024785757064819336, "step": 2084 }, { "epoch": 3.179931640625e-06, "step": 2084, "training_step_time": 0.12457966804504395 }, { "epoch": 3.18145751953125e-06, "model_forward_time": 0.030757427215576172, "step": 2085 }, { "epoch": 3.18145751953125e-06, "step": 2085, "training_step_time": 0.12244343757629395 }, { "epoch": 3.1829833984375e-06, "model_forward_time": 0.025048494338989258, "step": 2086 }, { "epoch": 3.1829833984375e-06, "step": 2086, "training_step_time": 0.11066579818725586 }, { "epoch": 3.18450927734375e-06, "model_forward_time": 0.025244951248168945, "step": 2087 }, { "epoch": 3.18450927734375e-06, "step": 2087, "training_step_time": 0.10466861724853516 }, { "epoch": 3.18603515625e-06, "model_forward_time": 0.025649547576904297, "step": 2088 }, { "epoch": 3.18603515625e-06, "step": 2088, "training_step_time": 0.10782051086425781 }, { "epoch": 3.18756103515625e-06, "model_forward_time": 0.025396108627319336, "step": 2089 }, { "epoch": 3.18756103515625e-06, "step": 2089, "training_step_time": 0.10519099235534668 }, { "epoch": 3.1890869140625e-06, "grad_norm": 0.7144132256507874, "learning_rate": 9.989429367547377e-05, "loss": 0.1204, "step": 2090 }, { "epoch": 3.1890869140625e-06, "model_forward_time": 0.025180578231811523, "step": 2090 }, { "epoch": 3.1890869140625e-06, "step": 2090, "training_step_time": 0.10965895652770996 }, { "epoch": 3.19061279296875e-06, "model_forward_time": 0.025431394577026367, "step": 2091 }, { "epoch": 3.19061279296875e-06, "step": 2091, "training_step_time": 0.10565376281738281 }, { "epoch": 3.192138671875e-06, "model_forward_time": 0.025173664093017578, "step": 2092 }, { "epoch": 3.192138671875e-06, "step": 2092, "training_step_time": 0.10548543930053711 }, { "epoch": 3.19366455078125e-06, "model_forward_time": 0.025699138641357422, "step": 2093 }, { "epoch": 3.19366455078125e-06, "step": 2093, "training_step_time": 0.17055177688598633 }, { "epoch": 3.1951904296875e-06, "model_forward_time": 0.025228261947631836, "step": 2094 }, { "epoch": 3.1951904296875e-06, "step": 2094, "training_step_time": 0.12229347229003906 }, { "epoch": 3.19671630859375e-06, "model_forward_time": 0.0248258113861084, "step": 2095 }, { "epoch": 3.19671630859375e-06, "step": 2095, "training_step_time": 0.12253737449645996 }, { "epoch": 3.1982421875e-06, "model_forward_time": 0.0254518985748291, "step": 2096 }, { "epoch": 3.1982421875e-06, "step": 2096, "training_step_time": 0.11786389350891113 }, { "epoch": 3.19976806640625e-06, "model_forward_time": 0.025361061096191406, "step": 2097 }, { "epoch": 3.19976806640625e-06, "step": 2097, "training_step_time": 0.11223268508911133 }, { "epoch": 3.2012939453125e-06, "model_forward_time": 0.025323152542114258, "step": 2098 }, { "epoch": 3.2012939453125e-06, "step": 2098, "training_step_time": 0.10807538032531738 }, { "epoch": 3.20281982421875e-06, "model_forward_time": 0.02576756477355957, "step": 2099 }, { "epoch": 3.20281982421875e-06, "step": 2099, "training_step_time": 0.12779521942138672 }, { "epoch": 3.204345703125e-06, "grad_norm": 0.6506755352020264, "learning_rate": 9.989068136093873e-05, "loss": 0.1189, "step": 2100 }, { "epoch": 3.204345703125e-06, "model_forward_time": 0.02525496482849121, "step": 2100 }, { "epoch": 3.204345703125e-06, "step": 2100, "training_step_time": 0.12012076377868652 }, { "epoch": 3.20587158203125e-06, "model_forward_time": 0.026320219039916992, "step": 2101 }, { "epoch": 3.20587158203125e-06, "step": 2101, "training_step_time": 0.11673259735107422 }, { "epoch": 3.2073974609375e-06, "model_forward_time": 0.02611851692199707, "step": 2102 }, { "epoch": 3.2073974609375e-06, "step": 2102, "training_step_time": 0.10762786865234375 }, { "epoch": 3.20892333984375e-06, "model_forward_time": 0.02554607391357422, "step": 2103 }, { "epoch": 3.20892333984375e-06, "step": 2103, "training_step_time": 0.10594296455383301 }, { "epoch": 3.21044921875e-06, "model_forward_time": 0.02533411979675293, "step": 2104 }, { "epoch": 3.21044921875e-06, "step": 2104, "training_step_time": 0.10741949081420898 }, { "epoch": 3.21197509765625e-06, "model_forward_time": 0.02526402473449707, "step": 2105 }, { "epoch": 3.21197509765625e-06, "step": 2105, "training_step_time": 0.14597535133361816 }, { "epoch": 3.2135009765625e-06, "model_forward_time": 0.026140213012695312, "step": 2106 }, { "epoch": 3.2135009765625e-06, "step": 2106, "training_step_time": 0.14003515243530273 }, { "epoch": 3.21502685546875e-06, "model_forward_time": 0.02495431900024414, "step": 2107 }, { "epoch": 3.21502685546875e-06, "step": 2107, "training_step_time": 0.10941839218139648 }, { "epoch": 3.216552734375e-06, "model_forward_time": 0.02492833137512207, "step": 2108 }, { "epoch": 3.216552734375e-06, "step": 2108, "training_step_time": 0.10828232765197754 }, { "epoch": 3.21807861328125e-06, "model_forward_time": 0.02807450294494629, "step": 2109 }, { "epoch": 3.21807861328125e-06, "step": 2109, "training_step_time": 0.11664819717407227 }, { "epoch": 3.2196044921875e-06, "grad_norm": 0.6079724431037903, "learning_rate": 9.988700842452146e-05, "loss": 0.1273, "step": 2110 }, { "epoch": 3.2196044921875e-06, "model_forward_time": 0.025449514389038086, "step": 2110 }, { "epoch": 3.2196044921875e-06, "step": 2110, "training_step_time": 0.10712432861328125 }, { "epoch": 3.22113037109375e-06, "model_forward_time": 0.025594711303710938, "step": 2111 }, { "epoch": 3.22113037109375e-06, "step": 2111, "training_step_time": 0.20910263061523438 }, { "epoch": 3.22265625e-06, "model_forward_time": 0.028296947479248047, "step": 2112 }, { "epoch": 3.22265625e-06, "step": 2112, "training_step_time": 0.10878992080688477 }, { "epoch": 3.22418212890625e-06, "model_forward_time": 0.024677515029907227, "step": 2113 }, { "epoch": 3.22418212890625e-06, "step": 2113, "training_step_time": 0.10692906379699707 }, { "epoch": 3.2257080078125e-06, "model_forward_time": 0.025416851043701172, "step": 2114 }, { "epoch": 3.2257080078125e-06, "step": 2114, "training_step_time": 0.10677242279052734 }, { "epoch": 3.22723388671875e-06, "model_forward_time": 0.025374174118041992, "step": 2115 }, { "epoch": 3.22723388671875e-06, "step": 2115, "training_step_time": 0.10704207420349121 }, { "epoch": 3.228759765625e-06, "model_forward_time": 0.02539515495300293, "step": 2116 }, { "epoch": 3.228759765625e-06, "step": 2116, "training_step_time": 0.11344528198242188 }, { "epoch": 3.23028564453125e-06, "model_forward_time": 0.026009559631347656, "step": 2117 }, { "epoch": 3.23028564453125e-06, "step": 2117, "training_step_time": 0.11017346382141113 }, { "epoch": 3.2318115234375e-06, "model_forward_time": 0.02544403076171875, "step": 2118 }, { "epoch": 3.2318115234375e-06, "step": 2118, "training_step_time": 0.1189274787902832 }, { "epoch": 3.23333740234375e-06, "model_forward_time": 0.025483369827270508, "step": 2119 }, { "epoch": 3.23333740234375e-06, "step": 2119, "training_step_time": 0.21869158744812012 }, { "epoch": 3.23486328125e-06, "grad_norm": 0.6594318151473999, "learning_rate": 9.988327487068492e-05, "loss": 0.1457, "step": 2120 }, { "epoch": 3.23486328125e-06, "model_forward_time": 0.024870634078979492, "step": 2120 }, { "epoch": 3.23486328125e-06, "step": 2120, "training_step_time": 0.11153578758239746 }, { "epoch": 3.23638916015625e-06, "model_forward_time": 0.02471184730529785, "step": 2121 }, { "epoch": 3.23638916015625e-06, "step": 2121, "training_step_time": 0.11233401298522949 }, { "epoch": 3.2379150390625e-06, "model_forward_time": 0.025632143020629883, "step": 2122 }, { "epoch": 3.2379150390625e-06, "step": 2122, "training_step_time": 0.1072697639465332 }, { "epoch": 3.23944091796875e-06, "model_forward_time": 0.025048255920410156, "step": 2123 }, { "epoch": 3.23944091796875e-06, "step": 2123, "training_step_time": 0.10773730278015137 }, { "epoch": 3.240966796875e-06, "model_forward_time": 0.025753021240234375, "step": 2124 }, { "epoch": 3.240966796875e-06, "step": 2124, "training_step_time": 0.10922646522521973 }, { "epoch": 3.24249267578125e-06, "model_forward_time": 0.025677919387817383, "step": 2125 }, { "epoch": 3.24249267578125e-06, "step": 2125, "training_step_time": 0.10572957992553711 }, { "epoch": 3.2440185546875e-06, "model_forward_time": 0.025423526763916016, "step": 2126 }, { "epoch": 3.2440185546875e-06, "step": 2126, "training_step_time": 0.10510468482971191 }, { "epoch": 3.24554443359375e-06, "model_forward_time": 0.026118040084838867, "step": 2127 }, { "epoch": 3.24554443359375e-06, "step": 2127, "training_step_time": 0.11398768424987793 }, { "epoch": 3.2470703125e-06, "model_forward_time": 0.02547907829284668, "step": 2128 }, { "epoch": 3.2470703125e-06, "step": 2128, "training_step_time": 0.18561720848083496 }, { "epoch": 3.24859619140625e-06, "model_forward_time": 0.0248110294342041, "step": 2129 }, { "epoch": 3.24859619140625e-06, "step": 2129, "training_step_time": 0.20724749565124512 }, { "epoch": 3.2501220703125e-06, "grad_norm": 0.624142050743103, "learning_rate": 9.987948070396571e-05, "loss": 0.1262, "step": 2130 }, { "epoch": 3.2501220703125e-06, "model_forward_time": 0.024610519409179688, "step": 2130 }, { "epoch": 3.2501220703125e-06, "step": 2130, "training_step_time": 0.2136697769165039 }, { "epoch": 3.25164794921875e-06, "model_forward_time": 0.024919986724853516, "step": 2131 }, { "epoch": 3.25164794921875e-06, "step": 2131, "training_step_time": 0.21112704277038574 }, { "epoch": 3.253173828125e-06, "model_forward_time": 0.024692058563232422, "step": 2132 }, { "epoch": 3.253173828125e-06, "step": 2132, "training_step_time": 0.2121884822845459 }, { "epoch": 3.25469970703125e-06, "model_forward_time": 0.02683401107788086, "step": 2133 }, { "epoch": 3.25469970703125e-06, "step": 2133, "training_step_time": 0.21092891693115234 }, { "epoch": 3.2562255859375e-06, "model_forward_time": 0.024557113647460938, "step": 2134 }, { "epoch": 3.2562255859375e-06, "step": 2134, "training_step_time": 0.20020246505737305 }, { "epoch": 3.25775146484375e-06, "model_forward_time": 0.024295568466186523, "step": 2135 }, { "epoch": 3.25775146484375e-06, "step": 2135, "training_step_time": 0.16309142112731934 }, { "epoch": 3.25927734375e-06, "model_forward_time": 0.024413585662841797, "step": 2136 }, { "epoch": 3.25927734375e-06, "step": 2136, "training_step_time": 0.19826674461364746 }, { "epoch": 3.26080322265625e-06, "model_forward_time": 0.024466514587402344, "step": 2137 }, { "epoch": 3.26080322265625e-06, "step": 2137, "training_step_time": 0.13530588150024414 }, { "epoch": 3.2623291015625e-06, "model_forward_time": 0.024679183959960938, "step": 2138 }, { "epoch": 3.2623291015625e-06, "step": 2138, "training_step_time": 0.18079113960266113 }, { "epoch": 3.26385498046875e-06, "model_forward_time": 0.024440288543701172, "step": 2139 }, { "epoch": 3.26385498046875e-06, "step": 2139, "training_step_time": 0.11630678176879883 }, { "epoch": 3.265380859375e-06, "grad_norm": 0.6571296453475952, "learning_rate": 9.987562592897413e-05, "loss": 0.1203, "step": 2140 }, { "epoch": 3.265380859375e-06, "model_forward_time": 0.024141550064086914, "step": 2140 }, { "epoch": 3.265380859375e-06, "step": 2140, "training_step_time": 0.11722898483276367 }, { "epoch": 3.26690673828125e-06, "model_forward_time": 0.025452136993408203, "step": 2141 }, { "epoch": 3.26690673828125e-06, "step": 2141, "training_step_time": 0.12760114669799805 }, { "epoch": 3.2684326171875e-06, "model_forward_time": 0.025402069091796875, "step": 2142 }, { "epoch": 3.2684326171875e-06, "step": 2142, "training_step_time": 0.11624526977539062 }, { "epoch": 3.26995849609375e-06, "model_forward_time": 0.025295257568359375, "step": 2143 }, { "epoch": 3.26995849609375e-06, "step": 2143, "training_step_time": 0.12904644012451172 }, { "epoch": 3.271484375e-06, "model_forward_time": 0.024993181228637695, "step": 2144 }, { "epoch": 3.271484375e-06, "step": 2144, "training_step_time": 0.12623310089111328 }, { "epoch": 3.27301025390625e-06, "model_forward_time": 0.024932146072387695, "step": 2145 }, { "epoch": 3.27301025390625e-06, "step": 2145, "training_step_time": 0.20032072067260742 }, { "epoch": 3.2745361328125e-06, "model_forward_time": 0.024401426315307617, "step": 2146 }, { "epoch": 3.2745361328125e-06, "step": 2146, "training_step_time": 0.13790631294250488 }, { "epoch": 3.27606201171875e-06, "model_forward_time": 0.024456501007080078, "step": 2147 }, { "epoch": 3.27606201171875e-06, "step": 2147, "training_step_time": 0.12216615676879883 }, { "epoch": 3.277587890625e-06, "model_forward_time": 0.02521038055419922, "step": 2148 }, { "epoch": 3.277587890625e-06, "step": 2148, "training_step_time": 0.1178901195526123 }, { "epoch": 3.27911376953125e-06, "model_forward_time": 0.02555704116821289, "step": 2149 }, { "epoch": 3.27911376953125e-06, "step": 2149, "training_step_time": 0.11796283721923828 }, { "epoch": 3.2806396484375e-06, "grad_norm": 0.9658941626548767, "learning_rate": 9.987171055039408e-05, "loss": 0.175, "step": 2150 }, { "epoch": 3.2806396484375e-06, "model_forward_time": 0.025292634963989258, "step": 2150 }, { "epoch": 3.2806396484375e-06, "step": 2150, "training_step_time": 0.17586350440979004 }, { "epoch": 3.28216552734375e-06, "model_forward_time": 0.02443861961364746, "step": 2151 }, { "epoch": 3.28216552734375e-06, "step": 2151, "training_step_time": 0.11321043968200684 }, { "epoch": 3.28369140625e-06, "model_forward_time": 0.025038719177246094, "step": 2152 }, { "epoch": 3.28369140625e-06, "step": 2152, "training_step_time": 0.10466170310974121 }, { "epoch": 3.28521728515625e-06, "model_forward_time": 0.025259971618652344, "step": 2153 }, { "epoch": 3.28521728515625e-06, "step": 2153, "training_step_time": 0.10571503639221191 }, { "epoch": 3.2867431640625e-06, "model_forward_time": 0.025088071823120117, "step": 2154 }, { "epoch": 3.2867431640625e-06, "step": 2154, "training_step_time": 0.10432124137878418 }, { "epoch": 3.28826904296875e-06, "model_forward_time": 0.02495741844177246, "step": 2155 }, { "epoch": 3.28826904296875e-06, "step": 2155, "training_step_time": 0.11027741432189941 }, { "epoch": 3.289794921875e-06, "model_forward_time": 0.02582573890686035, "step": 2156 }, { "epoch": 3.289794921875e-06, "step": 2156, "training_step_time": 0.11111211776733398 }, { "epoch": 3.29132080078125e-06, "model_forward_time": 0.025462627410888672, "step": 2157 }, { "epoch": 3.29132080078125e-06, "step": 2157, "training_step_time": 0.11565208435058594 }, { "epoch": 3.2928466796875e-06, "model_forward_time": 0.025603294372558594, "step": 2158 }, { "epoch": 3.2928466796875e-06, "step": 2158, "training_step_time": 0.11037921905517578 }, { "epoch": 3.29437255859375e-06, "model_forward_time": 0.025257587432861328, "step": 2159 }, { "epoch": 3.29437255859375e-06, "step": 2159, "training_step_time": 0.17060470581054688 }, { "epoch": 3.2958984375e-06, "grad_norm": 0.8946394324302673, "learning_rate": 9.986773457298311e-05, "loss": 0.1522, "step": 2160 }, { "epoch": 3.2958984375e-06, "model_forward_time": 0.024634361267089844, "step": 2160 }, { "epoch": 3.2958984375e-06, "step": 2160, "training_step_time": 0.16210556030273438 }, { "epoch": 3.29742431640625e-06, "model_forward_time": 0.025043725967407227, "step": 2161 }, { "epoch": 3.29742431640625e-06, "step": 2161, "training_step_time": 0.10389494895935059 }, { "epoch": 3.2989501953125e-06, "model_forward_time": 0.025413990020751953, "step": 2162 }, { "epoch": 3.2989501953125e-06, "step": 2162, "training_step_time": 0.10532212257385254 }, { "epoch": 3.30047607421875e-06, "model_forward_time": 0.025616168975830078, "step": 2163 }, { "epoch": 3.30047607421875e-06, "step": 2163, "training_step_time": 0.10789036750793457 }, { "epoch": 3.302001953125e-06, "model_forward_time": 0.02506113052368164, "step": 2164 }, { "epoch": 3.302001953125e-06, "step": 2164, "training_step_time": 0.10484933853149414 }, { "epoch": 3.30352783203125e-06, "model_forward_time": 0.025386333465576172, "step": 2165 }, { "epoch": 3.30352783203125e-06, "step": 2165, "training_step_time": 0.10552430152893066 }, { "epoch": 3.3050537109375e-06, "model_forward_time": 0.025143146514892578, "step": 2166 }, { "epoch": 3.3050537109375e-06, "step": 2166, "training_step_time": 0.10491585731506348 }, { "epoch": 3.30657958984375e-06, "model_forward_time": 0.024997711181640625, "step": 2167 }, { "epoch": 3.30657958984375e-06, "step": 2167, "training_step_time": 0.10502815246582031 }, { "epoch": 3.30810546875e-06, "model_forward_time": 0.025150299072265625, "step": 2168 }, { "epoch": 3.30810546875e-06, "step": 2168, "training_step_time": 0.1053466796875 }, { "epoch": 3.30963134765625e-06, "model_forward_time": 0.027341842651367188, "step": 2169 }, { "epoch": 3.30963134765625e-06, "step": 2169, "training_step_time": 0.10728335380554199 }, { "epoch": 3.3111572265625e-06, "grad_norm": 0.7859796285629272, "learning_rate": 9.986369800157242e-05, "loss": 0.1524, "step": 2170 }, { "epoch": 3.3111572265625e-06, "model_forward_time": 0.025215625762939453, "step": 2170 }, { "epoch": 3.3111572265625e-06, "step": 2170, "training_step_time": 0.10550832748413086 }, { "epoch": 3.31268310546875e-06, "model_forward_time": 0.025255680084228516, "step": 2171 }, { "epoch": 3.31268310546875e-06, "step": 2171, "training_step_time": 0.10847997665405273 }, { "epoch": 3.314208984375e-06, "model_forward_time": 0.025992393493652344, "step": 2172 }, { "epoch": 3.314208984375e-06, "step": 2172, "training_step_time": 0.10549521446228027 }, { "epoch": 3.31573486328125e-06, "model_forward_time": 0.025362014770507812, "step": 2173 }, { "epoch": 3.31573486328125e-06, "step": 2173, "training_step_time": 0.10432863235473633 }, { "epoch": 3.3172607421875e-06, "model_forward_time": 0.025313854217529297, "step": 2174 }, { "epoch": 3.3172607421875e-06, "step": 2174, "training_step_time": 0.10533928871154785 }, { "epoch": 3.31878662109375e-06, "model_forward_time": 0.025109052658081055, "step": 2175 }, { "epoch": 3.31878662109375e-06, "step": 2175, "training_step_time": 0.10478687286376953 }, { "epoch": 3.3203125e-06, "model_forward_time": 0.025460243225097656, "step": 2176 }, { "epoch": 3.3203125e-06, "step": 2176, "training_step_time": 0.10328149795532227 }, { "epoch": 3.32183837890625e-06, "model_forward_time": 0.025415420532226562, "step": 2177 }, { "epoch": 3.32183837890625e-06, "step": 2177, "training_step_time": 0.10247683525085449 }, { "epoch": 3.3233642578125e-06, "model_forward_time": 0.02520132064819336, "step": 2178 }, { "epoch": 3.3233642578125e-06, "step": 2178, "training_step_time": 0.10479617118835449 }, { "epoch": 3.32489013671875e-06, "model_forward_time": 0.025570154190063477, "step": 2179 }, { "epoch": 3.32489013671875e-06, "step": 2179, "training_step_time": 0.10556745529174805 }, { "epoch": 3.326416015625e-06, "grad_norm": 0.827404797077179, "learning_rate": 9.985960084106682e-05, "loss": 0.1413, "step": 2180 }, { "epoch": 3.326416015625e-06, "model_forward_time": 0.027471542358398438, "step": 2180 }, { "epoch": 3.326416015625e-06, "step": 2180, "training_step_time": 0.10951590538024902 }, { "epoch": 3.32794189453125e-06, "model_forward_time": 0.026354551315307617, "step": 2181 }, { "epoch": 3.32794189453125e-06, "step": 2181, "training_step_time": 0.10940313339233398 }, { "epoch": 3.3294677734375e-06, "model_forward_time": 0.025063514709472656, "step": 2182 }, { "epoch": 3.3294677734375e-06, "step": 2182, "training_step_time": 0.13629531860351562 }, { "epoch": 3.33099365234375e-06, "model_forward_time": 0.025477170944213867, "step": 2183 }, { "epoch": 3.33099365234375e-06, "step": 2183, "training_step_time": 0.1177668571472168 }, { "epoch": 3.33251953125e-06, "model_forward_time": 0.025032997131347656, "step": 2184 }, { "epoch": 3.33251953125e-06, "step": 2184, "training_step_time": 0.20606446266174316 }, { "epoch": 3.33404541015625e-06, "model_forward_time": 0.02445077896118164, "step": 2185 }, { "epoch": 3.33404541015625e-06, "step": 2185, "training_step_time": 0.13168716430664062 }, { "epoch": 3.3355712890625e-06, "model_forward_time": 0.02429485321044922, "step": 2186 }, { "epoch": 3.3355712890625e-06, "step": 2186, "training_step_time": 0.22959256172180176 }, { "epoch": 3.33709716796875e-06, "model_forward_time": 0.024776935577392578, "step": 2187 }, { "epoch": 3.33709716796875e-06, "step": 2187, "training_step_time": 0.12503457069396973 }, { "epoch": 3.338623046875e-06, "model_forward_time": 0.02451467514038086, "step": 2188 }, { "epoch": 3.338623046875e-06, "step": 2188, "training_step_time": 0.12317419052124023 }, { "epoch": 3.34014892578125e-06, "model_forward_time": 0.02436065673828125, "step": 2189 }, { "epoch": 3.34014892578125e-06, "step": 2189, "training_step_time": 0.1925981044769287 }, { "epoch": 3.3416748046875e-06, "grad_norm": 0.7417447566986084, "learning_rate": 9.985544309644475e-05, "loss": 0.1272, "step": 2190 }, { "epoch": 3.3416748046875e-06, "model_forward_time": 0.024759769439697266, "step": 2190 }, { "epoch": 3.3416748046875e-06, "step": 2190, "training_step_time": 0.10996413230895996 }, { "epoch": 3.34320068359375e-06, "model_forward_time": 0.02462005615234375, "step": 2191 }, { "epoch": 3.34320068359375e-06, "step": 2191, "training_step_time": 0.1260089874267578 }, { "epoch": 3.3447265625e-06, "model_forward_time": 0.025873899459838867, "step": 2192 }, { "epoch": 3.3447265625e-06, "step": 2192, "training_step_time": 0.1339128017425537 }, { "epoch": 3.34625244140625e-06, "model_forward_time": 0.025289535522460938, "step": 2193 }, { "epoch": 3.34625244140625e-06, "step": 2193, "training_step_time": 0.11606144905090332 }, { "epoch": 3.3477783203125e-06, "model_forward_time": 0.02546095848083496, "step": 2194 }, { "epoch": 3.3477783203125e-06, "step": 2194, "training_step_time": 0.11260271072387695 }, { "epoch": 3.34930419921875e-06, "model_forward_time": 0.025516748428344727, "step": 2195 }, { "epoch": 3.34930419921875e-06, "step": 2195, "training_step_time": 0.11277151107788086 }, { "epoch": 3.350830078125e-06, "model_forward_time": 0.025420188903808594, "step": 2196 }, { "epoch": 3.350830078125e-06, "step": 2196, "training_step_time": 0.10816264152526855 }, { "epoch": 3.35235595703125e-06, "model_forward_time": 0.025388717651367188, "step": 2197 }, { "epoch": 3.35235595703125e-06, "step": 2197, "training_step_time": 0.19941139221191406 }, { "epoch": 3.3538818359375e-06, "model_forward_time": 0.024378538131713867, "step": 2198 }, { "epoch": 3.3538818359375e-06, "step": 2198, "training_step_time": 0.10484576225280762 }, { "epoch": 3.35540771484375e-06, "model_forward_time": 0.02456188201904297, "step": 2199 }, { "epoch": 3.35540771484375e-06, "step": 2199, "training_step_time": 0.10401391983032227 }, { "epoch": 3.35693359375e-06, "grad_norm": 0.6290052533149719, "learning_rate": 9.985122477275824e-05, "loss": 0.1247, "step": 2200 }, { "epoch": 3.35693359375e-06, "model_forward_time": 0.025468826293945312, "step": 2200 }, { "epoch": 3.35693359375e-06, "step": 2200, "training_step_time": 0.10952520370483398 }, { "epoch": 3.35845947265625e-06, "model_forward_time": 0.027116775512695312, "step": 2201 }, { "epoch": 3.35845947265625e-06, "step": 2201, "training_step_time": 0.11272716522216797 }, { "epoch": 3.3599853515625e-06, "model_forward_time": 0.025820016860961914, "step": 2202 }, { "epoch": 3.3599853515625e-06, "step": 2202, "training_step_time": 0.21257948875427246 }, { "epoch": 3.36151123046875e-06, "model_forward_time": 0.02475452423095703, "step": 2203 }, { "epoch": 3.36151123046875e-06, "step": 2203, "training_step_time": 0.1158289909362793 }, { "epoch": 3.363037109375e-06, "model_forward_time": 0.02440643310546875, "step": 2204 }, { "epoch": 3.363037109375e-06, "step": 2204, "training_step_time": 0.10750460624694824 }, { "epoch": 3.36456298828125e-06, "model_forward_time": 0.02592754364013672, "step": 2205 }, { "epoch": 3.36456298828125e-06, "step": 2205, "training_step_time": 0.10721445083618164 }, { "epoch": 3.3660888671875e-06, "model_forward_time": 0.02557206153869629, "step": 2206 }, { "epoch": 3.3660888671875e-06, "step": 2206, "training_step_time": 0.11526727676391602 }, { "epoch": 3.36761474609375e-06, "model_forward_time": 0.02530694007873535, "step": 2207 }, { "epoch": 3.36761474609375e-06, "step": 2207, "training_step_time": 0.11032295227050781 }, { "epoch": 3.369140625e-06, "model_forward_time": 0.025232791900634766, "step": 2208 }, { "epoch": 3.369140625e-06, "step": 2208, "training_step_time": 0.11005616188049316 }, { "epoch": 3.37066650390625e-06, "model_forward_time": 0.02534008026123047, "step": 2209 }, { "epoch": 3.37066650390625e-06, "step": 2209, "training_step_time": 0.10846734046936035 }, { "epoch": 3.3721923828125e-06, "grad_norm": 1.0931735038757324, "learning_rate": 9.984694587513298e-05, "loss": 0.1413, "step": 2210 }, { "epoch": 3.3721923828125e-06, "model_forward_time": 0.025806665420532227, "step": 2210 }, { "epoch": 3.3721923828125e-06, "step": 2210, "training_step_time": 0.1068124771118164 }, { "epoch": 3.37371826171875e-06, "model_forward_time": 0.02540755271911621, "step": 2211 }, { "epoch": 3.37371826171875e-06, "step": 2211, "training_step_time": 0.10565328598022461 }, { "epoch": 3.375244140625e-06, "model_forward_time": 0.025311708450317383, "step": 2212 }, { "epoch": 3.375244140625e-06, "step": 2212, "training_step_time": 0.10906720161437988 }, { "epoch": 3.37677001953125e-06, "model_forward_time": 0.025380373001098633, "step": 2213 }, { "epoch": 3.37677001953125e-06, "step": 2213, "training_step_time": 0.1071782112121582 }, { "epoch": 3.3782958984375e-06, "model_forward_time": 0.025197982788085938, "step": 2214 }, { "epoch": 3.3782958984375e-06, "step": 2214, "training_step_time": 0.10754251480102539 }, { "epoch": 3.37982177734375e-06, "model_forward_time": 0.025327205657958984, "step": 2215 }, { "epoch": 3.37982177734375e-06, "step": 2215, "training_step_time": 0.10689949989318848 }, { "epoch": 3.38134765625e-06, "model_forward_time": 0.02474236488342285, "step": 2216 }, { "epoch": 3.38134765625e-06, "step": 2216, "training_step_time": 0.10626339912414551 }, { "epoch": 3.38287353515625e-06, "model_forward_time": 0.025862693786621094, "step": 2217 }, { "epoch": 3.38287353515625e-06, "step": 2217, "training_step_time": 0.11328125 }, { "epoch": 3.3843994140625e-06, "model_forward_time": 0.02625441551208496, "step": 2218 }, { "epoch": 3.3843994140625e-06, "step": 2218, "training_step_time": 0.1129457950592041 }, { "epoch": 3.38592529296875e-06, "model_forward_time": 0.02501845359802246, "step": 2219 }, { "epoch": 3.38592529296875e-06, "step": 2219, "training_step_time": 0.10761117935180664 }, { "epoch": 3.387451171875e-06, "grad_norm": 0.6803141236305237, "learning_rate": 9.984260640876821e-05, "loss": 0.1228, "step": 2220 }, { "epoch": 3.387451171875e-06, "model_forward_time": 0.024636030197143555, "step": 2220 }, { "epoch": 3.387451171875e-06, "step": 2220, "training_step_time": 0.10845708847045898 }, { "epoch": 3.38897705078125e-06, "model_forward_time": 0.024316072463989258, "step": 2221 }, { "epoch": 3.38897705078125e-06, "step": 2221, "training_step_time": 0.11272144317626953 }, { "epoch": 3.3905029296875e-06, "model_forward_time": 0.02524542808532715, "step": 2222 }, { "epoch": 3.3905029296875e-06, "step": 2222, "training_step_time": 0.11137151718139648 }, { "epoch": 3.39202880859375e-06, "model_forward_time": 0.024283170700073242, "step": 2223 }, { "epoch": 3.39202880859375e-06, "step": 2223, "training_step_time": 0.10985541343688965 }, { "epoch": 3.3935546875e-06, "model_forward_time": 0.025202035903930664, "step": 2224 }, { "epoch": 3.3935546875e-06, "step": 2224, "training_step_time": 0.10822153091430664 }, { "epoch": 3.39508056640625e-06, "model_forward_time": 0.024516820907592773, "step": 2225 }, { "epoch": 3.39508056640625e-06, "step": 2225, "training_step_time": 0.1114501953125 }, { "epoch": 3.3966064453125e-06, "model_forward_time": 0.025405168533325195, "step": 2226 }, { "epoch": 3.3966064453125e-06, "step": 2226, "training_step_time": 0.10735940933227539 }, { "epoch": 3.39813232421875e-06, "model_forward_time": 0.026009321212768555, "step": 2227 }, { "epoch": 3.39813232421875e-06, "step": 2227, "training_step_time": 0.10767865180969238 }, { "epoch": 3.399658203125e-06, "model_forward_time": 0.025368690490722656, "step": 2228 }, { "epoch": 3.399658203125e-06, "step": 2228, "training_step_time": 0.18982911109924316 }, { "epoch": 3.40118408203125e-06, "model_forward_time": 0.024901151657104492, "step": 2229 }, { "epoch": 3.40118408203125e-06, "step": 2229, "training_step_time": 0.10494637489318848 }, { "epoch": 3.4027099609375e-06, "grad_norm": 0.6757562160491943, "learning_rate": 9.983820637893681e-05, "loss": 0.144, "step": 2230 }, { "epoch": 3.4027099609375e-06, "model_forward_time": 0.024393558502197266, "step": 2230 }, { "epoch": 3.4027099609375e-06, "step": 2230, "training_step_time": 0.16824698448181152 }, { "epoch": 3.40423583984375e-06, "model_forward_time": 0.024935245513916016, "step": 2231 }, { "epoch": 3.40423583984375e-06, "step": 2231, "training_step_time": 0.1375739574432373 }, { "epoch": 3.40576171875e-06, "model_forward_time": 0.0245206356048584, "step": 2232 }, { "epoch": 3.40576171875e-06, "step": 2232, "training_step_time": 0.2156391143798828 }, { "epoch": 3.40728759765625e-06, "model_forward_time": 0.024553537368774414, "step": 2233 }, { "epoch": 3.40728759765625e-06, "step": 2233, "training_step_time": 0.1231074333190918 }, { "epoch": 3.4088134765625e-06, "model_forward_time": 0.024496793746948242, "step": 2234 }, { "epoch": 3.4088134765625e-06, "step": 2234, "training_step_time": 0.11021804809570312 }, { "epoch": 3.41033935546875e-06, "model_forward_time": 0.025335073471069336, "step": 2235 }, { "epoch": 3.41033935546875e-06, "step": 2235, "training_step_time": 0.12230038642883301 }, { "epoch": 3.411865234375e-06, "model_forward_time": 0.025592565536499023, "step": 2236 }, { "epoch": 3.411865234375e-06, "step": 2236, "training_step_time": 0.10744237899780273 }, { "epoch": 3.41339111328125e-06, "model_forward_time": 0.025238752365112305, "step": 2237 }, { "epoch": 3.41339111328125e-06, "step": 2237, "training_step_time": 0.12774324417114258 }, { "epoch": 3.4149169921875e-06, "model_forward_time": 0.02513885498046875, "step": 2238 }, { "epoch": 3.4149169921875e-06, "step": 2238, "training_step_time": 0.15024566650390625 }, { "epoch": 3.41644287109375e-06, "model_forward_time": 0.02674412727355957, "step": 2239 }, { "epoch": 3.41644287109375e-06, "step": 2239, "training_step_time": 0.13843321800231934 }, { "epoch": 3.41796875e-06, "grad_norm": 0.9230808615684509, "learning_rate": 9.983374579098523e-05, "loss": 0.1256, "step": 2240 }, { "epoch": 3.41796875e-06, "model_forward_time": 0.024523496627807617, "step": 2240 }, { "epoch": 3.41796875e-06, "step": 2240, "training_step_time": 0.20284485816955566 }, { "epoch": 3.41949462890625e-06, "model_forward_time": 0.024039268493652344, "step": 2241 }, { "epoch": 3.41949462890625e-06, "step": 2241, "training_step_time": 0.15082240104675293 }, { "epoch": 3.4210205078125e-06, "model_forward_time": 0.024088621139526367, "step": 2242 }, { "epoch": 3.4210205078125e-06, "step": 2242, "training_step_time": 0.17657041549682617 }, { "epoch": 3.42254638671875e-06, "model_forward_time": 0.02451014518737793, "step": 2243 }, { "epoch": 3.42254638671875e-06, "step": 2243, "training_step_time": 0.1265413761138916 }, { "epoch": 3.424072265625e-06, "model_forward_time": 0.02435922622680664, "step": 2244 }, { "epoch": 3.424072265625e-06, "step": 2244, "training_step_time": 0.11759614944458008 }, { "epoch": 3.42559814453125e-06, "model_forward_time": 0.024877309799194336, "step": 2245 }, { "epoch": 3.42559814453125e-06, "step": 2245, "training_step_time": 0.12009263038635254 }, { "epoch": 3.4271240234375e-06, "model_forward_time": 0.025249719619750977, "step": 2246 }, { "epoch": 3.4271240234375e-06, "step": 2246, "training_step_time": 0.10747218132019043 }, { "epoch": 3.42864990234375e-06, "model_forward_time": 0.025400638580322266, "step": 2247 }, { "epoch": 3.42864990234375e-06, "step": 2247, "training_step_time": 0.10975098609924316 }, { "epoch": 3.43017578125e-06, "model_forward_time": 0.025346994400024414, "step": 2248 }, { "epoch": 3.43017578125e-06, "step": 2248, "training_step_time": 0.12213873863220215 }, { "epoch": 3.43170166015625e-06, "model_forward_time": 0.025269746780395508, "step": 2249 }, { "epoch": 3.43170166015625e-06, "step": 2249, "training_step_time": 0.11492919921875 }, { "epoch": 3.4332275390625e-06, "grad_norm": 1.0198084115982056, "learning_rate": 9.98292246503335e-05, "loss": 0.1454, "step": 2250 }, { "epoch": 3.4332275390625e-06, "model_forward_time": 0.02577352523803711, "step": 2250 }, { "epoch": 3.4332275390625e-06, "step": 2250, "training_step_time": 0.15647268295288086 }, { "epoch": 3.43475341796875e-06, "model_forward_time": 0.025000810623168945, "step": 2251 }, { "epoch": 3.43475341796875e-06, "step": 2251, "training_step_time": 0.1584458351135254 }, { "epoch": 3.436279296875e-06, "model_forward_time": 0.024669170379638672, "step": 2252 }, { "epoch": 3.436279296875e-06, "step": 2252, "training_step_time": 0.12378954887390137 }, { "epoch": 3.43780517578125e-06, "model_forward_time": 0.02539229393005371, "step": 2253 }, { "epoch": 3.43780517578125e-06, "step": 2253, "training_step_time": 0.10535311698913574 }, { "epoch": 3.4393310546875e-06, "model_forward_time": 0.025667428970336914, "step": 2254 }, { "epoch": 3.4393310546875e-06, "step": 2254, "training_step_time": 0.1070563793182373 }, { "epoch": 3.44085693359375e-06, "model_forward_time": 0.024979114532470703, "step": 2255 }, { "epoch": 3.44085693359375e-06, "step": 2255, "training_step_time": 0.10921239852905273 }, { "epoch": 3.4423828125e-06, "model_forward_time": 0.025542259216308594, "step": 2256 }, { "epoch": 3.4423828125e-06, "step": 2256, "training_step_time": 0.10881543159484863 }, { "epoch": 3.44390869140625e-06, "model_forward_time": 0.02544093132019043, "step": 2257 }, { "epoch": 3.44390869140625e-06, "step": 2257, "training_step_time": 0.10526108741760254 }, { "epoch": 3.4454345703125e-06, "model_forward_time": 0.02520608901977539, "step": 2258 }, { "epoch": 3.4454345703125e-06, "step": 2258, "training_step_time": 0.10660457611083984 }, { "epoch": 3.44696044921875e-06, "model_forward_time": 0.025272607803344727, "step": 2259 }, { "epoch": 3.44696044921875e-06, "step": 2259, "training_step_time": 0.10548901557922363 }, { "epoch": 3.448486328125e-06, "grad_norm": 0.7208828330039978, "learning_rate": 9.982464296247522e-05, "loss": 0.1272, "step": 2260 }, { "epoch": 3.448486328125e-06, "model_forward_time": 0.025158405303955078, "step": 2260 }, { "epoch": 3.448486328125e-06, "step": 2260, "training_step_time": 0.10584449768066406 }, { "epoch": 3.45001220703125e-06, "model_forward_time": 0.028775453567504883, "step": 2261 }, { "epoch": 3.45001220703125e-06, "step": 2261, "training_step_time": 0.11305022239685059 }, { "epoch": 3.4515380859375e-06, "model_forward_time": 0.026183366775512695, "step": 2262 }, { "epoch": 3.4515380859375e-06, "step": 2262, "training_step_time": 0.1086130142211914 }, { "epoch": 3.45306396484375e-06, "model_forward_time": 0.0251772403717041, "step": 2263 }, { "epoch": 3.45306396484375e-06, "step": 2263, "training_step_time": 0.10745835304260254 }, { "epoch": 3.45458984375e-06, "model_forward_time": 0.02534031867980957, "step": 2264 }, { "epoch": 3.45458984375e-06, "step": 2264, "training_step_time": 0.10597467422485352 }, { "epoch": 3.45611572265625e-06, "model_forward_time": 0.024297714233398438, "step": 2265 }, { "epoch": 3.45611572265625e-06, "step": 2265, "training_step_time": 0.1075892448425293 }, { "epoch": 3.4576416015625e-06, "model_forward_time": 0.02575373649597168, "step": 2266 }, { "epoch": 3.4576416015625e-06, "step": 2266, "training_step_time": 0.1070547103881836 }, { "epoch": 3.45916748046875e-06, "model_forward_time": 0.025467634201049805, "step": 2267 }, { "epoch": 3.45916748046875e-06, "step": 2267, "training_step_time": 0.10497355461120605 }, { "epoch": 3.460693359375e-06, "model_forward_time": 0.025322675704956055, "step": 2268 }, { "epoch": 3.460693359375e-06, "step": 2268, "training_step_time": 0.10626578330993652 }, { "epoch": 3.46221923828125e-06, "model_forward_time": 0.025413036346435547, "step": 2269 }, { "epoch": 3.46221923828125e-06, "step": 2269, "training_step_time": 0.11072230339050293 }, { "epoch": 3.4637451171875e-06, "grad_norm": 0.763227105140686, "learning_rate": 9.982000073297759e-05, "loss": 0.1287, "step": 2270 }, { "epoch": 3.4637451171875e-06, "model_forward_time": 0.025361299514770508, "step": 2270 }, { "epoch": 3.4637451171875e-06, "step": 2270, "training_step_time": 0.11115837097167969 }, { "epoch": 3.46527099609375e-06, "model_forward_time": 0.02540302276611328, "step": 2271 }, { "epoch": 3.46527099609375e-06, "step": 2271, "training_step_time": 0.10826802253723145 }, { "epoch": 3.466796875e-06, "model_forward_time": 0.025376319885253906, "step": 2272 }, { "epoch": 3.466796875e-06, "step": 2272, "training_step_time": 0.10554218292236328 }, { "epoch": 3.46832275390625e-06, "model_forward_time": 0.02539539337158203, "step": 2273 }, { "epoch": 3.46832275390625e-06, "step": 2273, "training_step_time": 0.19907760620117188 }, { "epoch": 3.4698486328125e-06, "model_forward_time": 0.024342060089111328, "step": 2274 }, { "epoch": 3.4698486328125e-06, "step": 2274, "training_step_time": 0.11154818534851074 }, { "epoch": 3.47137451171875e-06, "model_forward_time": 0.024457931518554688, "step": 2275 }, { "epoch": 3.47137451171875e-06, "step": 2275, "training_step_time": 0.10834741592407227 }, { "epoch": 3.472900390625e-06, "model_forward_time": 0.025557756423950195, "step": 2276 }, { "epoch": 3.472900390625e-06, "step": 2276, "training_step_time": 0.12626862525939941 }, { "epoch": 3.47442626953125e-06, "model_forward_time": 0.0280759334564209, "step": 2277 }, { "epoch": 3.47442626953125e-06, "step": 2277, "training_step_time": 0.1174774169921875 }, { "epoch": 3.4759521484375e-06, "model_forward_time": 0.0254974365234375, "step": 2278 }, { "epoch": 3.4759521484375e-06, "step": 2278, "training_step_time": 0.216264009475708 }, { "epoch": 3.47747802734375e-06, "model_forward_time": 0.02453756332397461, "step": 2279 }, { "epoch": 3.47747802734375e-06, "step": 2279, "training_step_time": 0.15039634704589844 }, { "epoch": 3.47900390625e-06, "grad_norm": 0.7737330794334412, "learning_rate": 9.981529796748134e-05, "loss": 0.1356, "step": 2280 }, { "epoch": 3.47900390625e-06, "model_forward_time": 0.02416515350341797, "step": 2280 }, { "epoch": 3.47900390625e-06, "step": 2280, "training_step_time": 0.10844302177429199 }, { "epoch": 3.48052978515625e-06, "model_forward_time": 0.024841785430908203, "step": 2281 }, { "epoch": 3.48052978515625e-06, "step": 2281, "training_step_time": 0.12086176872253418 }, { "epoch": 3.4820556640625e-06, "model_forward_time": 0.025487184524536133, "step": 2282 }, { "epoch": 3.4820556640625e-06, "step": 2282, "training_step_time": 0.10951948165893555 }, { "epoch": 3.48358154296875e-06, "model_forward_time": 0.025606870651245117, "step": 2283 }, { "epoch": 3.48358154296875e-06, "step": 2283, "training_step_time": 0.17801833152770996 }, { "epoch": 3.485107421875e-06, "model_forward_time": 0.02459120750427246, "step": 2284 }, { "epoch": 3.485107421875e-06, "step": 2284, "training_step_time": 0.1344912052154541 }, { "epoch": 3.48663330078125e-06, "model_forward_time": 0.02458024024963379, "step": 2285 }, { "epoch": 3.48663330078125e-06, "step": 2285, "training_step_time": 0.1105201244354248 }, { "epoch": 3.4881591796875e-06, "model_forward_time": 0.025744915008544922, "step": 2286 }, { "epoch": 3.4881591796875e-06, "step": 2286, "training_step_time": 0.11853241920471191 }, { "epoch": 3.48968505859375e-06, "model_forward_time": 0.02599811553955078, "step": 2287 }, { "epoch": 3.48968505859375e-06, "step": 2287, "training_step_time": 0.11443853378295898 }, { "epoch": 3.4912109375e-06, "model_forward_time": 0.02582263946533203, "step": 2288 }, { "epoch": 3.4912109375e-06, "step": 2288, "training_step_time": 0.15849065780639648 }, { "epoch": 3.49273681640625e-06, "model_forward_time": 0.02512645721435547, "step": 2289 }, { "epoch": 3.49273681640625e-06, "step": 2289, "training_step_time": 0.1483144760131836 }, { "epoch": 3.4942626953125e-06, "grad_norm": 0.7489446401596069, "learning_rate": 9.98105346717008e-05, "loss": 0.1545, "step": 2290 }, { "epoch": 3.4942626953125e-06, "model_forward_time": 0.02517223358154297, "step": 2290 }, { "epoch": 3.4942626953125e-06, "step": 2290, "training_step_time": 0.10528326034545898 }, { "epoch": 3.49578857421875e-06, "model_forward_time": 0.025907278060913086, "step": 2291 }, { "epoch": 3.49578857421875e-06, "step": 2291, "training_step_time": 0.11009526252746582 }, { "epoch": 3.497314453125e-06, "model_forward_time": 0.02588486671447754, "step": 2292 }, { "epoch": 3.497314453125e-06, "step": 2292, "training_step_time": 0.10511541366577148 }, { "epoch": 3.49884033203125e-06, "model_forward_time": 0.02510356903076172, "step": 2293 }, { "epoch": 3.49884033203125e-06, "step": 2293, "training_step_time": 0.10793161392211914 }, { "epoch": 3.5003662109375e-06, "model_forward_time": 0.025310277938842773, "step": 2294 }, { "epoch": 3.5003662109375e-06, "step": 2294, "training_step_time": 0.1234273910522461 }, { "epoch": 3.50189208984375e-06, "model_forward_time": 0.025749921798706055, "step": 2295 }, { "epoch": 3.50189208984375e-06, "step": 2295, "training_step_time": 0.11373019218444824 }, { "epoch": 3.50341796875e-06, "model_forward_time": 0.025767803192138672, "step": 2296 }, { "epoch": 3.50341796875e-06, "step": 2296, "training_step_time": 0.10607266426086426 }, { "epoch": 3.50494384765625e-06, "model_forward_time": 0.025495529174804688, "step": 2297 }, { "epoch": 3.50494384765625e-06, "step": 2297, "training_step_time": 0.2454214096069336 }, { "epoch": 3.5064697265625e-06, "model_forward_time": 0.025356531143188477, "step": 2298 }, { "epoch": 3.5064697265625e-06, "step": 2298, "training_step_time": 0.2084031105041504 }, { "epoch": 3.50799560546875e-06, "model_forward_time": 0.025348424911499023, "step": 2299 }, { "epoch": 3.50799560546875e-06, "step": 2299, "training_step_time": 0.2015683650970459 }, { "epoch": 3.509521484375e-06, "grad_norm": 0.6070877313613892, "learning_rate": 9.980571085142381e-05, "loss": 0.1133, "step": 2300 }, { "epoch": 3.509521484375e-06, "model_forward_time": 0.024828433990478516, "step": 2300 }, { "epoch": 3.509521484375e-06, "step": 2300, "training_step_time": 0.19903993606567383 }, { "epoch": 3.51104736328125e-06, "model_forward_time": 0.024506092071533203, "step": 2301 }, { "epoch": 3.51104736328125e-06, "step": 2301, "training_step_time": 0.1875004768371582 }, { "epoch": 3.5125732421875e-06, "model_forward_time": 0.024552583694458008, "step": 2302 }, { "epoch": 3.5125732421875e-06, "step": 2302, "training_step_time": 0.17716622352600098 }, { "epoch": 3.51409912109375e-06, "model_forward_time": 0.024951457977294922, "step": 2303 }, { "epoch": 3.51409912109375e-06, "step": 2303, "training_step_time": 0.16551423072814941 }, { "epoch": 3.515625e-06, "model_forward_time": 0.024672985076904297, "step": 2304 }, { "epoch": 3.515625e-06, "step": 2304, "training_step_time": 0.10959076881408691 }, { "epoch": 3.51715087890625e-06, "model_forward_time": 0.02474236488342285, "step": 2305 }, { "epoch": 3.51715087890625e-06, "step": 2305, "training_step_time": 0.10127520561218262 }, { "epoch": 3.5186767578125e-06, "model_forward_time": 0.025143146514892578, "step": 2306 }, { "epoch": 3.5186767578125e-06, "step": 2306, "training_step_time": 0.10676217079162598 }, { "epoch": 3.52020263671875e-06, "model_forward_time": 0.025380611419677734, "step": 2307 }, { "epoch": 3.52020263671875e-06, "step": 2307, "training_step_time": 0.10568928718566895 }, { "epoch": 3.521728515625e-06, "model_forward_time": 0.02559947967529297, "step": 2308 }, { "epoch": 3.521728515625e-06, "step": 2308, "training_step_time": 0.10421633720397949 }, { "epoch": 3.52325439453125e-06, "model_forward_time": 0.02534627914428711, "step": 2309 }, { "epoch": 3.52325439453125e-06, "step": 2309, "training_step_time": 0.10481667518615723 }, { "epoch": 3.5247802734375e-06, "grad_norm": 0.7714501023292542, "learning_rate": 9.980082651251175e-05, "loss": 0.1339, "step": 2310 }, { "epoch": 3.5247802734375e-06, "model_forward_time": 0.02578425407409668, "step": 2310 }, { "epoch": 3.5247802734375e-06, "step": 2310, "training_step_time": 0.10922074317932129 }, { "epoch": 3.52630615234375e-06, "model_forward_time": 0.02527308464050293, "step": 2311 }, { "epoch": 3.52630615234375e-06, "step": 2311, "training_step_time": 0.10564994812011719 }, { "epoch": 3.52783203125e-06, "model_forward_time": 0.025030851364135742, "step": 2312 }, { "epoch": 3.52783203125e-06, "step": 2312, "training_step_time": 0.10576820373535156 }, { "epoch": 3.52935791015625e-06, "model_forward_time": 0.025517702102661133, "step": 2313 }, { "epoch": 3.52935791015625e-06, "step": 2313, "training_step_time": 0.10463762283325195 }, { "epoch": 3.5308837890625e-06, "model_forward_time": 0.025667905807495117, "step": 2314 }, { "epoch": 3.5308837890625e-06, "step": 2314, "training_step_time": 0.10563826560974121 }, { "epoch": 3.53240966796875e-06, "model_forward_time": 0.026885509490966797, "step": 2315 }, { "epoch": 3.53240966796875e-06, "step": 2315, "training_step_time": 0.11027240753173828 }, { "epoch": 3.533935546875e-06, "model_forward_time": 0.025712251663208008, "step": 2316 }, { "epoch": 3.533935546875e-06, "step": 2316, "training_step_time": 0.11579322814941406 }, { "epoch": 3.53546142578125e-06, "model_forward_time": 0.0249025821685791, "step": 2317 }, { "epoch": 3.53546142578125e-06, "step": 2317, "training_step_time": 0.11815404891967773 }, { "epoch": 3.5369873046875e-06, "model_forward_time": 0.025420665740966797, "step": 2318 }, { "epoch": 3.5369873046875e-06, "step": 2318, "training_step_time": 0.1580061912536621 }, { "epoch": 3.53851318359375e-06, "model_forward_time": 0.02613091468811035, "step": 2319 }, { "epoch": 3.53851318359375e-06, "step": 2319, "training_step_time": 0.15944457054138184 }, { "epoch": 3.5400390625e-06, "grad_norm": 1.6715880632400513, "learning_rate": 9.979588166089958e-05, "loss": 0.1376, "step": 2320 }, { "epoch": 3.5400390625e-06, "model_forward_time": 0.024211645126342773, "step": 2320 }, { "epoch": 3.5400390625e-06, "step": 2320, "training_step_time": 0.19746780395507812 }, { "epoch": 3.54156494140625e-06, "model_forward_time": 0.02441716194152832, "step": 2321 }, { "epoch": 3.54156494140625e-06, "step": 2321, "training_step_time": 0.11595749855041504 }, { "epoch": 3.5430908203125e-06, "model_forward_time": 0.02441883087158203, "step": 2322 }, { "epoch": 3.5430908203125e-06, "step": 2322, "training_step_time": 0.19123101234436035 }, { "epoch": 3.54461669921875e-06, "model_forward_time": 0.02567887306213379, "step": 2323 }, { "epoch": 3.54461669921875e-06, "step": 2323, "training_step_time": 0.11079692840576172 }, { "epoch": 3.546142578125e-06, "model_forward_time": 0.024657249450683594, "step": 2324 }, { "epoch": 3.546142578125e-06, "step": 2324, "training_step_time": 0.10932445526123047 }, { "epoch": 3.54766845703125e-06, "model_forward_time": 0.025821924209594727, "step": 2325 }, { "epoch": 3.54766845703125e-06, "step": 2325, "training_step_time": 0.15426087379455566 }, { "epoch": 3.5491943359375e-06, "model_forward_time": 0.025013208389282227, "step": 2326 }, { "epoch": 3.5491943359375e-06, "step": 2326, "training_step_time": 0.13788342475891113 }, { "epoch": 3.55072021484375e-06, "model_forward_time": 0.024633169174194336, "step": 2327 }, { "epoch": 3.55072021484375e-06, "step": 2327, "training_step_time": 0.11156797409057617 }, { "epoch": 3.55224609375e-06, "model_forward_time": 0.025289297103881836, "step": 2328 }, { "epoch": 3.55224609375e-06, "step": 2328, "training_step_time": 0.12189078330993652 }, { "epoch": 3.55377197265625e-06, "model_forward_time": 0.02517104148864746, "step": 2329 }, { "epoch": 3.55377197265625e-06, "step": 2329, "training_step_time": 0.11261820793151855 }, { "epoch": 3.5552978515625e-06, "grad_norm": 0.6216801404953003, "learning_rate": 9.979087630259572e-05, "loss": 0.1565, "step": 2330 }, { "epoch": 3.5552978515625e-06, "model_forward_time": 0.02546381950378418, "step": 2330 }, { "epoch": 3.5552978515625e-06, "step": 2330, "training_step_time": 0.18820929527282715 }, { "epoch": 3.55682373046875e-06, "model_forward_time": 0.025282621383666992, "step": 2331 }, { "epoch": 3.55682373046875e-06, "step": 2331, "training_step_time": 0.11551642417907715 }, { "epoch": 3.558349609375e-06, "model_forward_time": 0.025131940841674805, "step": 2332 }, { "epoch": 3.558349609375e-06, "step": 2332, "training_step_time": 0.10753035545349121 }, { "epoch": 3.55987548828125e-06, "model_forward_time": 0.02474665641784668, "step": 2333 }, { "epoch": 3.55987548828125e-06, "step": 2333, "training_step_time": 0.10717606544494629 }, { "epoch": 3.5614013671875e-06, "model_forward_time": 0.025811433792114258, "step": 2334 }, { "epoch": 3.5614013671875e-06, "step": 2334, "training_step_time": 0.10893607139587402 }, { "epoch": 3.56292724609375e-06, "model_forward_time": 0.02475714683532715, "step": 2335 }, { "epoch": 3.56292724609375e-06, "step": 2335, "training_step_time": 0.10801553726196289 }, { "epoch": 3.564453125e-06, "model_forward_time": 0.02489161491394043, "step": 2336 }, { "epoch": 3.564453125e-06, "step": 2336, "training_step_time": 0.11002802848815918 }, { "epoch": 3.56597900390625e-06, "model_forward_time": 0.025223970413208008, "step": 2337 }, { "epoch": 3.56597900390625e-06, "step": 2337, "training_step_time": 0.11621451377868652 }, { "epoch": 3.5675048828125e-06, "model_forward_time": 0.025515079498291016, "step": 2338 }, { "epoch": 3.5675048828125e-06, "step": 2338, "training_step_time": 0.1081857681274414 }, { "epoch": 3.56903076171875e-06, "model_forward_time": 0.025481700897216797, "step": 2339 }, { "epoch": 3.56903076171875e-06, "step": 2339, "training_step_time": 0.17309308052062988 }, { "epoch": 3.570556640625e-06, "grad_norm": 0.6606565117835999, "learning_rate": 9.97858104436822e-05, "loss": 0.131, "step": 2340 }, { "epoch": 3.570556640625e-06, "model_forward_time": 0.025295257568359375, "step": 2340 }, { "epoch": 3.570556640625e-06, "step": 2340, "training_step_time": 0.15503239631652832 }, { "epoch": 3.57208251953125e-06, "model_forward_time": 0.024593353271484375, "step": 2341 }, { "epoch": 3.57208251953125e-06, "step": 2341, "training_step_time": 0.10154461860656738 }, { "epoch": 3.5736083984375e-06, "model_forward_time": 0.025056123733520508, "step": 2342 }, { "epoch": 3.5736083984375e-06, "step": 2342, "training_step_time": 0.10542845726013184 }, { "epoch": 3.57513427734375e-06, "model_forward_time": 0.02513575553894043, "step": 2343 }, { "epoch": 3.57513427734375e-06, "step": 2343, "training_step_time": 0.10832452774047852 }, { "epoch": 3.57666015625e-06, "model_forward_time": 0.025075435638427734, "step": 2344 }, { "epoch": 3.57666015625e-06, "step": 2344, "training_step_time": 0.10765814781188965 }, { "epoch": 3.57818603515625e-06, "model_forward_time": 0.025156259536743164, "step": 2345 }, { "epoch": 3.57818603515625e-06, "step": 2345, "training_step_time": 0.11246943473815918 }, { "epoch": 3.5797119140625e-06, "model_forward_time": 0.02586674690246582, "step": 2346 }, { "epoch": 3.5797119140625e-06, "step": 2346, "training_step_time": 0.10997605323791504 }, { "epoch": 3.58123779296875e-06, "model_forward_time": 0.025565385818481445, "step": 2347 }, { "epoch": 3.58123779296875e-06, "step": 2347, "training_step_time": 0.10850954055786133 }, { "epoch": 3.582763671875e-06, "model_forward_time": 0.02528238296508789, "step": 2348 }, { "epoch": 3.582763671875e-06, "step": 2348, "training_step_time": 0.10843420028686523 }, { "epoch": 3.58428955078125e-06, "model_forward_time": 0.02548694610595703, "step": 2349 }, { "epoch": 3.58428955078125e-06, "step": 2349, "training_step_time": 0.10651826858520508 }, { "epoch": 3.5858154296875e-06, "grad_norm": 0.6426587700843811, "learning_rate": 9.978068409031449e-05, "loss": 0.1354, "step": 2350 }, { "epoch": 3.5858154296875e-06, "model_forward_time": 0.02540755271911621, "step": 2350 }, { "epoch": 3.5858154296875e-06, "step": 2350, "training_step_time": 0.10778188705444336 }, { "epoch": 3.58734130859375e-06, "model_forward_time": 0.025505542755126953, "step": 2351 }, { "epoch": 3.58734130859375e-06, "step": 2351, "training_step_time": 0.10970163345336914 }, { "epoch": 3.5888671875e-06, "model_forward_time": 0.025675296783447266, "step": 2352 }, { "epoch": 3.5888671875e-06, "step": 2352, "training_step_time": 0.11286282539367676 }, { "epoch": 3.59039306640625e-06, "model_forward_time": 0.027707815170288086, "step": 2353 }, { "epoch": 3.59039306640625e-06, "step": 2353, "training_step_time": 0.10751152038574219 }, { "epoch": 3.5919189453125e-06, "model_forward_time": 0.025429725646972656, "step": 2354 }, { "epoch": 3.5919189453125e-06, "step": 2354, "training_step_time": 0.10754513740539551 }, { "epoch": 3.59344482421875e-06, "model_forward_time": 0.025174379348754883, "step": 2355 }, { "epoch": 3.59344482421875e-06, "step": 2355, "training_step_time": 0.11274528503417969 }, { "epoch": 3.594970703125e-06, "model_forward_time": 0.025615215301513672, "step": 2356 }, { "epoch": 3.594970703125e-06, "step": 2356, "training_step_time": 0.11062955856323242 }, { "epoch": 3.59649658203125e-06, "model_forward_time": 0.025330781936645508, "step": 2357 }, { "epoch": 3.59649658203125e-06, "step": 2357, "training_step_time": 0.10631728172302246 }, { "epoch": 3.5980224609375e-06, "model_forward_time": 0.02548384666442871, "step": 2358 }, { "epoch": 3.5980224609375e-06, "step": 2358, "training_step_time": 0.10638141632080078 }, { "epoch": 3.59954833984375e-06, "model_forward_time": 0.0252530574798584, "step": 2359 }, { "epoch": 3.59954833984375e-06, "step": 2359, "training_step_time": 0.10704827308654785 }, { "epoch": 3.60107421875e-06, "grad_norm": 0.6929382681846619, "learning_rate": 9.97754972487216e-05, "loss": 0.1318, "step": 2360 }, { "epoch": 3.60107421875e-06, "model_forward_time": 0.025241613388061523, "step": 2360 }, { "epoch": 3.60107421875e-06, "step": 2360, "training_step_time": 0.10992836952209473 }, { "epoch": 3.60260009765625e-06, "model_forward_time": 0.025150299072265625, "step": 2361 }, { "epoch": 3.60260009765625e-06, "step": 2361, "training_step_time": 0.21767568588256836 }, { "epoch": 3.6041259765625e-06, "model_forward_time": 0.024817228317260742, "step": 2362 }, { "epoch": 3.6041259765625e-06, "step": 2362, "training_step_time": 0.12064838409423828 }, { "epoch": 3.60565185546875e-06, "model_forward_time": 0.025087356567382812, "step": 2363 }, { "epoch": 3.60565185546875e-06, "step": 2363, "training_step_time": 0.1828005313873291 }, { "epoch": 3.607177734375e-06, "model_forward_time": 0.024841785430908203, "step": 2364 }, { "epoch": 3.607177734375e-06, "step": 2364, "training_step_time": 0.11818504333496094 }, { "epoch": 3.60870361328125e-06, "model_forward_time": 0.024193286895751953, "step": 2365 }, { "epoch": 3.60870361328125e-06, "step": 2365, "training_step_time": 0.22084712982177734 }, { "epoch": 3.6102294921875e-06, "model_forward_time": 0.02446603775024414, "step": 2366 }, { "epoch": 3.6102294921875e-06, "step": 2366, "training_step_time": 0.1521778106689453 }, { "epoch": 3.61175537109375e-06, "model_forward_time": 0.02464771270751953, "step": 2367 }, { "epoch": 3.61175537109375e-06, "step": 2367, "training_step_time": 0.10435271263122559 }, { "epoch": 3.61328125e-06, "model_forward_time": 0.025382518768310547, "step": 2368 }, { "epoch": 3.61328125e-06, "step": 2368, "training_step_time": 0.10529160499572754 }, { "epoch": 3.61480712890625e-06, "model_forward_time": 0.025074005126953125, "step": 2369 }, { "epoch": 3.61480712890625e-06, "step": 2369, "training_step_time": 0.11052441596984863 }, { "epoch": 3.6163330078125e-06, "grad_norm": 0.7236571311950684, "learning_rate": 9.977024992520602e-05, "loss": 0.1374, "step": 2370 }, { "epoch": 3.6163330078125e-06, "model_forward_time": 0.026082515716552734, "step": 2370 }, { "epoch": 3.6163330078125e-06, "step": 2370, "training_step_time": 0.17008280754089355 }, { "epoch": 3.61785888671875e-06, "model_forward_time": 0.02504706382751465, "step": 2371 }, { "epoch": 3.61785888671875e-06, "step": 2371, "training_step_time": 0.17674708366394043 }, { "epoch": 3.619384765625e-06, "model_forward_time": 0.024468660354614258, "step": 2372 }, { "epoch": 3.619384765625e-06, "step": 2372, "training_step_time": 0.11211133003234863 }, { "epoch": 3.62091064453125e-06, "model_forward_time": 0.024601221084594727, "step": 2373 }, { "epoch": 3.62091064453125e-06, "step": 2373, "training_step_time": 0.11912894248962402 }, { "epoch": 3.6224365234375e-06, "model_forward_time": 0.025422096252441406, "step": 2374 }, { "epoch": 3.6224365234375e-06, "step": 2374, "training_step_time": 0.10901451110839844 }, { "epoch": 3.62396240234375e-06, "model_forward_time": 0.02536463737487793, "step": 2375 }, { "epoch": 3.62396240234375e-06, "step": 2375, "training_step_time": 0.11543488502502441 }, { "epoch": 3.62548828125e-06, "model_forward_time": 0.025476694107055664, "step": 2376 }, { "epoch": 3.62548828125e-06, "step": 2376, "training_step_time": 0.2022261619567871 }, { "epoch": 3.62701416015625e-06, "model_forward_time": 0.024479150772094727, "step": 2377 }, { "epoch": 3.62701416015625e-06, "step": 2377, "training_step_time": 0.10659217834472656 }, { "epoch": 3.6285400390625e-06, "model_forward_time": 0.02436518669128418, "step": 2378 }, { "epoch": 3.6285400390625e-06, "step": 2378, "training_step_time": 0.10633468627929688 }, { "epoch": 3.63006591796875e-06, "model_forward_time": 0.025379419326782227, "step": 2379 }, { "epoch": 3.63006591796875e-06, "step": 2379, "training_step_time": 0.10657954216003418 }, { "epoch": 3.631591796875e-06, "grad_norm": 0.518153190612793, "learning_rate": 9.976494212614377e-05, "loss": 0.131, "step": 2380 }, { "epoch": 3.631591796875e-06, "model_forward_time": 0.024471759796142578, "step": 2380 }, { "epoch": 3.631591796875e-06, "step": 2380, "training_step_time": 0.10532784461975098 }, { "epoch": 3.63311767578125e-06, "model_forward_time": 0.024922847747802734, "step": 2381 }, { "epoch": 3.63311767578125e-06, "step": 2381, "training_step_time": 0.1154634952545166 }, { "epoch": 3.6346435546875e-06, "model_forward_time": 0.025488615036010742, "step": 2382 }, { "epoch": 3.6346435546875e-06, "step": 2382, "training_step_time": 0.10823822021484375 }, { "epoch": 3.63616943359375e-06, "model_forward_time": 0.025673627853393555, "step": 2383 }, { "epoch": 3.63616943359375e-06, "step": 2383, "training_step_time": 0.10618185997009277 }, { "epoch": 3.6376953125e-06, "model_forward_time": 0.026149511337280273, "step": 2384 }, { "epoch": 3.6376953125e-06, "step": 2384, "training_step_time": 0.11056303977966309 }, { "epoch": 3.63922119140625e-06, "model_forward_time": 0.02579784393310547, "step": 2385 }, { "epoch": 3.63922119140625e-06, "step": 2385, "training_step_time": 0.11134576797485352 }, { "epoch": 3.6407470703125e-06, "model_forward_time": 0.02525782585144043, "step": 2386 }, { "epoch": 3.6407470703125e-06, "step": 2386, "training_step_time": 0.11006999015808105 }, { "epoch": 3.64227294921875e-06, "model_forward_time": 0.02589726448059082, "step": 2387 }, { "epoch": 3.64227294921875e-06, "step": 2387, "training_step_time": 0.11006426811218262 }, { "epoch": 3.643798828125e-06, "model_forward_time": 0.025664329528808594, "step": 2388 }, { "epoch": 3.643798828125e-06, "step": 2388, "training_step_time": 0.10926985740661621 }, { "epoch": 3.64532470703125e-06, "model_forward_time": 0.02555561065673828, "step": 2389 }, { "epoch": 3.64532470703125e-06, "step": 2389, "training_step_time": 0.10498476028442383 }, { "epoch": 3.6468505859375e-06, "grad_norm": 0.5260281562805176, "learning_rate": 9.97595738579843e-05, "loss": 0.1324, "step": 2390 }, { "epoch": 3.6468505859375e-06, "model_forward_time": 0.025089263916015625, "step": 2390 }, { "epoch": 3.6468505859375e-06, "step": 2390, "training_step_time": 0.10754656791687012 }, { "epoch": 3.64837646484375e-06, "model_forward_time": 0.02536487579345703, "step": 2391 }, { "epoch": 3.64837646484375e-06, "step": 2391, "training_step_time": 0.10452055931091309 }, { "epoch": 3.64990234375e-06, "model_forward_time": 0.02567291259765625, "step": 2392 }, { "epoch": 3.64990234375e-06, "step": 2392, "training_step_time": 0.10955476760864258 }, { "epoch": 3.65142822265625e-06, "model_forward_time": 0.025548934936523438, "step": 2393 }, { "epoch": 3.65142822265625e-06, "step": 2393, "training_step_time": 0.10619258880615234 }, { "epoch": 3.6529541015625e-06, "model_forward_time": 0.02559065818786621, "step": 2394 }, { "epoch": 3.6529541015625e-06, "step": 2394, "training_step_time": 0.10727596282958984 }, { "epoch": 3.65447998046875e-06, "model_forward_time": 0.02559638023376465, "step": 2395 }, { "epoch": 3.65447998046875e-06, "step": 2395, "training_step_time": 0.1066293716430664 }, { "epoch": 3.656005859375e-06, "model_forward_time": 0.0254971981048584, "step": 2396 }, { "epoch": 3.656005859375e-06, "step": 2396, "training_step_time": 0.1046757698059082 }, { "epoch": 3.65753173828125e-06, "model_forward_time": 0.025500059127807617, "step": 2397 }, { "epoch": 3.65753173828125e-06, "step": 2397, "training_step_time": 0.11049604415893555 }, { "epoch": 3.6590576171875e-06, "model_forward_time": 0.02557063102722168, "step": 2398 }, { "epoch": 3.6590576171875e-06, "step": 2398, "training_step_time": 0.10599732398986816 }, { "epoch": 3.66058349609375e-06, "model_forward_time": 0.025759220123291016, "step": 2399 }, { "epoch": 3.66058349609375e-06, "step": 2399, "training_step_time": 0.1056973934173584 }, { "epoch": 3.662109375e-06, "grad_norm": 0.8776196241378784, "learning_rate": 9.975414512725057e-05, "loss": 0.1139, "step": 2400 }, { "epoch": 3.662109375e-06, "model_forward_time": 0.025216102600097656, "step": 2400 }, { "epoch": 3.662109375e-06, "step": 2400, "training_step_time": 0.10498809814453125 }, { "epoch": 3.66363525390625e-06, "model_forward_time": 0.025449752807617188, "step": 2401 }, { "epoch": 3.66363525390625e-06, "step": 2401, "training_step_time": 0.10901117324829102 }, { "epoch": 3.6651611328125e-06, "model_forward_time": 0.025267362594604492, "step": 2402 }, { "epoch": 3.6651611328125e-06, "step": 2402, "training_step_time": 0.10559630393981934 }, { "epoch": 3.66668701171875e-06, "model_forward_time": 0.025326967239379883, "step": 2403 }, { "epoch": 3.66668701171875e-06, "step": 2403, "training_step_time": 0.11076951026916504 }, { "epoch": 3.668212890625e-06, "model_forward_time": 0.025516033172607422, "step": 2404 }, { "epoch": 3.668212890625e-06, "step": 2404, "training_step_time": 0.1061394214630127 }, { "epoch": 3.66973876953125e-06, "model_forward_time": 0.02540421485900879, "step": 2405 }, { "epoch": 3.66973876953125e-06, "step": 2405, "training_step_time": 0.11357808113098145 }, { "epoch": 3.6712646484375e-06, "model_forward_time": 0.025434017181396484, "step": 2406 }, { "epoch": 3.6712646484375e-06, "step": 2406, "training_step_time": 0.11350870132446289 }, { "epoch": 3.67279052734375e-06, "model_forward_time": 0.02530956268310547, "step": 2407 }, { "epoch": 3.67279052734375e-06, "step": 2407, "training_step_time": 0.10465884208679199 }, { "epoch": 3.67431640625e-06, "model_forward_time": 0.02541971206665039, "step": 2408 }, { "epoch": 3.67431640625e-06, "step": 2408, "training_step_time": 0.13942766189575195 }, { "epoch": 3.67584228515625e-06, "model_forward_time": 0.02474522590637207, "step": 2409 }, { "epoch": 3.67584228515625e-06, "step": 2409, "training_step_time": 0.11500954627990723 }, { "epoch": 3.6773681640625e-06, "grad_norm": 0.62945955991745, "learning_rate": 9.974865594053902e-05, "loss": 0.1394, "step": 2410 }, { "epoch": 3.6773681640625e-06, "model_forward_time": 0.02553868293762207, "step": 2410 }, { "epoch": 3.6773681640625e-06, "step": 2410, "training_step_time": 0.11709332466125488 }, { "epoch": 3.67889404296875e-06, "model_forward_time": 0.02585434913635254, "step": 2411 }, { "epoch": 3.67889404296875e-06, "step": 2411, "training_step_time": 0.12130308151245117 }, { "epoch": 3.680419921875e-06, "model_forward_time": 0.025525331497192383, "step": 2412 }, { "epoch": 3.680419921875e-06, "step": 2412, "training_step_time": 0.22292232513427734 }, { "epoch": 3.68194580078125e-06, "model_forward_time": 0.02533411979675293, "step": 2413 }, { "epoch": 3.68194580078125e-06, "step": 2413, "training_step_time": 0.12282395362854004 }, { "epoch": 3.6834716796875e-06, "model_forward_time": 0.02399277687072754, "step": 2414 }, { "epoch": 3.6834716796875e-06, "step": 2414, "training_step_time": 0.1124117374420166 }, { "epoch": 3.68499755859375e-06, "model_forward_time": 0.025194883346557617, "step": 2415 }, { "epoch": 3.68499755859375e-06, "step": 2415, "training_step_time": 0.11420488357543945 }, { "epoch": 3.6865234375e-06, "model_forward_time": 0.02571582794189453, "step": 2416 }, { "epoch": 3.6865234375e-06, "step": 2416, "training_step_time": 0.11023402214050293 }, { "epoch": 3.68804931640625e-06, "model_forward_time": 0.025423765182495117, "step": 2417 }, { "epoch": 3.68804931640625e-06, "step": 2417, "training_step_time": 0.10760879516601562 }, { "epoch": 3.6895751953125e-06, "model_forward_time": 0.025397300720214844, "step": 2418 }, { "epoch": 3.6895751953125e-06, "step": 2418, "training_step_time": 0.2127223014831543 }, { "epoch": 3.69110107421875e-06, "model_forward_time": 0.025742053985595703, "step": 2419 }, { "epoch": 3.69110107421875e-06, "step": 2419, "training_step_time": 0.17074179649353027 }, { "epoch": 3.692626953125e-06, "grad_norm": 0.4951641857624054, "learning_rate": 9.974310630451948e-05, "loss": 0.1687, "step": 2420 }, { "epoch": 3.692626953125e-06, "model_forward_time": 0.0235445499420166, "step": 2420 }, { "epoch": 3.692626953125e-06, "step": 2420, "training_step_time": 0.19449067115783691 }, { "epoch": 3.69415283203125e-06, "model_forward_time": 0.024948835372924805, "step": 2421 }, { "epoch": 3.69415283203125e-06, "step": 2421, "training_step_time": 0.1596693992614746 }, { "epoch": 3.6956787109375e-06, "model_forward_time": 0.024823665618896484, "step": 2422 }, { "epoch": 3.6956787109375e-06, "step": 2422, "training_step_time": 0.14661335945129395 }, { "epoch": 3.69720458984375e-06, "model_forward_time": 0.02461695671081543, "step": 2423 }, { "epoch": 3.69720458984375e-06, "step": 2423, "training_step_time": 0.1404561996459961 }, { "epoch": 3.69873046875e-06, "model_forward_time": 0.023802757263183594, "step": 2424 }, { "epoch": 3.69873046875e-06, "step": 2424, "training_step_time": 0.12650322914123535 }, { "epoch": 3.70025634765625e-06, "model_forward_time": 0.023796558380126953, "step": 2425 }, { "epoch": 3.70025634765625e-06, "step": 2425, "training_step_time": 0.11910057067871094 }, { "epoch": 3.7017822265625e-06, "model_forward_time": 0.025799989700317383, "step": 2426 }, { "epoch": 3.7017822265625e-06, "step": 2426, "training_step_time": 0.12034058570861816 }, { "epoch": 3.70330810546875e-06, "model_forward_time": 0.025651931762695312, "step": 2427 }, { "epoch": 3.70330810546875e-06, "step": 2427, "training_step_time": 0.20116853713989258 }, { "epoch": 3.704833984375e-06, "model_forward_time": 0.02464008331298828, "step": 2428 }, { "epoch": 3.704833984375e-06, "step": 2428, "training_step_time": 0.10827040672302246 }, { "epoch": 3.70635986328125e-06, "model_forward_time": 0.02773451805114746, "step": 2429 }, { "epoch": 3.70635986328125e-06, "step": 2429, "training_step_time": 0.11464834213256836 }, { "epoch": 3.7078857421875e-06, "grad_norm": 0.7130682468414307, "learning_rate": 9.973749622593534e-05, "loss": 0.1277, "step": 2430 }, { "epoch": 3.7078857421875e-06, "model_forward_time": 0.02567148208618164, "step": 2430 }, { "epoch": 3.7078857421875e-06, "step": 2430, "training_step_time": 0.20616555213928223 }, { "epoch": 3.70941162109375e-06, "model_forward_time": 0.027072429656982422, "step": 2431 }, { "epoch": 3.70941162109375e-06, "step": 2431, "training_step_time": 0.1265108585357666 }, { "epoch": 3.7109375e-06, "model_forward_time": 0.024636507034301758, "step": 2432 }, { "epoch": 3.7109375e-06, "step": 2432, "training_step_time": 0.10872626304626465 }, { "epoch": 3.71246337890625e-06, "model_forward_time": 0.0260317325592041, "step": 2433 }, { "epoch": 3.71246337890625e-06, "step": 2433, "training_step_time": 0.10725021362304688 }, { "epoch": 3.7139892578125e-06, "model_forward_time": 0.024962425231933594, "step": 2434 }, { "epoch": 3.7139892578125e-06, "step": 2434, "training_step_time": 0.1084890365600586 }, { "epoch": 3.71551513671875e-06, "model_forward_time": 0.025940895080566406, "step": 2435 }, { "epoch": 3.71551513671875e-06, "step": 2435, "training_step_time": 0.10590553283691406 }, { "epoch": 3.717041015625e-06, "model_forward_time": 0.025467395782470703, "step": 2436 }, { "epoch": 3.717041015625e-06, "step": 2436, "training_step_time": 0.11286187171936035 }, { "epoch": 3.71856689453125e-06, "model_forward_time": 0.025593996047973633, "step": 2437 }, { "epoch": 3.71856689453125e-06, "step": 2437, "training_step_time": 0.10830378532409668 }, { "epoch": 3.7200927734375e-06, "model_forward_time": 0.025409221649169922, "step": 2438 }, { "epoch": 3.7200927734375e-06, "step": 2438, "training_step_time": 0.10957717895507812 }, { "epoch": 3.72161865234375e-06, "model_forward_time": 0.025145292282104492, "step": 2439 }, { "epoch": 3.72161865234375e-06, "step": 2439, "training_step_time": 0.11058807373046875 }, { "epoch": 3.72314453125e-06, "grad_norm": 0.6387666463851929, "learning_rate": 9.973182571160332e-05, "loss": 0.143, "step": 2440 }, { "epoch": 3.72314453125e-06, "model_forward_time": 0.025300264358520508, "step": 2440 }, { "epoch": 3.72314453125e-06, "step": 2440, "training_step_time": 0.10539102554321289 }, { "epoch": 3.72467041015625e-06, "model_forward_time": 0.025233983993530273, "step": 2441 }, { "epoch": 3.72467041015625e-06, "step": 2441, "training_step_time": 0.10819363594055176 }, { "epoch": 3.7261962890625e-06, "model_forward_time": 0.02541375160217285, "step": 2442 }, { "epoch": 3.7261962890625e-06, "step": 2442, "training_step_time": 0.11135601997375488 }, { "epoch": 3.72772216796875e-06, "model_forward_time": 0.025618553161621094, "step": 2443 }, { "epoch": 3.72772216796875e-06, "step": 2443, "training_step_time": 0.10932230949401855 }, { "epoch": 3.729248046875e-06, "model_forward_time": 0.025752544403076172, "step": 2444 }, { "epoch": 3.729248046875e-06, "step": 2444, "training_step_time": 0.10847806930541992 }, { "epoch": 3.73077392578125e-06, "model_forward_time": 0.02591991424560547, "step": 2445 }, { "epoch": 3.73077392578125e-06, "step": 2445, "training_step_time": 0.10854244232177734 }, { "epoch": 3.7322998046875e-06, "model_forward_time": 0.025471210479736328, "step": 2446 }, { "epoch": 3.7322998046875e-06, "step": 2446, "training_step_time": 0.10749006271362305 }, { "epoch": 3.73382568359375e-06, "model_forward_time": 0.0258636474609375, "step": 2447 }, { "epoch": 3.73382568359375e-06, "step": 2447, "training_step_time": 0.11118054389953613 }, { "epoch": 3.7353515625e-06, "model_forward_time": 0.025213003158569336, "step": 2448 }, { "epoch": 3.7353515625e-06, "step": 2448, "training_step_time": 0.10736441612243652 }, { "epoch": 3.73687744140625e-06, "model_forward_time": 0.025422334671020508, "step": 2449 }, { "epoch": 3.73687744140625e-06, "step": 2449, "training_step_time": 0.10825872421264648 }, { "epoch": 3.7384033203125e-06, "grad_norm": 0.7156681418418884, "learning_rate": 9.972609476841367e-05, "loss": 0.1222, "step": 2450 }, { "epoch": 3.7384033203125e-06, "model_forward_time": 0.025318145751953125, "step": 2450 }, { "epoch": 3.7384033203125e-06, "step": 2450, "training_step_time": 0.11148333549499512 }, { "epoch": 3.73992919921875e-06, "model_forward_time": 0.025162458419799805, "step": 2451 }, { "epoch": 3.73992919921875e-06, "step": 2451, "training_step_time": 0.1061704158782959 }, { "epoch": 3.741455078125e-06, "model_forward_time": 0.0251162052154541, "step": 2452 }, { "epoch": 3.741455078125e-06, "step": 2452, "training_step_time": 0.14069080352783203 }, { "epoch": 3.74298095703125e-06, "model_forward_time": 0.027409791946411133, "step": 2453 }, { "epoch": 3.74298095703125e-06, "step": 2453, "training_step_time": 0.17675089836120605 }, { "epoch": 3.7445068359375e-06, "model_forward_time": 0.024850130081176758, "step": 2454 }, { "epoch": 3.7445068359375e-06, "step": 2454, "training_step_time": 0.192976713180542 }, { "epoch": 3.74603271484375e-06, "model_forward_time": 0.02440166473388672, "step": 2455 }, { "epoch": 3.74603271484375e-06, "step": 2455, "training_step_time": 0.1324610710144043 }, { "epoch": 3.74755859375e-06, "model_forward_time": 0.024494409561157227, "step": 2456 }, { "epoch": 3.74755859375e-06, "step": 2456, "training_step_time": 0.16155195236206055 }, { "epoch": 3.74908447265625e-06, "model_forward_time": 0.025012493133544922, "step": 2457 }, { "epoch": 3.74908447265625e-06, "step": 2457, "training_step_time": 0.18750762939453125 }, { "epoch": 3.7506103515625e-06, "model_forward_time": 0.02459573745727539, "step": 2458 }, { "epoch": 3.7506103515625e-06, "step": 2458, "training_step_time": 0.15998196601867676 }, { "epoch": 3.75213623046875e-06, "model_forward_time": 0.025149106979370117, "step": 2459 }, { "epoch": 3.75213623046875e-06, "step": 2459, "training_step_time": 0.10747933387756348 }, { "epoch": 3.753662109375e-06, "grad_norm": 0.5127847194671631, "learning_rate": 9.972030340333001e-05, "loss": 0.1259, "step": 2460 }, { "epoch": 3.753662109375e-06, "model_forward_time": 0.025229454040527344, "step": 2460 }, { "epoch": 3.753662109375e-06, "step": 2460, "training_step_time": 0.1051795482635498 }, { "epoch": 3.75518798828125e-06, "model_forward_time": 0.025468826293945312, "step": 2461 }, { "epoch": 3.75518798828125e-06, "step": 2461, "training_step_time": 0.19698452949523926 }, { "epoch": 3.7567138671875e-06, "model_forward_time": 0.024358034133911133, "step": 2462 }, { "epoch": 3.7567138671875e-06, "step": 2462, "training_step_time": 0.15067720413208008 }, { "epoch": 3.75823974609375e-06, "model_forward_time": 0.024712085723876953, "step": 2463 }, { "epoch": 3.75823974609375e-06, "step": 2463, "training_step_time": 0.11064767837524414 }, { "epoch": 3.759765625e-06, "model_forward_time": 0.02512955665588379, "step": 2464 }, { "epoch": 3.759765625e-06, "step": 2464, "training_step_time": 0.10876941680908203 }, { "epoch": 3.76129150390625e-06, "model_forward_time": 0.02569580078125, "step": 2465 }, { "epoch": 3.76129150390625e-06, "step": 2465, "training_step_time": 0.11101913452148438 }, { "epoch": 3.7628173828125e-06, "model_forward_time": 0.026356220245361328, "step": 2466 }, { "epoch": 3.7628173828125e-06, "step": 2466, "training_step_time": 0.11411738395690918 }, { "epoch": 3.76434326171875e-06, "model_forward_time": 0.02544569969177246, "step": 2467 }, { "epoch": 3.76434326171875e-06, "step": 2467, "training_step_time": 0.2089993953704834 }, { "epoch": 3.765869140625e-06, "model_forward_time": 0.024832487106323242, "step": 2468 }, { "epoch": 3.765869140625e-06, "step": 2468, "training_step_time": 0.13469791412353516 }, { "epoch": 3.76739501953125e-06, "model_forward_time": 0.02465224266052246, "step": 2469 }, { "epoch": 3.76739501953125e-06, "step": 2469, "training_step_time": 0.1372072696685791 }, { "epoch": 3.7689208984375e-06, "grad_norm": 0.7402927279472351, "learning_rate": 9.971445162338939e-05, "loss": 0.131, "step": 2470 }, { "epoch": 3.7689208984375e-06, "model_forward_time": 0.02436685562133789, "step": 2470 }, { "epoch": 3.7689208984375e-06, "step": 2470, "training_step_time": 0.18536162376403809 }, { "epoch": 3.77044677734375e-06, "model_forward_time": 0.024528026580810547, "step": 2471 }, { "epoch": 3.77044677734375e-06, "step": 2471, "training_step_time": 0.12128996849060059 }, { "epoch": 3.77197265625e-06, "model_forward_time": 0.024566650390625, "step": 2472 }, { "epoch": 3.77197265625e-06, "step": 2472, "training_step_time": 0.11435961723327637 }, { "epoch": 3.77349853515625e-06, "model_forward_time": 0.025643348693847656, "step": 2473 }, { "epoch": 3.77349853515625e-06, "step": 2473, "training_step_time": 0.10745501518249512 }, { "epoch": 3.7750244140625e-06, "model_forward_time": 0.02560710906982422, "step": 2474 }, { "epoch": 3.7750244140625e-06, "step": 2474, "training_step_time": 0.10751605033874512 }, { "epoch": 3.77655029296875e-06, "model_forward_time": 0.026154756546020508, "step": 2475 }, { "epoch": 3.77655029296875e-06, "step": 2475, "training_step_time": 0.11568593978881836 }, { "epoch": 3.778076171875e-06, "model_forward_time": 0.02592778205871582, "step": 2476 }, { "epoch": 3.778076171875e-06, "step": 2476, "training_step_time": 0.11283731460571289 }, { "epoch": 3.77960205078125e-06, "model_forward_time": 0.0255126953125, "step": 2477 }, { "epoch": 3.77960205078125e-06, "step": 2477, "training_step_time": 0.1073300838470459 }, { "epoch": 3.7811279296875e-06, "model_forward_time": 0.02589106559753418, "step": 2478 }, { "epoch": 3.7811279296875e-06, "step": 2478, "training_step_time": 0.11598682403564453 }, { "epoch": 3.78265380859375e-06, "model_forward_time": 0.025252580642700195, "step": 2479 }, { "epoch": 3.78265380859375e-06, "step": 2479, "training_step_time": 0.12063789367675781 }, { "epoch": 3.7841796875e-06, "grad_norm": 0.9336197972297668, "learning_rate": 9.97085394357023e-05, "loss": 0.1645, "step": 2480 }, { "epoch": 3.7841796875e-06, "model_forward_time": 0.02584981918334961, "step": 2480 }, { "epoch": 3.7841796875e-06, "step": 2480, "training_step_time": 0.11069917678833008 }, { "epoch": 3.78570556640625e-06, "model_forward_time": 0.025162458419799805, "step": 2481 }, { "epoch": 3.78570556640625e-06, "step": 2481, "training_step_time": 0.10551095008850098 }, { "epoch": 3.7872314453125e-06, "model_forward_time": 0.02553534507751465, "step": 2482 }, { "epoch": 3.7872314453125e-06, "step": 2482, "training_step_time": 0.10664749145507812 }, { "epoch": 3.78875732421875e-06, "model_forward_time": 0.02544379234313965, "step": 2483 }, { "epoch": 3.78875732421875e-06, "step": 2483, "training_step_time": 0.10379862785339355 }, { "epoch": 3.790283203125e-06, "model_forward_time": 0.0253446102142334, "step": 2484 }, { "epoch": 3.790283203125e-06, "step": 2484, "training_step_time": 0.10477423667907715 }, { "epoch": 3.79180908203125e-06, "model_forward_time": 0.025556564331054688, "step": 2485 }, { "epoch": 3.79180908203125e-06, "step": 2485, "training_step_time": 0.1066734790802002 }, { "epoch": 3.7933349609375e-06, "model_forward_time": 0.025150299072265625, "step": 2486 }, { "epoch": 3.7933349609375e-06, "step": 2486, "training_step_time": 0.10605001449584961 }, { "epoch": 3.79486083984375e-06, "model_forward_time": 0.025298118591308594, "step": 2487 }, { "epoch": 3.79486083984375e-06, "step": 2487, "training_step_time": 0.10730218887329102 }, { "epoch": 3.79638671875e-06, "model_forward_time": 0.025428295135498047, "step": 2488 }, { "epoch": 3.79638671875e-06, "step": 2488, "training_step_time": 0.10447001457214355 }, { "epoch": 3.79791259765625e-06, "model_forward_time": 0.027969837188720703, "step": 2489 }, { "epoch": 3.79791259765625e-06, "step": 2489, "training_step_time": 0.10995912551879883 }, { "epoch": 3.7994384765625e-06, "grad_norm": 1.0131940841674805, "learning_rate": 9.970256684745258e-05, "loss": 0.1391, "step": 2490 }, { "epoch": 3.7994384765625e-06, "model_forward_time": 0.0251619815826416, "step": 2490 }, { "epoch": 3.7994384765625e-06, "step": 2490, "training_step_time": 0.10502076148986816 }, { "epoch": 3.80096435546875e-06, "model_forward_time": 0.025311946868896484, "step": 2491 }, { "epoch": 3.80096435546875e-06, "step": 2491, "training_step_time": 0.10914158821105957 }, { "epoch": 3.802490234375e-06, "model_forward_time": 0.025356769561767578, "step": 2492 }, { "epoch": 3.802490234375e-06, "step": 2492, "training_step_time": 0.10823988914489746 }, { "epoch": 3.80401611328125e-06, "model_forward_time": 0.025299549102783203, "step": 2493 }, { "epoch": 3.80401611328125e-06, "step": 2493, "training_step_time": 0.11547207832336426 }, { "epoch": 3.8055419921875e-06, "model_forward_time": 0.02527904510498047, "step": 2494 }, { "epoch": 3.8055419921875e-06, "step": 2494, "training_step_time": 0.12424206733703613 }, { "epoch": 3.80706787109375e-06, "model_forward_time": 0.025422334671020508, "step": 2495 }, { "epoch": 3.80706787109375e-06, "step": 2495, "training_step_time": 0.1422436237335205 }, { "epoch": 3.80859375e-06, "model_forward_time": 0.02512383460998535, "step": 2496 }, { "epoch": 3.80859375e-06, "step": 2496, "training_step_time": 0.16315555572509766 }, { "epoch": 3.81011962890625e-06, "model_forward_time": 0.024690628051757812, "step": 2497 }, { "epoch": 3.81011962890625e-06, "step": 2497, "training_step_time": 0.12959909439086914 }, { "epoch": 3.8116455078125e-06, "model_forward_time": 0.024353504180908203, "step": 2498 }, { "epoch": 3.8116455078125e-06, "step": 2498, "training_step_time": 0.1750173568725586 }, { "epoch": 3.81317138671875e-06, "model_forward_time": 0.024386882781982422, "step": 2499 }, { "epoch": 3.81317138671875e-06, "step": 2499, "training_step_time": 0.1945657730102539 }, { "epoch": 3.814697265625e-06, "grad_norm": 0.8203685283660889, "learning_rate": 9.969653386589748e-05, "loss": 0.1197, "step": 2500 }, { "epoch": 3.814697265625e-06, "model_forward_time": 0.02459096908569336, "step": 2500 }, { "epoch": 3.814697265625e-06, "step": 2500, "training_step_time": 0.1203460693359375 }, { "epoch": 3.81622314453125e-06, "model_forward_time": 0.024329423904418945, "step": 2501 }, { "epoch": 3.81622314453125e-06, "step": 2501, "training_step_time": 0.17841076850891113 }, { "epoch": 3.8177490234375e-06, "model_forward_time": 0.02482128143310547, "step": 2502 }, { "epoch": 3.8177490234375e-06, "step": 2502, "training_step_time": 0.12412452697753906 }, { "epoch": 3.81927490234375e-06, "model_forward_time": 0.024544715881347656, "step": 2503 }, { "epoch": 3.81927490234375e-06, "step": 2503, "training_step_time": 0.10745644569396973 }, { "epoch": 3.82080078125e-06, "model_forward_time": 0.027962923049926758, "step": 2504 }, { "epoch": 3.82080078125e-06, "step": 2504, "training_step_time": 0.10976195335388184 }, { "epoch": 3.82232666015625e-06, "model_forward_time": 0.025274038314819336, "step": 2505 }, { "epoch": 3.82232666015625e-06, "step": 2505, "training_step_time": 0.1073157787322998 }, { "epoch": 3.8238525390625e-06, "model_forward_time": 0.0251309871673584, "step": 2506 }, { "epoch": 3.8238525390625e-06, "step": 2506, "training_step_time": 0.11293482780456543 }, { "epoch": 3.82537841796875e-06, "model_forward_time": 0.02541828155517578, "step": 2507 }, { "epoch": 3.82537841796875e-06, "step": 2507, "training_step_time": 0.15117120742797852 }, { "epoch": 3.826904296875e-06, "model_forward_time": 0.024676799774169922, "step": 2508 }, { "epoch": 3.826904296875e-06, "step": 2508, "training_step_time": 0.13915443420410156 }, { "epoch": 3.82843017578125e-06, "model_forward_time": 0.0246429443359375, "step": 2509 }, { "epoch": 3.82843017578125e-06, "step": 2509, "training_step_time": 0.11389350891113281 }, { "epoch": 3.8299560546875e-06, "grad_norm": 0.6547967195510864, "learning_rate": 9.969044049836767e-05, "loss": 0.1276, "step": 2510 }, { "epoch": 3.8299560546875e-06, "model_forward_time": 0.02484130859375, "step": 2510 }, { "epoch": 3.8299560546875e-06, "step": 2510, "training_step_time": 0.11131405830383301 }, { "epoch": 3.83148193359375e-06, "model_forward_time": 0.025444507598876953, "step": 2511 }, { "epoch": 3.83148193359375e-06, "step": 2511, "training_step_time": 0.11545777320861816 }, { "epoch": 3.8330078125e-06, "model_forward_time": 0.024796724319458008, "step": 2512 }, { "epoch": 3.8330078125e-06, "step": 2512, "training_step_time": 0.10818123817443848 }, { "epoch": 3.83453369140625e-06, "model_forward_time": 0.02537226676940918, "step": 2513 }, { "epoch": 3.83453369140625e-06, "step": 2513, "training_step_time": 0.19669389724731445 }, { "epoch": 3.8360595703125e-06, "model_forward_time": 0.024755239486694336, "step": 2514 }, { "epoch": 3.8360595703125e-06, "step": 2514, "training_step_time": 0.10411620140075684 }, { "epoch": 3.83758544921875e-06, "model_forward_time": 0.024598360061645508, "step": 2515 }, { "epoch": 3.83758544921875e-06, "step": 2515, "training_step_time": 0.10577011108398438 }, { "epoch": 3.839111328125e-06, "model_forward_time": 0.025448083877563477, "step": 2516 }, { "epoch": 3.839111328125e-06, "step": 2516, "training_step_time": 0.10699868202209473 }, { "epoch": 3.84063720703125e-06, "model_forward_time": 0.0253756046295166, "step": 2517 }, { "epoch": 3.84063720703125e-06, "step": 2517, "training_step_time": 0.2233750820159912 }, { "epoch": 3.8421630859375e-06, "model_forward_time": 0.024968624114990234, "step": 2518 }, { "epoch": 3.8421630859375e-06, "step": 2518, "training_step_time": 0.11464333534240723 }, { "epoch": 3.84368896484375e-06, "model_forward_time": 0.024672985076904297, "step": 2519 }, { "epoch": 3.84368896484375e-06, "step": 2519, "training_step_time": 0.11810493469238281 }, { "epoch": 3.84521484375e-06, "grad_norm": 0.4905283451080322, "learning_rate": 9.968428675226714e-05, "loss": 0.15, "step": 2520 }, { "epoch": 3.84521484375e-06, "model_forward_time": 0.02505636215209961, "step": 2520 }, { "epoch": 3.84521484375e-06, "step": 2520, "training_step_time": 0.19582796096801758 }, { "epoch": 3.84674072265625e-06, "model_forward_time": 0.02480006217956543, "step": 2521 }, { "epoch": 3.84674072265625e-06, "step": 2521, "training_step_time": 0.11697196960449219 }, { "epoch": 3.8482666015625e-06, "model_forward_time": 0.024858951568603516, "step": 2522 }, { "epoch": 3.8482666015625e-06, "step": 2522, "training_step_time": 0.10675406455993652 }, { "epoch": 3.84979248046875e-06, "model_forward_time": 0.02541184425354004, "step": 2523 }, { "epoch": 3.84979248046875e-06, "step": 2523, "training_step_time": 0.10760903358459473 }, { "epoch": 3.851318359375e-06, "model_forward_time": 0.02526378631591797, "step": 2524 }, { "epoch": 3.851318359375e-06, "step": 2524, "training_step_time": 0.10769486427307129 }, { "epoch": 3.85284423828125e-06, "model_forward_time": 0.02568650245666504, "step": 2525 }, { "epoch": 3.85284423828125e-06, "step": 2525, "training_step_time": 0.1098628044128418 }, { "epoch": 3.8543701171875e-06, "model_forward_time": 0.025000810623168945, "step": 2526 }, { "epoch": 3.8543701171875e-06, "step": 2526, "training_step_time": 0.10809779167175293 }, { "epoch": 3.85589599609375e-06, "model_forward_time": 0.02568507194519043, "step": 2527 }, { "epoch": 3.85589599609375e-06, "step": 2527, "training_step_time": 0.10846757888793945 }, { "epoch": 3.857421875e-06, "model_forward_time": 0.02523660659790039, "step": 2528 }, { "epoch": 3.857421875e-06, "step": 2528, "training_step_time": 0.10638165473937988 }, { "epoch": 3.85894775390625e-06, "model_forward_time": 0.025026559829711914, "step": 2529 }, { "epoch": 3.85894775390625e-06, "step": 2529, "training_step_time": 0.10637211799621582 }, { "epoch": 3.8604736328125e-06, "grad_norm": 0.45186877250671387, "learning_rate": 9.967807263507329e-05, "loss": 0.1006, "step": 2530 }, { "epoch": 3.8604736328125e-06, "model_forward_time": 0.025406599044799805, "step": 2530 }, { "epoch": 3.8604736328125e-06, "step": 2530, "training_step_time": 0.10567331314086914 }, { "epoch": 3.86199951171875e-06, "model_forward_time": 0.02520895004272461, "step": 2531 }, { "epoch": 3.86199951171875e-06, "step": 2531, "training_step_time": 0.10552239418029785 }, { "epoch": 3.863525390625e-06, "model_forward_time": 0.025433778762817383, "step": 2532 }, { "epoch": 3.863525390625e-06, "step": 2532, "training_step_time": 0.10562419891357422 }, { "epoch": 3.86505126953125e-06, "model_forward_time": 0.02487802505493164, "step": 2533 }, { "epoch": 3.86505126953125e-06, "step": 2533, "training_step_time": 0.10971593856811523 }, { "epoch": 3.8665771484375e-06, "model_forward_time": 0.025235652923583984, "step": 2534 }, { "epoch": 3.8665771484375e-06, "step": 2534, "training_step_time": 0.10633349418640137 }, { "epoch": 3.86810302734375e-06, "model_forward_time": 0.025305986404418945, "step": 2535 }, { "epoch": 3.86810302734375e-06, "step": 2535, "training_step_time": 0.10921716690063477 }, { "epoch": 3.86962890625e-06, "model_forward_time": 0.025175094604492188, "step": 2536 }, { "epoch": 3.86962890625e-06, "step": 2536, "training_step_time": 0.10807156562805176 }, { "epoch": 3.87115478515625e-06, "model_forward_time": 0.02562403678894043, "step": 2537 }, { "epoch": 3.87115478515625e-06, "step": 2537, "training_step_time": 0.10750675201416016 }, { "epoch": 3.8726806640625e-06, "model_forward_time": 0.025577545166015625, "step": 2538 }, { "epoch": 3.8726806640625e-06, "step": 2538, "training_step_time": 0.10752511024475098 }, { "epoch": 3.87420654296875e-06, "model_forward_time": 0.026001930236816406, "step": 2539 }, { "epoch": 3.87420654296875e-06, "step": 2539, "training_step_time": 0.11614036560058594 }, { "epoch": 3.875732421875e-06, "grad_norm": 0.6325926184654236, "learning_rate": 9.967179815433685e-05, "loss": 0.1126, "step": 2540 }, { "epoch": 3.875732421875e-06, "model_forward_time": 0.02568960189819336, "step": 2540 }, { "epoch": 3.875732421875e-06, "step": 2540, "training_step_time": 0.11594867706298828 }, { "epoch": 3.87725830078125e-06, "model_forward_time": 0.025379419326782227, "step": 2541 }, { "epoch": 3.87725830078125e-06, "step": 2541, "training_step_time": 0.11919307708740234 }, { "epoch": 3.8787841796875e-06, "model_forward_time": 0.025401830673217773, "step": 2542 }, { "epoch": 3.8787841796875e-06, "step": 2542, "training_step_time": 0.10793447494506836 }, { "epoch": 3.88031005859375e-06, "model_forward_time": 0.02532052993774414, "step": 2543 }, { "epoch": 3.88031005859375e-06, "step": 2543, "training_step_time": 0.19345736503601074 }, { "epoch": 3.8818359375e-06, "model_forward_time": 0.024195432662963867, "step": 2544 }, { "epoch": 3.8818359375e-06, "step": 2544, "training_step_time": 0.17615365982055664 }, { "epoch": 3.88336181640625e-06, "model_forward_time": 0.023886680603027344, "step": 2545 }, { "epoch": 3.88336181640625e-06, "step": 2545, "training_step_time": 0.18914461135864258 }, { "epoch": 3.8848876953125e-06, "model_forward_time": 0.024648189544677734, "step": 2546 }, { "epoch": 3.8848876953125e-06, "step": 2546, "training_step_time": 0.15303659439086914 }, { "epoch": 3.88641357421875e-06, "model_forward_time": 0.024959564208984375, "step": 2547 }, { "epoch": 3.88641357421875e-06, "step": 2547, "training_step_time": 0.17954802513122559 }, { "epoch": 3.887939453125e-06, "model_forward_time": 0.024660587310791016, "step": 2548 }, { "epoch": 3.887939453125e-06, "step": 2548, "training_step_time": 0.16007018089294434 }, { "epoch": 3.88946533203125e-06, "model_forward_time": 0.02429652214050293, "step": 2549 }, { "epoch": 3.88946533203125e-06, "step": 2549, "training_step_time": 0.10378861427307129 }, { "epoch": 3.8909912109375e-06, "grad_norm": 0.6692981719970703, "learning_rate": 9.966546331768191e-05, "loss": 0.1202, "step": 2550 }, { "epoch": 3.8909912109375e-06, "model_forward_time": 0.025659561157226562, "step": 2550 }, { "epoch": 3.8909912109375e-06, "step": 2550, "training_step_time": 0.10602498054504395 }, { "epoch": 3.89251708984375e-06, "model_forward_time": 0.02485799789428711, "step": 2551 }, { "epoch": 3.89251708984375e-06, "step": 2551, "training_step_time": 0.10846996307373047 }, { "epoch": 3.89404296875e-06, "model_forward_time": 0.025962352752685547, "step": 2552 }, { "epoch": 3.89404296875e-06, "step": 2552, "training_step_time": 0.15173935890197754 }, { "epoch": 3.89556884765625e-06, "model_forward_time": 0.025590896606445312, "step": 2553 }, { "epoch": 3.89556884765625e-06, "step": 2553, "training_step_time": 0.15000033378601074 }, { "epoch": 3.8970947265625e-06, "model_forward_time": 0.024731874465942383, "step": 2554 }, { "epoch": 3.8970947265625e-06, "step": 2554, "training_step_time": 0.1374979019165039 }, { "epoch": 3.89862060546875e-06, "model_forward_time": 0.02346944808959961, "step": 2555 }, { "epoch": 3.89862060546875e-06, "step": 2555, "training_step_time": 0.20352649688720703 }, { "epoch": 3.900146484375e-06, "model_forward_time": 0.024442434310913086, "step": 2556 }, { "epoch": 3.900146484375e-06, "step": 2556, "training_step_time": 0.1940760612487793 }, { "epoch": 3.90167236328125e-06, "model_forward_time": 0.024971485137939453, "step": 2557 }, { "epoch": 3.90167236328125e-06, "step": 2557, "training_step_time": 0.16737747192382812 }, { "epoch": 3.9031982421875e-06, "model_forward_time": 0.023710966110229492, "step": 2558 }, { "epoch": 3.9031982421875e-06, "step": 2558, "training_step_time": 0.1463909149169922 }, { "epoch": 3.90472412109375e-06, "model_forward_time": 0.023838043212890625, "step": 2559 }, { "epoch": 3.90472412109375e-06, "step": 2559, "training_step_time": 0.12836956977844238 }, { "epoch": 3.90625e-06, "grad_norm": 1.0405771732330322, "learning_rate": 9.96590681328059e-05, "loss": 0.1484, "step": 2560 }, { "epoch": 3.90625e-06, "model_forward_time": 0.023972034454345703, "step": 2560 }, { "epoch": 3.90625e-06, "step": 2560, "training_step_time": 0.1922767162322998 }, { "epoch": 3.90777587890625e-06, "model_forward_time": 0.025022506713867188, "step": 2561 }, { "epoch": 3.90777587890625e-06, "step": 2561, "training_step_time": 0.12780380249023438 }, { "epoch": 3.9093017578125e-06, "model_forward_time": 0.024634599685668945, "step": 2562 }, { "epoch": 3.9093017578125e-06, "step": 2562, "training_step_time": 0.15634989738464355 }, { "epoch": 3.91082763671875e-06, "model_forward_time": 0.024483680725097656, "step": 2563 }, { "epoch": 3.91082763671875e-06, "step": 2563, "training_step_time": 0.15680599212646484 }, { "epoch": 3.912353515625e-06, "model_forward_time": 0.02456831932067871, "step": 2564 }, { "epoch": 3.912353515625e-06, "step": 2564, "training_step_time": 0.11584043502807617 }, { "epoch": 3.91387939453125e-06, "model_forward_time": 0.025426149368286133, "step": 2565 }, { "epoch": 3.91387939453125e-06, "step": 2565, "training_step_time": 0.11266922950744629 }, { "epoch": 3.9154052734375e-06, "model_forward_time": 0.024250030517578125, "step": 2566 }, { "epoch": 3.9154052734375e-06, "step": 2566, "training_step_time": 0.1071479320526123 }, { "epoch": 3.91693115234375e-06, "model_forward_time": 0.02664327621459961, "step": 2567 }, { "epoch": 3.91693115234375e-06, "step": 2567, "training_step_time": 0.11099553108215332 }, { "epoch": 3.91845703125e-06, "model_forward_time": 0.02579951286315918, "step": 2568 }, { "epoch": 3.91845703125e-06, "step": 2568, "training_step_time": 0.10607290267944336 }, { "epoch": 3.91998291015625e-06, "model_forward_time": 0.0254361629486084, "step": 2569 }, { "epoch": 3.91998291015625e-06, "step": 2569, "training_step_time": 0.10824728012084961 }, { "epoch": 3.9215087890625e-06, "grad_norm": 0.6695353984832764, "learning_rate": 9.965261260747956e-05, "loss": 0.1261, "step": 2570 }, { "epoch": 3.9215087890625e-06, "model_forward_time": 0.025298118591308594, "step": 2570 }, { "epoch": 3.9215087890625e-06, "step": 2570, "training_step_time": 0.10599803924560547 }, { "epoch": 3.92303466796875e-06, "model_forward_time": 0.02756810188293457, "step": 2571 }, { "epoch": 3.92303466796875e-06, "step": 2571, "training_step_time": 0.11224031448364258 }, { "epoch": 3.924560546875e-06, "model_forward_time": 0.025689363479614258, "step": 2572 }, { "epoch": 3.924560546875e-06, "step": 2572, "training_step_time": 0.10567450523376465 }, { "epoch": 3.92608642578125e-06, "model_forward_time": 0.025651216506958008, "step": 2573 }, { "epoch": 3.92608642578125e-06, "step": 2573, "training_step_time": 0.10559201240539551 }, { "epoch": 3.9276123046875e-06, "model_forward_time": 0.025165557861328125, "step": 2574 }, { "epoch": 3.9276123046875e-06, "step": 2574, "training_step_time": 0.11419391632080078 }, { "epoch": 3.92913818359375e-06, "model_forward_time": 0.02482295036315918, "step": 2575 }, { "epoch": 3.92913818359375e-06, "step": 2575, "training_step_time": 0.11034870147705078 }, { "epoch": 3.9306640625e-06, "model_forward_time": 0.025615692138671875, "step": 2576 }, { "epoch": 3.9306640625e-06, "step": 2576, "training_step_time": 0.10513186454772949 }, { "epoch": 3.93218994140625e-06, "model_forward_time": 0.025079965591430664, "step": 2577 }, { "epoch": 3.93218994140625e-06, "step": 2577, "training_step_time": 0.1083517074584961 }, { "epoch": 3.9337158203125e-06, "model_forward_time": 0.02583932876586914, "step": 2578 }, { "epoch": 3.9337158203125e-06, "step": 2578, "training_step_time": 0.10722923278808594 }, { "epoch": 3.93524169921875e-06, "model_forward_time": 0.025200843811035156, "step": 2579 }, { "epoch": 3.93524169921875e-06, "step": 2579, "training_step_time": 0.10635232925415039 }, { "epoch": 3.936767578125e-06, "grad_norm": 0.5957489013671875, "learning_rate": 9.964609674954696e-05, "loss": 0.134, "step": 2580 }, { "epoch": 3.936767578125e-06, "model_forward_time": 0.02541804313659668, "step": 2580 }, { "epoch": 3.936767578125e-06, "step": 2580, "training_step_time": 0.11294436454772949 }, { "epoch": 3.93829345703125e-06, "model_forward_time": 0.025622844696044922, "step": 2581 }, { "epoch": 3.93829345703125e-06, "step": 2581, "training_step_time": 0.10666084289550781 }, { "epoch": 3.9398193359375e-06, "model_forward_time": 0.02549147605895996, "step": 2582 }, { "epoch": 3.9398193359375e-06, "step": 2582, "training_step_time": 0.11539244651794434 }, { "epoch": 3.94134521484375e-06, "model_forward_time": 0.025074243545532227, "step": 2583 }, { "epoch": 3.94134521484375e-06, "step": 2583, "training_step_time": 0.11464595794677734 }, { "epoch": 3.94287109375e-06, "model_forward_time": 0.025991439819335938, "step": 2584 }, { "epoch": 3.94287109375e-06, "step": 2584, "training_step_time": 0.11116671562194824 }, { "epoch": 3.94439697265625e-06, "model_forward_time": 0.025312185287475586, "step": 2585 }, { "epoch": 3.94439697265625e-06, "step": 2585, "training_step_time": 0.1107170581817627 }, { "epoch": 3.9459228515625e-06, "model_forward_time": 0.025470733642578125, "step": 2586 }, { "epoch": 3.9459228515625e-06, "step": 2586, "training_step_time": 0.12117505073547363 }, { "epoch": 3.94744873046875e-06, "model_forward_time": 0.02547478675842285, "step": 2587 }, { "epoch": 3.94744873046875e-06, "step": 2587, "training_step_time": 0.16582131385803223 }, { "epoch": 3.948974609375e-06, "model_forward_time": 0.024440288543701172, "step": 2588 }, { "epoch": 3.948974609375e-06, "step": 2588, "training_step_time": 0.17252588272094727 }, { "epoch": 3.95050048828125e-06, "model_forward_time": 0.02466607093811035, "step": 2589 }, { "epoch": 3.95050048828125e-06, "step": 2589, "training_step_time": 0.178086519241333 }, { "epoch": 3.9520263671875e-06, "grad_norm": 0.7916200757026672, "learning_rate": 9.963952056692549e-05, "loss": 0.1296, "step": 2590 }, { "epoch": 3.9520263671875e-06, "model_forward_time": 0.023917198181152344, "step": 2590 }, { "epoch": 3.9520263671875e-06, "step": 2590, "training_step_time": 0.1822068691253662 }, { "epoch": 3.95355224609375e-06, "model_forward_time": 0.02631402015686035, "step": 2591 }, { "epoch": 3.95355224609375e-06, "step": 2591, "training_step_time": 0.1557767391204834 }, { "epoch": 3.955078125e-06, "model_forward_time": 0.025574445724487305, "step": 2592 }, { "epoch": 3.955078125e-06, "step": 2592, "training_step_time": 0.11522722244262695 }, { "epoch": 3.95660400390625e-06, "model_forward_time": 0.02468705177307129, "step": 2593 }, { "epoch": 3.95660400390625e-06, "step": 2593, "training_step_time": 0.10857152938842773 }, { "epoch": 3.9581298828125e-06, "model_forward_time": 0.025453805923461914, "step": 2594 }, { "epoch": 3.9581298828125e-06, "step": 2594, "training_step_time": 0.10865116119384766 }, { "epoch": 3.95965576171875e-06, "model_forward_time": 0.025238990783691406, "step": 2595 }, { "epoch": 3.95965576171875e-06, "step": 2595, "training_step_time": 0.10948657989501953 }, { "epoch": 3.961181640625e-06, "model_forward_time": 0.025798320770263672, "step": 2596 }, { "epoch": 3.961181640625e-06, "step": 2596, "training_step_time": 0.11607003211975098 }, { "epoch": 3.96270751953125e-06, "model_forward_time": 0.025298595428466797, "step": 2597 }, { "epoch": 3.96270751953125e-06, "step": 2597, "training_step_time": 0.13132214546203613 }, { "epoch": 3.9642333984375e-06, "model_forward_time": 0.02596879005432129, "step": 2598 }, { "epoch": 3.9642333984375e-06, "step": 2598, "training_step_time": 0.11165380477905273 }, { "epoch": 3.96575927734375e-06, "model_forward_time": 0.02558159828186035, "step": 2599 }, { "epoch": 3.96575927734375e-06, "step": 2599, "training_step_time": 0.12325191497802734 }, { "epoch": 3.96728515625e-06, "grad_norm": 0.8327940106391907, "learning_rate": 9.963288406760582e-05, "loss": 0.1503, "step": 2600 }, { "epoch": 3.96728515625e-06, "model_forward_time": 0.026118993759155273, "step": 2600 }, { "epoch": 3.96728515625e-06, "step": 2600, "training_step_time": 0.1679854393005371 }, { "epoch": 3.96881103515625e-06, "model_forward_time": 0.024846315383911133, "step": 2601 }, { "epoch": 3.96881103515625e-06, "step": 2601, "training_step_time": 0.13950634002685547 }, { "epoch": 3.9703369140625e-06, "model_forward_time": 0.024503469467163086, "step": 2602 }, { "epoch": 3.9703369140625e-06, "step": 2602, "training_step_time": 0.10708093643188477 }, { "epoch": 3.97186279296875e-06, "model_forward_time": 0.025322914123535156, "step": 2603 }, { "epoch": 3.97186279296875e-06, "step": 2603, "training_step_time": 0.10945701599121094 }, { "epoch": 3.973388671875e-06, "model_forward_time": 0.02540421485900879, "step": 2604 }, { "epoch": 3.973388671875e-06, "step": 2604, "training_step_time": 0.11253929138183594 }, { "epoch": 3.97491455078125e-06, "model_forward_time": 0.025065898895263672, "step": 2605 }, { "epoch": 3.97491455078125e-06, "step": 2605, "training_step_time": 0.1102752685546875 }, { "epoch": 3.9764404296875e-06, "model_forward_time": 0.02481865882873535, "step": 2606 }, { "epoch": 3.9764404296875e-06, "step": 2606, "training_step_time": 0.11115741729736328 }, { "epoch": 3.97796630859375e-06, "model_forward_time": 0.026096343994140625, "step": 2607 }, { "epoch": 3.97796630859375e-06, "step": 2607, "training_step_time": 0.11132001876831055 }, { "epoch": 3.9794921875e-06, "model_forward_time": 0.02564716339111328, "step": 2608 }, { "epoch": 3.9794921875e-06, "step": 2608, "training_step_time": 0.10923266410827637 }, { "epoch": 3.98101806640625e-06, "model_forward_time": 0.025237560272216797, "step": 2609 }, { "epoch": 3.98101806640625e-06, "step": 2609, "training_step_time": 0.2297360897064209 }, { "epoch": 3.9825439453125e-06, "grad_norm": 0.7479241490364075, "learning_rate": 9.962618725965196e-05, "loss": 0.1371, "step": 2610 }, { "epoch": 3.9825439453125e-06, "model_forward_time": 0.024466276168823242, "step": 2610 }, { "epoch": 3.9825439453125e-06, "step": 2610, "training_step_time": 0.10362887382507324 }, { "epoch": 3.98406982421875e-06, "model_forward_time": 0.02477288246154785, "step": 2611 }, { "epoch": 3.98406982421875e-06, "step": 2611, "training_step_time": 0.10330843925476074 }, { "epoch": 3.985595703125e-06, "model_forward_time": 0.02607131004333496, "step": 2612 }, { "epoch": 3.985595703125e-06, "step": 2612, "training_step_time": 0.11860013008117676 }, { "epoch": 3.98712158203125e-06, "model_forward_time": 0.025256872177124023, "step": 2613 }, { "epoch": 3.98712158203125e-06, "step": 2613, "training_step_time": 0.10472679138183594 }, { "epoch": 3.9886474609375e-06, "model_forward_time": 0.025536060333251953, "step": 2614 }, { "epoch": 3.9886474609375e-06, "step": 2614, "training_step_time": 0.10591983795166016 }, { "epoch": 3.99017333984375e-06, "model_forward_time": 0.025212764739990234, "step": 2615 }, { "epoch": 3.99017333984375e-06, "step": 2615, "training_step_time": 0.10774540901184082 }, { "epoch": 3.99169921875e-06, "model_forward_time": 0.025318145751953125, "step": 2616 }, { "epoch": 3.99169921875e-06, "step": 2616, "training_step_time": 0.11058163642883301 }, { "epoch": 3.99322509765625e-06, "model_forward_time": 0.02550029754638672, "step": 2617 }, { "epoch": 3.99322509765625e-06, "step": 2617, "training_step_time": 0.10853433609008789 }, { "epoch": 3.9947509765625e-06, "model_forward_time": 0.0256955623626709, "step": 2618 }, { "epoch": 3.9947509765625e-06, "step": 2618, "training_step_time": 0.10755658149719238 }, { "epoch": 3.99627685546875e-06, "model_forward_time": 0.024566173553466797, "step": 2619 }, { "epoch": 3.99627685546875e-06, "step": 2619, "training_step_time": 0.11729955673217773 }, { "epoch": 3.997802734375e-06, "grad_norm": 0.7612093687057495, "learning_rate": 9.961943015120113e-05, "loss": 0.1182, "step": 2620 }, { "epoch": 3.997802734375e-06, "model_forward_time": 0.024451255798339844, "step": 2620 }, { "epoch": 3.997802734375e-06, "step": 2620, "training_step_time": 0.14741015434265137 }, { "epoch": 3.99932861328125e-06, "model_forward_time": 0.024184465408325195, "step": 2621 }, { "epoch": 3.99932861328125e-06, "step": 2621, "training_step_time": 0.13308405876159668 }, { "epoch": 4.0008544921875e-06, "model_forward_time": 0.023622989654541016, "step": 2622 }, { "epoch": 4.0008544921875e-06, "step": 2622, "training_step_time": 0.12961959838867188 }, { "epoch": 4.00238037109375e-06, "model_forward_time": 0.024151086807250977, "step": 2623 }, { "epoch": 4.00238037109375e-06, "step": 2623, "training_step_time": 0.12308955192565918 }, { "epoch": 4.00390625e-06, "model_forward_time": 0.024152517318725586, "step": 2624 }, { "epoch": 4.00390625e-06, "step": 2624, "training_step_time": 0.12040305137634277 }, { "epoch": 4.00543212890625e-06, "model_forward_time": 0.024043798446655273, "step": 2625 }, { "epoch": 4.00543212890625e-06, "step": 2625, "training_step_time": 0.11487674713134766 }, { "epoch": 4.0069580078125e-06, "model_forward_time": 0.024619579315185547, "step": 2626 }, { "epoch": 4.0069580078125e-06, "step": 2626, "training_step_time": 0.1153862476348877 }, { "epoch": 4.00848388671875e-06, "model_forward_time": 0.02565312385559082, "step": 2627 }, { "epoch": 4.00848388671875e-06, "step": 2627, "training_step_time": 0.1151423454284668 }, { "epoch": 4.010009765625e-06, "model_forward_time": 0.025574922561645508, "step": 2628 }, { "epoch": 4.010009765625e-06, "step": 2628, "training_step_time": 0.11503458023071289 }, { "epoch": 4.01153564453125e-06, "model_forward_time": 0.025277376174926758, "step": 2629 }, { "epoch": 4.01153564453125e-06, "step": 2629, "training_step_time": 0.11299943923950195 }, { "epoch": 4.0130615234375e-06, "grad_norm": 0.9169083833694458, "learning_rate": 9.961261275046383e-05, "loss": 0.1264, "step": 2630 }, { "epoch": 4.0130615234375e-06, "model_forward_time": 0.025089025497436523, "step": 2630 }, { "epoch": 4.0130615234375e-06, "step": 2630, "training_step_time": 0.11535882949829102 }, { "epoch": 4.01458740234375e-06, "model_forward_time": 0.025220394134521484, "step": 2631 }, { "epoch": 4.01458740234375e-06, "step": 2631, "training_step_time": 0.1254265308380127 }, { "epoch": 4.01611328125e-06, "model_forward_time": 0.024977922439575195, "step": 2632 }, { "epoch": 4.01611328125e-06, "step": 2632, "training_step_time": 0.17168593406677246 }, { "epoch": 4.01763916015625e-06, "model_forward_time": 0.02472996711730957, "step": 2633 }, { "epoch": 4.01763916015625e-06, "step": 2633, "training_step_time": 0.16511201858520508 }, { "epoch": 4.0191650390625e-06, "model_forward_time": 0.024491071701049805, "step": 2634 }, { "epoch": 4.0191650390625e-06, "step": 2634, "training_step_time": 0.16451549530029297 }, { "epoch": 4.02069091796875e-06, "model_forward_time": 0.024625062942504883, "step": 2635 }, { "epoch": 4.02069091796875e-06, "step": 2635, "training_step_time": 0.2046198844909668 }, { "epoch": 4.022216796875e-06, "model_forward_time": 0.02654242515563965, "step": 2636 }, { "epoch": 4.022216796875e-06, "step": 2636, "training_step_time": 0.16288542747497559 }, { "epoch": 4.02374267578125e-06, "model_forward_time": 0.024131298065185547, "step": 2637 }, { "epoch": 4.02374267578125e-06, "step": 2637, "training_step_time": 0.10601449012756348 }, { "epoch": 4.0252685546875e-06, "model_forward_time": 0.024834156036376953, "step": 2638 }, { "epoch": 4.0252685546875e-06, "step": 2638, "training_step_time": 0.10745692253112793 }, { "epoch": 4.02679443359375e-06, "model_forward_time": 0.025490760803222656, "step": 2639 }, { "epoch": 4.02679443359375e-06, "step": 2639, "training_step_time": 0.11195969581604004 }, { "epoch": 4.0283203125e-06, "grad_norm": 0.8895446062088013, "learning_rate": 9.96057350657239e-05, "loss": 0.154, "step": 2640 }, { "epoch": 4.0283203125e-06, "model_forward_time": 0.02508997917175293, "step": 2640 }, { "epoch": 4.0283203125e-06, "step": 2640, "training_step_time": 0.10900235176086426 }, { "epoch": 4.02984619140625e-06, "model_forward_time": 0.025409698486328125, "step": 2641 }, { "epoch": 4.02984619140625e-06, "step": 2641, "training_step_time": 0.11361122131347656 }, { "epoch": 4.0313720703125e-06, "model_forward_time": 0.025673866271972656, "step": 2642 }, { "epoch": 4.0313720703125e-06, "step": 2642, "training_step_time": 0.11690664291381836 }, { "epoch": 4.03289794921875e-06, "model_forward_time": 0.024580955505371094, "step": 2643 }, { "epoch": 4.03289794921875e-06, "step": 2643, "training_step_time": 0.11499595642089844 }, { "epoch": 4.034423828125e-06, "model_forward_time": 0.02550673484802246, "step": 2644 }, { "epoch": 4.034423828125e-06, "step": 2644, "training_step_time": 0.1154639720916748 }, { "epoch": 4.03594970703125e-06, "model_forward_time": 0.025206565856933594, "step": 2645 }, { "epoch": 4.03594970703125e-06, "step": 2645, "training_step_time": 0.11664366722106934 }, { "epoch": 4.0374755859375e-06, "model_forward_time": 0.02520298957824707, "step": 2646 }, { "epoch": 4.0374755859375e-06, "step": 2646, "training_step_time": 0.16458725929260254 }, { "epoch": 4.03900146484375e-06, "model_forward_time": 0.02398538589477539, "step": 2647 }, { "epoch": 4.03900146484375e-06, "step": 2647, "training_step_time": 0.15099358558654785 }, { "epoch": 4.04052734375e-06, "model_forward_time": 0.025594234466552734, "step": 2648 }, { "epoch": 4.04052734375e-06, "step": 2648, "training_step_time": 0.11366987228393555 }, { "epoch": 4.04205322265625e-06, "model_forward_time": 0.025868654251098633, "step": 2649 }, { "epoch": 4.04205322265625e-06, "step": 2649, "training_step_time": 0.10973405838012695 }, { "epoch": 4.0435791015625e-06, "grad_norm": 0.6514967679977417, "learning_rate": 9.959879710533835e-05, "loss": 0.1366, "step": 2650 }, { "epoch": 4.0435791015625e-06, "model_forward_time": 0.02537822723388672, "step": 2650 }, { "epoch": 4.0435791015625e-06, "step": 2650, "training_step_time": 0.17003965377807617 }, { "epoch": 4.04510498046875e-06, "model_forward_time": 0.0243227481842041, "step": 2651 }, { "epoch": 4.04510498046875e-06, "step": 2651, "training_step_time": 0.16069912910461426 }, { "epoch": 4.046630859375e-06, "model_forward_time": 0.024210214614868164, "step": 2652 }, { "epoch": 4.046630859375e-06, "step": 2652, "training_step_time": 0.11087918281555176 }, { "epoch": 4.04815673828125e-06, "model_forward_time": 0.024750947952270508, "step": 2653 }, { "epoch": 4.04815673828125e-06, "step": 2653, "training_step_time": 0.1651015281677246 }, { "epoch": 4.0496826171875e-06, "model_forward_time": 0.024959802627563477, "step": 2654 }, { "epoch": 4.0496826171875e-06, "step": 2654, "training_step_time": 0.1662149429321289 }, { "epoch": 4.05120849609375e-06, "model_forward_time": 0.024851560592651367, "step": 2655 }, { "epoch": 4.05120849609375e-06, "step": 2655, "training_step_time": 0.10683155059814453 }, { "epoch": 4.052734375e-06, "model_forward_time": 0.025258779525756836, "step": 2656 }, { "epoch": 4.052734375e-06, "step": 2656, "training_step_time": 0.10517644882202148 }, { "epoch": 4.05426025390625e-06, "model_forward_time": 0.025101661682128906, "step": 2657 }, { "epoch": 4.05426025390625e-06, "step": 2657, "training_step_time": 0.10609674453735352 }, { "epoch": 4.0557861328125e-06, "model_forward_time": 0.025737762451171875, "step": 2658 }, { "epoch": 4.0557861328125e-06, "step": 2658, "training_step_time": 0.1074228286743164 }, { "epoch": 4.05731201171875e-06, "model_forward_time": 0.025517940521240234, "step": 2659 }, { "epoch": 4.05731201171875e-06, "step": 2659, "training_step_time": 0.10764479637145996 }, { "epoch": 4.058837890625e-06, "grad_norm": 0.6101964116096497, "learning_rate": 9.959179887773744e-05, "loss": 0.1499, "step": 2660 }, { "epoch": 4.058837890625e-06, "model_forward_time": 0.025423288345336914, "step": 2660 }, { "epoch": 4.058837890625e-06, "step": 2660, "training_step_time": 0.11061787605285645 }, { "epoch": 4.06036376953125e-06, "model_forward_time": 0.025127887725830078, "step": 2661 }, { "epoch": 4.06036376953125e-06, "step": 2661, "training_step_time": 0.14537715911865234 }, { "epoch": 4.0618896484375e-06, "model_forward_time": 0.024112701416015625, "step": 2662 }, { "epoch": 4.0618896484375e-06, "step": 2662, "training_step_time": 0.1497800350189209 }, { "epoch": 4.06341552734375e-06, "model_forward_time": 0.024029016494750977, "step": 2663 }, { "epoch": 4.06341552734375e-06, "step": 2663, "training_step_time": 0.14650487899780273 }, { "epoch": 4.06494140625e-06, "model_forward_time": 0.02388453483581543, "step": 2664 }, { "epoch": 4.06494140625e-06, "step": 2664, "training_step_time": 0.14997458457946777 }, { "epoch": 4.06646728515625e-06, "model_forward_time": 0.02441549301147461, "step": 2665 }, { "epoch": 4.06646728515625e-06, "step": 2665, "training_step_time": 0.12883663177490234 }, { "epoch": 4.0679931640625e-06, "model_forward_time": 0.024616479873657227, "step": 2666 }, { "epoch": 4.0679931640625e-06, "step": 2666, "training_step_time": 0.13300681114196777 }, { "epoch": 4.06951904296875e-06, "model_forward_time": 0.02476954460144043, "step": 2667 }, { "epoch": 4.06951904296875e-06, "step": 2667, "training_step_time": 0.12348794937133789 }, { "epoch": 4.071044921875e-06, "model_forward_time": 0.024622201919555664, "step": 2668 }, { "epoch": 4.071044921875e-06, "step": 2668, "training_step_time": 0.11321663856506348 }, { "epoch": 4.07257080078125e-06, "model_forward_time": 0.025504589080810547, "step": 2669 }, { "epoch": 4.07257080078125e-06, "step": 2669, "training_step_time": 0.1139822006225586 }, { "epoch": 4.0740966796875e-06, "grad_norm": 0.6662260890007019, "learning_rate": 9.95847403914247e-05, "loss": 0.1603, "step": 2670 }, { "epoch": 4.0740966796875e-06, "model_forward_time": 0.02512979507446289, "step": 2670 }, { "epoch": 4.0740966796875e-06, "step": 2670, "training_step_time": 0.11485671997070312 }, { "epoch": 4.07562255859375e-06, "model_forward_time": 0.025142908096313477, "step": 2671 }, { "epoch": 4.07562255859375e-06, "step": 2671, "training_step_time": 0.112152099609375 }, { "epoch": 4.0771484375e-06, "model_forward_time": 0.02572941780090332, "step": 2672 }, { "epoch": 4.0771484375e-06, "step": 2672, "training_step_time": 0.10960817337036133 }, { "epoch": 4.07867431640625e-06, "model_forward_time": 0.025151491165161133, "step": 2673 }, { "epoch": 4.07867431640625e-06, "step": 2673, "training_step_time": 0.10865187644958496 }, { "epoch": 4.0802001953125e-06, "model_forward_time": 0.0252840518951416, "step": 2674 }, { "epoch": 4.0802001953125e-06, "step": 2674, "training_step_time": 0.10846114158630371 }, { "epoch": 4.08172607421875e-06, "model_forward_time": 0.025331497192382812, "step": 2675 }, { "epoch": 4.08172607421875e-06, "step": 2675, "training_step_time": 0.17831993103027344 }, { "epoch": 4.083251953125e-06, "model_forward_time": 0.02456355094909668, "step": 2676 }, { "epoch": 4.083251953125e-06, "step": 2676, "training_step_time": 0.18026018142700195 }, { "epoch": 4.08477783203125e-06, "model_forward_time": 0.0242917537689209, "step": 2677 }, { "epoch": 4.08477783203125e-06, "step": 2677, "training_step_time": 0.18127012252807617 }, { "epoch": 4.0863037109375e-06, "model_forward_time": 0.02460026741027832, "step": 2678 }, { "epoch": 4.0863037109375e-06, "step": 2678, "training_step_time": 0.12339186668395996 }, { "epoch": 4.08782958984375e-06, "model_forward_time": 0.024390697479248047, "step": 2679 }, { "epoch": 4.08782958984375e-06, "step": 2679, "training_step_time": 0.11007428169250488 }, { "epoch": 4.08935546875e-06, "grad_norm": 0.8351659178733826, "learning_rate": 9.957762165497686e-05, "loss": 0.1788, "step": 2680 }, { "epoch": 4.08935546875e-06, "model_forward_time": 0.025530099868774414, "step": 2680 }, { "epoch": 4.08935546875e-06, "step": 2680, "training_step_time": 0.11474108695983887 }, { "epoch": 4.09088134765625e-06, "model_forward_time": 0.0251772403717041, "step": 2681 }, { "epoch": 4.09088134765625e-06, "step": 2681, "training_step_time": 0.10891962051391602 }, { "epoch": 4.0924072265625e-06, "model_forward_time": 0.024941444396972656, "step": 2682 }, { "epoch": 4.0924072265625e-06, "step": 2682, "training_step_time": 0.11425948143005371 }, { "epoch": 4.09393310546875e-06, "model_forward_time": 0.025307178497314453, "step": 2683 }, { "epoch": 4.09393310546875e-06, "step": 2683, "training_step_time": 0.11291170120239258 }, { "epoch": 4.095458984375e-06, "model_forward_time": 0.0250089168548584, "step": 2684 }, { "epoch": 4.095458984375e-06, "step": 2684, "training_step_time": 0.10802578926086426 }, { "epoch": 4.09698486328125e-06, "model_forward_time": 0.025241374969482422, "step": 2685 }, { "epoch": 4.09698486328125e-06, "step": 2685, "training_step_time": 0.13444757461547852 }, { "epoch": 4.0985107421875e-06, "model_forward_time": 0.025255680084228516, "step": 2686 }, { "epoch": 4.0985107421875e-06, "step": 2686, "training_step_time": 0.13781356811523438 }, { "epoch": 4.10003662109375e-06, "model_forward_time": 0.025057315826416016, "step": 2687 }, { "epoch": 4.10003662109375e-06, "step": 2687, "training_step_time": 0.11336636543273926 }, { "epoch": 4.1015625e-06, "model_forward_time": 0.02519512176513672, "step": 2688 }, { "epoch": 4.1015625e-06, "step": 2688, "training_step_time": 0.1180574893951416 }, { "epoch": 4.10308837890625e-06, "model_forward_time": 0.025197267532348633, "step": 2689 }, { "epoch": 4.10308837890625e-06, "step": 2689, "training_step_time": 0.11135029792785645 }, { "epoch": 4.1046142578125e-06, "grad_norm": 0.6094166040420532, "learning_rate": 9.957044267704384e-05, "loss": 0.1523, "step": 2690 }, { "epoch": 4.1046142578125e-06, "model_forward_time": 0.0252230167388916, "step": 2690 }, { "epoch": 4.1046142578125e-06, "step": 2690, "training_step_time": 0.1869983673095703 }, { "epoch": 4.10614013671875e-06, "model_forward_time": 0.02469015121459961, "step": 2691 }, { "epoch": 4.10614013671875e-06, "step": 2691, "training_step_time": 0.11540579795837402 }, { "epoch": 4.107666015625e-06, "model_forward_time": 0.025210142135620117, "step": 2692 }, { "epoch": 4.107666015625e-06, "step": 2692, "training_step_time": 0.11225557327270508 }, { "epoch": 4.10919189453125e-06, "model_forward_time": 0.02594780921936035, "step": 2693 }, { "epoch": 4.10919189453125e-06, "step": 2693, "training_step_time": 0.10793089866638184 }, { "epoch": 4.1107177734375e-06, "model_forward_time": 0.025758743286132812, "step": 2694 }, { "epoch": 4.1107177734375e-06, "step": 2694, "training_step_time": 0.10962867736816406 }, { "epoch": 4.11224365234375e-06, "model_forward_time": 0.025530338287353516, "step": 2695 }, { "epoch": 4.11224365234375e-06, "step": 2695, "training_step_time": 0.10907864570617676 }, { "epoch": 4.11376953125e-06, "model_forward_time": 0.02521347999572754, "step": 2696 }, { "epoch": 4.11376953125e-06, "step": 2696, "training_step_time": 0.11299681663513184 }, { "epoch": 4.11529541015625e-06, "model_forward_time": 0.02534770965576172, "step": 2697 }, { "epoch": 4.11529541015625e-06, "step": 2697, "training_step_time": 0.11208987236022949 }, { "epoch": 4.1168212890625e-06, "model_forward_time": 0.02551746368408203, "step": 2698 }, { "epoch": 4.1168212890625e-06, "step": 2698, "training_step_time": 0.22218728065490723 }, { "epoch": 4.11834716796875e-06, "model_forward_time": 0.024779796600341797, "step": 2699 }, { "epoch": 4.11834716796875e-06, "step": 2699, "training_step_time": 0.10980939865112305 }, { "epoch": 4.119873046875e-06, "grad_norm": 0.7234001755714417, "learning_rate": 9.956320346634876e-05, "loss": 0.106, "step": 2700 }, { "epoch": 4.119873046875e-06, "model_forward_time": 0.02491617202758789, "step": 2700 }, { "epoch": 4.119873046875e-06, "step": 2700, "training_step_time": 0.10665249824523926 }, { "epoch": 4.12139892578125e-06, "model_forward_time": 0.02508401870727539, "step": 2701 }, { "epoch": 4.12139892578125e-06, "step": 2701, "training_step_time": 0.10627007484436035 }, { "epoch": 4.1229248046875e-06, "model_forward_time": 0.025051116943359375, "step": 2702 }, { "epoch": 4.1229248046875e-06, "step": 2702, "training_step_time": 0.10767531394958496 }, { "epoch": 4.12445068359375e-06, "model_forward_time": 0.025228500366210938, "step": 2703 }, { "epoch": 4.12445068359375e-06, "step": 2703, "training_step_time": 0.1067044734954834 }, { "epoch": 4.1259765625e-06, "model_forward_time": 0.025156021118164062, "step": 2704 }, { "epoch": 4.1259765625e-06, "step": 2704, "training_step_time": 0.10915851593017578 }, { "epoch": 4.12750244140625e-06, "model_forward_time": 0.02491927146911621, "step": 2705 }, { "epoch": 4.12750244140625e-06, "step": 2705, "training_step_time": 0.11313343048095703 }, { "epoch": 4.1290283203125e-06, "model_forward_time": 0.025448083877563477, "step": 2706 }, { "epoch": 4.1290283203125e-06, "step": 2706, "training_step_time": 0.11539244651794434 }, { "epoch": 4.13055419921875e-06, "model_forward_time": 0.024895668029785156, "step": 2707 }, { "epoch": 4.13055419921875e-06, "step": 2707, "training_step_time": 0.11974263191223145 }, { "epoch": 4.132080078125e-06, "model_forward_time": 0.025686264038085938, "step": 2708 }, { "epoch": 4.132080078125e-06, "step": 2708, "training_step_time": 0.11359095573425293 }, { "epoch": 4.13360595703125e-06, "model_forward_time": 0.024784564971923828, "step": 2709 }, { "epoch": 4.13360595703125e-06, "step": 2709, "training_step_time": 0.11349678039550781 }, { "epoch": 4.1351318359375e-06, "grad_norm": 0.5547717809677124, "learning_rate": 9.955590403168799e-05, "loss": 0.1129, "step": 2710 }, { "epoch": 4.1351318359375e-06, "model_forward_time": 0.02514934539794922, "step": 2710 }, { "epoch": 4.1351318359375e-06, "step": 2710, "training_step_time": 0.10987210273742676 }, { "epoch": 4.13665771484375e-06, "model_forward_time": 0.02508544921875, "step": 2711 }, { "epoch": 4.13665771484375e-06, "step": 2711, "training_step_time": 0.10998225212097168 }, { "epoch": 4.13818359375e-06, "model_forward_time": 0.02506542205810547, "step": 2712 }, { "epoch": 4.13818359375e-06, "step": 2712, "training_step_time": 0.11174988746643066 }, { "epoch": 4.13970947265625e-06, "model_forward_time": 0.024965763092041016, "step": 2713 }, { "epoch": 4.13970947265625e-06, "step": 2713, "training_step_time": 0.10977840423583984 }, { "epoch": 4.1412353515625e-06, "model_forward_time": 0.025157451629638672, "step": 2714 }, { "epoch": 4.1412353515625e-06, "step": 2714, "training_step_time": 0.1067650318145752 }, { "epoch": 4.14276123046875e-06, "model_forward_time": 0.024903535842895508, "step": 2715 }, { "epoch": 4.14276123046875e-06, "step": 2715, "training_step_time": 0.10834574699401855 }, { "epoch": 4.144287109375e-06, "model_forward_time": 0.028226613998413086, "step": 2716 }, { "epoch": 4.144287109375e-06, "step": 2716, "training_step_time": 0.11452031135559082 }, { "epoch": 4.14581298828125e-06, "model_forward_time": 0.025138139724731445, "step": 2717 }, { "epoch": 4.14581298828125e-06, "step": 2717, "training_step_time": 0.10767292976379395 }, { "epoch": 4.1473388671875e-06, "model_forward_time": 0.024976491928100586, "step": 2718 }, { "epoch": 4.1473388671875e-06, "step": 2718, "training_step_time": 0.10459065437316895 }, { "epoch": 4.14886474609375e-06, "model_forward_time": 0.02512335777282715, "step": 2719 }, { "epoch": 4.14886474609375e-06, "step": 2719, "training_step_time": 0.10622358322143555 }, { "epoch": 4.150390625e-06, "grad_norm": 0.9897136688232422, "learning_rate": 9.9548544381931e-05, "loss": 0.1498, "step": 2720 }, { "epoch": 4.150390625e-06, "model_forward_time": 0.025136947631835938, "step": 2720 }, { "epoch": 4.150390625e-06, "step": 2720, "training_step_time": 0.10980892181396484 }, { "epoch": 4.15191650390625e-06, "model_forward_time": 0.02495288848876953, "step": 2721 }, { "epoch": 4.15191650390625e-06, "step": 2721, "training_step_time": 0.10661029815673828 }, { "epoch": 4.1534423828125e-06, "model_forward_time": 0.02490067481994629, "step": 2722 }, { "epoch": 4.1534423828125e-06, "step": 2722, "training_step_time": 0.19107842445373535 }, { "epoch": 4.15496826171875e-06, "model_forward_time": 0.02626347541809082, "step": 2723 }, { "epoch": 4.15496826171875e-06, "step": 2723, "training_step_time": 0.10937261581420898 }, { "epoch": 4.156494140625e-06, "model_forward_time": 0.025488615036010742, "step": 2724 }, { "epoch": 4.156494140625e-06, "step": 2724, "training_step_time": 0.11562538146972656 }, { "epoch": 4.15802001953125e-06, "model_forward_time": 0.02881026268005371, "step": 2725 }, { "epoch": 4.15802001953125e-06, "step": 2725, "training_step_time": 0.12107086181640625 }, { "epoch": 4.1595458984375e-06, "model_forward_time": 0.025046586990356445, "step": 2726 }, { "epoch": 4.1595458984375e-06, "step": 2726, "training_step_time": 0.21125197410583496 }, { "epoch": 4.16107177734375e-06, "model_forward_time": 0.02410292625427246, "step": 2727 }, { "epoch": 4.16107177734375e-06, "step": 2727, "training_step_time": 0.14776849746704102 }, { "epoch": 4.16259765625e-06, "model_forward_time": 0.02440953254699707, "step": 2728 }, { "epoch": 4.16259765625e-06, "step": 2728, "training_step_time": 0.10667824745178223 }, { "epoch": 4.16412353515625e-06, "model_forward_time": 0.025004148483276367, "step": 2729 }, { "epoch": 4.16412353515625e-06, "step": 2729, "training_step_time": 0.10719132423400879 }, { "epoch": 4.1656494140625e-06, "grad_norm": 0.8511749505996704, "learning_rate": 9.954112452602045e-05, "loss": 0.1545, "step": 2730 }, { "epoch": 4.1656494140625e-06, "model_forward_time": 0.02574324607849121, "step": 2730 }, { "epoch": 4.1656494140625e-06, "step": 2730, "training_step_time": 0.10624361038208008 }, { "epoch": 4.16717529296875e-06, "model_forward_time": 0.0293428897857666, "step": 2731 }, { "epoch": 4.16717529296875e-06, "step": 2731, "training_step_time": 0.11302828788757324 }, { "epoch": 4.168701171875e-06, "model_forward_time": 0.024964332580566406, "step": 2732 }, { "epoch": 4.168701171875e-06, "step": 2732, "training_step_time": 0.12818241119384766 }, { "epoch": 4.17022705078125e-06, "model_forward_time": 0.025168180465698242, "step": 2733 }, { "epoch": 4.17022705078125e-06, "step": 2733, "training_step_time": 0.14683961868286133 }, { "epoch": 4.1717529296875e-06, "model_forward_time": 0.02550816535949707, "step": 2734 }, { "epoch": 4.1717529296875e-06, "step": 2734, "training_step_time": 0.10952877998352051 }, { "epoch": 4.17327880859375e-06, "model_forward_time": 0.0255277156829834, "step": 2735 }, { "epoch": 4.17327880859375e-06, "step": 2735, "training_step_time": 0.10734128952026367 }, { "epoch": 4.1748046875e-06, "model_forward_time": 0.025738239288330078, "step": 2736 }, { "epoch": 4.1748046875e-06, "step": 2736, "training_step_time": 0.10856509208679199 }, { "epoch": 4.17633056640625e-06, "model_forward_time": 0.02543020248413086, "step": 2737 }, { "epoch": 4.17633056640625e-06, "step": 2737, "training_step_time": 0.11371254920959473 }, { "epoch": 4.1778564453125e-06, "model_forward_time": 0.02515888214111328, "step": 2738 }, { "epoch": 4.1778564453125e-06, "step": 2738, "training_step_time": 0.2068016529083252 }, { "epoch": 4.17938232421875e-06, "model_forward_time": 0.0245516300201416, "step": 2739 }, { "epoch": 4.17938232421875e-06, "step": 2739, "training_step_time": 0.1056830883026123 }, { "epoch": 4.180908203125e-06, "grad_norm": 0.5626347064971924, "learning_rate": 9.953364447297219e-05, "loss": 0.1306, "step": 2740 }, { "epoch": 4.180908203125e-06, "model_forward_time": 0.024382829666137695, "step": 2740 }, { "epoch": 4.180908203125e-06, "step": 2740, "training_step_time": 0.10436034202575684 }, { "epoch": 4.18243408203125e-06, "model_forward_time": 0.02515578269958496, "step": 2741 }, { "epoch": 4.18243408203125e-06, "step": 2741, "training_step_time": 0.21937108039855957 }, { "epoch": 4.1839599609375e-06, "model_forward_time": 0.024631023406982422, "step": 2742 }, { "epoch": 4.1839599609375e-06, "step": 2742, "training_step_time": 0.10819697380065918 }, { "epoch": 4.18548583984375e-06, "model_forward_time": 0.02543187141418457, "step": 2743 }, { "epoch": 4.18548583984375e-06, "step": 2743, "training_step_time": 0.10727047920227051 }, { "epoch": 4.18701171875e-06, "model_forward_time": 0.025311708450317383, "step": 2744 }, { "epoch": 4.18701171875e-06, "step": 2744, "training_step_time": 0.10822248458862305 }, { "epoch": 4.18853759765625e-06, "model_forward_time": 0.025208234786987305, "step": 2745 }, { "epoch": 4.18853759765625e-06, "step": 2745, "training_step_time": 0.1193997859954834 }, { "epoch": 4.1900634765625e-06, "model_forward_time": 0.02602386474609375, "step": 2746 }, { "epoch": 4.1900634765625e-06, "step": 2746, "training_step_time": 0.11086630821228027 }, { "epoch": 4.19158935546875e-06, "model_forward_time": 0.025446176528930664, "step": 2747 }, { "epoch": 4.19158935546875e-06, "step": 2747, "training_step_time": 0.10746574401855469 }, { "epoch": 4.193115234375e-06, "model_forward_time": 0.025178909301757812, "step": 2748 }, { "epoch": 4.193115234375e-06, "step": 2748, "training_step_time": 0.10785436630249023 }, { "epoch": 4.19464111328125e-06, "model_forward_time": 0.024927139282226562, "step": 2749 }, { "epoch": 4.19464111328125e-06, "step": 2749, "training_step_time": 0.10636711120605469 }, { "epoch": 4.1961669921875e-06, "grad_norm": 0.7132095694541931, "learning_rate": 9.952610423187516e-05, "loss": 0.1481, "step": 2750 }, { "epoch": 4.1961669921875e-06, "model_forward_time": 0.025022506713867188, "step": 2750 }, { "epoch": 4.1961669921875e-06, "step": 2750, "training_step_time": 0.10988736152648926 }, { "epoch": 4.19769287109375e-06, "model_forward_time": 0.02560710906982422, "step": 2751 }, { "epoch": 4.19769287109375e-06, "step": 2751, "training_step_time": 0.1097877025604248 }, { "epoch": 4.19921875e-06, "model_forward_time": 0.025916576385498047, "step": 2752 }, { "epoch": 4.19921875e-06, "step": 2752, "training_step_time": 0.10747647285461426 }, { "epoch": 4.20074462890625e-06, "model_forward_time": 0.02539539337158203, "step": 2753 }, { "epoch": 4.20074462890625e-06, "step": 2753, "training_step_time": 0.10840535163879395 }, { "epoch": 4.2022705078125e-06, "model_forward_time": 0.029796600341796875, "step": 2754 }, { "epoch": 4.2022705078125e-06, "step": 2754, "training_step_time": 0.11266589164733887 }, { "epoch": 4.20379638671875e-06, "model_forward_time": 0.025039196014404297, "step": 2755 }, { "epoch": 4.20379638671875e-06, "step": 2755, "training_step_time": 0.10694360733032227 }, { "epoch": 4.205322265625e-06, "model_forward_time": 0.024927139282226562, "step": 2756 }, { "epoch": 4.205322265625e-06, "step": 2756, "training_step_time": 0.11135268211364746 }, { "epoch": 4.20684814453125e-06, "model_forward_time": 0.025053977966308594, "step": 2757 }, { "epoch": 4.20684814453125e-06, "step": 2757, "training_step_time": 0.1098787784576416 }, { "epoch": 4.2083740234375e-06, "model_forward_time": 0.0254976749420166, "step": 2758 }, { "epoch": 4.2083740234375e-06, "step": 2758, "training_step_time": 0.10670614242553711 }, { "epoch": 4.20989990234375e-06, "model_forward_time": 0.02415633201599121, "step": 2759 }, { "epoch": 4.20989990234375e-06, "step": 2759, "training_step_time": 0.10811328887939453 }, { "epoch": 4.21142578125e-06, "grad_norm": 0.9546775817871094, "learning_rate": 9.95185038118915e-05, "loss": 0.1372, "step": 2760 }, { "epoch": 4.21142578125e-06, "model_forward_time": 0.025157451629638672, "step": 2760 }, { "epoch": 4.21142578125e-06, "step": 2760, "training_step_time": 0.10591316223144531 }, { "epoch": 4.21295166015625e-06, "model_forward_time": 0.025135040283203125, "step": 2761 }, { "epoch": 4.21295166015625e-06, "step": 2761, "training_step_time": 0.1121976375579834 }, { "epoch": 4.2144775390625e-06, "model_forward_time": 0.02525043487548828, "step": 2762 }, { "epoch": 4.2144775390625e-06, "step": 2762, "training_step_time": 0.11163759231567383 }, { "epoch": 4.21600341796875e-06, "model_forward_time": 0.025014877319335938, "step": 2763 }, { "epoch": 4.21600341796875e-06, "step": 2763, "training_step_time": 0.10879802703857422 }, { "epoch": 4.217529296875e-06, "model_forward_time": 0.024827241897583008, "step": 2764 }, { "epoch": 4.217529296875e-06, "step": 2764, "training_step_time": 0.10605001449584961 }, { "epoch": 4.21905517578125e-06, "model_forward_time": 0.025132417678833008, "step": 2765 }, { "epoch": 4.21905517578125e-06, "step": 2765, "training_step_time": 0.10844850540161133 }, { "epoch": 4.2205810546875e-06, "model_forward_time": 0.025289297103881836, "step": 2766 }, { "epoch": 4.2205810546875e-06, "step": 2766, "training_step_time": 0.11169075965881348 }, { "epoch": 4.22210693359375e-06, "model_forward_time": 0.02502131462097168, "step": 2767 }, { "epoch": 4.22210693359375e-06, "step": 2767, "training_step_time": 0.10914254188537598 }, { "epoch": 4.2236328125e-06, "model_forward_time": 0.025182008743286133, "step": 2768 }, { "epoch": 4.2236328125e-06, "step": 2768, "training_step_time": 0.13020896911621094 }, { "epoch": 4.22515869140625e-06, "model_forward_time": 0.024641752243041992, "step": 2769 }, { "epoch": 4.22515869140625e-06, "step": 2769, "training_step_time": 0.14920425415039062 }, { "epoch": 4.2266845703125e-06, "grad_norm": 1.108259677886963, "learning_rate": 9.951084322225641e-05, "loss": 0.1249, "step": 2770 }, { "epoch": 4.2266845703125e-06, "model_forward_time": 0.02445220947265625, "step": 2770 }, { "epoch": 4.2266845703125e-06, "step": 2770, "training_step_time": 0.22476983070373535 }, { "epoch": 4.22821044921875e-06, "model_forward_time": 0.02431488037109375, "step": 2771 }, { "epoch": 4.22821044921875e-06, "step": 2771, "training_step_time": 0.2097334861755371 }, { "epoch": 4.229736328125e-06, "model_forward_time": 0.024579763412475586, "step": 2772 }, { "epoch": 4.229736328125e-06, "step": 2772, "training_step_time": 0.13304352760314941 }, { "epoch": 4.23126220703125e-06, "model_forward_time": 0.024151325225830078, "step": 2773 }, { "epoch": 4.23126220703125e-06, "step": 2773, "training_step_time": 0.2030806541442871 }, { "epoch": 4.2327880859375e-06, "model_forward_time": 0.02460503578186035, "step": 2774 }, { "epoch": 4.2327880859375e-06, "step": 2774, "training_step_time": 0.11671948432922363 }, { "epoch": 4.23431396484375e-06, "model_forward_time": 0.024417638778686523, "step": 2775 }, { "epoch": 4.23431396484375e-06, "step": 2775, "training_step_time": 0.1087496280670166 }, { "epoch": 4.23583984375e-06, "model_forward_time": 0.028589248657226562, "step": 2776 }, { "epoch": 4.23583984375e-06, "step": 2776, "training_step_time": 0.11244964599609375 }, { "epoch": 4.23736572265625e-06, "model_forward_time": 0.025116443634033203, "step": 2777 }, { "epoch": 4.23736572265625e-06, "step": 2777, "training_step_time": 0.10978078842163086 }, { "epoch": 4.2388916015625e-06, "model_forward_time": 0.024893999099731445, "step": 2778 }, { "epoch": 4.2388916015625e-06, "step": 2778, "training_step_time": 0.12011241912841797 }, { "epoch": 4.24041748046875e-06, "model_forward_time": 0.025335073471069336, "step": 2779 }, { "epoch": 4.24041748046875e-06, "step": 2779, "training_step_time": 0.13481616973876953 }, { "epoch": 4.241943359375e-06, "grad_norm": 0.9175239205360413, "learning_rate": 9.950312247227825e-05, "loss": 0.1373, "step": 2780 }, { "epoch": 4.241943359375e-06, "model_forward_time": 0.024530649185180664, "step": 2780 }, { "epoch": 4.241943359375e-06, "step": 2780, "training_step_time": 0.10839080810546875 }, { "epoch": 4.24346923828125e-06, "model_forward_time": 0.025356292724609375, "step": 2781 }, { "epoch": 4.24346923828125e-06, "step": 2781, "training_step_time": 0.11330747604370117 }, { "epoch": 4.2449951171875e-06, "model_forward_time": 0.025420427322387695, "step": 2782 }, { "epoch": 4.2449951171875e-06, "step": 2782, "training_step_time": 0.10999894142150879 }, { "epoch": 4.24652099609375e-06, "model_forward_time": 0.025222301483154297, "step": 2783 }, { "epoch": 4.24652099609375e-06, "step": 2783, "training_step_time": 0.11020755767822266 }, { "epoch": 4.248046875e-06, "model_forward_time": 0.02972698211669922, "step": 2784 }, { "epoch": 4.248046875e-06, "step": 2784, "training_step_time": 0.21287822723388672 }, { "epoch": 4.24957275390625e-06, "model_forward_time": 0.0247189998626709, "step": 2785 }, { "epoch": 4.24957275390625e-06, "step": 2785, "training_step_time": 0.11601758003234863 }, { "epoch": 4.2510986328125e-06, "model_forward_time": 0.024424314498901367, "step": 2786 }, { "epoch": 4.2510986328125e-06, "step": 2786, "training_step_time": 0.18888211250305176 }, { "epoch": 4.25262451171875e-06, "model_forward_time": 0.02725386619567871, "step": 2787 }, { "epoch": 4.25262451171875e-06, "step": 2787, "training_step_time": 0.12241959571838379 }, { "epoch": 4.254150390625e-06, "model_forward_time": 0.02453923225402832, "step": 2788 }, { "epoch": 4.254150390625e-06, "step": 2788, "training_step_time": 0.11156773567199707 }, { "epoch": 4.25567626953125e-06, "model_forward_time": 0.024919509887695312, "step": 2789 }, { "epoch": 4.25567626953125e-06, "step": 2789, "training_step_time": 0.10801911354064941 }, { "epoch": 4.2572021484375e-06, "grad_norm": 0.8682767748832703, "learning_rate": 9.949534157133844e-05, "loss": 0.1544, "step": 2790 }, { "epoch": 4.2572021484375e-06, "model_forward_time": 0.02493762969970703, "step": 2790 }, { "epoch": 4.2572021484375e-06, "step": 2790, "training_step_time": 0.17709136009216309 }, { "epoch": 4.25872802734375e-06, "model_forward_time": 0.024722814559936523, "step": 2791 }, { "epoch": 4.25872802734375e-06, "step": 2791, "training_step_time": 0.16068506240844727 }, { "epoch": 4.26025390625e-06, "model_forward_time": 0.02444624900817871, "step": 2792 }, { "epoch": 4.26025390625e-06, "step": 2792, "training_step_time": 0.10413527488708496 }, { "epoch": 4.26177978515625e-06, "model_forward_time": 0.02468729019165039, "step": 2793 }, { "epoch": 4.26177978515625e-06, "step": 2793, "training_step_time": 0.10332489013671875 }, { "epoch": 4.2633056640625e-06, "model_forward_time": 0.02556324005126953, "step": 2794 }, { "epoch": 4.2633056640625e-06, "step": 2794, "training_step_time": 0.10662341117858887 }, { "epoch": 4.26483154296875e-06, "model_forward_time": 0.02501988410949707, "step": 2795 }, { "epoch": 4.26483154296875e-06, "step": 2795, "training_step_time": 0.10595536231994629 }, { "epoch": 4.266357421875e-06, "model_forward_time": 0.02497553825378418, "step": 2796 }, { "epoch": 4.266357421875e-06, "step": 2796, "training_step_time": 0.10684728622436523 }, { "epoch": 4.26788330078125e-06, "model_forward_time": 0.025267601013183594, "step": 2797 }, { "epoch": 4.26788330078125e-06, "step": 2797, "training_step_time": 0.1061089038848877 }, { "epoch": 4.2694091796875e-06, "model_forward_time": 0.025238990783691406, "step": 2798 }, { "epoch": 4.2694091796875e-06, "step": 2798, "training_step_time": 0.11097550392150879 }, { "epoch": 4.27093505859375e-06, "model_forward_time": 0.025223731994628906, "step": 2799 }, { "epoch": 4.27093505859375e-06, "step": 2799, "training_step_time": 0.11216378211975098 }, { "epoch": 4.2724609375e-06, "grad_norm": 0.5365232825279236, "learning_rate": 9.94875005288915e-05, "loss": 0.132, "step": 2800 }, { "epoch": 4.2724609375e-06, "model_forward_time": 0.025026798248291016, "step": 2800 }, { "epoch": 4.2724609375e-06, "step": 2800, "training_step_time": 0.11704492568969727 }, { "epoch": 4.27398681640625e-06, "model_forward_time": 0.025136470794677734, "step": 2801 }, { "epoch": 4.27398681640625e-06, "step": 2801, "training_step_time": 0.13306212425231934 }, { "epoch": 4.2755126953125e-06, "model_forward_time": 0.02527904510498047, "step": 2802 }, { "epoch": 4.2755126953125e-06, "step": 2802, "training_step_time": 0.12569212913513184 }, { "epoch": 4.27703857421875e-06, "model_forward_time": 0.024560213088989258, "step": 2803 }, { "epoch": 4.27703857421875e-06, "step": 2803, "training_step_time": 0.1293184757232666 }, { "epoch": 4.278564453125e-06, "model_forward_time": 0.024906396865844727, "step": 2804 }, { "epoch": 4.278564453125e-06, "step": 2804, "training_step_time": 0.12253785133361816 }, { "epoch": 4.28009033203125e-06, "model_forward_time": 0.025130033493041992, "step": 2805 }, { "epoch": 4.28009033203125e-06, "step": 2805, "training_step_time": 0.12067270278930664 }, { "epoch": 4.2816162109375e-06, "model_forward_time": 0.02831268310546875, "step": 2806 }, { "epoch": 4.2816162109375e-06, "step": 2806, "training_step_time": 0.11629176139831543 }, { "epoch": 4.28314208984375e-06, "model_forward_time": 0.025098085403442383, "step": 2807 }, { "epoch": 4.28314208984375e-06, "step": 2807, "training_step_time": 0.11623263359069824 }, { "epoch": 4.28466796875e-06, "model_forward_time": 0.025158166885375977, "step": 2808 }, { "epoch": 4.28466796875e-06, "step": 2808, "training_step_time": 0.11282563209533691 }, { "epoch": 4.28619384765625e-06, "model_forward_time": 0.025584936141967773, "step": 2809 }, { "epoch": 4.28619384765625e-06, "step": 2809, "training_step_time": 0.11139798164367676 }, { "epoch": 4.2877197265625e-06, "grad_norm": 0.7726246118545532, "learning_rate": 9.947959935446507e-05, "loss": 0.1589, "step": 2810 }, { "epoch": 4.2877197265625e-06, "model_forward_time": 0.025605201721191406, "step": 2810 }, { "epoch": 4.2877197265625e-06, "step": 2810, "training_step_time": 0.10854744911193848 }, { "epoch": 4.28924560546875e-06, "model_forward_time": 0.025130033493041992, "step": 2811 }, { "epoch": 4.28924560546875e-06, "step": 2811, "training_step_time": 0.10748934745788574 }, { "epoch": 4.290771484375e-06, "model_forward_time": 0.0254209041595459, "step": 2812 }, { "epoch": 4.290771484375e-06, "step": 2812, "training_step_time": 0.10888242721557617 }, { "epoch": 4.29229736328125e-06, "model_forward_time": 0.02546858787536621, "step": 2813 }, { "epoch": 4.29229736328125e-06, "step": 2813, "training_step_time": 0.10860180854797363 }, { "epoch": 4.2938232421875e-06, "model_forward_time": 0.025110721588134766, "step": 2814 }, { "epoch": 4.2938232421875e-06, "step": 2814, "training_step_time": 0.21330666542053223 }, { "epoch": 4.29534912109375e-06, "model_forward_time": 0.02472996711730957, "step": 2815 }, { "epoch": 4.29534912109375e-06, "step": 2815, "training_step_time": 0.13057613372802734 }, { "epoch": 4.296875e-06, "model_forward_time": 0.024506092071533203, "step": 2816 }, { "epoch": 4.296875e-06, "step": 2816, "training_step_time": 0.20092058181762695 }, { "epoch": 4.29840087890625e-06, "model_forward_time": 0.02419424057006836, "step": 2817 }, { "epoch": 4.29840087890625e-06, "step": 2817, "training_step_time": 0.13336777687072754 }, { "epoch": 4.2999267578125e-06, "model_forward_time": 0.024494409561157227, "step": 2818 }, { "epoch": 4.2999267578125e-06, "step": 2818, "training_step_time": 0.12605071067810059 }, { "epoch": 4.30145263671875e-06, "model_forward_time": 0.024712800979614258, "step": 2819 }, { "epoch": 4.30145263671875e-06, "step": 2819, "training_step_time": 0.16102075576782227 }, { "epoch": 4.302978515625e-06, "grad_norm": 0.7662800550460815, "learning_rate": 9.94716380576598e-05, "loss": 0.1394, "step": 2820 }, { "epoch": 4.302978515625e-06, "model_forward_time": 0.027503252029418945, "step": 2820 }, { "epoch": 4.302978515625e-06, "step": 2820, "training_step_time": 0.1103670597076416 }, { "epoch": 4.30450439453125e-06, "model_forward_time": 0.024597644805908203, "step": 2821 }, { "epoch": 4.30450439453125e-06, "step": 2821, "training_step_time": 0.10418820381164551 }, { "epoch": 4.3060302734375e-06, "model_forward_time": 0.024363994598388672, "step": 2822 }, { "epoch": 4.3060302734375e-06, "step": 2822, "training_step_time": 0.1081233024597168 }, { "epoch": 4.30755615234375e-06, "model_forward_time": 0.025439739227294922, "step": 2823 }, { "epoch": 4.30755615234375e-06, "step": 2823, "training_step_time": 0.1555490493774414 }, { "epoch": 4.30908203125e-06, "model_forward_time": 0.024992704391479492, "step": 2824 }, { "epoch": 4.30908203125e-06, "step": 2824, "training_step_time": 0.14147615432739258 }, { "epoch": 4.31060791015625e-06, "model_forward_time": 0.025159597396850586, "step": 2825 }, { "epoch": 4.31060791015625e-06, "step": 2825, "training_step_time": 0.11299872398376465 }, { "epoch": 4.3121337890625e-06, "model_forward_time": 0.025101661682128906, "step": 2826 }, { "epoch": 4.3121337890625e-06, "step": 2826, "training_step_time": 0.11157035827636719 }, { "epoch": 4.31365966796875e-06, "model_forward_time": 0.025141000747680664, "step": 2827 }, { "epoch": 4.31365966796875e-06, "step": 2827, "training_step_time": 0.10704636573791504 }, { "epoch": 4.315185546875e-06, "model_forward_time": 0.0255887508392334, "step": 2828 }, { "epoch": 4.315185546875e-06, "step": 2828, "training_step_time": 0.11251211166381836 }, { "epoch": 4.31671142578125e-06, "model_forward_time": 0.026050090789794922, "step": 2829 }, { "epoch": 4.31671142578125e-06, "step": 2829, "training_step_time": 0.19586658477783203 }, { "epoch": 4.3182373046875e-06, "grad_norm": 0.7930927276611328, "learning_rate": 9.946361664814943e-05, "loss": 0.1426, "step": 2830 }, { "epoch": 4.3182373046875e-06, "model_forward_time": 0.02529621124267578, "step": 2830 }, { "epoch": 4.3182373046875e-06, "step": 2830, "training_step_time": 0.10746026039123535 }, { "epoch": 4.31976318359375e-06, "model_forward_time": 0.024320602416992188, "step": 2831 }, { "epoch": 4.31976318359375e-06, "step": 2831, "training_step_time": 0.13182425498962402 }, { "epoch": 4.3212890625e-06, "model_forward_time": 0.02493882179260254, "step": 2832 }, { "epoch": 4.3212890625e-06, "step": 2832, "training_step_time": 0.11899733543395996 }, { "epoch": 4.32281494140625e-06, "model_forward_time": 0.025506973266601562, "step": 2833 }, { "epoch": 4.32281494140625e-06, "step": 2833, "training_step_time": 0.1136622428894043 }, { "epoch": 4.3243408203125e-06, "model_forward_time": 0.02584385871887207, "step": 2834 }, { "epoch": 4.3243408203125e-06, "step": 2834, "training_step_time": 0.11119794845581055 }, { "epoch": 4.32586669921875e-06, "model_forward_time": 0.0259859561920166, "step": 2835 }, { "epoch": 4.32586669921875e-06, "step": 2835, "training_step_time": 0.21367788314819336 }, { "epoch": 4.327392578125e-06, "model_forward_time": 0.02478790283203125, "step": 2836 }, { "epoch": 4.327392578125e-06, "step": 2836, "training_step_time": 0.12007260322570801 }, { "epoch": 4.32891845703125e-06, "model_forward_time": 0.02495718002319336, "step": 2837 }, { "epoch": 4.32891845703125e-06, "step": 2837, "training_step_time": 0.1038215160369873 }, { "epoch": 4.3304443359375e-06, "model_forward_time": 0.02550482749938965, "step": 2838 }, { "epoch": 4.3304443359375e-06, "step": 2838, "training_step_time": 0.10763120651245117 }, { "epoch": 4.33197021484375e-06, "model_forward_time": 0.025640487670898438, "step": 2839 }, { "epoch": 4.33197021484375e-06, "step": 2839, "training_step_time": 0.10847043991088867 }, { "epoch": 4.33349609375e-06, "grad_norm": 1.069185733795166, "learning_rate": 9.945553513568068e-05, "loss": 0.1457, "step": 2840 }, { "epoch": 4.33349609375e-06, "model_forward_time": 0.0257875919342041, "step": 2840 }, { "epoch": 4.33349609375e-06, "step": 2840, "training_step_time": 0.10794520378112793 }, { "epoch": 4.33502197265625e-06, "model_forward_time": 0.025675058364868164, "step": 2841 }, { "epoch": 4.33502197265625e-06, "step": 2841, "training_step_time": 0.10611391067504883 }, { "epoch": 4.3365478515625e-06, "model_forward_time": 0.02539229393005371, "step": 2842 }, { "epoch": 4.3365478515625e-06, "step": 2842, "training_step_time": 0.11593151092529297 }, { "epoch": 4.33807373046875e-06, "model_forward_time": 0.025228023529052734, "step": 2843 }, { "epoch": 4.33807373046875e-06, "step": 2843, "training_step_time": 0.10912203788757324 }, { "epoch": 4.339599609375e-06, "model_forward_time": 0.02556133270263672, "step": 2844 }, { "epoch": 4.339599609375e-06, "step": 2844, "training_step_time": 0.10664772987365723 }, { "epoch": 4.34112548828125e-06, "model_forward_time": 0.025111675262451172, "step": 2845 }, { "epoch": 4.34112548828125e-06, "step": 2845, "training_step_time": 0.10616612434387207 }, { "epoch": 4.3426513671875e-06, "model_forward_time": 0.02564859390258789, "step": 2846 }, { "epoch": 4.3426513671875e-06, "step": 2846, "training_step_time": 0.1074683666229248 }, { "epoch": 4.34417724609375e-06, "model_forward_time": 0.025493621826171875, "step": 2847 }, { "epoch": 4.34417724609375e-06, "step": 2847, "training_step_time": 0.10701465606689453 }, { "epoch": 4.345703125e-06, "model_forward_time": 0.02537083625793457, "step": 2848 }, { "epoch": 4.345703125e-06, "step": 2848, "training_step_time": 0.10764622688293457 }, { "epoch": 4.34722900390625e-06, "model_forward_time": 0.025753498077392578, "step": 2849 }, { "epoch": 4.34722900390625e-06, "step": 2849, "training_step_time": 0.10744237899780273 }, { "epoch": 4.3487548828125e-06, "grad_norm": 0.5193539261817932, "learning_rate": 9.944739353007344e-05, "loss": 0.1199, "step": 2850 }, { "epoch": 4.3487548828125e-06, "model_forward_time": 0.025250911712646484, "step": 2850 }, { "epoch": 4.3487548828125e-06, "step": 2850, "training_step_time": 0.1063528060913086 }, { "epoch": 4.35028076171875e-06, "model_forward_time": 0.025938749313354492, "step": 2851 }, { "epoch": 4.35028076171875e-06, "step": 2851, "training_step_time": 0.11112833023071289 }, { "epoch": 4.351806640625e-06, "model_forward_time": 0.0253140926361084, "step": 2852 }, { "epoch": 4.351806640625e-06, "step": 2852, "training_step_time": 0.10675048828125 }, { "epoch": 4.35333251953125e-06, "model_forward_time": 0.026303529739379883, "step": 2853 }, { "epoch": 4.35333251953125e-06, "step": 2853, "training_step_time": 0.11050891876220703 }, { "epoch": 4.3548583984375e-06, "model_forward_time": 0.025612592697143555, "step": 2854 }, { "epoch": 4.3548583984375e-06, "step": 2854, "training_step_time": 0.1068418025970459 }, { "epoch": 4.35638427734375e-06, "model_forward_time": 0.025264739990234375, "step": 2855 }, { "epoch": 4.35638427734375e-06, "step": 2855, "training_step_time": 0.10683393478393555 }, { "epoch": 4.35791015625e-06, "model_forward_time": 0.02545928955078125, "step": 2856 }, { "epoch": 4.35791015625e-06, "step": 2856, "training_step_time": 0.10934567451477051 }, { "epoch": 4.35943603515625e-06, "model_forward_time": 0.02525949478149414, "step": 2857 }, { "epoch": 4.35943603515625e-06, "step": 2857, "training_step_time": 0.10873246192932129 }, { "epoch": 4.3609619140625e-06, "model_forward_time": 0.025900602340698242, "step": 2858 }, { "epoch": 4.3609619140625e-06, "step": 2858, "training_step_time": 0.10770463943481445 }, { "epoch": 4.36248779296875e-06, "model_forward_time": 0.026159048080444336, "step": 2859 }, { "epoch": 4.36248779296875e-06, "step": 2859, "training_step_time": 0.11481833457946777 }, { "epoch": 4.364013671875e-06, "grad_norm": 0.9115896224975586, "learning_rate": 9.943919184122043e-05, "loss": 0.1402, "step": 2860 }, { "epoch": 4.364013671875e-06, "model_forward_time": 0.0255584716796875, "step": 2860 }, { "epoch": 4.364013671875e-06, "step": 2860, "training_step_time": 0.1951124668121338 }, { "epoch": 4.36553955078125e-06, "model_forward_time": 0.02464127540588379, "step": 2861 }, { "epoch": 4.36553955078125e-06, "step": 2861, "training_step_time": 0.20286321640014648 }, { "epoch": 4.3670654296875e-06, "model_forward_time": 0.02478170394897461, "step": 2862 }, { "epoch": 4.3670654296875e-06, "step": 2862, "training_step_time": 0.13198113441467285 }, { "epoch": 4.36859130859375e-06, "model_forward_time": 0.024460554122924805, "step": 2863 }, { "epoch": 4.36859130859375e-06, "step": 2863, "training_step_time": 0.1323685646057129 }, { "epoch": 4.3701171875e-06, "model_forward_time": 0.02495861053466797, "step": 2864 }, { "epoch": 4.3701171875e-06, "step": 2864, "training_step_time": 0.16466927528381348 }, { "epoch": 4.37164306640625e-06, "model_forward_time": 0.02469015121459961, "step": 2865 }, { "epoch": 4.37164306640625e-06, "step": 2865, "training_step_time": 0.15828251838684082 }, { "epoch": 4.3731689453125e-06, "model_forward_time": 0.024965763092041016, "step": 2866 }, { "epoch": 4.3731689453125e-06, "step": 2866, "training_step_time": 0.10855245590209961 }, { "epoch": 4.37469482421875e-06, "model_forward_time": 0.02485179901123047, "step": 2867 }, { "epoch": 4.37469482421875e-06, "step": 2867, "training_step_time": 0.13995671272277832 }, { "epoch": 4.376220703125e-06, "model_forward_time": 0.02526378631591797, "step": 2868 }, { "epoch": 4.376220703125e-06, "step": 2868, "training_step_time": 0.19884872436523438 }, { "epoch": 4.37774658203125e-06, "model_forward_time": 0.02438521385192871, "step": 2869 }, { "epoch": 4.37774658203125e-06, "step": 2869, "training_step_time": 0.14481425285339355 }, { "epoch": 4.3792724609375e-06, "grad_norm": 1.1567416191101074, "learning_rate": 9.943093007908755e-05, "loss": 0.1096, "step": 2870 }, { "epoch": 4.3792724609375e-06, "model_forward_time": 0.02489614486694336, "step": 2870 }, { "epoch": 4.3792724609375e-06, "step": 2870, "training_step_time": 0.2074892520904541 }, { "epoch": 4.38079833984375e-06, "model_forward_time": 0.025901317596435547, "step": 2871 }, { "epoch": 4.38079833984375e-06, "step": 2871, "training_step_time": 0.1325676441192627 }, { "epoch": 4.38232421875e-06, "model_forward_time": 0.02446269989013672, "step": 2872 }, { "epoch": 4.38232421875e-06, "step": 2872, "training_step_time": 0.1791667938232422 }, { "epoch": 4.38385009765625e-06, "model_forward_time": 0.025340557098388672, "step": 2873 }, { "epoch": 4.38385009765625e-06, "step": 2873, "training_step_time": 0.1185905933380127 }, { "epoch": 4.3853759765625e-06, "model_forward_time": 0.026240825653076172, "step": 2874 }, { "epoch": 4.3853759765625e-06, "step": 2874, "training_step_time": 0.11226582527160645 }, { "epoch": 4.38690185546875e-06, "model_forward_time": 0.026343345642089844, "step": 2875 }, { "epoch": 4.38690185546875e-06, "step": 2875, "training_step_time": 0.11324834823608398 }, { "epoch": 4.388427734375e-06, "model_forward_time": 0.025667905807495117, "step": 2876 }, { "epoch": 4.388427734375e-06, "step": 2876, "training_step_time": 0.2104356288909912 }, { "epoch": 4.38995361328125e-06, "model_forward_time": 0.026690244674682617, "step": 2877 }, { "epoch": 4.38995361328125e-06, "step": 2877, "training_step_time": 0.11708545684814453 }, { "epoch": 4.3914794921875e-06, "model_forward_time": 0.02524876594543457, "step": 2878 }, { "epoch": 4.3914794921875e-06, "step": 2878, "training_step_time": 0.1107950210571289 }, { "epoch": 4.39300537109375e-06, "model_forward_time": 0.026842117309570312, "step": 2879 }, { "epoch": 4.39300537109375e-06, "step": 2879, "training_step_time": 0.10951638221740723 }, { "epoch": 4.39453125e-06, "grad_norm": 0.8022437691688538, "learning_rate": 9.942260825371358e-05, "loss": 0.0972, "step": 2880 }, { "epoch": 4.39453125e-06, "model_forward_time": 0.025896072387695312, "step": 2880 }, { "epoch": 4.39453125e-06, "step": 2880, "training_step_time": 0.19140625 }, { "epoch": 4.39605712890625e-06, "model_forward_time": 0.025415658950805664, "step": 2881 }, { "epoch": 4.39605712890625e-06, "step": 2881, "training_step_time": 0.11669254302978516 }, { "epoch": 4.3975830078125e-06, "model_forward_time": 0.02515387535095215, "step": 2882 }, { "epoch": 4.3975830078125e-06, "step": 2882, "training_step_time": 0.10691618919372559 }, { "epoch": 4.39910888671875e-06, "model_forward_time": 0.024991512298583984, "step": 2883 }, { "epoch": 4.39910888671875e-06, "step": 2883, "training_step_time": 0.10912060737609863 }, { "epoch": 4.400634765625e-06, "model_forward_time": 0.02570366859436035, "step": 2884 }, { "epoch": 4.400634765625e-06, "step": 2884, "training_step_time": 0.10786867141723633 }, { "epoch": 4.40216064453125e-06, "model_forward_time": 0.025783061981201172, "step": 2885 }, { "epoch": 4.40216064453125e-06, "step": 2885, "training_step_time": 0.11433076858520508 }, { "epoch": 4.4036865234375e-06, "model_forward_time": 0.02613353729248047, "step": 2886 }, { "epoch": 4.4036865234375e-06, "step": 2886, "training_step_time": 0.10788655281066895 }, { "epoch": 4.40521240234375e-06, "model_forward_time": 0.026128530502319336, "step": 2887 }, { "epoch": 4.40521240234375e-06, "step": 2887, "training_step_time": 0.10891532897949219 }, { "epoch": 4.40673828125e-06, "model_forward_time": 0.025747060775756836, "step": 2888 }, { "epoch": 4.40673828125e-06, "step": 2888, "training_step_time": 0.10700201988220215 }, { "epoch": 4.40826416015625e-06, "model_forward_time": 0.025652647018432617, "step": 2889 }, { "epoch": 4.40826416015625e-06, "step": 2889, "training_step_time": 0.10730957984924316 }, { "epoch": 4.4097900390625e-06, "grad_norm": 0.5341487526893616, "learning_rate": 9.941422637521035e-05, "loss": 0.1253, "step": 2890 }, { "epoch": 4.4097900390625e-06, "model_forward_time": 0.02560901641845703, "step": 2890 }, { "epoch": 4.4097900390625e-06, "step": 2890, "training_step_time": 0.1115262508392334 }, { "epoch": 4.41131591796875e-06, "model_forward_time": 0.0257565975189209, "step": 2891 }, { "epoch": 4.41131591796875e-06, "step": 2891, "training_step_time": 0.10729789733886719 }, { "epoch": 4.412841796875e-06, "model_forward_time": 0.02559185028076172, "step": 2892 }, { "epoch": 4.412841796875e-06, "step": 2892, "training_step_time": 0.10714602470397949 }, { "epoch": 4.41436767578125e-06, "model_forward_time": 0.025635957717895508, "step": 2893 }, { "epoch": 4.41436767578125e-06, "step": 2893, "training_step_time": 0.11196184158325195 }, { "epoch": 4.4158935546875e-06, "model_forward_time": 0.026094913482666016, "step": 2894 }, { "epoch": 4.4158935546875e-06, "step": 2894, "training_step_time": 0.10988926887512207 }, { "epoch": 4.41741943359375e-06, "model_forward_time": 0.02536177635192871, "step": 2895 }, { "epoch": 4.41741943359375e-06, "step": 2895, "training_step_time": 0.10808682441711426 }, { "epoch": 4.4189453125e-06, "model_forward_time": 0.025553464889526367, "step": 2896 }, { "epoch": 4.4189453125e-06, "step": 2896, "training_step_time": 0.10755491256713867 }, { "epoch": 4.42047119140625e-06, "model_forward_time": 0.0259091854095459, "step": 2897 }, { "epoch": 4.42047119140625e-06, "step": 2897, "training_step_time": 0.10822916030883789 }, { "epoch": 4.4219970703125e-06, "model_forward_time": 0.025369644165039062, "step": 2898 }, { "epoch": 4.4219970703125e-06, "step": 2898, "training_step_time": 0.10725831985473633 }, { "epoch": 4.42352294921875e-06, "model_forward_time": 0.026180505752563477, "step": 2899 }, { "epoch": 4.42352294921875e-06, "step": 2899, "training_step_time": 0.10856175422668457 }, { "epoch": 4.425048828125e-06, "grad_norm": 0.4163476526737213, "learning_rate": 9.940578445376258e-05, "loss": 0.1239, "step": 2900 }, { "epoch": 4.425048828125e-06, "model_forward_time": 0.02578449249267578, "step": 2900 }, { "epoch": 4.425048828125e-06, "step": 2900, "training_step_time": 0.11043620109558105 }, { "epoch": 4.42657470703125e-06, "model_forward_time": 0.02543330192565918, "step": 2901 }, { "epoch": 4.42657470703125e-06, "step": 2901, "training_step_time": 0.10791707038879395 }, { "epoch": 4.4281005859375e-06, "model_forward_time": 0.02577495574951172, "step": 2902 }, { "epoch": 4.4281005859375e-06, "step": 2902, "training_step_time": 0.11560869216918945 }, { "epoch": 4.42962646484375e-06, "model_forward_time": 0.025716066360473633, "step": 2903 }, { "epoch": 4.42962646484375e-06, "step": 2903, "training_step_time": 0.11022305488586426 }, { "epoch": 4.43115234375e-06, "model_forward_time": 0.025704622268676758, "step": 2904 }, { "epoch": 4.43115234375e-06, "step": 2904, "training_step_time": 0.2121117115020752 }, { "epoch": 4.43267822265625e-06, "model_forward_time": 0.02570343017578125, "step": 2905 }, { "epoch": 4.43267822265625e-06, "step": 2905, "training_step_time": 0.13804292678833008 }, { "epoch": 4.4342041015625e-06, "model_forward_time": 0.02502274513244629, "step": 2906 }, { "epoch": 4.4342041015625e-06, "step": 2906, "training_step_time": 0.20011472702026367 }, { "epoch": 4.43572998046875e-06, "model_forward_time": 0.024092912673950195, "step": 2907 }, { "epoch": 4.43572998046875e-06, "step": 2907, "training_step_time": 0.16405582427978516 }, { "epoch": 4.437255859375e-06, "model_forward_time": 0.025096416473388672, "step": 2908 }, { "epoch": 4.437255859375e-06, "step": 2908, "training_step_time": 0.1948237419128418 }, { "epoch": 4.43878173828125e-06, "model_forward_time": 0.024829864501953125, "step": 2909 }, { "epoch": 4.43878173828125e-06, "step": 2909, "training_step_time": 0.1401219367980957 }, { "epoch": 4.4403076171875e-06, "grad_norm": 0.5024942755699158, "learning_rate": 9.939728249962807e-05, "loss": 0.1176, "step": 2910 }, { "epoch": 4.4403076171875e-06, "model_forward_time": 0.023928403854370117, "step": 2910 }, { "epoch": 4.4403076171875e-06, "step": 2910, "training_step_time": 0.10935497283935547 }, { "epoch": 4.44183349609375e-06, "model_forward_time": 0.0254514217376709, "step": 2911 }, { "epoch": 4.44183349609375e-06, "step": 2911, "training_step_time": 0.10876345634460449 }, { "epoch": 4.443359375e-06, "model_forward_time": 0.025729894638061523, "step": 2912 }, { "epoch": 4.443359375e-06, "step": 2912, "training_step_time": 0.16447782516479492 }, { "epoch": 4.44488525390625e-06, "model_forward_time": 0.02524852752685547, "step": 2913 }, { "epoch": 4.44488525390625e-06, "step": 2913, "training_step_time": 0.1319105625152588 }, { "epoch": 4.4464111328125e-06, "model_forward_time": 0.02468132972717285, "step": 2914 }, { "epoch": 4.4464111328125e-06, "step": 2914, "training_step_time": 0.11304616928100586 }, { "epoch": 4.44793701171875e-06, "model_forward_time": 0.02822089195251465, "step": 2915 }, { "epoch": 4.44793701171875e-06, "step": 2915, "training_step_time": 0.11149215698242188 }, { "epoch": 4.449462890625e-06, "model_forward_time": 0.025212764739990234, "step": 2916 }, { "epoch": 4.449462890625e-06, "step": 2916, "training_step_time": 0.10906720161437988 }, { "epoch": 4.45098876953125e-06, "model_forward_time": 0.02449345588684082, "step": 2917 }, { "epoch": 4.45098876953125e-06, "step": 2917, "training_step_time": 0.11286067962646484 }, { "epoch": 4.4525146484375e-06, "model_forward_time": 0.02461528778076172, "step": 2918 }, { "epoch": 4.4525146484375e-06, "step": 2918, "training_step_time": 0.19971013069152832 }, { "epoch": 4.45404052734375e-06, "model_forward_time": 0.024705886840820312, "step": 2919 }, { "epoch": 4.45404052734375e-06, "step": 2919, "training_step_time": 0.11220097541809082 }, { "epoch": 4.45556640625e-06, "grad_norm": 0.9816491603851318, "learning_rate": 9.938872052313746e-05, "loss": 0.1499, "step": 2920 }, { "epoch": 4.45556640625e-06, "model_forward_time": 0.02576470375061035, "step": 2920 }, { "epoch": 4.45556640625e-06, "step": 2920, "training_step_time": 0.10891389846801758 }, { "epoch": 4.45709228515625e-06, "model_forward_time": 0.024667739868164062, "step": 2921 }, { "epoch": 4.45709228515625e-06, "step": 2921, "training_step_time": 0.10807919502258301 }, { "epoch": 4.4586181640625e-06, "model_forward_time": 0.026676416397094727, "step": 2922 }, { "epoch": 4.4586181640625e-06, "step": 2922, "training_step_time": 0.11333608627319336 }, { "epoch": 4.46014404296875e-06, "model_forward_time": 0.02564835548400879, "step": 2923 }, { "epoch": 4.46014404296875e-06, "step": 2923, "training_step_time": 0.11304354667663574 }, { "epoch": 4.461669921875e-06, "model_forward_time": 0.029223918914794922, "step": 2924 }, { "epoch": 4.461669921875e-06, "step": 2924, "training_step_time": 0.11306500434875488 }, { "epoch": 4.46319580078125e-06, "model_forward_time": 0.02574753761291504, "step": 2925 }, { "epoch": 4.46319580078125e-06, "step": 2925, "training_step_time": 0.21307802200317383 }, { "epoch": 4.4647216796875e-06, "model_forward_time": 0.024732112884521484, "step": 2926 }, { "epoch": 4.4647216796875e-06, "step": 2926, "training_step_time": 0.11550402641296387 }, { "epoch": 4.46624755859375e-06, "model_forward_time": 0.024950742721557617, "step": 2927 }, { "epoch": 4.46624755859375e-06, "step": 2927, "training_step_time": 0.10428953170776367 }, { "epoch": 4.4677734375e-06, "model_forward_time": 0.02560877799987793, "step": 2928 }, { "epoch": 4.4677734375e-06, "step": 2928, "training_step_time": 0.10859870910644531 }, { "epoch": 4.46929931640625e-06, "model_forward_time": 0.025862932205200195, "step": 2929 }, { "epoch": 4.46929931640625e-06, "step": 2929, "training_step_time": 0.11178445816040039 }, { "epoch": 4.4708251953125e-06, "grad_norm": 1.0555541515350342, "learning_rate": 9.938009853469436e-05, "loss": 0.1142, "step": 2930 }, { "epoch": 4.4708251953125e-06, "model_forward_time": 0.02568840980529785, "step": 2930 }, { "epoch": 4.4708251953125e-06, "step": 2930, "training_step_time": 0.11029767990112305 }, { "epoch": 4.47235107421875e-06, "model_forward_time": 0.02713155746459961, "step": 2931 }, { "epoch": 4.47235107421875e-06, "step": 2931, "training_step_time": 0.10959458351135254 }, { "epoch": 4.473876953125e-06, "model_forward_time": 0.024959564208984375, "step": 2932 }, { "epoch": 4.473876953125e-06, "step": 2932, "training_step_time": 0.10962820053100586 }, { "epoch": 4.47540283203125e-06, "model_forward_time": 0.026279211044311523, "step": 2933 }, { "epoch": 4.47540283203125e-06, "step": 2933, "training_step_time": 0.10775542259216309 }, { "epoch": 4.4769287109375e-06, "model_forward_time": 0.0254974365234375, "step": 2934 }, { "epoch": 4.4769287109375e-06, "step": 2934, "training_step_time": 0.10799670219421387 }, { "epoch": 4.47845458984375e-06, "model_forward_time": 0.025084733963012695, "step": 2935 }, { "epoch": 4.47845458984375e-06, "step": 2935, "training_step_time": 0.132537841796875 }, { "epoch": 4.47998046875e-06, "model_forward_time": 0.025426149368286133, "step": 2936 }, { "epoch": 4.47998046875e-06, "step": 2936, "training_step_time": 0.1486375331878662 }, { "epoch": 4.48150634765625e-06, "model_forward_time": 0.024872303009033203, "step": 2937 }, { "epoch": 4.48150634765625e-06, "step": 2937, "training_step_time": 0.1477510929107666 }, { "epoch": 4.4830322265625e-06, "model_forward_time": 0.02434229850769043, "step": 2938 }, { "epoch": 4.4830322265625e-06, "step": 2938, "training_step_time": 0.12763762474060059 }, { "epoch": 4.48455810546875e-06, "model_forward_time": 0.024348974227905273, "step": 2939 }, { "epoch": 4.48455810546875e-06, "step": 2939, "training_step_time": 0.12826800346374512 }, { "epoch": 4.486083984375e-06, "grad_norm": 0.7148603200912476, "learning_rate": 9.937141654477528e-05, "loss": 0.1181, "step": 2940 }, { "epoch": 4.486083984375e-06, "model_forward_time": 0.024813413619995117, "step": 2940 }, { "epoch": 4.486083984375e-06, "step": 2940, "training_step_time": 0.12384796142578125 }, { "epoch": 4.48760986328125e-06, "model_forward_time": 0.024008989334106445, "step": 2941 }, { "epoch": 4.48760986328125e-06, "step": 2941, "training_step_time": 0.11441802978515625 }, { "epoch": 4.4891357421875e-06, "model_forward_time": 0.025722026824951172, "step": 2942 }, { "epoch": 4.4891357421875e-06, "step": 2942, "training_step_time": 0.11750102043151855 }, { "epoch": 4.49066162109375e-06, "model_forward_time": 0.025391817092895508, "step": 2943 }, { "epoch": 4.49066162109375e-06, "step": 2943, "training_step_time": 0.11415600776672363 }, { "epoch": 4.4921875e-06, "model_forward_time": 0.025437593460083008, "step": 2944 }, { "epoch": 4.4921875e-06, "step": 2944, "training_step_time": 0.11010575294494629 }, { "epoch": 4.49371337890625e-06, "model_forward_time": 0.026737689971923828, "step": 2945 }, { "epoch": 4.49371337890625e-06, "step": 2945, "training_step_time": 0.1131284236907959 }, { "epoch": 4.4952392578125e-06, "model_forward_time": 0.02538585662841797, "step": 2946 }, { "epoch": 4.4952392578125e-06, "step": 2946, "training_step_time": 0.1160120964050293 }, { "epoch": 4.49676513671875e-06, "model_forward_time": 0.025252342224121094, "step": 2947 }, { "epoch": 4.49676513671875e-06, "step": 2947, "training_step_time": 0.11012840270996094 }, { "epoch": 4.498291015625e-06, "model_forward_time": 0.025865554809570312, "step": 2948 }, { "epoch": 4.498291015625e-06, "step": 2948, "training_step_time": 0.18613433837890625 }, { "epoch": 4.49981689453125e-06, "model_forward_time": 0.024675607681274414, "step": 2949 }, { "epoch": 4.49981689453125e-06, "step": 2949, "training_step_time": 0.19361066818237305 }, { "epoch": 4.5013427734375e-06, "grad_norm": 0.6078255772590637, "learning_rate": 9.936267456392971e-05, "loss": 0.1419, "step": 2950 }, { "epoch": 4.5013427734375e-06, "model_forward_time": 0.024212121963500977, "step": 2950 }, { "epoch": 4.5013427734375e-06, "step": 2950, "training_step_time": 0.1473231315612793 }, { "epoch": 4.50286865234375e-06, "model_forward_time": 0.02373480796813965, "step": 2951 }, { "epoch": 4.50286865234375e-06, "step": 2951, "training_step_time": 0.1781630516052246 }, { "epoch": 4.50439453125e-06, "model_forward_time": 0.02470850944519043, "step": 2952 }, { "epoch": 4.50439453125e-06, "step": 2952, "training_step_time": 0.1799602508544922 }, { "epoch": 4.50592041015625e-06, "model_forward_time": 0.024767160415649414, "step": 2953 }, { "epoch": 4.50592041015625e-06, "step": 2953, "training_step_time": 0.14460277557373047 }, { "epoch": 4.5074462890625e-06, "model_forward_time": 0.024739503860473633, "step": 2954 }, { "epoch": 4.5074462890625e-06, "step": 2954, "training_step_time": 0.10888934135437012 }, { "epoch": 4.50897216796875e-06, "model_forward_time": 0.02471923828125, "step": 2955 }, { "epoch": 4.50897216796875e-06, "step": 2955, "training_step_time": 0.10982418060302734 }, { "epoch": 4.510498046875e-06, "model_forward_time": 0.025331974029541016, "step": 2956 }, { "epoch": 4.510498046875e-06, "step": 2956, "training_step_time": 0.19500398635864258 }, { "epoch": 4.51202392578125e-06, "model_forward_time": 0.025336742401123047, "step": 2957 }, { "epoch": 4.51202392578125e-06, "step": 2957, "training_step_time": 0.13620853424072266 }, { "epoch": 4.5135498046875e-06, "model_forward_time": 0.025560855865478516, "step": 2958 }, { "epoch": 4.5135498046875e-06, "step": 2958, "training_step_time": 0.11236977577209473 }, { "epoch": 4.51507568359375e-06, "model_forward_time": 0.025601863861083984, "step": 2959 }, { "epoch": 4.51507568359375e-06, "step": 2959, "training_step_time": 0.11033987998962402 }, { "epoch": 4.5166015625e-06, "grad_norm": 0.6266261339187622, "learning_rate": 9.935387260277993e-05, "loss": 0.1329, "step": 2960 }, { "epoch": 4.5166015625e-06, "model_forward_time": 0.025922060012817383, "step": 2960 }, { "epoch": 4.5166015625e-06, "step": 2960, "training_step_time": 0.11202692985534668 }, { "epoch": 4.51812744140625e-06, "model_forward_time": 0.025138139724731445, "step": 2961 }, { "epoch": 4.51812744140625e-06, "step": 2961, "training_step_time": 0.11005687713623047 }, { "epoch": 4.5196533203125e-06, "model_forward_time": 0.02571868896484375, "step": 2962 }, { "epoch": 4.5196533203125e-06, "step": 2962, "training_step_time": 0.19635462760925293 }, { "epoch": 4.52117919921875e-06, "model_forward_time": 0.025515079498291016, "step": 2963 }, { "epoch": 4.52117919921875e-06, "step": 2963, "training_step_time": 0.11399555206298828 }, { "epoch": 4.522705078125e-06, "model_forward_time": 0.025533676147460938, "step": 2964 }, { "epoch": 4.522705078125e-06, "step": 2964, "training_step_time": 0.11181783676147461 }, { "epoch": 4.52423095703125e-06, "model_forward_time": 0.028892040252685547, "step": 2965 }, { "epoch": 4.52423095703125e-06, "step": 2965, "training_step_time": 0.21477127075195312 }, { "epoch": 4.5257568359375e-06, "model_forward_time": 0.025362253189086914, "step": 2966 }, { "epoch": 4.5257568359375e-06, "step": 2966, "training_step_time": 0.11672544479370117 }, { "epoch": 4.52728271484375e-06, "model_forward_time": 0.024881601333618164, "step": 2967 }, { "epoch": 4.52728271484375e-06, "step": 2967, "training_step_time": 0.10939812660217285 }, { "epoch": 4.52880859375e-06, "model_forward_time": 0.02612447738647461, "step": 2968 }, { "epoch": 4.52880859375e-06, "step": 2968, "training_step_time": 0.21409916877746582 }, { "epoch": 4.53033447265625e-06, "model_forward_time": 0.024281024932861328, "step": 2969 }, { "epoch": 4.53033447265625e-06, "step": 2969, "training_step_time": 0.11605477333068848 }, { "epoch": 4.5318603515625e-06, "grad_norm": 0.7585597634315491, "learning_rate": 9.934501067202117e-05, "loss": 0.1163, "step": 2970 }, { "epoch": 4.5318603515625e-06, "model_forward_time": 0.02476954460144043, "step": 2970 }, { "epoch": 4.5318603515625e-06, "step": 2970, "training_step_time": 0.10861420631408691 }, { "epoch": 4.53338623046875e-06, "model_forward_time": 0.025798320770263672, "step": 2971 }, { "epoch": 4.53338623046875e-06, "step": 2971, "training_step_time": 0.11424756050109863 }, { "epoch": 4.534912109375e-06, "model_forward_time": 0.025229215621948242, "step": 2972 }, { "epoch": 4.534912109375e-06, "step": 2972, "training_step_time": 0.11175155639648438 }, { "epoch": 4.53643798828125e-06, "model_forward_time": 0.02484607696533203, "step": 2973 }, { "epoch": 4.53643798828125e-06, "step": 2973, "training_step_time": 0.10991120338439941 }, { "epoch": 4.5379638671875e-06, "model_forward_time": 0.025568723678588867, "step": 2974 }, { "epoch": 4.5379638671875e-06, "step": 2974, "training_step_time": 0.11665129661560059 }, { "epoch": 4.53948974609375e-06, "model_forward_time": 0.025730371475219727, "step": 2975 }, { "epoch": 4.53948974609375e-06, "step": 2975, "training_step_time": 0.11010360717773438 }, { "epoch": 4.541015625e-06, "model_forward_time": 0.025798320770263672, "step": 2976 }, { "epoch": 4.541015625e-06, "step": 2976, "training_step_time": 0.10947537422180176 }, { "epoch": 4.54254150390625e-06, "model_forward_time": 0.025766372680664062, "step": 2977 }, { "epoch": 4.54254150390625e-06, "step": 2977, "training_step_time": 0.10937786102294922 }, { "epoch": 4.5440673828125e-06, "model_forward_time": 0.024997711181640625, "step": 2978 }, { "epoch": 4.5440673828125e-06, "step": 2978, "training_step_time": 0.10871410369873047 }, { "epoch": 4.54559326171875e-06, "model_forward_time": 0.02529430389404297, "step": 2979 }, { "epoch": 4.54559326171875e-06, "step": 2979, "training_step_time": 0.11275863647460938 }, { "epoch": 4.547119140625e-06, "grad_norm": 0.7055568099021912, "learning_rate": 9.933608878242153e-05, "loss": 0.1257, "step": 2980 }, { "epoch": 4.547119140625e-06, "model_forward_time": 0.029764652252197266, "step": 2980 }, { "epoch": 4.547119140625e-06, "step": 2980, "training_step_time": 0.11362099647521973 }, { "epoch": 4.54864501953125e-06, "model_forward_time": 0.02495884895324707, "step": 2981 }, { "epoch": 4.54864501953125e-06, "step": 2981, "training_step_time": 0.10736536979675293 }, { "epoch": 4.5501708984375e-06, "model_forward_time": 0.024997711181640625, "step": 2982 }, { "epoch": 4.5501708984375e-06, "step": 2982, "training_step_time": 0.10762286186218262 }, { "epoch": 4.55169677734375e-06, "model_forward_time": 0.02576589584350586, "step": 2983 }, { "epoch": 4.55169677734375e-06, "step": 2983, "training_step_time": 0.10982728004455566 }, { "epoch": 4.55322265625e-06, "model_forward_time": 0.02516031265258789, "step": 2984 }, { "epoch": 4.55322265625e-06, "step": 2984, "training_step_time": 0.10881328582763672 }, { "epoch": 4.55474853515625e-06, "model_forward_time": 0.02513861656188965, "step": 2985 }, { "epoch": 4.55474853515625e-06, "step": 2985, "training_step_time": 0.10706138610839844 }, { "epoch": 4.5562744140625e-06, "model_forward_time": 0.025171995162963867, "step": 2986 }, { "epoch": 4.5562744140625e-06, "step": 2986, "training_step_time": 0.10719585418701172 }, { "epoch": 4.55780029296875e-06, "model_forward_time": 0.025115489959716797, "step": 2987 }, { "epoch": 4.55780029296875e-06, "step": 2987, "training_step_time": 0.1064448356628418 }, { "epoch": 4.559326171875e-06, "model_forward_time": 0.02533411979675293, "step": 2988 }, { "epoch": 4.559326171875e-06, "step": 2988, "training_step_time": 0.10821652412414551 }, { "epoch": 4.56085205078125e-06, "model_forward_time": 0.025208473205566406, "step": 2989 }, { "epoch": 4.56085205078125e-06, "step": 2989, "training_step_time": 0.10895276069641113 }, { "epoch": 4.5623779296875e-06, "grad_norm": 0.9388747215270996, "learning_rate": 9.932710694482191e-05, "loss": 0.1511, "step": 2990 }, { "epoch": 4.5623779296875e-06, "model_forward_time": 0.025083303451538086, "step": 2990 }, { "epoch": 4.5623779296875e-06, "step": 2990, "training_step_time": 0.1072242259979248 }, { "epoch": 4.56390380859375e-06, "model_forward_time": 0.02484583854675293, "step": 2991 }, { "epoch": 4.56390380859375e-06, "step": 2991, "training_step_time": 0.10586428642272949 }, { "epoch": 4.5654296875e-06, "model_forward_time": 0.025149822235107422, "step": 2992 }, { "epoch": 4.5654296875e-06, "step": 2992, "training_step_time": 0.113616943359375 }, { "epoch": 4.56695556640625e-06, "model_forward_time": 0.02527928352355957, "step": 2993 }, { "epoch": 4.56695556640625e-06, "step": 2993, "training_step_time": 0.16721057891845703 }, { "epoch": 4.5684814453125e-06, "model_forward_time": 0.02418208122253418, "step": 2994 }, { "epoch": 4.5684814453125e-06, "step": 2994, "training_step_time": 0.141829252243042 }, { "epoch": 4.57000732421875e-06, "model_forward_time": 0.024295330047607422, "step": 2995 }, { "epoch": 4.57000732421875e-06, "step": 2995, "training_step_time": 0.21079754829406738 }, { "epoch": 4.571533203125e-06, "model_forward_time": 0.02372145652770996, "step": 2996 }, { "epoch": 4.571533203125e-06, "step": 2996, "training_step_time": 0.15844202041625977 }, { "epoch": 4.57305908203125e-06, "model_forward_time": 0.02421116828918457, "step": 2997 }, { "epoch": 4.57305908203125e-06, "step": 2997, "training_step_time": 0.18604803085327148 }, { "epoch": 4.5745849609375e-06, "model_forward_time": 0.02385234832763672, "step": 2998 }, { "epoch": 4.5745849609375e-06, "step": 2998, "training_step_time": 0.12929105758666992 }, { "epoch": 4.57611083984375e-06, "model_forward_time": 0.02402353286743164, "step": 2999 }, { "epoch": 4.57611083984375e-06, "step": 2999, "training_step_time": 0.11617779731750488 }, { "epoch": 4.57763671875e-06, "grad_norm": 0.7032703757286072, "learning_rate": 9.931806517013612e-05, "loss": 0.1262, "step": 3000 }, { "epoch": 4.57763671875e-06, "model_forward_time": 0.025274276733398438, "step": 3000 }, { "epoch": 4.57763671875e-06, "step": 3000, "training_step_time": 0.10644841194152832 }, { "epoch": 4.57916259765625e-06, "model_forward_time": 0.023895740509033203, "step": 3001 }, { "epoch": 4.57916259765625e-06, "step": 3001, "training_step_time": 0.10050320625305176 }, { "epoch": 4.5806884765625e-06, "model_forward_time": 0.024775981903076172, "step": 3002 }, { "epoch": 4.5806884765625e-06, "step": 3002, "training_step_time": 0.16507720947265625 }, { "epoch": 4.58221435546875e-06, "model_forward_time": 0.02513432502746582, "step": 3003 }, { "epoch": 4.58221435546875e-06, "step": 3003, "training_step_time": 0.13882660865783691 }, { "epoch": 4.583740234375e-06, "model_forward_time": 0.027904987335205078, "step": 3004 }, { "epoch": 4.583740234375e-06, "step": 3004, "training_step_time": 0.21424102783203125 }, { "epoch": 4.58526611328125e-06, "model_forward_time": 0.024539470672607422, "step": 3005 }, { "epoch": 4.58526611328125e-06, "step": 3005, "training_step_time": 0.13618683815002441 }, { "epoch": 4.5867919921875e-06, "model_forward_time": 0.02490067481994629, "step": 3006 }, { "epoch": 4.5867919921875e-06, "step": 3006, "training_step_time": 0.15112853050231934 }, { "epoch": 4.58831787109375e-06, "model_forward_time": 0.02464580535888672, "step": 3007 }, { "epoch": 4.58831787109375e-06, "step": 3007, "training_step_time": 0.1699233055114746 }, { "epoch": 4.58984375e-06, "model_forward_time": 0.025163888931274414, "step": 3008 }, { "epoch": 4.58984375e-06, "step": 3008, "training_step_time": 0.11030864715576172 }, { "epoch": 4.59136962890625e-06, "model_forward_time": 0.02524423599243164, "step": 3009 }, { "epoch": 4.59136962890625e-06, "step": 3009, "training_step_time": 0.16350126266479492 }, { "epoch": 4.5928955078125e-06, "grad_norm": 0.7527583837509155, "learning_rate": 9.930896346935077e-05, "loss": 0.1128, "step": 3010 }, { "epoch": 4.5928955078125e-06, "model_forward_time": 0.027840137481689453, "step": 3010 }, { "epoch": 4.5928955078125e-06, "step": 3010, "training_step_time": 0.1271955966949463 }, { "epoch": 4.59442138671875e-06, "model_forward_time": 0.024216175079345703, "step": 3011 }, { "epoch": 4.59442138671875e-06, "step": 3011, "training_step_time": 0.11459136009216309 }, { "epoch": 4.595947265625e-06, "model_forward_time": 0.025548458099365234, "step": 3012 }, { "epoch": 4.595947265625e-06, "step": 3012, "training_step_time": 0.11770272254943848 }, { "epoch": 4.59747314453125e-06, "model_forward_time": 0.025242090225219727, "step": 3013 }, { "epoch": 4.59747314453125e-06, "step": 3013, "training_step_time": 0.11095738410949707 }, { "epoch": 4.5989990234375e-06, "model_forward_time": 0.02518630027770996, "step": 3014 }, { "epoch": 4.5989990234375e-06, "step": 3014, "training_step_time": 0.10807490348815918 }, { "epoch": 4.60052490234375e-06, "model_forward_time": 0.025982379913330078, "step": 3015 }, { "epoch": 4.60052490234375e-06, "step": 3015, "training_step_time": 0.20018553733825684 }, { "epoch": 4.60205078125e-06, "model_forward_time": 0.024311065673828125, "step": 3016 }, { "epoch": 4.60205078125e-06, "step": 3016, "training_step_time": 0.10424637794494629 }, { "epoch": 4.60357666015625e-06, "model_forward_time": 0.024016618728637695, "step": 3017 }, { "epoch": 4.60357666015625e-06, "step": 3017, "training_step_time": 0.10770297050476074 }, { "epoch": 4.6051025390625e-06, "model_forward_time": 0.025191783905029297, "step": 3018 }, { "epoch": 4.6051025390625e-06, "step": 3018, "training_step_time": 0.11938261985778809 }, { "epoch": 4.60662841796875e-06, "model_forward_time": 0.0260465145111084, "step": 3019 }, { "epoch": 4.60662841796875e-06, "step": 3019, "training_step_time": 0.11019563674926758 }, { "epoch": 4.608154296875e-06, "grad_norm": 0.4577745497226715, "learning_rate": 9.929980185352526e-05, "loss": 0.1118, "step": 3020 }, { "epoch": 4.608154296875e-06, "model_forward_time": 0.02611684799194336, "step": 3020 }, { "epoch": 4.608154296875e-06, "step": 3020, "training_step_time": 0.16607999801635742 }, { "epoch": 4.60968017578125e-06, "model_forward_time": 0.024385929107666016, "step": 3021 }, { "epoch": 4.60968017578125e-06, "step": 3021, "training_step_time": 0.10687613487243652 }, { "epoch": 4.6112060546875e-06, "model_forward_time": 0.024969816207885742, "step": 3022 }, { "epoch": 4.6112060546875e-06, "step": 3022, "training_step_time": 0.10885810852050781 }, { "epoch": 4.61273193359375e-06, "model_forward_time": 0.0255582332611084, "step": 3023 }, { "epoch": 4.61273193359375e-06, "step": 3023, "training_step_time": 0.1081390380859375 }, { "epoch": 4.6142578125e-06, "model_forward_time": 0.02564835548400879, "step": 3024 }, { "epoch": 4.6142578125e-06, "step": 3024, "training_step_time": 0.11097168922424316 }, { "epoch": 4.61578369140625e-06, "model_forward_time": 0.025356769561767578, "step": 3025 }, { "epoch": 4.61578369140625e-06, "step": 3025, "training_step_time": 0.10971260070800781 }, { "epoch": 4.6173095703125e-06, "model_forward_time": 0.02590775489807129, "step": 3026 }, { "epoch": 4.6173095703125e-06, "step": 3026, "training_step_time": 0.10683202743530273 }, { "epoch": 4.61883544921875e-06, "model_forward_time": 0.025111913681030273, "step": 3027 }, { "epoch": 4.61883544921875e-06, "step": 3027, "training_step_time": 0.11165094375610352 }, { "epoch": 4.620361328125e-06, "model_forward_time": 0.0275270938873291, "step": 3028 }, { "epoch": 4.620361328125e-06, "step": 3028, "training_step_time": 0.11361312866210938 }, { "epoch": 4.62188720703125e-06, "model_forward_time": 0.02532029151916504, "step": 3029 }, { "epoch": 4.62188720703125e-06, "step": 3029, "training_step_time": 0.10674166679382324 }, { "epoch": 4.6234130859375e-06, "grad_norm": 0.5803720951080322, "learning_rate": 9.929058033379181e-05, "loss": 0.1292, "step": 3030 }, { "epoch": 4.6234130859375e-06, "model_forward_time": 0.02605438232421875, "step": 3030 }, { "epoch": 4.6234130859375e-06, "step": 3030, "training_step_time": 0.10945558547973633 }, { "epoch": 4.62493896484375e-06, "model_forward_time": 0.025441408157348633, "step": 3031 }, { "epoch": 4.62493896484375e-06, "step": 3031, "training_step_time": 0.11014413833618164 }, { "epoch": 4.62646484375e-06, "model_forward_time": 0.02509140968322754, "step": 3032 }, { "epoch": 4.62646484375e-06, "step": 3032, "training_step_time": 0.11038565635681152 }, { "epoch": 4.62799072265625e-06, "model_forward_time": 0.02730250358581543, "step": 3033 }, { "epoch": 4.62799072265625e-06, "step": 3033, "training_step_time": 0.11754989624023438 }, { "epoch": 4.6295166015625e-06, "model_forward_time": 0.025072574615478516, "step": 3034 }, { "epoch": 4.6295166015625e-06, "step": 3034, "training_step_time": 0.10888481140136719 }, { "epoch": 4.63104248046875e-06, "model_forward_time": 0.025486230850219727, "step": 3035 }, { "epoch": 4.63104248046875e-06, "step": 3035, "training_step_time": 0.10706686973571777 }, { "epoch": 4.632568359375e-06, "model_forward_time": 0.026131391525268555, "step": 3036 }, { "epoch": 4.632568359375e-06, "step": 3036, "training_step_time": 0.1116795539855957 }, { "epoch": 4.63409423828125e-06, "model_forward_time": 0.025681734085083008, "step": 3037 }, { "epoch": 4.63409423828125e-06, "step": 3037, "training_step_time": 0.11033320426940918 }, { "epoch": 4.6356201171875e-06, "model_forward_time": 0.025547504425048828, "step": 3038 }, { "epoch": 4.6356201171875e-06, "step": 3038, "training_step_time": 0.1141977310180664 }, { "epoch": 4.63714599609375e-06, "model_forward_time": 0.025180578231811523, "step": 3039 }, { "epoch": 4.63714599609375e-06, "step": 3039, "training_step_time": 0.10655546188354492 }, { "epoch": 4.638671875e-06, "grad_norm": 0.4894583225250244, "learning_rate": 9.92812989213555e-05, "loss": 0.1352, "step": 3040 }, { "epoch": 4.638671875e-06, "model_forward_time": 0.02577948570251465, "step": 3040 }, { "epoch": 4.638671875e-06, "step": 3040, "training_step_time": 0.10836982727050781 }, { "epoch": 4.64019775390625e-06, "model_forward_time": 0.02597522735595703, "step": 3041 }, { "epoch": 4.64019775390625e-06, "step": 3041, "training_step_time": 0.11708593368530273 }, { "epoch": 4.6417236328125e-06, "model_forward_time": 0.025516986846923828, "step": 3042 }, { "epoch": 4.6417236328125e-06, "step": 3042, "training_step_time": 0.10789799690246582 }, { "epoch": 4.64324951171875e-06, "model_forward_time": 0.025321006774902344, "step": 3043 }, { "epoch": 4.64324951171875e-06, "step": 3043, "training_step_time": 0.10865330696105957 }, { "epoch": 4.644775390625e-06, "model_forward_time": 0.02486586570739746, "step": 3044 }, { "epoch": 4.644775390625e-06, "step": 3044, "training_step_time": 0.10664510726928711 }, { "epoch": 4.64630126953125e-06, "model_forward_time": 0.02589869499206543, "step": 3045 }, { "epoch": 4.64630126953125e-06, "step": 3045, "training_step_time": 0.10899591445922852 }, { "epoch": 4.6478271484375e-06, "model_forward_time": 0.029685020446777344, "step": 3046 }, { "epoch": 4.6478271484375e-06, "step": 3046, "training_step_time": 0.1338672637939453 }, { "epoch": 4.64935302734375e-06, "model_forward_time": 0.025487422943115234, "step": 3047 }, { "epoch": 4.64935302734375e-06, "step": 3047, "training_step_time": 0.18281102180480957 }, { "epoch": 4.65087890625e-06, "model_forward_time": 0.024692773818969727, "step": 3048 }, { "epoch": 4.65087890625e-06, "step": 3048, "training_step_time": 0.21215152740478516 }, { "epoch": 4.65240478515625e-06, "model_forward_time": 0.025002002716064453, "step": 3049 }, { "epoch": 4.65240478515625e-06, "step": 3049, "training_step_time": 0.16957807540893555 }, { "epoch": 4.6539306640625e-06, "grad_norm": 0.9082778096199036, "learning_rate": 9.927195762749405e-05, "loss": 0.1249, "step": 3050 }, { "epoch": 4.6539306640625e-06, "model_forward_time": 0.02416205406188965, "step": 3050 }, { "epoch": 4.6539306640625e-06, "step": 3050, "training_step_time": 0.17082524299621582 }, { "epoch": 4.65545654296875e-06, "model_forward_time": 0.024770259857177734, "step": 3051 }, { "epoch": 4.65545654296875e-06, "step": 3051, "training_step_time": 0.18565702438354492 }, { "epoch": 4.656982421875e-06, "model_forward_time": 0.02504587173461914, "step": 3052 }, { "epoch": 4.656982421875e-06, "step": 3052, "training_step_time": 0.12810707092285156 }, { "epoch": 4.65850830078125e-06, "model_forward_time": 0.025627851486206055, "step": 3053 }, { "epoch": 4.65850830078125e-06, "step": 3053, "training_step_time": 0.1111907958984375 }, { "epoch": 4.6600341796875e-06, "model_forward_time": 0.02577805519104004, "step": 3054 }, { "epoch": 4.6600341796875e-06, "step": 3054, "training_step_time": 0.17374491691589355 }, { "epoch": 4.66156005859375e-06, "model_forward_time": 0.025226831436157227, "step": 3055 }, { "epoch": 4.66156005859375e-06, "step": 3055, "training_step_time": 0.1413719654083252 }, { "epoch": 4.6630859375e-06, "model_forward_time": 0.024529457092285156, "step": 3056 }, { "epoch": 4.6630859375e-06, "step": 3056, "training_step_time": 0.10995221138000488 }, { "epoch": 4.66461181640625e-06, "model_forward_time": 0.025147438049316406, "step": 3057 }, { "epoch": 4.66461181640625e-06, "step": 3057, "training_step_time": 0.11493563652038574 }, { "epoch": 4.6661376953125e-06, "model_forward_time": 0.025557994842529297, "step": 3058 }, { "epoch": 4.6661376953125e-06, "step": 3058, "training_step_time": 0.11695313453674316 }, { "epoch": 4.66766357421875e-06, "model_forward_time": 0.02530503273010254, "step": 3059 }, { "epoch": 4.66766357421875e-06, "step": 3059, "training_step_time": 0.1722719669342041 }, { "epoch": 4.669189453125e-06, "grad_norm": 0.796428382396698, "learning_rate": 9.926255646355804e-05, "loss": 0.1306, "step": 3060 }, { "epoch": 4.669189453125e-06, "model_forward_time": 0.024994611740112305, "step": 3060 }, { "epoch": 4.669189453125e-06, "step": 3060, "training_step_time": 0.19031000137329102 }, { "epoch": 4.67071533203125e-06, "model_forward_time": 0.026267290115356445, "step": 3061 }, { "epoch": 4.67071533203125e-06, "step": 3061, "training_step_time": 0.18209218978881836 }, { "epoch": 4.6722412109375e-06, "model_forward_time": 0.02486419677734375, "step": 3062 }, { "epoch": 4.6722412109375e-06, "step": 3062, "training_step_time": 0.17567920684814453 }, { "epoch": 4.67376708984375e-06, "model_forward_time": 0.025582313537597656, "step": 3063 }, { "epoch": 4.67376708984375e-06, "step": 3063, "training_step_time": 0.13541650772094727 }, { "epoch": 4.67529296875e-06, "model_forward_time": 0.025574684143066406, "step": 3064 }, { "epoch": 4.67529296875e-06, "step": 3064, "training_step_time": 0.10504913330078125 }, { "epoch": 4.67681884765625e-06, "model_forward_time": 0.025575637817382812, "step": 3065 }, { "epoch": 4.67681884765625e-06, "step": 3065, "training_step_time": 0.11639595031738281 }, { "epoch": 4.6783447265625e-06, "model_forward_time": 0.02613687515258789, "step": 3066 }, { "epoch": 4.6783447265625e-06, "step": 3066, "training_step_time": 0.11397767066955566 }, { "epoch": 4.67987060546875e-06, "model_forward_time": 0.025957584381103516, "step": 3067 }, { "epoch": 4.67987060546875e-06, "step": 3067, "training_step_time": 0.1090846061706543 }, { "epoch": 4.681396484375e-06, "model_forward_time": 0.0264284610748291, "step": 3068 }, { "epoch": 4.681396484375e-06, "step": 3068, "training_step_time": 0.11313819885253906 }, { "epoch": 4.68292236328125e-06, "model_forward_time": 0.026006698608398438, "step": 3069 }, { "epoch": 4.68292236328125e-06, "step": 3069, "training_step_time": 0.10953092575073242 }, { "epoch": 4.6844482421875e-06, "grad_norm": 0.6938896179199219, "learning_rate": 9.925309544097078e-05, "loss": 0.1133, "step": 3070 }, { "epoch": 4.6844482421875e-06, "model_forward_time": 0.025972366333007812, "step": 3070 }, { "epoch": 4.6844482421875e-06, "step": 3070, "training_step_time": 0.10843706130981445 }, { "epoch": 4.68597412109375e-06, "model_forward_time": 0.025505542755126953, "step": 3071 }, { "epoch": 4.68597412109375e-06, "step": 3071, "training_step_time": 0.11654138565063477 }, { "epoch": 4.6875e-06, "model_forward_time": 0.025406599044799805, "step": 3072 }, { "epoch": 4.6875e-06, "step": 3072, "training_step_time": 0.10954761505126953 }, { "epoch": 4.68902587890625e-06, "model_forward_time": 0.025370121002197266, "step": 3073 }, { "epoch": 4.68902587890625e-06, "step": 3073, "training_step_time": 0.10794854164123535 }, { "epoch": 4.6905517578125e-06, "model_forward_time": 0.025587081909179688, "step": 3074 }, { "epoch": 4.6905517578125e-06, "step": 3074, "training_step_time": 0.10911726951599121 }, { "epoch": 4.69207763671875e-06, "model_forward_time": 0.02571702003479004, "step": 3075 }, { "epoch": 4.69207763671875e-06, "step": 3075, "training_step_time": 0.12769460678100586 }, { "epoch": 4.693603515625e-06, "model_forward_time": 0.028377532958984375, "step": 3076 }, { "epoch": 4.693603515625e-06, "step": 3076, "training_step_time": 0.1168069839477539 }, { "epoch": 4.69512939453125e-06, "model_forward_time": 0.025876522064208984, "step": 3077 }, { "epoch": 4.69512939453125e-06, "step": 3077, "training_step_time": 0.10981917381286621 }, { "epoch": 4.6966552734375e-06, "model_forward_time": 0.02604365348815918, "step": 3078 }, { "epoch": 4.6966552734375e-06, "step": 3078, "training_step_time": 0.10936856269836426 }, { "epoch": 4.69818115234375e-06, "model_forward_time": 0.02559041976928711, "step": 3079 }, { "epoch": 4.69818115234375e-06, "step": 3079, "training_step_time": 0.11368846893310547 }, { "epoch": 4.69970703125e-06, "grad_norm": 0.7649744153022766, "learning_rate": 9.924357457122828e-05, "loss": 0.1368, "step": 3080 }, { "epoch": 4.69970703125e-06, "model_forward_time": 0.0251619815826416, "step": 3080 }, { "epoch": 4.69970703125e-06, "step": 3080, "training_step_time": 0.10979294776916504 }, { "epoch": 4.70123291015625e-06, "model_forward_time": 0.025305747985839844, "step": 3081 }, { "epoch": 4.70123291015625e-06, "step": 3081, "training_step_time": 0.10976910591125488 }, { "epoch": 4.7027587890625e-06, "model_forward_time": 0.02550363540649414, "step": 3082 }, { "epoch": 4.7027587890625e-06, "step": 3082, "training_step_time": 0.10929536819458008 }, { "epoch": 4.70428466796875e-06, "model_forward_time": 0.025512218475341797, "step": 3083 }, { "epoch": 4.70428466796875e-06, "step": 3083, "training_step_time": 0.11077189445495605 }, { "epoch": 4.705810546875e-06, "model_forward_time": 0.02516913414001465, "step": 3084 }, { "epoch": 4.705810546875e-06, "step": 3084, "training_step_time": 0.11489057540893555 }, { "epoch": 4.70733642578125e-06, "model_forward_time": 0.025218486785888672, "step": 3085 }, { "epoch": 4.70733642578125e-06, "step": 3085, "training_step_time": 0.10700464248657227 }, { "epoch": 4.7088623046875e-06, "model_forward_time": 0.025419950485229492, "step": 3086 }, { "epoch": 4.7088623046875e-06, "step": 3086, "training_step_time": 0.10664486885070801 }, { "epoch": 4.71038818359375e-06, "model_forward_time": 0.02564096450805664, "step": 3087 }, { "epoch": 4.71038818359375e-06, "step": 3087, "training_step_time": 0.10914230346679688 }, { "epoch": 4.7119140625e-06, "model_forward_time": 0.02474236488342285, "step": 3088 }, { "epoch": 4.7119140625e-06, "step": 3088, "training_step_time": 0.10856270790100098 }, { "epoch": 4.71343994140625e-06, "model_forward_time": 0.024720430374145508, "step": 3089 }, { "epoch": 4.71343994140625e-06, "step": 3089, "training_step_time": 0.10740780830383301 }, { "epoch": 4.7149658203125e-06, "grad_norm": 0.8241348266601562, "learning_rate": 9.923399386589933e-05, "loss": 0.1421, "step": 3090 }, { "epoch": 4.7149658203125e-06, "model_forward_time": 0.025288105010986328, "step": 3090 }, { "epoch": 4.7149658203125e-06, "step": 3090, "training_step_time": 0.11110687255859375 }, { "epoch": 4.71649169921875e-06, "model_forward_time": 0.0258939266204834, "step": 3091 }, { "epoch": 4.71649169921875e-06, "step": 3091, "training_step_time": 0.11382651329040527 }, { "epoch": 4.718017578125e-06, "model_forward_time": 0.025494098663330078, "step": 3092 }, { "epoch": 4.718017578125e-06, "step": 3092, "training_step_time": 0.1103208065032959 }, { "epoch": 4.71954345703125e-06, "model_forward_time": 0.025516271591186523, "step": 3093 }, { "epoch": 4.71954345703125e-06, "step": 3093, "training_step_time": 0.20687389373779297 }, { "epoch": 4.7210693359375e-06, "model_forward_time": 0.025048494338989258, "step": 3094 }, { "epoch": 4.7210693359375e-06, "step": 3094, "training_step_time": 0.15758085250854492 }, { "epoch": 4.72259521484375e-06, "model_forward_time": 0.024439573287963867, "step": 3095 }, { "epoch": 4.72259521484375e-06, "step": 3095, "training_step_time": 0.1678307056427002 }, { "epoch": 4.72412109375e-06, "model_forward_time": 0.024590730667114258, "step": 3096 }, { "epoch": 4.72412109375e-06, "step": 3096, "training_step_time": 0.17733073234558105 }, { "epoch": 4.72564697265625e-06, "model_forward_time": 0.024485111236572266, "step": 3097 }, { "epoch": 4.72564697265625e-06, "step": 3097, "training_step_time": 0.18431401252746582 }, { "epoch": 4.7271728515625e-06, "model_forward_time": 0.024735450744628906, "step": 3098 }, { "epoch": 4.7271728515625e-06, "step": 3098, "training_step_time": 0.11357283592224121 }, { "epoch": 4.72869873046875e-06, "model_forward_time": 0.024533748626708984, "step": 3099 }, { "epoch": 4.72869873046875e-06, "step": 3099, "training_step_time": 0.10903763771057129 }, { "epoch": 4.730224609375e-06, "grad_norm": 0.7410247325897217, "learning_rate": 9.922435333662536e-05, "loss": 0.1088, "step": 3100 }, { "epoch": 4.730224609375e-06, "model_forward_time": 0.025169849395751953, "step": 3100 }, { "epoch": 4.730224609375e-06, "step": 3100, "training_step_time": 0.11604928970336914 }, { "epoch": 4.73175048828125e-06, "model_forward_time": 0.025543928146362305, "step": 3101 }, { "epoch": 4.73175048828125e-06, "step": 3101, "training_step_time": 0.11035680770874023 }, { "epoch": 4.7332763671875e-06, "model_forward_time": 0.0251615047454834, "step": 3102 }, { "epoch": 4.7332763671875e-06, "step": 3102, "training_step_time": 0.17905545234680176 }, { "epoch": 4.73480224609375e-06, "model_forward_time": 0.023978471755981445, "step": 3103 }, { "epoch": 4.73480224609375e-06, "step": 3103, "training_step_time": 0.1168661117553711 }, { "epoch": 4.736328125e-06, "model_forward_time": 0.024658679962158203, "step": 3104 }, { "epoch": 4.736328125e-06, "step": 3104, "training_step_time": 0.10789823532104492 }, { "epoch": 4.73785400390625e-06, "model_forward_time": 0.02521228790283203, "step": 3105 }, { "epoch": 4.73785400390625e-06, "step": 3105, "training_step_time": 0.19896316528320312 }, { "epoch": 4.7393798828125e-06, "model_forward_time": 0.02454853057861328, "step": 3106 }, { "epoch": 4.7393798828125e-06, "step": 3106, "training_step_time": 0.16744017601013184 }, { "epoch": 4.74090576171875e-06, "model_forward_time": 0.024479150772094727, "step": 3107 }, { "epoch": 4.74090576171875e-06, "step": 3107, "training_step_time": 0.11704277992248535 }, { "epoch": 4.742431640625e-06, "model_forward_time": 0.025068044662475586, "step": 3108 }, { "epoch": 4.742431640625e-06, "step": 3108, "training_step_time": 0.11910486221313477 }, { "epoch": 4.74395751953125e-06, "model_forward_time": 0.025223970413208008, "step": 3109 }, { "epoch": 4.74395751953125e-06, "step": 3109, "training_step_time": 0.11467719078063965 }, { "epoch": 4.7454833984375e-06, "grad_norm": 0.8477486968040466, "learning_rate": 9.921465299512054e-05, "loss": 0.1006, "step": 3110 }, { "epoch": 4.7454833984375e-06, "model_forward_time": 0.025619029998779297, "step": 3110 }, { "epoch": 4.7454833984375e-06, "step": 3110, "training_step_time": 0.20810341835021973 }, { "epoch": 4.74700927734375e-06, "model_forward_time": 0.024345874786376953, "step": 3111 }, { "epoch": 4.74700927734375e-06, "step": 3111, "training_step_time": 0.11537885665893555 }, { "epoch": 4.74853515625e-06, "model_forward_time": 0.026366710662841797, "step": 3112 }, { "epoch": 4.74853515625e-06, "step": 3112, "training_step_time": 0.11254429817199707 }, { "epoch": 4.75006103515625e-06, "model_forward_time": 0.025374889373779297, "step": 3113 }, { "epoch": 4.75006103515625e-06, "step": 3113, "training_step_time": 0.11008405685424805 }, { "epoch": 4.7515869140625e-06, "model_forward_time": 0.025318622589111328, "step": 3114 }, { "epoch": 4.7515869140625e-06, "step": 3114, "training_step_time": 0.11137890815734863 }, { "epoch": 4.75311279296875e-06, "model_forward_time": 0.024925708770751953, "step": 3115 }, { "epoch": 4.75311279296875e-06, "step": 3115, "training_step_time": 0.10834264755249023 }, { "epoch": 4.754638671875e-06, "model_forward_time": 0.025073766708374023, "step": 3116 }, { "epoch": 4.754638671875e-06, "step": 3116, "training_step_time": 0.10764598846435547 }, { "epoch": 4.75616455078125e-06, "model_forward_time": 0.025084257125854492, "step": 3117 }, { "epoch": 4.75616455078125e-06, "step": 3117, "training_step_time": 0.10880208015441895 }, { "epoch": 4.7576904296875e-06, "model_forward_time": 0.024461984634399414, "step": 3118 }, { "epoch": 4.7576904296875e-06, "step": 3118, "training_step_time": 0.11026763916015625 }, { "epoch": 4.75921630859375e-06, "model_forward_time": 0.024316072463989258, "step": 3119 }, { "epoch": 4.75921630859375e-06, "step": 3119, "training_step_time": 0.10907745361328125 }, { "epoch": 4.7607421875e-06, "grad_norm": 0.5950537323951721, "learning_rate": 9.92048928531717e-05, "loss": 0.1217, "step": 3120 }, { "epoch": 4.7607421875e-06, "model_forward_time": 0.024172544479370117, "step": 3120 }, { "epoch": 4.7607421875e-06, "step": 3120, "training_step_time": 0.112945556640625 }, { "epoch": 4.76226806640625e-06, "model_forward_time": 0.02507925033569336, "step": 3121 }, { "epoch": 4.76226806640625e-06, "step": 3121, "training_step_time": 0.10976481437683105 }, { "epoch": 4.7637939453125e-06, "model_forward_time": 0.02536463737487793, "step": 3122 }, { "epoch": 4.7637939453125e-06, "step": 3122, "training_step_time": 0.10746359825134277 }, { "epoch": 4.76531982421875e-06, "model_forward_time": 0.025304079055786133, "step": 3123 }, { "epoch": 4.76531982421875e-06, "step": 3123, "training_step_time": 0.10760259628295898 }, { "epoch": 4.766845703125e-06, "model_forward_time": 0.02433013916015625, "step": 3124 }, { "epoch": 4.766845703125e-06, "step": 3124, "training_step_time": 0.10927248001098633 }, { "epoch": 4.76837158203125e-06, "model_forward_time": 0.02547144889831543, "step": 3125 }, { "epoch": 4.76837158203125e-06, "step": 3125, "training_step_time": 0.10896539688110352 }, { "epoch": 4.7698974609375e-06, "model_forward_time": 0.02511119842529297, "step": 3126 }, { "epoch": 4.7698974609375e-06, "step": 3126, "training_step_time": 0.10831022262573242 }, { "epoch": 4.77142333984375e-06, "model_forward_time": 0.025262117385864258, "step": 3127 }, { "epoch": 4.77142333984375e-06, "step": 3127, "training_step_time": 0.10940718650817871 }, { "epoch": 4.77294921875e-06, "model_forward_time": 0.02514195442199707, "step": 3128 }, { "epoch": 4.77294921875e-06, "step": 3128, "training_step_time": 0.10848808288574219 }, { "epoch": 4.77447509765625e-06, "model_forward_time": 0.025318384170532227, "step": 3129 }, { "epoch": 4.77447509765625e-06, "step": 3129, "training_step_time": 0.1079854965209961 }, { "epoch": 4.7760009765625e-06, "grad_norm": 0.4268414378166199, "learning_rate": 9.91950729226383e-05, "loss": 0.1098, "step": 3130 }, { "epoch": 4.7760009765625e-06, "model_forward_time": 0.025234699249267578, "step": 3130 }, { "epoch": 4.7760009765625e-06, "step": 3130, "training_step_time": 0.10901474952697754 }, { "epoch": 4.77752685546875e-06, "model_forward_time": 0.02529597282409668, "step": 3131 }, { "epoch": 4.77752685546875e-06, "step": 3131, "training_step_time": 0.11394929885864258 }, { "epoch": 4.779052734375e-06, "model_forward_time": 0.02624988555908203, "step": 3132 }, { "epoch": 4.779052734375e-06, "step": 3132, "training_step_time": 0.10981297492980957 }, { "epoch": 4.78057861328125e-06, "model_forward_time": 0.025560379028320312, "step": 3133 }, { "epoch": 4.78057861328125e-06, "step": 3133, "training_step_time": 0.10924196243286133 }, { "epoch": 4.7821044921875e-06, "model_forward_time": 0.025394439697265625, "step": 3134 }, { "epoch": 4.7821044921875e-06, "step": 3134, "training_step_time": 0.10924434661865234 }, { "epoch": 4.78363037109375e-06, "model_forward_time": 0.0254213809967041, "step": 3135 }, { "epoch": 4.78363037109375e-06, "step": 3135, "training_step_time": 0.11183977127075195 }, { "epoch": 4.78515625e-06, "model_forward_time": 0.025263071060180664, "step": 3136 }, { "epoch": 4.78515625e-06, "step": 3136, "training_step_time": 0.1081991195678711 }, { "epoch": 4.78668212890625e-06, "model_forward_time": 0.025624513626098633, "step": 3137 }, { "epoch": 4.78668212890625e-06, "step": 3137, "training_step_time": 0.11359095573425293 }, { "epoch": 4.7882080078125e-06, "model_forward_time": 0.025541305541992188, "step": 3138 }, { "epoch": 4.7882080078125e-06, "step": 3138, "training_step_time": 0.10698437690734863 }, { "epoch": 4.78973388671875e-06, "model_forward_time": 0.02619338035583496, "step": 3139 }, { "epoch": 4.78973388671875e-06, "step": 3139, "training_step_time": 0.21125388145446777 }, { "epoch": 4.791259765625e-06, "grad_norm": 0.5600676536560059, "learning_rate": 9.918519321545251e-05, "loss": 0.1177, "step": 3140 }, { "epoch": 4.791259765625e-06, "model_forward_time": 0.024468421936035156, "step": 3140 }, { "epoch": 4.791259765625e-06, "step": 3140, "training_step_time": 0.1807880401611328 }, { "epoch": 4.79278564453125e-06, "model_forward_time": 0.024535655975341797, "step": 3141 }, { "epoch": 4.79278564453125e-06, "step": 3141, "training_step_time": 0.1674511432647705 }, { "epoch": 4.7943115234375e-06, "model_forward_time": 0.02409529685974121, "step": 3142 }, { "epoch": 4.7943115234375e-06, "step": 3142, "training_step_time": 0.21997880935668945 }, { "epoch": 4.79583740234375e-06, "model_forward_time": 0.024723052978515625, "step": 3143 }, { "epoch": 4.79583740234375e-06, "step": 3143, "training_step_time": 0.1482689380645752 }, { "epoch": 4.79736328125e-06, "model_forward_time": 0.025604248046875, "step": 3144 }, { "epoch": 4.79736328125e-06, "step": 3144, "training_step_time": 0.17982006072998047 }, { "epoch": 4.79888916015625e-06, "model_forward_time": 0.024657249450683594, "step": 3145 }, { "epoch": 4.79888916015625e-06, "step": 3145, "training_step_time": 0.13733506202697754 }, { "epoch": 4.8004150390625e-06, "model_forward_time": 0.02406144142150879, "step": 3146 }, { "epoch": 4.8004150390625e-06, "step": 3146, "training_step_time": 0.11666083335876465 }, { "epoch": 4.80194091796875e-06, "model_forward_time": 0.025442123413085938, "step": 3147 }, { "epoch": 4.80194091796875e-06, "step": 3147, "training_step_time": 0.10765981674194336 }, { "epoch": 4.803466796875e-06, "model_forward_time": 0.0256044864654541, "step": 3148 }, { "epoch": 4.803466796875e-06, "step": 3148, "training_step_time": 0.1148371696472168 }, { "epoch": 4.80499267578125e-06, "model_forward_time": 0.02514481544494629, "step": 3149 }, { "epoch": 4.80499267578125e-06, "step": 3149, "training_step_time": 0.10923099517822266 }, { "epoch": 4.8065185546875e-06, "grad_norm": 0.7370355129241943, "learning_rate": 9.917525374361912e-05, "loss": 0.1256, "step": 3150 }, { "epoch": 4.8065185546875e-06, "model_forward_time": 0.025751829147338867, "step": 3150 }, { "epoch": 4.8065185546875e-06, "step": 3150, "training_step_time": 0.19551610946655273 }, { "epoch": 4.80804443359375e-06, "model_forward_time": 0.02410435676574707, "step": 3151 }, { "epoch": 4.80804443359375e-06, "step": 3151, "training_step_time": 0.18570613861083984 }, { "epoch": 4.8095703125e-06, "model_forward_time": 0.024379730224609375, "step": 3152 }, { "epoch": 4.8095703125e-06, "step": 3152, "training_step_time": 0.12948274612426758 }, { "epoch": 4.81109619140625e-06, "model_forward_time": 0.02476191520690918, "step": 3153 }, { "epoch": 4.81109619140625e-06, "step": 3153, "training_step_time": 0.11074709892272949 }, { "epoch": 4.8126220703125e-06, "model_forward_time": 0.026234149932861328, "step": 3154 }, { "epoch": 4.8126220703125e-06, "step": 3154, "training_step_time": 0.11652326583862305 }, { "epoch": 4.81414794921875e-06, "model_forward_time": 0.025310516357421875, "step": 3155 }, { "epoch": 4.81414794921875e-06, "step": 3155, "training_step_time": 0.2198777198791504 }, { "epoch": 4.815673828125e-06, "model_forward_time": 0.024760007858276367, "step": 3156 }, { "epoch": 4.815673828125e-06, "step": 3156, "training_step_time": 0.10975384712219238 }, { "epoch": 4.81719970703125e-06, "model_forward_time": 0.025514841079711914, "step": 3157 }, { "epoch": 4.81719970703125e-06, "step": 3157, "training_step_time": 0.10725116729736328 }, { "epoch": 4.8187255859375e-06, "model_forward_time": 0.02529287338256836, "step": 3158 }, { "epoch": 4.8187255859375e-06, "step": 3158, "training_step_time": 0.11344408988952637 }, { "epoch": 4.82025146484375e-06, "model_forward_time": 0.025048494338989258, "step": 3159 }, { "epoch": 4.82025146484375e-06, "step": 3159, "training_step_time": 0.10832333564758301 }, { "epoch": 4.82177734375e-06, "grad_norm": 0.8812365531921387, "learning_rate": 9.91652545192155e-05, "loss": 0.14, "step": 3160 }, { "epoch": 4.82177734375e-06, "model_forward_time": 0.02489018440246582, "step": 3160 }, { "epoch": 4.82177734375e-06, "step": 3160, "training_step_time": 0.11364555358886719 }, { "epoch": 4.82330322265625e-06, "model_forward_time": 0.02547287940979004, "step": 3161 }, { "epoch": 4.82330322265625e-06, "step": 3161, "training_step_time": 0.1086728572845459 }, { "epoch": 4.8248291015625e-06, "model_forward_time": 0.02527642250061035, "step": 3162 }, { "epoch": 4.8248291015625e-06, "step": 3162, "training_step_time": 0.10724067687988281 }, { "epoch": 4.82635498046875e-06, "model_forward_time": 0.025109529495239258, "step": 3163 }, { "epoch": 4.82635498046875e-06, "step": 3163, "training_step_time": 0.10861659049987793 }, { "epoch": 4.827880859375e-06, "model_forward_time": 0.025444746017456055, "step": 3164 }, { "epoch": 4.827880859375e-06, "step": 3164, "training_step_time": 0.1164860725402832 }, { "epoch": 4.82940673828125e-06, "model_forward_time": 0.02514934539794922, "step": 3165 }, { "epoch": 4.82940673828125e-06, "step": 3165, "training_step_time": 0.10828614234924316 }, { "epoch": 4.8309326171875e-06, "model_forward_time": 0.025427579879760742, "step": 3166 }, { "epoch": 4.8309326171875e-06, "step": 3166, "training_step_time": 0.11019372940063477 }, { "epoch": 4.83245849609375e-06, "model_forward_time": 0.025220870971679688, "step": 3167 }, { "epoch": 4.83245849609375e-06, "step": 3167, "training_step_time": 0.10805034637451172 }, { "epoch": 4.833984375e-06, "model_forward_time": 0.025301218032836914, "step": 3168 }, { "epoch": 4.833984375e-06, "step": 3168, "training_step_time": 0.11912965774536133 }, { "epoch": 4.83551025390625e-06, "model_forward_time": 0.025051355361938477, "step": 3169 }, { "epoch": 4.83551025390625e-06, "step": 3169, "training_step_time": 0.11191511154174805 }, { "epoch": 4.8370361328125e-06, "grad_norm": 0.9046509265899658, "learning_rate": 9.915519555439166e-05, "loss": 0.1481, "step": 3170 }, { "epoch": 4.8370361328125e-06, "model_forward_time": 0.025673389434814453, "step": 3170 }, { "epoch": 4.8370361328125e-06, "step": 3170, "training_step_time": 0.11039066314697266 }, { "epoch": 4.83856201171875e-06, "model_forward_time": 0.025052547454833984, "step": 3171 }, { "epoch": 4.83856201171875e-06, "step": 3171, "training_step_time": 0.14138412475585938 }, { "epoch": 4.840087890625e-06, "model_forward_time": 0.025269031524658203, "step": 3172 }, { "epoch": 4.840087890625e-06, "step": 3172, "training_step_time": 0.16205191612243652 }, { "epoch": 4.84161376953125e-06, "model_forward_time": 0.02432107925415039, "step": 3173 }, { "epoch": 4.84161376953125e-06, "step": 3173, "training_step_time": 0.1522657871246338 }, { "epoch": 4.8431396484375e-06, "model_forward_time": 0.024186372756958008, "step": 3174 }, { "epoch": 4.8431396484375e-06, "step": 3174, "training_step_time": 0.13804411888122559 }, { "epoch": 4.84466552734375e-06, "model_forward_time": 0.024758100509643555, "step": 3175 }, { "epoch": 4.84466552734375e-06, "step": 3175, "training_step_time": 0.13001513481140137 }, { "epoch": 4.84619140625e-06, "model_forward_time": 0.024761199951171875, "step": 3176 }, { "epoch": 4.84619140625e-06, "step": 3176, "training_step_time": 0.12532424926757812 }, { "epoch": 4.84771728515625e-06, "model_forward_time": 0.02473616600036621, "step": 3177 }, { "epoch": 4.84771728515625e-06, "step": 3177, "training_step_time": 0.12359476089477539 }, { "epoch": 4.8492431640625e-06, "model_forward_time": 0.025469303131103516, "step": 3178 }, { "epoch": 4.8492431640625e-06, "step": 3178, "training_step_time": 0.12058568000793457 }, { "epoch": 4.85076904296875e-06, "model_forward_time": 0.025388002395629883, "step": 3179 }, { "epoch": 4.85076904296875e-06, "step": 3179, "training_step_time": 0.11426448822021484 }, { "epoch": 4.852294921875e-06, "grad_norm": 0.9116901755332947, "learning_rate": 9.914507686137019e-05, "loss": 0.1576, "step": 3180 }, { "epoch": 4.852294921875e-06, "model_forward_time": 0.025117874145507812, "step": 3180 }, { "epoch": 4.852294921875e-06, "step": 3180, "training_step_time": 0.11200404167175293 }, { "epoch": 4.85382080078125e-06, "model_forward_time": 0.025031566619873047, "step": 3181 }, { "epoch": 4.85382080078125e-06, "step": 3181, "training_step_time": 0.2213914394378662 }, { "epoch": 4.8553466796875e-06, "model_forward_time": 0.024785995483398438, "step": 3182 }, { "epoch": 4.8553466796875e-06, "step": 3182, "training_step_time": 0.11844182014465332 }, { "epoch": 4.85687255859375e-06, "model_forward_time": 0.024105548858642578, "step": 3183 }, { "epoch": 4.85687255859375e-06, "step": 3183, "training_step_time": 0.18578124046325684 }, { "epoch": 4.8583984375e-06, "model_forward_time": 0.02423095703125, "step": 3184 }, { "epoch": 4.8583984375e-06, "step": 3184, "training_step_time": 0.13434386253356934 }, { "epoch": 4.85992431640625e-06, "model_forward_time": 0.024040937423706055, "step": 3185 }, { "epoch": 4.85992431640625e-06, "step": 3185, "training_step_time": 0.19899797439575195 }, { "epoch": 4.8614501953125e-06, "model_forward_time": 0.02435588836669922, "step": 3186 }, { "epoch": 4.8614501953125e-06, "step": 3186, "training_step_time": 0.16465091705322266 }, { "epoch": 4.86297607421875e-06, "model_forward_time": 0.024432659149169922, "step": 3187 }, { "epoch": 4.86297607421875e-06, "step": 3187, "training_step_time": 0.1498579978942871 }, { "epoch": 4.864501953125e-06, "model_forward_time": 0.02463388442993164, "step": 3188 }, { "epoch": 4.864501953125e-06, "step": 3188, "training_step_time": 0.1433238983154297 }, { "epoch": 4.86602783203125e-06, "model_forward_time": 0.0248563289642334, "step": 3189 }, { "epoch": 4.86602783203125e-06, "step": 3189, "training_step_time": 0.11121296882629395 }, { "epoch": 4.8675537109375e-06, "grad_norm": 0.5139617323875427, "learning_rate": 9.913489845244626e-05, "loss": 0.1522, "step": 3190 }, { "epoch": 4.8675537109375e-06, "model_forward_time": 0.02484273910522461, "step": 3190 }, { "epoch": 4.8675537109375e-06, "step": 3190, "training_step_time": 0.11066246032714844 }, { "epoch": 4.86907958984375e-06, "model_forward_time": 0.024976730346679688, "step": 3191 }, { "epoch": 4.86907958984375e-06, "step": 3191, "training_step_time": 0.10774540901184082 }, { "epoch": 4.87060546875e-06, "model_forward_time": 0.025418519973754883, "step": 3192 }, { "epoch": 4.87060546875e-06, "step": 3192, "training_step_time": 0.11703896522521973 }, { "epoch": 4.87213134765625e-06, "model_forward_time": 0.025385379791259766, "step": 3193 }, { "epoch": 4.87213134765625e-06, "step": 3193, "training_step_time": 0.1956779956817627 }, { "epoch": 4.8736572265625e-06, "model_forward_time": 0.025589942932128906, "step": 3194 }, { "epoch": 4.8736572265625e-06, "step": 3194, "training_step_time": 0.1103067398071289 }, { "epoch": 4.87518310546875e-06, "model_forward_time": 0.024490833282470703, "step": 3195 }, { "epoch": 4.87518310546875e-06, "step": 3195, "training_step_time": 0.21574139595031738 }, { "epoch": 4.876708984375e-06, "model_forward_time": 0.02475595474243164, "step": 3196 }, { "epoch": 4.876708984375e-06, "step": 3196, "training_step_time": 0.11703276634216309 }, { "epoch": 4.87823486328125e-06, "model_forward_time": 0.025253772735595703, "step": 3197 }, { "epoch": 4.87823486328125e-06, "step": 3197, "training_step_time": 0.11318802833557129 }, { "epoch": 4.8797607421875e-06, "model_forward_time": 0.025367259979248047, "step": 3198 }, { "epoch": 4.8797607421875e-06, "step": 3198, "training_step_time": 0.2274341583251953 }, { "epoch": 4.88128662109375e-06, "model_forward_time": 0.024669170379638672, "step": 3199 }, { "epoch": 4.88128662109375e-06, "step": 3199, "training_step_time": 0.1598987579345703 }, { "epoch": 4.8828125e-06, "grad_norm": 0.4906257390975952, "learning_rate": 9.912466033998757e-05, "loss": 0.122, "step": 3200 }, { "epoch": 4.8828125e-06, "model_forward_time": 0.02478766441345215, "step": 3200 }, { "epoch": 4.8828125e-06, "step": 3200, "training_step_time": 0.10839557647705078 }, { "epoch": 4.88433837890625e-06, "model_forward_time": 0.02462601661682129, "step": 3201 }, { "epoch": 4.88433837890625e-06, "step": 3201, "training_step_time": 0.11238646507263184 }, { "epoch": 4.8858642578125e-06, "model_forward_time": 0.025043487548828125, "step": 3202 }, { "epoch": 4.8858642578125e-06, "step": 3202, "training_step_time": 0.11549711227416992 }, { "epoch": 4.88739013671875e-06, "model_forward_time": 0.02493143081665039, "step": 3203 }, { "epoch": 4.88739013671875e-06, "step": 3203, "training_step_time": 0.1100616455078125 }, { "epoch": 4.888916015625e-06, "model_forward_time": 0.024981021881103516, "step": 3204 }, { "epoch": 4.888916015625e-06, "step": 3204, "training_step_time": 0.10721588134765625 }, { "epoch": 4.89044189453125e-06, "model_forward_time": 0.025205612182617188, "step": 3205 }, { "epoch": 4.89044189453125e-06, "step": 3205, "training_step_time": 0.11126208305358887 }, { "epoch": 4.8919677734375e-06, "model_forward_time": 0.025455474853515625, "step": 3206 }, { "epoch": 4.8919677734375e-06, "step": 3206, "training_step_time": 0.11132097244262695 }, { "epoch": 4.89349365234375e-06, "model_forward_time": 0.024722814559936523, "step": 3207 }, { "epoch": 4.89349365234375e-06, "step": 3207, "training_step_time": 0.10744476318359375 }, { "epoch": 4.89501953125e-06, "model_forward_time": 0.025031566619873047, "step": 3208 }, { "epoch": 4.89501953125e-06, "step": 3208, "training_step_time": 0.11104273796081543 }, { "epoch": 4.89654541015625e-06, "model_forward_time": 0.025067567825317383, "step": 3209 }, { "epoch": 4.89654541015625e-06, "step": 3209, "training_step_time": 0.10792708396911621 }, { "epoch": 4.8980712890625e-06, "grad_norm": 0.6585983037948608, "learning_rate": 9.911436253643445e-05, "loss": 0.1165, "step": 3210 }, { "epoch": 4.8980712890625e-06, "model_forward_time": 0.02523493766784668, "step": 3210 }, { "epoch": 4.8980712890625e-06, "step": 3210, "training_step_time": 0.11326193809509277 }, { "epoch": 4.89959716796875e-06, "model_forward_time": 0.024994373321533203, "step": 3211 }, { "epoch": 4.89959716796875e-06, "step": 3211, "training_step_time": 0.10886335372924805 }, { "epoch": 4.901123046875e-06, "model_forward_time": 0.02552652359008789, "step": 3212 }, { "epoch": 4.901123046875e-06, "step": 3212, "training_step_time": 0.10810375213623047 }, { "epoch": 4.90264892578125e-06, "model_forward_time": 0.0249021053314209, "step": 3213 }, { "epoch": 4.90264892578125e-06, "step": 3213, "training_step_time": 0.10915923118591309 }, { "epoch": 4.9041748046875e-06, "model_forward_time": 0.024564027786254883, "step": 3214 }, { "epoch": 4.9041748046875e-06, "step": 3214, "training_step_time": 0.11595916748046875 }, { "epoch": 4.90570068359375e-06, "model_forward_time": 0.024170637130737305, "step": 3215 }, { "epoch": 4.90570068359375e-06, "step": 3215, "training_step_time": 0.11234235763549805 }, { "epoch": 4.9072265625e-06, "model_forward_time": 0.024499177932739258, "step": 3216 }, { "epoch": 4.9072265625e-06, "step": 3216, "training_step_time": 0.11677289009094238 }, { "epoch": 4.90875244140625e-06, "model_forward_time": 0.02436375617980957, "step": 3217 }, { "epoch": 4.90875244140625e-06, "step": 3217, "training_step_time": 0.10746884346008301 }, { "epoch": 4.9102783203125e-06, "model_forward_time": 0.02528524398803711, "step": 3218 }, { "epoch": 4.9102783203125e-06, "step": 3218, "training_step_time": 0.1084144115447998 }, { "epoch": 4.91180419921875e-06, "model_forward_time": 0.025277137756347656, "step": 3219 }, { "epoch": 4.91180419921875e-06, "step": 3219, "training_step_time": 0.1116180419921875 }, { "epoch": 4.913330078125e-06, "grad_norm": 0.9967507123947144, "learning_rate": 9.910400505429965e-05, "loss": 0.1518, "step": 3220 }, { "epoch": 4.913330078125e-06, "model_forward_time": 0.02497410774230957, "step": 3220 }, { "epoch": 4.913330078125e-06, "step": 3220, "training_step_time": 0.10763144493103027 }, { "epoch": 4.91485595703125e-06, "model_forward_time": 0.025525808334350586, "step": 3221 }, { "epoch": 4.91485595703125e-06, "step": 3221, "training_step_time": 0.10751032829284668 }, { "epoch": 4.9163818359375e-06, "model_forward_time": 0.02528977394104004, "step": 3222 }, { "epoch": 4.9163818359375e-06, "step": 3222, "training_step_time": 0.10893654823303223 }, { "epoch": 4.91790771484375e-06, "model_forward_time": 0.025118112564086914, "step": 3223 }, { "epoch": 4.91790771484375e-06, "step": 3223, "training_step_time": 0.1071171760559082 }, { "epoch": 4.91943359375e-06, "model_forward_time": 0.025223255157470703, "step": 3224 }, { "epoch": 4.91943359375e-06, "step": 3224, "training_step_time": 0.1080465316772461 }, { "epoch": 4.92095947265625e-06, "model_forward_time": 0.02525639533996582, "step": 3225 }, { "epoch": 4.92095947265625e-06, "step": 3225, "training_step_time": 0.2277822494506836 }, { "epoch": 4.9224853515625e-06, "model_forward_time": 0.02449631690979004, "step": 3226 }, { "epoch": 4.9224853515625e-06, "step": 3226, "training_step_time": 0.15121841430664062 }, { "epoch": 4.92401123046875e-06, "model_forward_time": 0.024351119995117188, "step": 3227 }, { "epoch": 4.92401123046875e-06, "step": 3227, "training_step_time": 0.21167445182800293 }, { "epoch": 4.925537109375e-06, "model_forward_time": 0.024397850036621094, "step": 3228 }, { "epoch": 4.925537109375e-06, "step": 3228, "training_step_time": 0.139068603515625 }, { "epoch": 4.92706298828125e-06, "model_forward_time": 0.025906085968017578, "step": 3229 }, { "epoch": 4.92706298828125e-06, "step": 3229, "training_step_time": 0.19667959213256836 }, { "epoch": 4.9285888671875e-06, "grad_norm": 0.8460479378700256, "learning_rate": 9.909358790616849e-05, "loss": 0.1425, "step": 3230 }, { "epoch": 4.9285888671875e-06, "model_forward_time": 0.025046586990356445, "step": 3230 }, { "epoch": 4.9285888671875e-06, "step": 3230, "training_step_time": 0.15529131889343262 }, { "epoch": 4.93011474609375e-06, "model_forward_time": 0.024851560592651367, "step": 3231 }, { "epoch": 4.93011474609375e-06, "step": 3231, "training_step_time": 0.14596843719482422 }, { "epoch": 4.931640625e-06, "model_forward_time": 0.02543354034423828, "step": 3232 }, { "epoch": 4.931640625e-06, "step": 3232, "training_step_time": 0.1572108268737793 }, { "epoch": 4.93316650390625e-06, "model_forward_time": 0.025016069412231445, "step": 3233 }, { "epoch": 4.93316650390625e-06, "step": 3233, "training_step_time": 0.22903752326965332 }, { "epoch": 4.9346923828125e-06, "model_forward_time": 0.025895357131958008, "step": 3234 }, { "epoch": 4.9346923828125e-06, "step": 3234, "training_step_time": 0.20109176635742188 }, { "epoch": 4.93621826171875e-06, "model_forward_time": 0.026651382446289062, "step": 3235 }, { "epoch": 4.93621826171875e-06, "step": 3235, "training_step_time": 0.19794034957885742 }, { "epoch": 4.937744140625e-06, "model_forward_time": 0.025330781936645508, "step": 3236 }, { "epoch": 4.937744140625e-06, "step": 3236, "training_step_time": 0.11130642890930176 }, { "epoch": 4.93927001953125e-06, "model_forward_time": 0.024691104888916016, "step": 3237 }, { "epoch": 4.93927001953125e-06, "step": 3237, "training_step_time": 0.2139301300048828 }, { "epoch": 4.9407958984375e-06, "model_forward_time": 0.024454832077026367, "step": 3238 }, { "epoch": 4.9407958984375e-06, "step": 3238, "training_step_time": 0.12315964698791504 }, { "epoch": 4.94232177734375e-06, "model_forward_time": 0.02460789680480957, "step": 3239 }, { "epoch": 4.94232177734375e-06, "step": 3239, "training_step_time": 0.11834836006164551 }, { "epoch": 4.94384765625e-06, "grad_norm": 0.49196356534957886, "learning_rate": 9.90831111046988e-05, "loss": 0.127, "step": 3240 }, { "epoch": 4.94384765625e-06, "model_forward_time": 0.026029586791992188, "step": 3240 }, { "epoch": 4.94384765625e-06, "step": 3240, "training_step_time": 0.20206093788146973 }, { "epoch": 4.94537353515625e-06, "model_forward_time": 0.025101184844970703, "step": 3241 }, { "epoch": 4.94537353515625e-06, "step": 3241, "training_step_time": 0.11531448364257812 }, { "epoch": 4.9468994140625e-06, "model_forward_time": 0.025177717208862305, "step": 3242 }, { "epoch": 4.9468994140625e-06, "step": 3242, "training_step_time": 0.10837507247924805 }, { "epoch": 4.94842529296875e-06, "model_forward_time": 0.025664091110229492, "step": 3243 }, { "epoch": 4.94842529296875e-06, "step": 3243, "training_step_time": 0.10791540145874023 }, { "epoch": 4.949951171875e-06, "model_forward_time": 0.02532196044921875, "step": 3244 }, { "epoch": 4.949951171875e-06, "step": 3244, "training_step_time": 0.10921359062194824 }, { "epoch": 4.95147705078125e-06, "model_forward_time": 0.02577805519104004, "step": 3245 }, { "epoch": 4.95147705078125e-06, "step": 3245, "training_step_time": 0.11053252220153809 }, { "epoch": 4.9530029296875e-06, "model_forward_time": 0.025481700897216797, "step": 3246 }, { "epoch": 4.9530029296875e-06, "step": 3246, "training_step_time": 0.10992026329040527 }, { "epoch": 4.95452880859375e-06, "model_forward_time": 0.026688337326049805, "step": 3247 }, { "epoch": 4.95452880859375e-06, "step": 3247, "training_step_time": 0.11150455474853516 }, { "epoch": 4.9560546875e-06, "model_forward_time": 0.025789976119995117, "step": 3248 }, { "epoch": 4.9560546875e-06, "step": 3248, "training_step_time": 0.10913610458374023 }, { "epoch": 4.95758056640625e-06, "model_forward_time": 0.025461196899414062, "step": 3249 }, { "epoch": 4.95758056640625e-06, "step": 3249, "training_step_time": 0.10711050033569336 }, { "epoch": 4.9591064453125e-06, "grad_norm": 0.6129723191261292, "learning_rate": 9.90725746626209e-05, "loss": 0.1285, "step": 3250 }, { "epoch": 4.9591064453125e-06, "model_forward_time": 0.025580406188964844, "step": 3250 }, { "epoch": 4.9591064453125e-06, "step": 3250, "training_step_time": 0.1126255989074707 }, { "epoch": 4.96063232421875e-06, "model_forward_time": 0.02577066421508789, "step": 3251 }, { "epoch": 4.96063232421875e-06, "step": 3251, "training_step_time": 0.10759377479553223 }, { "epoch": 4.962158203125e-06, "model_forward_time": 0.02594447135925293, "step": 3252 }, { "epoch": 4.962158203125e-06, "step": 3252, "training_step_time": 0.10802960395812988 }, { "epoch": 4.96368408203125e-06, "model_forward_time": 0.025814294815063477, "step": 3253 }, { "epoch": 4.96368408203125e-06, "step": 3253, "training_step_time": 0.10796785354614258 }, { "epoch": 4.9652099609375e-06, "model_forward_time": 0.025761127471923828, "step": 3254 }, { "epoch": 4.9652099609375e-06, "step": 3254, "training_step_time": 0.10899806022644043 }, { "epoch": 4.96673583984375e-06, "model_forward_time": 0.025426149368286133, "step": 3255 }, { "epoch": 4.96673583984375e-06, "step": 3255, "training_step_time": 0.11186838150024414 }, { "epoch": 4.96826171875e-06, "model_forward_time": 0.025588035583496094, "step": 3256 }, { "epoch": 4.96826171875e-06, "step": 3256, "training_step_time": 0.10788464546203613 }, { "epoch": 4.96978759765625e-06, "model_forward_time": 0.025451183319091797, "step": 3257 }, { "epoch": 4.96978759765625e-06, "step": 3257, "training_step_time": 0.11051535606384277 }, { "epoch": 4.9713134765625e-06, "model_forward_time": 0.026192188262939453, "step": 3258 }, { "epoch": 4.9713134765625e-06, "step": 3258, "training_step_time": 0.10768914222717285 }, { "epoch": 4.97283935546875e-06, "model_forward_time": 0.025746583938598633, "step": 3259 }, { "epoch": 4.97283935546875e-06, "step": 3259, "training_step_time": 0.106719970703125 }, { "epoch": 4.974365234375e-06, "grad_norm": 0.9966940879821777, "learning_rate": 9.906197859273753e-05, "loss": 0.145, "step": 3260 }, { "epoch": 4.974365234375e-06, "model_forward_time": 0.025510787963867188, "step": 3260 }, { "epoch": 4.974365234375e-06, "step": 3260, "training_step_time": 0.10967254638671875 }, { "epoch": 4.97589111328125e-06, "model_forward_time": 0.025699853897094727, "step": 3261 }, { "epoch": 4.97589111328125e-06, "step": 3261, "training_step_time": 0.10957598686218262 }, { "epoch": 4.9774169921875e-06, "model_forward_time": 0.025473833084106445, "step": 3262 }, { "epoch": 4.9774169921875e-06, "step": 3262, "training_step_time": 0.10700035095214844 }, { "epoch": 4.97894287109375e-06, "model_forward_time": 0.02553582191467285, "step": 3263 }, { "epoch": 4.97894287109375e-06, "step": 3263, "training_step_time": 0.10734295845031738 }, { "epoch": 4.98046875e-06, "model_forward_time": 0.02573251724243164, "step": 3264 }, { "epoch": 4.98046875e-06, "step": 3264, "training_step_time": 0.11006784439086914 }, { "epoch": 4.98199462890625e-06, "model_forward_time": 0.02547168731689453, "step": 3265 }, { "epoch": 4.98199462890625e-06, "step": 3265, "training_step_time": 0.10898089408874512 }, { "epoch": 4.9835205078125e-06, "model_forward_time": 0.02541184425354004, "step": 3266 }, { "epoch": 4.9835205078125e-06, "step": 3266, "training_step_time": 0.10830068588256836 }, { "epoch": 4.98504638671875e-06, "model_forward_time": 0.025359630584716797, "step": 3267 }, { "epoch": 4.98504638671875e-06, "step": 3267, "training_step_time": 0.10808444023132324 }, { "epoch": 4.986572265625e-06, "model_forward_time": 0.02550220489501953, "step": 3268 }, { "epoch": 4.986572265625e-06, "step": 3268, "training_step_time": 0.1588141918182373 }, { "epoch": 4.98809814453125e-06, "model_forward_time": 0.025068044662475586, "step": 3269 }, { "epoch": 4.98809814453125e-06, "step": 3269, "training_step_time": 0.17066001892089844 }, { "epoch": 4.9896240234375e-06, "grad_norm": 0.5129582285881042, "learning_rate": 9.905132290792394e-05, "loss": 0.12, "step": 3270 }, { "epoch": 4.9896240234375e-06, "model_forward_time": 0.024705886840820312, "step": 3270 }, { "epoch": 4.9896240234375e-06, "step": 3270, "training_step_time": 0.1843571662902832 }, { "epoch": 4.99114990234375e-06, "model_forward_time": 0.02533411979675293, "step": 3271 }, { "epoch": 4.99114990234375e-06, "step": 3271, "training_step_time": 0.18781065940856934 }, { "epoch": 4.99267578125e-06, "model_forward_time": 0.024753570556640625, "step": 3272 }, { "epoch": 4.99267578125e-06, "step": 3272, "training_step_time": 0.19375324249267578 }, { "epoch": 4.99420166015625e-06, "model_forward_time": 0.024758338928222656, "step": 3273 }, { "epoch": 4.99420166015625e-06, "step": 3273, "training_step_time": 0.13396215438842773 }, { "epoch": 4.9957275390625e-06, "model_forward_time": 0.02509164810180664, "step": 3274 }, { "epoch": 4.9957275390625e-06, "step": 3274, "training_step_time": 0.18749070167541504 }, { "epoch": 4.99725341796875e-06, "model_forward_time": 0.024580001831054688, "step": 3275 }, { "epoch": 4.99725341796875e-06, "step": 3275, "training_step_time": 0.18320822715759277 }, { "epoch": 4.998779296875e-06, "model_forward_time": 0.024519920349121094, "step": 3276 }, { "epoch": 4.998779296875e-06, "step": 3276, "training_step_time": 0.1153876781463623 }, { "epoch": 5.00030517578125e-06, "model_forward_time": 0.02528071403503418, "step": 3277 }, { "epoch": 5.00030517578125e-06, "step": 3277, "training_step_time": 0.1585848331451416 }, { "epoch": 5.0018310546875e-06, "model_forward_time": 0.024559736251831055, "step": 3278 }, { "epoch": 5.0018310546875e-06, "step": 3278, "training_step_time": 0.1093747615814209 }, { "epoch": 5.00335693359375e-06, "model_forward_time": 0.025209903717041016, "step": 3279 }, { "epoch": 5.00335693359375e-06, "step": 3279, "training_step_time": 0.10776019096374512 }, { "epoch": 5.0048828125e-06, "grad_norm": 0.7755047082901001, "learning_rate": 9.904060762112777e-05, "loss": 0.126, "step": 3280 }, { "epoch": 5.0048828125e-06, "model_forward_time": 0.025447368621826172, "step": 3280 }, { "epoch": 5.0048828125e-06, "step": 3280, "training_step_time": 0.11076784133911133 }, { "epoch": 5.00640869140625e-06, "model_forward_time": 0.02446436882019043, "step": 3281 }, { "epoch": 5.00640869140625e-06, "step": 3281, "training_step_time": 0.16920948028564453 }, { "epoch": 5.0079345703125e-06, "model_forward_time": 0.02556920051574707, "step": 3282 }, { "epoch": 5.0079345703125e-06, "step": 3282, "training_step_time": 0.1642313003540039 }, { "epoch": 5.00946044921875e-06, "model_forward_time": 0.02524542808532715, "step": 3283 }, { "epoch": 5.00946044921875e-06, "step": 3283, "training_step_time": 0.13206839561462402 }, { "epoch": 5.010986328125e-06, "model_forward_time": 0.02482295036315918, "step": 3284 }, { "epoch": 5.010986328125e-06, "step": 3284, "training_step_time": 0.19943451881408691 }, { "epoch": 5.01251220703125e-06, "model_forward_time": 0.024399518966674805, "step": 3285 }, { "epoch": 5.01251220703125e-06, "step": 3285, "training_step_time": 0.12189745903015137 }, { "epoch": 5.0140380859375e-06, "model_forward_time": 0.02486872673034668, "step": 3286 }, { "epoch": 5.0140380859375e-06, "step": 3286, "training_step_time": 0.1141045093536377 }, { "epoch": 5.01556396484375e-06, "model_forward_time": 0.024568557739257812, "step": 3287 }, { "epoch": 5.01556396484375e-06, "step": 3287, "training_step_time": 0.11363387107849121 }, { "epoch": 5.01708984375e-06, "model_forward_time": 0.02406787872314453, "step": 3288 }, { "epoch": 5.01708984375e-06, "step": 3288, "training_step_time": 0.11143231391906738 }, { "epoch": 5.01861572265625e-06, "model_forward_time": 0.025574445724487305, "step": 3289 }, { "epoch": 5.01861572265625e-06, "step": 3289, "training_step_time": 0.11050891876220703 }, { "epoch": 5.0201416015625e-06, "grad_norm": 0.47359898686408997, "learning_rate": 9.902983274536912e-05, "loss": 0.1297, "step": 3290 }, { "epoch": 5.0201416015625e-06, "model_forward_time": 0.02555561065673828, "step": 3290 }, { "epoch": 5.0201416015625e-06, "step": 3290, "training_step_time": 0.11321473121643066 }, { "epoch": 5.02166748046875e-06, "model_forward_time": 0.025382041931152344, "step": 3291 }, { "epoch": 5.02166748046875e-06, "step": 3291, "training_step_time": 0.11043715476989746 }, { "epoch": 5.023193359375e-06, "model_forward_time": 0.025308847427368164, "step": 3292 }, { "epoch": 5.023193359375e-06, "step": 3292, "training_step_time": 0.10826277732849121 }, { "epoch": 5.02471923828125e-06, "model_forward_time": 0.025557994842529297, "step": 3293 }, { "epoch": 5.02471923828125e-06, "step": 3293, "training_step_time": 0.11281871795654297 }, { "epoch": 5.0262451171875e-06, "model_forward_time": 0.025214195251464844, "step": 3294 }, { "epoch": 5.0262451171875e-06, "step": 3294, "training_step_time": 0.10975050926208496 }, { "epoch": 5.02777099609375e-06, "model_forward_time": 0.02509164810180664, "step": 3295 }, { "epoch": 5.02777099609375e-06, "step": 3295, "training_step_time": 0.10721588134765625 }, { "epoch": 5.029296875e-06, "model_forward_time": 0.02517104148864746, "step": 3296 }, { "epoch": 5.029296875e-06, "step": 3296, "training_step_time": 0.11140942573547363 }, { "epoch": 5.03082275390625e-06, "model_forward_time": 0.025516271591186523, "step": 3297 }, { "epoch": 5.03082275390625e-06, "step": 3297, "training_step_time": 0.10791349411010742 }, { "epoch": 5.0323486328125e-06, "model_forward_time": 0.02699875831604004, "step": 3298 }, { "epoch": 5.0323486328125e-06, "step": 3298, "training_step_time": 0.11146879196166992 }, { "epoch": 5.03387451171875e-06, "model_forward_time": 0.025566577911376953, "step": 3299 }, { "epoch": 5.03387451171875e-06, "step": 3299, "training_step_time": 0.11018252372741699 }, { "epoch": 5.035400390625e-06, "grad_norm": 0.6142375469207764, "learning_rate": 9.901899829374047e-05, "loss": 0.1135, "step": 3300 }, { "epoch": 5.035400390625e-06, "model_forward_time": 0.025331497192382812, "step": 3300 }, { "epoch": 5.035400390625e-06, "step": 3300, "training_step_time": 0.1110227108001709 }, { "epoch": 5.03692626953125e-06, "model_forward_time": 0.025975465774536133, "step": 3301 }, { "epoch": 5.03692626953125e-06, "step": 3301, "training_step_time": 0.10928583145141602 }, { "epoch": 5.0384521484375e-06, "model_forward_time": 0.025483131408691406, "step": 3302 }, { "epoch": 5.0384521484375e-06, "step": 3302, "training_step_time": 0.1087038516998291 }, { "epoch": 5.03997802734375e-06, "model_forward_time": 0.025272130966186523, "step": 3303 }, { "epoch": 5.03997802734375e-06, "step": 3303, "training_step_time": 0.10715508460998535 }, { "epoch": 5.04150390625e-06, "model_forward_time": 0.025513887405395508, "step": 3304 }, { "epoch": 5.04150390625e-06, "step": 3304, "training_step_time": 0.11074662208557129 }, { "epoch": 5.04302978515625e-06, "model_forward_time": 0.02567005157470703, "step": 3305 }, { "epoch": 5.04302978515625e-06, "step": 3305, "training_step_time": 0.1072838306427002 }, { "epoch": 5.0445556640625e-06, "model_forward_time": 0.025232315063476562, "step": 3306 }, { "epoch": 5.0445556640625e-06, "step": 3306, "training_step_time": 0.10974931716918945 }, { "epoch": 5.04608154296875e-06, "model_forward_time": 0.025429248809814453, "step": 3307 }, { "epoch": 5.04608154296875e-06, "step": 3307, "training_step_time": 0.10987377166748047 }, { "epoch": 5.047607421875e-06, "model_forward_time": 0.025057554244995117, "step": 3308 }, { "epoch": 5.047607421875e-06, "step": 3308, "training_step_time": 0.10850024223327637 }, { "epoch": 5.04913330078125e-06, "model_forward_time": 0.025445222854614258, "step": 3309 }, { "epoch": 5.04913330078125e-06, "step": 3309, "training_step_time": 0.1112375259399414 }, { "epoch": 5.0506591796875e-06, "grad_norm": 0.8731642365455627, "learning_rate": 9.90081042794067e-05, "loss": 0.1424, "step": 3310 }, { "epoch": 5.0506591796875e-06, "model_forward_time": 0.024201631546020508, "step": 3310 }, { "epoch": 5.0506591796875e-06, "step": 3310, "training_step_time": 0.11226630210876465 }, { "epoch": 5.05218505859375e-06, "model_forward_time": 0.025452852249145508, "step": 3311 }, { "epoch": 5.05218505859375e-06, "step": 3311, "training_step_time": 0.1145024299621582 }, { "epoch": 5.0537109375e-06, "model_forward_time": 0.02540302276611328, "step": 3312 }, { "epoch": 5.0537109375e-06, "step": 3312, "training_step_time": 0.12565064430236816 }, { "epoch": 5.05523681640625e-06, "model_forward_time": 0.025209426879882812, "step": 3313 }, { "epoch": 5.05523681640625e-06, "step": 3313, "training_step_time": 0.17058825492858887 }, { "epoch": 5.0567626953125e-06, "model_forward_time": 0.025848388671875, "step": 3314 }, { "epoch": 5.0567626953125e-06, "step": 3314, "training_step_time": 0.1715235710144043 }, { "epoch": 5.05828857421875e-06, "model_forward_time": 0.024121999740600586, "step": 3315 }, { "epoch": 5.05828857421875e-06, "step": 3315, "training_step_time": 0.16973376274108887 }, { "epoch": 5.059814453125e-06, "model_forward_time": 0.024633169174194336, "step": 3316 }, { "epoch": 5.059814453125e-06, "step": 3316, "training_step_time": 0.12722063064575195 }, { "epoch": 5.06134033203125e-06, "model_forward_time": 0.024718046188354492, "step": 3317 }, { "epoch": 5.06134033203125e-06, "step": 3317, "training_step_time": 0.15833187103271484 }, { "epoch": 5.0628662109375e-06, "model_forward_time": 0.024771928787231445, "step": 3318 }, { "epoch": 5.0628662109375e-06, "step": 3318, "training_step_time": 0.1680283546447754 }, { "epoch": 5.06439208984375e-06, "model_forward_time": 0.02459096908569336, "step": 3319 }, { "epoch": 5.06439208984375e-06, "step": 3319, "training_step_time": 0.18820762634277344 }, { "epoch": 5.06591796875e-06, "grad_norm": 0.7321391105651855, "learning_rate": 9.899715071560508e-05, "loss": 0.1516, "step": 3320 }, { "epoch": 5.06591796875e-06, "model_forward_time": 0.025385618209838867, "step": 3320 }, { "epoch": 5.06591796875e-06, "step": 3320, "training_step_time": 0.16673731803894043 }, { "epoch": 5.06744384765625e-06, "model_forward_time": 0.024245023727416992, "step": 3321 }, { "epoch": 5.06744384765625e-06, "step": 3321, "training_step_time": 0.1881856918334961 }, { "epoch": 5.0689697265625e-06, "model_forward_time": 0.02424478530883789, "step": 3322 }, { "epoch": 5.0689697265625e-06, "step": 3322, "training_step_time": 0.17690134048461914 }, { "epoch": 5.07049560546875e-06, "model_forward_time": 0.024748802185058594, "step": 3323 }, { "epoch": 5.07049560546875e-06, "step": 3323, "training_step_time": 0.17329668998718262 }, { "epoch": 5.072021484375e-06, "model_forward_time": 0.02468585968017578, "step": 3324 }, { "epoch": 5.072021484375e-06, "step": 3324, "training_step_time": 0.15823149681091309 }, { "epoch": 5.07354736328125e-06, "model_forward_time": 0.024211883544921875, "step": 3325 }, { "epoch": 5.07354736328125e-06, "step": 3325, "training_step_time": 0.15304350852966309 }, { "epoch": 5.0750732421875e-06, "model_forward_time": 0.02490401268005371, "step": 3326 }, { "epoch": 5.0750732421875e-06, "step": 3326, "training_step_time": 0.18915510177612305 }, { "epoch": 5.07659912109375e-06, "model_forward_time": 0.02474522590637207, "step": 3327 }, { "epoch": 5.07659912109375e-06, "step": 3327, "training_step_time": 0.21829891204833984 }, { "epoch": 5.078125e-06, "model_forward_time": 0.024674415588378906, "step": 3328 }, { "epoch": 5.078125e-06, "step": 3328, "training_step_time": 0.11189699172973633 }, { "epoch": 5.07965087890625e-06, "model_forward_time": 0.024722814559936523, "step": 3329 }, { "epoch": 5.07965087890625e-06, "step": 3329, "training_step_time": 0.10694050788879395 }, { "epoch": 5.0811767578125e-06, "grad_norm": 0.4290498197078705, "learning_rate": 9.89861376156452e-05, "loss": 0.1248, "step": 3330 }, { "epoch": 5.0811767578125e-06, "model_forward_time": 0.02578115463256836, "step": 3330 }, { "epoch": 5.0811767578125e-06, "step": 3330, "training_step_time": 0.10819220542907715 }, { "epoch": 5.08270263671875e-06, "model_forward_time": 0.025634288787841797, "step": 3331 }, { "epoch": 5.08270263671875e-06, "step": 3331, "training_step_time": 0.10854244232177734 }, { "epoch": 5.084228515625e-06, "model_forward_time": 0.025829553604125977, "step": 3332 }, { "epoch": 5.084228515625e-06, "step": 3332, "training_step_time": 0.11339330673217773 }, { "epoch": 5.08575439453125e-06, "model_forward_time": 0.025535106658935547, "step": 3333 }, { "epoch": 5.08575439453125e-06, "step": 3333, "training_step_time": 0.10898137092590332 }, { "epoch": 5.0872802734375e-06, "model_forward_time": 0.024979352951049805, "step": 3334 }, { "epoch": 5.0872802734375e-06, "step": 3334, "training_step_time": 0.10894775390625 }, { "epoch": 5.08880615234375e-06, "model_forward_time": 0.025241851806640625, "step": 3335 }, { "epoch": 5.08880615234375e-06, "step": 3335, "training_step_time": 0.1111454963684082 }, { "epoch": 5.09033203125e-06, "model_forward_time": 0.025610685348510742, "step": 3336 }, { "epoch": 5.09033203125e-06, "step": 3336, "training_step_time": 0.10805106163024902 }, { "epoch": 5.09185791015625e-06, "model_forward_time": 0.025163650512695312, "step": 3337 }, { "epoch": 5.09185791015625e-06, "step": 3337, "training_step_time": 0.10710334777832031 }, { "epoch": 5.0933837890625e-06, "model_forward_time": 0.025674104690551758, "step": 3338 }, { "epoch": 5.0933837890625e-06, "step": 3338, "training_step_time": 0.1103048324584961 }, { "epoch": 5.09490966796875e-06, "model_forward_time": 0.025478124618530273, "step": 3339 }, { "epoch": 5.09490966796875e-06, "step": 3339, "training_step_time": 0.10799813270568848 }, { "epoch": 5.096435546875e-06, "grad_norm": 0.654565691947937, "learning_rate": 9.897506499290902e-05, "loss": 0.1136, "step": 3340 }, { "epoch": 5.096435546875e-06, "model_forward_time": 0.02530694007873535, "step": 3340 }, { "epoch": 5.096435546875e-06, "step": 3340, "training_step_time": 0.11374139785766602 }, { "epoch": 5.09796142578125e-06, "model_forward_time": 0.025504350662231445, "step": 3341 }, { "epoch": 5.09796142578125e-06, "step": 3341, "training_step_time": 0.10834240913391113 }, { "epoch": 5.0994873046875e-06, "model_forward_time": 0.025019407272338867, "step": 3342 }, { "epoch": 5.0994873046875e-06, "step": 3342, "training_step_time": 0.10620450973510742 }, { "epoch": 5.10101318359375e-06, "model_forward_time": 0.02547144889831543, "step": 3343 }, { "epoch": 5.10101318359375e-06, "step": 3343, "training_step_time": 0.10982608795166016 }, { "epoch": 5.1025390625e-06, "model_forward_time": 0.025662660598754883, "step": 3344 }, { "epoch": 5.1025390625e-06, "step": 3344, "training_step_time": 0.10815119743347168 }, { "epoch": 5.10406494140625e-06, "model_forward_time": 0.025660276412963867, "step": 3345 }, { "epoch": 5.10406494140625e-06, "step": 3345, "training_step_time": 0.10927367210388184 }, { "epoch": 5.1055908203125e-06, "model_forward_time": 0.02554917335510254, "step": 3346 }, { "epoch": 5.1055908203125e-06, "step": 3346, "training_step_time": 0.10837912559509277 }, { "epoch": 5.10711669921875e-06, "model_forward_time": 0.025473356246948242, "step": 3347 }, { "epoch": 5.10711669921875e-06, "step": 3347, "training_step_time": 0.10836935043334961 }, { "epoch": 5.108642578125e-06, "model_forward_time": 0.02554011344909668, "step": 3348 }, { "epoch": 5.108642578125e-06, "step": 3348, "training_step_time": 0.10989522933959961 }, { "epoch": 5.11016845703125e-06, "model_forward_time": 0.027683496475219727, "step": 3349 }, { "epoch": 5.11016845703125e-06, "step": 3349, "training_step_time": 0.1146082878112793 }, { "epoch": 5.1116943359375e-06, "grad_norm": 0.5085901618003845, "learning_rate": 9.896393286085084e-05, "loss": 0.1138, "step": 3350 }, { "epoch": 5.1116943359375e-06, "model_forward_time": 0.026171445846557617, "step": 3350 }, { "epoch": 5.1116943359375e-06, "step": 3350, "training_step_time": 0.10760092735290527 }, { "epoch": 5.11322021484375e-06, "model_forward_time": 0.026284456253051758, "step": 3351 }, { "epoch": 5.11322021484375e-06, "step": 3351, "training_step_time": 0.11056876182556152 }, { "epoch": 5.11474609375e-06, "model_forward_time": 0.025292158126831055, "step": 3352 }, { "epoch": 5.11474609375e-06, "step": 3352, "training_step_time": 0.11232972145080566 }, { "epoch": 5.11627197265625e-06, "model_forward_time": 0.025601625442504883, "step": 3353 }, { "epoch": 5.11627197265625e-06, "step": 3353, "training_step_time": 0.10922074317932129 }, { "epoch": 5.1177978515625e-06, "model_forward_time": 0.025664806365966797, "step": 3354 }, { "epoch": 5.1177978515625e-06, "step": 3354, "training_step_time": 0.10700678825378418 }, { "epoch": 5.11932373046875e-06, "model_forward_time": 0.025015830993652344, "step": 3355 }, { "epoch": 5.11932373046875e-06, "step": 3355, "training_step_time": 0.1819911003112793 }, { "epoch": 5.120849609375e-06, "model_forward_time": 0.02496647834777832, "step": 3356 }, { "epoch": 5.120849609375e-06, "step": 3356, "training_step_time": 0.20984292030334473 }, { "epoch": 5.12237548828125e-06, "model_forward_time": 0.024555206298828125, "step": 3357 }, { "epoch": 5.12237548828125e-06, "step": 3357, "training_step_time": 0.14247846603393555 }, { "epoch": 5.1239013671875e-06, "model_forward_time": 0.024637222290039062, "step": 3358 }, { "epoch": 5.1239013671875e-06, "step": 3358, "training_step_time": 0.20355510711669922 }, { "epoch": 5.12542724609375e-06, "model_forward_time": 0.0248262882232666, "step": 3359 }, { "epoch": 5.12542724609375e-06, "step": 3359, "training_step_time": 0.15606474876403809 }, { "epoch": 5.126953125e-06, "grad_norm": 0.6300991773605347, "learning_rate": 9.895274123299723e-05, "loss": 0.1176, "step": 3360 }, { "epoch": 5.126953125e-06, "model_forward_time": 0.024277448654174805, "step": 3360 }, { "epoch": 5.126953125e-06, "step": 3360, "training_step_time": 0.2021167278289795 }, { "epoch": 5.12847900390625e-06, "model_forward_time": 0.02524113655090332, "step": 3361 }, { "epoch": 5.12847900390625e-06, "step": 3361, "training_step_time": 0.11090779304504395 }, { "epoch": 5.1300048828125e-06, "model_forward_time": 0.027451038360595703, "step": 3362 }, { "epoch": 5.1300048828125e-06, "step": 3362, "training_step_time": 0.11035871505737305 }, { "epoch": 5.13153076171875e-06, "model_forward_time": 0.025591373443603516, "step": 3363 }, { "epoch": 5.13153076171875e-06, "step": 3363, "training_step_time": 0.11652016639709473 }, { "epoch": 5.133056640625e-06, "model_forward_time": 0.02537846565246582, "step": 3364 }, { "epoch": 5.133056640625e-06, "step": 3364, "training_step_time": 0.12974095344543457 }, { "epoch": 5.13458251953125e-06, "model_forward_time": 0.02446913719177246, "step": 3365 }, { "epoch": 5.13458251953125e-06, "step": 3365, "training_step_time": 0.18522334098815918 }, { "epoch": 5.1361083984375e-06, "model_forward_time": 0.024740934371948242, "step": 3366 }, { "epoch": 5.1361083984375e-06, "step": 3366, "training_step_time": 0.14810776710510254 }, { "epoch": 5.13763427734375e-06, "model_forward_time": 0.026396989822387695, "step": 3367 }, { "epoch": 5.13763427734375e-06, "step": 3367, "training_step_time": 0.13106060028076172 }, { "epoch": 5.13916015625e-06, "model_forward_time": 0.024644136428833008, "step": 3368 }, { "epoch": 5.13916015625e-06, "step": 3368, "training_step_time": 0.20798301696777344 }, { "epoch": 5.14068603515625e-06, "model_forward_time": 0.02512216567993164, "step": 3369 }, { "epoch": 5.14068603515625e-06, "step": 3369, "training_step_time": 0.13444972038269043 }, { "epoch": 5.1422119140625e-06, "grad_norm": 0.5910798907279968, "learning_rate": 9.894149012294708e-05, "loss": 0.1139, "step": 3370 }, { "epoch": 5.1422119140625e-06, "model_forward_time": 0.024836063385009766, "step": 3370 }, { "epoch": 5.1422119140625e-06, "step": 3370, "training_step_time": 0.1796886920928955 }, { "epoch": 5.14373779296875e-06, "model_forward_time": 0.02502751350402832, "step": 3371 }, { "epoch": 5.14373779296875e-06, "step": 3371, "training_step_time": 0.13038921356201172 }, { "epoch": 5.145263671875e-06, "model_forward_time": 0.024905681610107422, "step": 3372 }, { "epoch": 5.145263671875e-06, "step": 3372, "training_step_time": 0.1158590316772461 }, { "epoch": 5.14678955078125e-06, "model_forward_time": 0.025545597076416016, "step": 3373 }, { "epoch": 5.14678955078125e-06, "step": 3373, "training_step_time": 0.1111290454864502 }, { "epoch": 5.1483154296875e-06, "model_forward_time": 0.025193452835083008, "step": 3374 }, { "epoch": 5.1483154296875e-06, "step": 3374, "training_step_time": 0.11071515083312988 }, { "epoch": 5.14984130859375e-06, "model_forward_time": 0.025051116943359375, "step": 3375 }, { "epoch": 5.14984130859375e-06, "step": 3375, "training_step_time": 0.1115727424621582 }, { "epoch": 5.1513671875e-06, "model_forward_time": 0.024564743041992188, "step": 3376 }, { "epoch": 5.1513671875e-06, "step": 3376, "training_step_time": 0.11530661582946777 }, { "epoch": 5.15289306640625e-06, "model_forward_time": 0.025089263916015625, "step": 3377 }, { "epoch": 5.15289306640625e-06, "step": 3377, "training_step_time": 0.1094825267791748 }, { "epoch": 5.1544189453125e-06, "model_forward_time": 0.026329755783081055, "step": 3378 }, { "epoch": 5.1544189453125e-06, "step": 3378, "training_step_time": 0.11051535606384277 }, { "epoch": 5.15594482421875e-06, "model_forward_time": 0.025543212890625, "step": 3379 }, { "epoch": 5.15594482421875e-06, "step": 3379, "training_step_time": 0.10906720161437988 }, { "epoch": 5.157470703125e-06, "grad_norm": 0.6157618165016174, "learning_rate": 9.893017954437156e-05, "loss": 0.158, "step": 3380 }, { "epoch": 5.157470703125e-06, "model_forward_time": 0.025409936904907227, "step": 3380 }, { "epoch": 5.157470703125e-06, "step": 3380, "training_step_time": 0.11186718940734863 }, { "epoch": 5.15899658203125e-06, "model_forward_time": 0.0250246524810791, "step": 3381 }, { "epoch": 5.15899658203125e-06, "step": 3381, "training_step_time": 0.10830307006835938 }, { "epoch": 5.1605224609375e-06, "model_forward_time": 0.025045156478881836, "step": 3382 }, { "epoch": 5.1605224609375e-06, "step": 3382, "training_step_time": 0.11045622825622559 }, { "epoch": 5.16204833984375e-06, "model_forward_time": 0.025307893753051758, "step": 3383 }, { "epoch": 5.16204833984375e-06, "step": 3383, "training_step_time": 0.11243581771850586 }, { "epoch": 5.16357421875e-06, "model_forward_time": 0.025453567504882812, "step": 3384 }, { "epoch": 5.16357421875e-06, "step": 3384, "training_step_time": 0.11086130142211914 }, { "epoch": 5.16510009765625e-06, "model_forward_time": 0.025103330612182617, "step": 3385 }, { "epoch": 5.16510009765625e-06, "step": 3385, "training_step_time": 0.11155319213867188 }, { "epoch": 5.1666259765625e-06, "model_forward_time": 0.025478124618530273, "step": 3386 }, { "epoch": 5.1666259765625e-06, "step": 3386, "training_step_time": 0.10824084281921387 }, { "epoch": 5.16815185546875e-06, "model_forward_time": 0.025166749954223633, "step": 3387 }, { "epoch": 5.16815185546875e-06, "step": 3387, "training_step_time": 0.10843300819396973 }, { "epoch": 5.169677734375e-06, "model_forward_time": 0.025154590606689453, "step": 3388 }, { "epoch": 5.169677734375e-06, "step": 3388, "training_step_time": 0.1085653305053711 }, { "epoch": 5.17120361328125e-06, "model_forward_time": 0.025290966033935547, "step": 3389 }, { "epoch": 5.17120361328125e-06, "step": 3389, "training_step_time": 0.10765552520751953 }, { "epoch": 5.1727294921875e-06, "grad_norm": 0.667768657207489, "learning_rate": 9.891880951101407e-05, "loss": 0.1124, "step": 3390 }, { "epoch": 5.1727294921875e-06, "model_forward_time": 0.02593064308166504, "step": 3390 }, { "epoch": 5.1727294921875e-06, "step": 3390, "training_step_time": 0.10717606544494629 }, { "epoch": 5.17425537109375e-06, "model_forward_time": 0.025372743606567383, "step": 3391 }, { "epoch": 5.17425537109375e-06, "step": 3391, "training_step_time": 0.10731053352355957 }, { "epoch": 5.17578125e-06, "model_forward_time": 0.027755260467529297, "step": 3392 }, { "epoch": 5.17578125e-06, "step": 3392, "training_step_time": 0.1120603084564209 }, { "epoch": 5.17730712890625e-06, "model_forward_time": 0.025792837142944336, "step": 3393 }, { "epoch": 5.17730712890625e-06, "step": 3393, "training_step_time": 0.10758590698242188 }, { "epoch": 5.1788330078125e-06, "model_forward_time": 0.0253143310546875, "step": 3394 }, { "epoch": 5.1788330078125e-06, "step": 3394, "training_step_time": 0.11064529418945312 }, { "epoch": 5.18035888671875e-06, "model_forward_time": 0.02527761459350586, "step": 3395 }, { "epoch": 5.18035888671875e-06, "step": 3395, "training_step_time": 0.10834145545959473 }, { "epoch": 5.181884765625e-06, "model_forward_time": 0.025114774703979492, "step": 3396 }, { "epoch": 5.181884765625e-06, "step": 3396, "training_step_time": 0.11182904243469238 }, { "epoch": 5.18341064453125e-06, "model_forward_time": 0.025284528732299805, "step": 3397 }, { "epoch": 5.18341064453125e-06, "step": 3397, "training_step_time": 0.10790872573852539 }, { "epoch": 5.1849365234375e-06, "model_forward_time": 0.025470495223999023, "step": 3398 }, { "epoch": 5.1849365234375e-06, "step": 3398, "training_step_time": 0.10769772529602051 }, { "epoch": 5.18646240234375e-06, "model_forward_time": 0.0251772403717041, "step": 3399 }, { "epoch": 5.18646240234375e-06, "step": 3399, "training_step_time": 0.11132264137268066 }, { "epoch": 5.18798828125e-06, "grad_norm": 0.4903802275657654, "learning_rate": 9.890738003669029e-05, "loss": 0.1303, "step": 3400 }, { "epoch": 5.18798828125e-06, "model_forward_time": 0.02441859245300293, "step": 3400 }, { "epoch": 5.18798828125e-06, "step": 3400, "training_step_time": 0.15500235557556152 }, { "epoch": 5.18951416015625e-06, "model_forward_time": 0.02473282814025879, "step": 3401 }, { "epoch": 5.18951416015625e-06, "step": 3401, "training_step_time": 0.14902639389038086 }, { "epoch": 5.1910400390625e-06, "model_forward_time": 0.02466416358947754, "step": 3402 }, { "epoch": 5.1910400390625e-06, "step": 3402, "training_step_time": 0.11003684997558594 }, { "epoch": 5.19256591796875e-06, "model_forward_time": 0.024868011474609375, "step": 3403 }, { "epoch": 5.19256591796875e-06, "step": 3403, "training_step_time": 0.15742945671081543 }, { "epoch": 5.194091796875e-06, "model_forward_time": 0.024973630905151367, "step": 3404 }, { "epoch": 5.194091796875e-06, "step": 3404, "training_step_time": 0.21064186096191406 }, { "epoch": 5.19561767578125e-06, "model_forward_time": 0.0255889892578125, "step": 3405 }, { "epoch": 5.19561767578125e-06, "step": 3405, "training_step_time": 0.19760489463806152 }, { "epoch": 5.1971435546875e-06, "model_forward_time": 0.024436473846435547, "step": 3406 }, { "epoch": 5.1971435546875e-06, "step": 3406, "training_step_time": 0.13222670555114746 }, { "epoch": 5.19866943359375e-06, "model_forward_time": 0.02452397346496582, "step": 3407 }, { "epoch": 5.19866943359375e-06, "step": 3407, "training_step_time": 0.19873738288879395 }, { "epoch": 5.2001953125e-06, "model_forward_time": 0.02526688575744629, "step": 3408 }, { "epoch": 5.2001953125e-06, "step": 3408, "training_step_time": 0.11535024642944336 }, { "epoch": 5.20172119140625e-06, "model_forward_time": 0.024330854415893555, "step": 3409 }, { "epoch": 5.20172119140625e-06, "step": 3409, "training_step_time": 0.19217872619628906 }, { "epoch": 5.2032470703125e-06, "grad_norm": 0.5298376083374023, "learning_rate": 9.889589113528809e-05, "loss": 0.1192, "step": 3410 }, { "epoch": 5.2032470703125e-06, "model_forward_time": 0.024539947509765625, "step": 3410 }, { "epoch": 5.2032470703125e-06, "step": 3410, "training_step_time": 0.10487961769104004 }, { "epoch": 5.20477294921875e-06, "model_forward_time": 0.0246279239654541, "step": 3411 }, { "epoch": 5.20477294921875e-06, "step": 3411, "training_step_time": 0.10378456115722656 }, { "epoch": 5.206298828125e-06, "model_forward_time": 0.02520275115966797, "step": 3412 }, { "epoch": 5.206298828125e-06, "step": 3412, "training_step_time": 0.10572314262390137 }, { "epoch": 5.20782470703125e-06, "model_forward_time": 0.026127338409423828, "step": 3413 }, { "epoch": 5.20782470703125e-06, "step": 3413, "training_step_time": 0.10959005355834961 }, { "epoch": 5.2093505859375e-06, "model_forward_time": 0.025098800659179688, "step": 3414 }, { "epoch": 5.2093505859375e-06, "step": 3414, "training_step_time": 0.17122721672058105 }, { "epoch": 5.21087646484375e-06, "model_forward_time": 0.02494668960571289, "step": 3415 }, { "epoch": 5.21087646484375e-06, "step": 3415, "training_step_time": 0.16518902778625488 }, { "epoch": 5.21240234375e-06, "model_forward_time": 0.024560928344726562, "step": 3416 }, { "epoch": 5.21240234375e-06, "step": 3416, "training_step_time": 0.10960793495178223 }, { "epoch": 5.21392822265625e-06, "model_forward_time": 0.025112628936767578, "step": 3417 }, { "epoch": 5.21392822265625e-06, "step": 3417, "training_step_time": 0.21888518333435059 }, { "epoch": 5.2154541015625e-06, "model_forward_time": 0.024595975875854492, "step": 3418 }, { "epoch": 5.2154541015625e-06, "step": 3418, "training_step_time": 0.10899710655212402 }, { "epoch": 5.21697998046875e-06, "model_forward_time": 0.02467513084411621, "step": 3419 }, { "epoch": 5.21697998046875e-06, "step": 3419, "training_step_time": 0.11002564430236816 }, { "epoch": 5.218505859375e-06, "grad_norm": 0.7561694979667664, "learning_rate": 9.888434282076758e-05, "loss": 0.1066, "step": 3420 }, { "epoch": 5.218505859375e-06, "model_forward_time": 0.02538609504699707, "step": 3420 }, { "epoch": 5.218505859375e-06, "step": 3420, "training_step_time": 0.10863089561462402 }, { "epoch": 5.22003173828125e-06, "model_forward_time": 0.02506852149963379, "step": 3421 }, { "epoch": 5.22003173828125e-06, "step": 3421, "training_step_time": 0.1073920726776123 }, { "epoch": 5.2215576171875e-06, "model_forward_time": 0.025315046310424805, "step": 3422 }, { "epoch": 5.2215576171875e-06, "step": 3422, "training_step_time": 0.10891890525817871 }, { "epoch": 5.22308349609375e-06, "model_forward_time": 0.0248110294342041, "step": 3423 }, { "epoch": 5.22308349609375e-06, "step": 3423, "training_step_time": 0.10869002342224121 }, { "epoch": 5.224609375e-06, "model_forward_time": 0.025775432586669922, "step": 3424 }, { "epoch": 5.224609375e-06, "step": 3424, "training_step_time": 0.11115384101867676 }, { "epoch": 5.22613525390625e-06, "model_forward_time": 0.024389028549194336, "step": 3425 }, { "epoch": 5.22613525390625e-06, "step": 3425, "training_step_time": 0.11342382431030273 }, { "epoch": 5.2276611328125e-06, "model_forward_time": 0.02429056167602539, "step": 3426 }, { "epoch": 5.2276611328125e-06, "step": 3426, "training_step_time": 0.1109933853149414 }, { "epoch": 5.22918701171875e-06, "model_forward_time": 0.02416706085205078, "step": 3427 }, { "epoch": 5.22918701171875e-06, "step": 3427, "training_step_time": 0.10983943939208984 }, { "epoch": 5.230712890625e-06, "model_forward_time": 0.025521278381347656, "step": 3428 }, { "epoch": 5.230712890625e-06, "step": 3428, "training_step_time": 0.11211776733398438 }, { "epoch": 5.23223876953125e-06, "model_forward_time": 0.025494098663330078, "step": 3429 }, { "epoch": 5.23223876953125e-06, "step": 3429, "training_step_time": 0.11052846908569336 }, { "epoch": 5.2337646484375e-06, "grad_norm": 0.6977363228797913, "learning_rate": 9.887273510716107e-05, "loss": 0.1366, "step": 3430 }, { "epoch": 5.2337646484375e-06, "model_forward_time": 0.025081157684326172, "step": 3430 }, { "epoch": 5.2337646484375e-06, "step": 3430, "training_step_time": 0.11130237579345703 }, { "epoch": 5.23529052734375e-06, "model_forward_time": 0.025034427642822266, "step": 3431 }, { "epoch": 5.23529052734375e-06, "step": 3431, "training_step_time": 0.1090855598449707 }, { "epoch": 5.23681640625e-06, "model_forward_time": 0.025266647338867188, "step": 3432 }, { "epoch": 5.23681640625e-06, "step": 3432, "training_step_time": 0.11012387275695801 }, { "epoch": 5.23834228515625e-06, "model_forward_time": 0.025238990783691406, "step": 3433 }, { "epoch": 5.23834228515625e-06, "step": 3433, "training_step_time": 0.11205697059631348 }, { "epoch": 5.2398681640625e-06, "model_forward_time": 0.02556133270263672, "step": 3434 }, { "epoch": 5.2398681640625e-06, "step": 3434, "training_step_time": 0.1110084056854248 }, { "epoch": 5.24139404296875e-06, "model_forward_time": 0.025668859481811523, "step": 3435 }, { "epoch": 5.24139404296875e-06, "step": 3435, "training_step_time": 0.10806846618652344 }, { "epoch": 5.242919921875e-06, "model_forward_time": 0.02575206756591797, "step": 3436 }, { "epoch": 5.242919921875e-06, "step": 3436, "training_step_time": 0.10979771614074707 }, { "epoch": 5.24444580078125e-06, "model_forward_time": 0.025369644165039062, "step": 3437 }, { "epoch": 5.24444580078125e-06, "step": 3437, "training_step_time": 0.11091947555541992 }, { "epoch": 5.2459716796875e-06, "model_forward_time": 0.025528669357299805, "step": 3438 }, { "epoch": 5.2459716796875e-06, "step": 3438, "training_step_time": 0.10814285278320312 }, { "epoch": 5.24749755859375e-06, "model_forward_time": 0.02489304542541504, "step": 3439 }, { "epoch": 5.24749755859375e-06, "step": 3439, "training_step_time": 0.10756397247314453 }, { "epoch": 5.2490234375e-06, "grad_norm": 0.8019386529922485, "learning_rate": 9.886106800857298e-05, "loss": 0.1324, "step": 3440 }, { "epoch": 5.2490234375e-06, "model_forward_time": 0.025530338287353516, "step": 3440 }, { "epoch": 5.2490234375e-06, "step": 3440, "training_step_time": 0.10972380638122559 }, { "epoch": 5.25054931640625e-06, "model_forward_time": 0.025622844696044922, "step": 3441 }, { "epoch": 5.25054931640625e-06, "step": 3441, "training_step_time": 0.11240863800048828 }, { "epoch": 5.2520751953125e-06, "model_forward_time": 0.025661468505859375, "step": 3442 }, { "epoch": 5.2520751953125e-06, "step": 3442, "training_step_time": 0.10773324966430664 }, { "epoch": 5.25360107421875e-06, "model_forward_time": 0.025330781936645508, "step": 3443 }, { "epoch": 5.25360107421875e-06, "step": 3443, "training_step_time": 0.10838794708251953 }, { "epoch": 5.255126953125e-06, "model_forward_time": 0.02551722526550293, "step": 3444 }, { "epoch": 5.255126953125e-06, "step": 3444, "training_step_time": 0.20933842658996582 }, { "epoch": 5.25665283203125e-06, "model_forward_time": 0.02483987808227539, "step": 3445 }, { "epoch": 5.25665283203125e-06, "step": 3445, "training_step_time": 0.18695831298828125 }, { "epoch": 5.2581787109375e-06, "model_forward_time": 0.024812936782836914, "step": 3446 }, { "epoch": 5.2581787109375e-06, "step": 3446, "training_step_time": 0.15917062759399414 }, { "epoch": 5.25970458984375e-06, "model_forward_time": 0.025088071823120117, "step": 3447 }, { "epoch": 5.25970458984375e-06, "step": 3447, "training_step_time": 0.19579410552978516 }, { "epoch": 5.26123046875e-06, "model_forward_time": 0.02516031265258789, "step": 3448 }, { "epoch": 5.26123046875e-06, "step": 3448, "training_step_time": 0.1849205493927002 }, { "epoch": 5.26275634765625e-06, "model_forward_time": 0.024431705474853516, "step": 3449 }, { "epoch": 5.26275634765625e-06, "step": 3449, "training_step_time": 0.17187905311584473 }, { "epoch": 5.2642822265625e-06, "grad_norm": 0.5509032607078552, "learning_rate": 9.884934153917997e-05, "loss": 0.1291, "step": 3450 }, { "epoch": 5.2642822265625e-06, "model_forward_time": 0.02470564842224121, "step": 3450 }, { "epoch": 5.2642822265625e-06, "step": 3450, "training_step_time": 0.18787860870361328 }, { "epoch": 5.26580810546875e-06, "model_forward_time": 0.024738788604736328, "step": 3451 }, { "epoch": 5.26580810546875e-06, "step": 3451, "training_step_time": 0.11341667175292969 }, { "epoch": 5.267333984375e-06, "model_forward_time": 0.02483677864074707, "step": 3452 }, { "epoch": 5.267333984375e-06, "step": 3452, "training_step_time": 0.10983419418334961 }, { "epoch": 5.26885986328125e-06, "model_forward_time": 0.02579498291015625, "step": 3453 }, { "epoch": 5.26885986328125e-06, "step": 3453, "training_step_time": 0.1969153881072998 }, { "epoch": 5.2703857421875e-06, "model_forward_time": 0.02483391761779785, "step": 3454 }, { "epoch": 5.2703857421875e-06, "step": 3454, "training_step_time": 0.11024641990661621 }, { "epoch": 5.27191162109375e-06, "model_forward_time": 0.024932861328125, "step": 3455 }, { "epoch": 5.27191162109375e-06, "step": 3455, "training_step_time": 0.10762500762939453 }, { "epoch": 5.2734375e-06, "model_forward_time": 0.02577996253967285, "step": 3456 }, { "epoch": 5.2734375e-06, "step": 3456, "training_step_time": 0.1114046573638916 }, { "epoch": 5.27496337890625e-06, "model_forward_time": 0.026149511337280273, "step": 3457 }, { "epoch": 5.27496337890625e-06, "step": 3457, "training_step_time": 0.10918879508972168 }, { "epoch": 5.2764892578125e-06, "model_forward_time": 0.02549576759338379, "step": 3458 }, { "epoch": 5.2764892578125e-06, "step": 3458, "training_step_time": 0.16745948791503906 }, { "epoch": 5.27801513671875e-06, "model_forward_time": 0.024671554565429688, "step": 3459 }, { "epoch": 5.27801513671875e-06, "step": 3459, "training_step_time": 0.16326236724853516 }, { "epoch": 5.279541015625e-06, "grad_norm": 0.6027151346206665, "learning_rate": 9.88375557132308e-05, "loss": 0.1113, "step": 3460 }, { "epoch": 5.279541015625e-06, "model_forward_time": 0.024532079696655273, "step": 3460 }, { "epoch": 5.279541015625e-06, "step": 3460, "training_step_time": 0.10580945014953613 }, { "epoch": 5.28106689453125e-06, "model_forward_time": 0.024826526641845703, "step": 3461 }, { "epoch": 5.28106689453125e-06, "step": 3461, "training_step_time": 0.17293524742126465 }, { "epoch": 5.2825927734375e-06, "model_forward_time": 0.02475595474243164, "step": 3462 }, { "epoch": 5.2825927734375e-06, "step": 3462, "training_step_time": 0.17767882347106934 }, { "epoch": 5.28411865234375e-06, "model_forward_time": 0.025221586227416992, "step": 3463 }, { "epoch": 5.28411865234375e-06, "step": 3463, "training_step_time": 0.10974311828613281 }, { "epoch": 5.28564453125e-06, "model_forward_time": 0.025029420852661133, "step": 3464 }, { "epoch": 5.28564453125e-06, "step": 3464, "training_step_time": 0.10975122451782227 }, { "epoch": 5.28717041015625e-06, "model_forward_time": 0.025022268295288086, "step": 3465 }, { "epoch": 5.28717041015625e-06, "step": 3465, "training_step_time": 0.10857844352722168 }, { "epoch": 5.2886962890625e-06, "model_forward_time": 0.025325536727905273, "step": 3466 }, { "epoch": 5.2886962890625e-06, "step": 3466, "training_step_time": 0.11226963996887207 }, { "epoch": 5.29022216796875e-06, "model_forward_time": 0.025382518768310547, "step": 3467 }, { "epoch": 5.29022216796875e-06, "step": 3467, "training_step_time": 0.11269545555114746 }, { "epoch": 5.291748046875e-06, "model_forward_time": 0.0255889892578125, "step": 3468 }, { "epoch": 5.291748046875e-06, "step": 3468, "training_step_time": 0.11371660232543945 }, { "epoch": 5.29327392578125e-06, "model_forward_time": 0.025280475616455078, "step": 3469 }, { "epoch": 5.29327392578125e-06, "step": 3469, "training_step_time": 0.1140754222869873 }, { "epoch": 5.2947998046875e-06, "grad_norm": 0.8043185472488403, "learning_rate": 9.882571054504636e-05, "loss": 0.1424, "step": 3470 }, { "epoch": 5.2947998046875e-06, "model_forward_time": 0.025569438934326172, "step": 3470 }, { "epoch": 5.2947998046875e-06, "step": 3470, "training_step_time": 0.11008691787719727 }, { "epoch": 5.29632568359375e-06, "model_forward_time": 0.0256044864654541, "step": 3471 }, { "epoch": 5.29632568359375e-06, "step": 3471, "training_step_time": 0.11122870445251465 }, { "epoch": 5.2978515625e-06, "model_forward_time": 0.02504444122314453, "step": 3472 }, { "epoch": 5.2978515625e-06, "step": 3472, "training_step_time": 0.11519360542297363 }, { "epoch": 5.29937744140625e-06, "model_forward_time": 0.025614261627197266, "step": 3473 }, { "epoch": 5.29937744140625e-06, "step": 3473, "training_step_time": 0.1095435619354248 }, { "epoch": 5.3009033203125e-06, "model_forward_time": 0.025815486907958984, "step": 3474 }, { "epoch": 5.3009033203125e-06, "step": 3474, "training_step_time": 0.1093592643737793 }, { "epoch": 5.30242919921875e-06, "model_forward_time": 0.024956464767456055, "step": 3475 }, { "epoch": 5.30242919921875e-06, "step": 3475, "training_step_time": 0.11590290069580078 }, { "epoch": 5.303955078125e-06, "model_forward_time": 0.025304317474365234, "step": 3476 }, { "epoch": 5.303955078125e-06, "step": 3476, "training_step_time": 0.11078190803527832 }, { "epoch": 5.30548095703125e-06, "model_forward_time": 0.025183677673339844, "step": 3477 }, { "epoch": 5.30548095703125e-06, "step": 3477, "training_step_time": 0.11088895797729492 }, { "epoch": 5.3070068359375e-06, "model_forward_time": 0.02527308464050293, "step": 3478 }, { "epoch": 5.3070068359375e-06, "step": 3478, "training_step_time": 0.11271524429321289 }, { "epoch": 5.30853271484375e-06, "model_forward_time": 0.025701522827148438, "step": 3479 }, { "epoch": 5.30853271484375e-06, "step": 3479, "training_step_time": 0.11104512214660645 }, { "epoch": 5.31005859375e-06, "grad_norm": 0.6473183035850525, "learning_rate": 9.881380604901964e-05, "loss": 0.1129, "step": 3480 }, { "epoch": 5.31005859375e-06, "model_forward_time": 0.025483369827270508, "step": 3480 }, { "epoch": 5.31005859375e-06, "step": 3480, "training_step_time": 0.10886645317077637 }, { "epoch": 5.31158447265625e-06, "model_forward_time": 0.025574445724487305, "step": 3481 }, { "epoch": 5.31158447265625e-06, "step": 3481, "training_step_time": 0.11009550094604492 }, { "epoch": 5.3131103515625e-06, "model_forward_time": 0.02535390853881836, "step": 3482 }, { "epoch": 5.3131103515625e-06, "step": 3482, "training_step_time": 0.10774850845336914 }, { "epoch": 5.31463623046875e-06, "model_forward_time": 0.024837493896484375, "step": 3483 }, { "epoch": 5.31463623046875e-06, "step": 3483, "training_step_time": 0.11113739013671875 }, { "epoch": 5.316162109375e-06, "model_forward_time": 0.0252535343170166, "step": 3484 }, { "epoch": 5.316162109375e-06, "step": 3484, "training_step_time": 0.11003375053405762 }, { "epoch": 5.31768798828125e-06, "model_forward_time": 0.02564072608947754, "step": 3485 }, { "epoch": 5.31768798828125e-06, "step": 3485, "training_step_time": 0.10724639892578125 }, { "epoch": 5.3192138671875e-06, "model_forward_time": 0.025247573852539062, "step": 3486 }, { "epoch": 5.3192138671875e-06, "step": 3486, "training_step_time": 0.11706209182739258 }, { "epoch": 5.32073974609375e-06, "model_forward_time": 0.027644872665405273, "step": 3487 }, { "epoch": 5.32073974609375e-06, "step": 3487, "training_step_time": 0.11270380020141602 }, { "epoch": 5.322265625e-06, "model_forward_time": 0.025614023208618164, "step": 3488 }, { "epoch": 5.322265625e-06, "step": 3488, "training_step_time": 0.15154242515563965 }, { "epoch": 5.32379150390625e-06, "model_forward_time": 0.024924755096435547, "step": 3489 }, { "epoch": 5.32379150390625e-06, "step": 3489, "training_step_time": 0.20758414268493652 }, { "epoch": 5.3253173828125e-06, "grad_norm": 0.48119691014289856, "learning_rate": 9.880184223961573e-05, "loss": 0.1275, "step": 3490 }, { "epoch": 5.3253173828125e-06, "model_forward_time": 0.025310754776000977, "step": 3490 }, { "epoch": 5.3253173828125e-06, "step": 3490, "training_step_time": 0.13411545753479004 }, { "epoch": 5.32684326171875e-06, "model_forward_time": 0.024502992630004883, "step": 3491 }, { "epoch": 5.32684326171875e-06, "step": 3491, "training_step_time": 0.20780038833618164 }, { "epoch": 5.328369140625e-06, "model_forward_time": 0.0245358943939209, "step": 3492 }, { "epoch": 5.328369140625e-06, "step": 3492, "training_step_time": 0.1842968463897705 }, { "epoch": 5.32989501953125e-06, "model_forward_time": 0.02517390251159668, "step": 3493 }, { "epoch": 5.32989501953125e-06, "step": 3493, "training_step_time": 0.17435359954833984 }, { "epoch": 5.3314208984375e-06, "model_forward_time": 0.024028778076171875, "step": 3494 }, { "epoch": 5.3314208984375e-06, "step": 3494, "training_step_time": 0.1887679100036621 }, { "epoch": 5.33294677734375e-06, "model_forward_time": 0.024264812469482422, "step": 3495 }, { "epoch": 5.33294677734375e-06, "step": 3495, "training_step_time": 0.11607527732849121 }, { "epoch": 5.33447265625e-06, "model_forward_time": 0.02495265007019043, "step": 3496 }, { "epoch": 5.33447265625e-06, "step": 3496, "training_step_time": 0.10937714576721191 }, { "epoch": 5.33599853515625e-06, "model_forward_time": 0.02533555030822754, "step": 3497 }, { "epoch": 5.33599853515625e-06, "step": 3497, "training_step_time": 0.19635248184204102 }, { "epoch": 5.3375244140625e-06, "model_forward_time": 0.024823665618896484, "step": 3498 }, { "epoch": 5.3375244140625e-06, "step": 3498, "training_step_time": 0.10367059707641602 }, { "epoch": 5.33905029296875e-06, "model_forward_time": 0.02450251579284668, "step": 3499 }, { "epoch": 5.33905029296875e-06, "step": 3499, "training_step_time": 0.11481595039367676 }, { "epoch": 5.340576171875e-06, "grad_norm": 0.6093948483467102, "learning_rate": 9.878981913137179e-05, "loss": 0.0988, "step": 3500 }, { "epoch": 5.340576171875e-06, "model_forward_time": 0.024262666702270508, "step": 3500 }, { "epoch": 5.340576171875e-06, "step": 3500, "training_step_time": 0.10814404487609863 }, { "epoch": 5.34210205078125e-06, "model_forward_time": 0.025707244873046875, "step": 3501 }, { "epoch": 5.34210205078125e-06, "step": 3501, "training_step_time": 0.10902523994445801 }, { "epoch": 5.3436279296875e-06, "model_forward_time": 0.026643991470336914, "step": 3502 }, { "epoch": 5.3436279296875e-06, "step": 3502, "training_step_time": 0.11229538917541504 }, { "epoch": 5.34515380859375e-06, "model_forward_time": 0.025742530822753906, "step": 3503 }, { "epoch": 5.34515380859375e-06, "step": 3503, "training_step_time": 0.11125755310058594 }, { "epoch": 5.3466796875e-06, "model_forward_time": 0.025651216506958008, "step": 3504 }, { "epoch": 5.3466796875e-06, "step": 3504, "training_step_time": 0.12004995346069336 }, { "epoch": 5.34820556640625e-06, "model_forward_time": 0.02694225311279297, "step": 3505 }, { "epoch": 5.34820556640625e-06, "step": 3505, "training_step_time": 0.13196349143981934 }, { "epoch": 5.3497314453125e-06, "model_forward_time": 0.025164127349853516, "step": 3506 }, { "epoch": 5.3497314453125e-06, "step": 3506, "training_step_time": 0.18841218948364258 }, { "epoch": 5.35125732421875e-06, "model_forward_time": 0.024602890014648438, "step": 3507 }, { "epoch": 5.35125732421875e-06, "step": 3507, "training_step_time": 0.11376333236694336 }, { "epoch": 5.352783203125e-06, "model_forward_time": 0.024126291275024414, "step": 3508 }, { "epoch": 5.352783203125e-06, "step": 3508, "training_step_time": 0.11130547523498535 }, { "epoch": 5.35430908203125e-06, "model_forward_time": 0.025876998901367188, "step": 3509 }, { "epoch": 5.35430908203125e-06, "step": 3509, "training_step_time": 0.14697885513305664 }, { "epoch": 5.3558349609375e-06, "grad_norm": 0.5382101535797119, "learning_rate": 9.877773673889701e-05, "loss": 0.1, "step": 3510 }, { "epoch": 5.3558349609375e-06, "model_forward_time": 0.02559971809387207, "step": 3510 }, { "epoch": 5.3558349609375e-06, "step": 3510, "training_step_time": 0.1353740692138672 }, { "epoch": 5.35736083984375e-06, "model_forward_time": 0.025170326232910156, "step": 3511 }, { "epoch": 5.35736083984375e-06, "step": 3511, "training_step_time": 0.1329174041748047 }, { "epoch": 5.35888671875e-06, "model_forward_time": 0.025002241134643555, "step": 3512 }, { "epoch": 5.35888671875e-06, "step": 3512, "training_step_time": 0.1267375946044922 }, { "epoch": 5.36041259765625e-06, "model_forward_time": 0.025343894958496094, "step": 3513 }, { "epoch": 5.36041259765625e-06, "step": 3513, "training_step_time": 0.1313920021057129 }, { "epoch": 5.3619384765625e-06, "model_forward_time": 0.02546834945678711, "step": 3514 }, { "epoch": 5.3619384765625e-06, "step": 3514, "training_step_time": 0.12313628196716309 }, { "epoch": 5.36346435546875e-06, "model_forward_time": 0.025356531143188477, "step": 3515 }, { "epoch": 5.36346435546875e-06, "step": 3515, "training_step_time": 0.11833810806274414 }, { "epoch": 5.364990234375e-06, "model_forward_time": 0.024939775466918945, "step": 3516 }, { "epoch": 5.364990234375e-06, "step": 3516, "training_step_time": 0.11993956565856934 }, { "epoch": 5.36651611328125e-06, "model_forward_time": 0.025254249572753906, "step": 3517 }, { "epoch": 5.36651611328125e-06, "step": 3517, "training_step_time": 0.1139516830444336 }, { "epoch": 5.3680419921875e-06, "model_forward_time": 0.025592565536499023, "step": 3518 }, { "epoch": 5.3680419921875e-06, "step": 3518, "training_step_time": 0.11178326606750488 }, { "epoch": 5.36956787109375e-06, "model_forward_time": 0.024964332580566406, "step": 3519 }, { "epoch": 5.36956787109375e-06, "step": 3519, "training_step_time": 0.11345934867858887 }, { "epoch": 5.37109375e-06, "grad_norm": 0.7704569101333618, "learning_rate": 9.876559507687267e-05, "loss": 0.1345, "step": 3520 }, { "epoch": 5.37109375e-06, "model_forward_time": 0.024475574493408203, "step": 3520 }, { "epoch": 5.37109375e-06, "step": 3520, "training_step_time": 0.1101231575012207 }, { "epoch": 5.37261962890625e-06, "model_forward_time": 0.02429819107055664, "step": 3521 }, { "epoch": 5.37261962890625e-06, "step": 3521, "training_step_time": 0.11268067359924316 }, { "epoch": 5.3741455078125e-06, "model_forward_time": 0.02555990219116211, "step": 3522 }, { "epoch": 5.3741455078125e-06, "step": 3522, "training_step_time": 0.11799907684326172 }, { "epoch": 5.37567138671875e-06, "model_forward_time": 0.025043010711669922, "step": 3523 }, { "epoch": 5.37567138671875e-06, "step": 3523, "training_step_time": 0.11258649826049805 }, { "epoch": 5.377197265625e-06, "model_forward_time": 0.025302648544311523, "step": 3524 }, { "epoch": 5.377197265625e-06, "step": 3524, "training_step_time": 0.1079258918762207 }, { "epoch": 5.37872314453125e-06, "model_forward_time": 0.025361299514770508, "step": 3525 }, { "epoch": 5.37872314453125e-06, "step": 3525, "training_step_time": 0.11121892929077148 }, { "epoch": 5.3802490234375e-06, "model_forward_time": 0.0252227783203125, "step": 3526 }, { "epoch": 5.3802490234375e-06, "step": 3526, "training_step_time": 0.11141633987426758 }, { "epoch": 5.38177490234375e-06, "model_forward_time": 0.025115013122558594, "step": 3527 }, { "epoch": 5.38177490234375e-06, "step": 3527, "training_step_time": 0.10831260681152344 }, { "epoch": 5.38330078125e-06, "model_forward_time": 0.025998830795288086, "step": 3528 }, { "epoch": 5.38330078125e-06, "step": 3528, "training_step_time": 0.1098785400390625 }, { "epoch": 5.38482666015625e-06, "model_forward_time": 0.02529764175415039, "step": 3529 }, { "epoch": 5.38482666015625e-06, "step": 3529, "training_step_time": 0.10957956314086914 }, { "epoch": 5.3863525390625e-06, "grad_norm": 0.7169075608253479, "learning_rate": 9.875339416005202e-05, "loss": 0.1231, "step": 3530 }, { "epoch": 5.3863525390625e-06, "model_forward_time": 0.027828216552734375, "step": 3530 }, { "epoch": 5.3863525390625e-06, "step": 3530, "training_step_time": 0.11369132995605469 }, { "epoch": 5.38787841796875e-06, "model_forward_time": 0.02522420883178711, "step": 3531 }, { "epoch": 5.38787841796875e-06, "step": 3531, "training_step_time": 0.11010885238647461 }, { "epoch": 5.389404296875e-06, "model_forward_time": 0.025552988052368164, "step": 3532 }, { "epoch": 5.389404296875e-06, "step": 3532, "training_step_time": 0.1084141731262207 }, { "epoch": 5.39093017578125e-06, "model_forward_time": 0.025548934936523438, "step": 3533 }, { "epoch": 5.39093017578125e-06, "step": 3533, "training_step_time": 0.17070627212524414 }, { "epoch": 5.3924560546875e-06, "model_forward_time": 0.024975061416625977, "step": 3534 }, { "epoch": 5.3924560546875e-06, "step": 3534, "training_step_time": 0.18741536140441895 }, { "epoch": 5.39398193359375e-06, "model_forward_time": 0.024165630340576172, "step": 3535 }, { "epoch": 5.39398193359375e-06, "step": 3535, "training_step_time": 0.15580201148986816 }, { "epoch": 5.3955078125e-06, "model_forward_time": 0.025069236755371094, "step": 3536 }, { "epoch": 5.3955078125e-06, "step": 3536, "training_step_time": 0.2123889923095703 }, { "epoch": 5.39703369140625e-06, "model_forward_time": 0.0242156982421875, "step": 3537 }, { "epoch": 5.39703369140625e-06, "step": 3537, "training_step_time": 0.1835479736328125 }, { "epoch": 5.3985595703125e-06, "model_forward_time": 0.02461862564086914, "step": 3538 }, { "epoch": 5.3985595703125e-06, "step": 3538, "training_step_time": 0.15091466903686523 }, { "epoch": 5.40008544921875e-06, "model_forward_time": 0.024627685546875, "step": 3539 }, { "epoch": 5.40008544921875e-06, "step": 3539, "training_step_time": 0.18666601181030273 }, { "epoch": 5.401611328125e-06, "grad_norm": 0.9182036519050598, "learning_rate": 9.87411340032603e-05, "loss": 0.1243, "step": 3540 }, { "epoch": 5.401611328125e-06, "model_forward_time": 0.02465224266052246, "step": 3540 }, { "epoch": 5.401611328125e-06, "step": 3540, "training_step_time": 0.10913467407226562 }, { "epoch": 5.40313720703125e-06, "model_forward_time": 0.024746417999267578, "step": 3541 }, { "epoch": 5.40313720703125e-06, "step": 3541, "training_step_time": 0.19028735160827637 }, { "epoch": 5.4046630859375e-06, "model_forward_time": 0.024817943572998047, "step": 3542 }, { "epoch": 5.4046630859375e-06, "step": 3542, "training_step_time": 0.10945010185241699 }, { "epoch": 5.40618896484375e-06, "model_forward_time": 0.024428367614746094, "step": 3543 }, { "epoch": 5.40618896484375e-06, "step": 3543, "training_step_time": 0.10692286491394043 }, { "epoch": 5.40771484375e-06, "model_forward_time": 0.02686452865600586, "step": 3544 }, { "epoch": 5.40771484375e-06, "step": 3544, "training_step_time": 0.10947251319885254 }, { "epoch": 5.40924072265625e-06, "model_forward_time": 0.02622079849243164, "step": 3545 }, { "epoch": 5.40924072265625e-06, "step": 3545, "training_step_time": 0.1086728572845459 }, { "epoch": 5.4107666015625e-06, "model_forward_time": 0.026522159576416016, "step": 3546 }, { "epoch": 5.4107666015625e-06, "step": 3546, "training_step_time": 0.10749149322509766 }, { "epoch": 5.41229248046875e-06, "model_forward_time": 0.02649402618408203, "step": 3547 }, { "epoch": 5.41229248046875e-06, "step": 3547, "training_step_time": 0.10960555076599121 }, { "epoch": 5.413818359375e-06, "model_forward_time": 0.026070117950439453, "step": 3548 }, { "epoch": 5.413818359375e-06, "step": 3548, "training_step_time": 0.11558008193969727 }, { "epoch": 5.41534423828125e-06, "model_forward_time": 0.02567911148071289, "step": 3549 }, { "epoch": 5.41534423828125e-06, "step": 3549, "training_step_time": 0.1335005760192871 }, { "epoch": 5.4168701171875e-06, "grad_norm": 0.9292612075805664, "learning_rate": 9.872881462139479e-05, "loss": 0.1365, "step": 3550 }, { "epoch": 5.4168701171875e-06, "model_forward_time": 0.025362730026245117, "step": 3550 }, { "epoch": 5.4168701171875e-06, "step": 3550, "training_step_time": 0.20851397514343262 }, { "epoch": 5.41839599609375e-06, "model_forward_time": 0.024652719497680664, "step": 3551 }, { "epoch": 5.41839599609375e-06, "step": 3551, "training_step_time": 0.14301204681396484 }, { "epoch": 5.419921875e-06, "model_forward_time": 0.024757862091064453, "step": 3552 }, { "epoch": 5.419921875e-06, "step": 3552, "training_step_time": 0.1331467628479004 }, { "epoch": 5.42144775390625e-06, "model_forward_time": 0.024792194366455078, "step": 3553 }, { "epoch": 5.42144775390625e-06, "step": 3553, "training_step_time": 0.1241154670715332 }, { "epoch": 5.4229736328125e-06, "model_forward_time": 0.02486109733581543, "step": 3554 }, { "epoch": 5.4229736328125e-06, "step": 3554, "training_step_time": 0.11758589744567871 }, { "epoch": 5.42449951171875e-06, "model_forward_time": 0.025557518005371094, "step": 3555 }, { "epoch": 5.42449951171875e-06, "step": 3555, "training_step_time": 0.12010407447814941 }, { "epoch": 5.426025390625e-06, "model_forward_time": 0.025249004364013672, "step": 3556 }, { "epoch": 5.426025390625e-06, "step": 3556, "training_step_time": 0.11636829376220703 }, { "epoch": 5.42755126953125e-06, "model_forward_time": 0.025246143341064453, "step": 3557 }, { "epoch": 5.42755126953125e-06, "step": 3557, "training_step_time": 0.1151571273803711 }, { "epoch": 5.4290771484375e-06, "model_forward_time": 0.025602340698242188, "step": 3558 }, { "epoch": 5.4290771484375e-06, "step": 3558, "training_step_time": 0.11262130737304688 }, { "epoch": 5.43060302734375e-06, "model_forward_time": 0.02532672882080078, "step": 3559 }, { "epoch": 5.43060302734375e-06, "step": 3559, "training_step_time": 0.11426997184753418 }, { "epoch": 5.43212890625e-06, "grad_norm": 0.7721486687660217, "learning_rate": 9.871643602942469e-05, "loss": 0.0937, "step": 3560 }, { "epoch": 5.43212890625e-06, "model_forward_time": 0.025256633758544922, "step": 3560 }, { "epoch": 5.43212890625e-06, "step": 3560, "training_step_time": 0.10849881172180176 }, { "epoch": 5.43365478515625e-06, "model_forward_time": 0.025162935256958008, "step": 3561 }, { "epoch": 5.43365478515625e-06, "step": 3561, "training_step_time": 0.1158592700958252 }, { "epoch": 5.4351806640625e-06, "model_forward_time": 0.02406764030456543, "step": 3562 }, { "epoch": 5.4351806640625e-06, "step": 3562, "training_step_time": 0.11043691635131836 }, { "epoch": 5.43670654296875e-06, "model_forward_time": 0.025604724884033203, "step": 3563 }, { "epoch": 5.43670654296875e-06, "step": 3563, "training_step_time": 0.11426353454589844 }, { "epoch": 5.438232421875e-06, "model_forward_time": 0.02533245086669922, "step": 3564 }, { "epoch": 5.438232421875e-06, "step": 3564, "training_step_time": 0.11117672920227051 }, { "epoch": 5.43975830078125e-06, "model_forward_time": 0.025414466857910156, "step": 3565 }, { "epoch": 5.43975830078125e-06, "step": 3565, "training_step_time": 0.11493635177612305 }, { "epoch": 5.4412841796875e-06, "model_forward_time": 0.02533102035522461, "step": 3566 }, { "epoch": 5.4412841796875e-06, "step": 3566, "training_step_time": 0.10959219932556152 }, { "epoch": 5.44281005859375e-06, "model_forward_time": 0.025224924087524414, "step": 3567 }, { "epoch": 5.44281005859375e-06, "step": 3567, "training_step_time": 0.10678339004516602 }, { "epoch": 5.4443359375e-06, "model_forward_time": 0.025284290313720703, "step": 3568 }, { "epoch": 5.4443359375e-06, "step": 3568, "training_step_time": 0.10858750343322754 }, { "epoch": 5.44586181640625e-06, "model_forward_time": 0.024285554885864258, "step": 3569 }, { "epoch": 5.44586181640625e-06, "step": 3569, "training_step_time": 0.11386609077453613 }, { "epoch": 5.4473876953125e-06, "grad_norm": 0.5761857628822327, "learning_rate": 9.870399824239117e-05, "loss": 0.1072, "step": 3570 }, { "epoch": 5.4473876953125e-06, "model_forward_time": 0.025664806365966797, "step": 3570 }, { "epoch": 5.4473876953125e-06, "step": 3570, "training_step_time": 0.10714960098266602 }, { "epoch": 5.44891357421875e-06, "model_forward_time": 0.024098634719848633, "step": 3571 }, { "epoch": 5.44891357421875e-06, "step": 3571, "training_step_time": 0.11076879501342773 }, { "epoch": 5.450439453125e-06, "model_forward_time": 0.02530503273010254, "step": 3572 }, { "epoch": 5.450439453125e-06, "step": 3572, "training_step_time": 0.10745954513549805 }, { "epoch": 5.45196533203125e-06, "model_forward_time": 0.02584552764892578, "step": 3573 }, { "epoch": 5.45196533203125e-06, "step": 3573, "training_step_time": 0.10828232765197754 }, { "epoch": 5.4534912109375e-06, "model_forward_time": 0.025543212890625, "step": 3574 }, { "epoch": 5.4534912109375e-06, "step": 3574, "training_step_time": 0.11190414428710938 }, { "epoch": 5.45501708984375e-06, "model_forward_time": 0.025513410568237305, "step": 3575 }, { "epoch": 5.45501708984375e-06, "step": 3575, "training_step_time": 0.10922098159790039 }, { "epoch": 5.45654296875e-06, "model_forward_time": 0.02511143684387207, "step": 3576 }, { "epoch": 5.45654296875e-06, "step": 3576, "training_step_time": 0.10804390907287598 }, { "epoch": 5.45806884765625e-06, "model_forward_time": 0.02501535415649414, "step": 3577 }, { "epoch": 5.45806884765625e-06, "step": 3577, "training_step_time": 0.1902482509613037 }, { "epoch": 5.4595947265625e-06, "model_forward_time": 0.02481818199157715, "step": 3578 }, { "epoch": 5.4595947265625e-06, "step": 3578, "training_step_time": 0.12123632431030273 }, { "epoch": 5.46112060546875e-06, "model_forward_time": 0.025467395782470703, "step": 3579 }, { "epoch": 5.46112060546875e-06, "step": 3579, "training_step_time": 0.11988615989685059 }, { "epoch": 5.462646484375e-06, "grad_norm": 0.8048961162567139, "learning_rate": 9.869150127540727e-05, "loss": 0.1586, "step": 3580 }, { "epoch": 5.462646484375e-06, "model_forward_time": 0.026096820831298828, "step": 3580 }, { "epoch": 5.462646484375e-06, "step": 3580, "training_step_time": 0.2304058074951172 }, { "epoch": 5.46417236328125e-06, "model_forward_time": 0.024451017379760742, "step": 3581 }, { "epoch": 5.46417236328125e-06, "step": 3581, "training_step_time": 0.15493535995483398 }, { "epoch": 5.4656982421875e-06, "model_forward_time": 0.024720430374145508, "step": 3582 }, { "epoch": 5.4656982421875e-06, "step": 3582, "training_step_time": 0.20496821403503418 }, { "epoch": 5.46722412109375e-06, "model_forward_time": 0.024452924728393555, "step": 3583 }, { "epoch": 5.46722412109375e-06, "step": 3583, "training_step_time": 0.1382906436920166 }, { "epoch": 5.46875e-06, "model_forward_time": 0.024465084075927734, "step": 3584 }, { "epoch": 5.46875e-06, "step": 3584, "training_step_time": 0.12224531173706055 }, { "epoch": 5.47027587890625e-06, "model_forward_time": 0.024843215942382812, "step": 3585 }, { "epoch": 5.47027587890625e-06, "step": 3585, "training_step_time": 0.10573434829711914 }, { "epoch": 5.4718017578125e-06, "model_forward_time": 0.02535557746887207, "step": 3586 }, { "epoch": 5.4718017578125e-06, "step": 3586, "training_step_time": 0.20642828941345215 }, { "epoch": 5.47332763671875e-06, "model_forward_time": 0.024872541427612305, "step": 3587 }, { "epoch": 5.47332763671875e-06, "step": 3587, "training_step_time": 0.10535335540771484 }, { "epoch": 5.474853515625e-06, "model_forward_time": 0.024709701538085938, "step": 3588 }, { "epoch": 5.474853515625e-06, "step": 3588, "training_step_time": 0.10942673683166504 }, { "epoch": 5.47637939453125e-06, "model_forward_time": 0.025477886199951172, "step": 3589 }, { "epoch": 5.47637939453125e-06, "step": 3589, "training_step_time": 0.10938739776611328 }, { "epoch": 5.4779052734375e-06, "grad_norm": 0.9959975481033325, "learning_rate": 9.867894514365802e-05, "loss": 0.1271, "step": 3590 }, { "epoch": 5.4779052734375e-06, "model_forward_time": 0.02608323097229004, "step": 3590 }, { "epoch": 5.4779052734375e-06, "step": 3590, "training_step_time": 0.11232542991638184 }, { "epoch": 5.47943115234375e-06, "model_forward_time": 0.025028467178344727, "step": 3591 }, { "epoch": 5.47943115234375e-06, "step": 3591, "training_step_time": 0.20829033851623535 }, { "epoch": 5.48095703125e-06, "model_forward_time": 0.025417566299438477, "step": 3592 }, { "epoch": 5.48095703125e-06, "step": 3592, "training_step_time": 0.13596153259277344 }, { "epoch": 5.48248291015625e-06, "model_forward_time": 0.02444148063659668, "step": 3593 }, { "epoch": 5.48248291015625e-06, "step": 3593, "training_step_time": 0.14188671112060547 }, { "epoch": 5.4840087890625e-06, "model_forward_time": 0.025122642517089844, "step": 3594 }, { "epoch": 5.4840087890625e-06, "step": 3594, "training_step_time": 0.18038082122802734 }, { "epoch": 5.48553466796875e-06, "model_forward_time": 0.026125192642211914, "step": 3595 }, { "epoch": 5.48553466796875e-06, "step": 3595, "training_step_time": 0.12324070930480957 }, { "epoch": 5.487060546875e-06, "model_forward_time": 0.026318788528442383, "step": 3596 }, { "epoch": 5.487060546875e-06, "step": 3596, "training_step_time": 0.1178884506225586 }, { "epoch": 5.48858642578125e-06, "model_forward_time": 0.025536537170410156, "step": 3597 }, { "epoch": 5.48858642578125e-06, "step": 3597, "training_step_time": 0.11551213264465332 }, { "epoch": 5.4901123046875e-06, "model_forward_time": 0.02524590492248535, "step": 3598 }, { "epoch": 5.4901123046875e-06, "step": 3598, "training_step_time": 0.11681175231933594 }, { "epoch": 5.49163818359375e-06, "model_forward_time": 0.025385379791259766, "step": 3599 }, { "epoch": 5.49163818359375e-06, "step": 3599, "training_step_time": 0.11041831970214844 }, { "epoch": 5.4931640625e-06, "grad_norm": 0.5958221554756165, "learning_rate": 9.86663298624003e-05, "loss": 0.127, "step": 3600 }, { "epoch": 5.4931640625e-06, "model_forward_time": 0.02586841583251953, "step": 3600 }, { "epoch": 5.4931640625e-06, "step": 3600, "training_step_time": 0.11036968231201172 }, { "epoch": 5.49468994140625e-06, "model_forward_time": 0.025845050811767578, "step": 3601 }, { "epoch": 5.49468994140625e-06, "step": 3601, "training_step_time": 0.11132287979125977 }, { "epoch": 5.4962158203125e-06, "model_forward_time": 0.025480031967163086, "step": 3602 }, { "epoch": 5.4962158203125e-06, "step": 3602, "training_step_time": 0.10953688621520996 }, { "epoch": 5.49774169921875e-06, "model_forward_time": 0.0252835750579834, "step": 3603 }, { "epoch": 5.49774169921875e-06, "step": 3603, "training_step_time": 0.10858917236328125 }, { "epoch": 5.499267578125e-06, "model_forward_time": 0.02533864974975586, "step": 3604 }, { "epoch": 5.499267578125e-06, "step": 3604, "training_step_time": 0.11181068420410156 }, { "epoch": 5.50079345703125e-06, "model_forward_time": 0.02553248405456543, "step": 3605 }, { "epoch": 5.50079345703125e-06, "step": 3605, "training_step_time": 0.10823225975036621 }, { "epoch": 5.5023193359375e-06, "model_forward_time": 0.0254366397857666, "step": 3606 }, { "epoch": 5.5023193359375e-06, "step": 3606, "training_step_time": 0.10924696922302246 }, { "epoch": 5.50384521484375e-06, "model_forward_time": 0.025003671646118164, "step": 3607 }, { "epoch": 5.50384521484375e-06, "step": 3607, "training_step_time": 0.11053276062011719 }, { "epoch": 5.50537109375e-06, "model_forward_time": 0.0250704288482666, "step": 3608 }, { "epoch": 5.50537109375e-06, "step": 3608, "training_step_time": 0.10642480850219727 }, { "epoch": 5.50689697265625e-06, "model_forward_time": 0.024239540100097656, "step": 3609 }, { "epoch": 5.50689697265625e-06, "step": 3609, "training_step_time": 0.11119461059570312 }, { "epoch": 5.5084228515625e-06, "grad_norm": 0.7978876829147339, "learning_rate": 9.865365544696285e-05, "loss": 0.1218, "step": 3610 }, { "epoch": 5.5084228515625e-06, "model_forward_time": 0.024227380752563477, "step": 3610 }, { "epoch": 5.5084228515625e-06, "step": 3610, "training_step_time": 0.10892510414123535 }, { "epoch": 5.50994873046875e-06, "model_forward_time": 0.024201393127441406, "step": 3611 }, { "epoch": 5.50994873046875e-06, "step": 3611, "training_step_time": 0.11036515235900879 }, { "epoch": 5.511474609375e-06, "model_forward_time": 0.024261951446533203, "step": 3612 }, { "epoch": 5.511474609375e-06, "step": 3612, "training_step_time": 0.11133956909179688 }, { "epoch": 5.51300048828125e-06, "model_forward_time": 0.025452375411987305, "step": 3613 }, { "epoch": 5.51300048828125e-06, "step": 3613, "training_step_time": 0.11284828186035156 }, { "epoch": 5.5145263671875e-06, "model_forward_time": 0.02516007423400879, "step": 3614 }, { "epoch": 5.5145263671875e-06, "step": 3614, "training_step_time": 0.11010622978210449 }, { "epoch": 5.51605224609375e-06, "model_forward_time": 0.025304794311523438, "step": 3615 }, { "epoch": 5.51605224609375e-06, "step": 3615, "training_step_time": 0.1081843376159668 }, { "epoch": 5.517578125e-06, "model_forward_time": 0.025708436965942383, "step": 3616 }, { "epoch": 5.517578125e-06, "step": 3616, "training_step_time": 0.11047792434692383 }, { "epoch": 5.51910400390625e-06, "model_forward_time": 0.025622844696044922, "step": 3617 }, { "epoch": 5.51910400390625e-06, "step": 3617, "training_step_time": 0.11481785774230957 }, { "epoch": 5.5206298828125e-06, "model_forward_time": 0.025615692138671875, "step": 3618 }, { "epoch": 5.5206298828125e-06, "step": 3618, "training_step_time": 0.11126589775085449 }, { "epoch": 5.52215576171875e-06, "model_forward_time": 0.02544879913330078, "step": 3619 }, { "epoch": 5.52215576171875e-06, "step": 3619, "training_step_time": 0.10785293579101562 }, { "epoch": 5.523681640625e-06, "grad_norm": 0.7753174304962158, "learning_rate": 9.864092191274632e-05, "loss": 0.1153, "step": 3620 }, { "epoch": 5.523681640625e-06, "model_forward_time": 0.025243282318115234, "step": 3620 }, { "epoch": 5.523681640625e-06, "step": 3620, "training_step_time": 0.11166644096374512 }, { "epoch": 5.52520751953125e-06, "model_forward_time": 0.02500152587890625, "step": 3621 }, { "epoch": 5.52520751953125e-06, "step": 3621, "training_step_time": 0.22475814819335938 }, { "epoch": 5.5267333984375e-06, "model_forward_time": 0.024393796920776367, "step": 3622 }, { "epoch": 5.5267333984375e-06, "step": 3622, "training_step_time": 0.2043933868408203 }, { "epoch": 5.52825927734375e-06, "model_forward_time": 0.025471925735473633, "step": 3623 }, { "epoch": 5.52825927734375e-06, "step": 3623, "training_step_time": 0.14416766166687012 }, { "epoch": 5.52978515625e-06, "model_forward_time": 0.024672985076904297, "step": 3624 }, { "epoch": 5.52978515625e-06, "step": 3624, "training_step_time": 0.20978093147277832 }, { "epoch": 5.53131103515625e-06, "model_forward_time": 0.024283885955810547, "step": 3625 }, { "epoch": 5.53131103515625e-06, "step": 3625, "training_step_time": 0.18009114265441895 }, { "epoch": 5.5328369140625e-06, "model_forward_time": 0.02435612678527832, "step": 3626 }, { "epoch": 5.5328369140625e-06, "step": 3626, "training_step_time": 0.1759657859802246 }, { "epoch": 5.53436279296875e-06, "model_forward_time": 0.02431797981262207, "step": 3627 }, { "epoch": 5.53436279296875e-06, "step": 3627, "training_step_time": 0.14286208152770996 }, { "epoch": 5.535888671875e-06, "model_forward_time": 0.02447199821472168, "step": 3628 }, { "epoch": 5.535888671875e-06, "step": 3628, "training_step_time": 0.12184834480285645 }, { "epoch": 5.53741455078125e-06, "model_forward_time": 0.02477264404296875, "step": 3629 }, { "epoch": 5.53741455078125e-06, "step": 3629, "training_step_time": 0.10953545570373535 }, { "epoch": 5.5389404296875e-06, "grad_norm": 0.5163019895553589, "learning_rate": 9.862812927522309e-05, "loss": 0.0957, "step": 3630 }, { "epoch": 5.5389404296875e-06, "model_forward_time": 0.02468729019165039, "step": 3630 }, { "epoch": 5.5389404296875e-06, "step": 3630, "training_step_time": 0.19427728652954102 }, { "epoch": 5.54046630859375e-06, "model_forward_time": 0.024411678314208984, "step": 3631 }, { "epoch": 5.54046630859375e-06, "step": 3631, "training_step_time": 0.10506319999694824 }, { "epoch": 5.5419921875e-06, "model_forward_time": 0.024384260177612305, "step": 3632 }, { "epoch": 5.5419921875e-06, "step": 3632, "training_step_time": 0.10987353324890137 }, { "epoch": 5.54351806640625e-06, "model_forward_time": 0.025397777557373047, "step": 3633 }, { "epoch": 5.54351806640625e-06, "step": 3633, "training_step_time": 0.11170649528503418 }, { "epoch": 5.5450439453125e-06, "model_forward_time": 0.025423049926757812, "step": 3634 }, { "epoch": 5.5450439453125e-06, "step": 3634, "training_step_time": 0.11060476303100586 }, { "epoch": 5.54656982421875e-06, "model_forward_time": 0.025203466415405273, "step": 3635 }, { "epoch": 5.54656982421875e-06, "step": 3635, "training_step_time": 0.21367692947387695 }, { "epoch": 5.548095703125e-06, "model_forward_time": 0.02469801902770996, "step": 3636 }, { "epoch": 5.548095703125e-06, "step": 3636, "training_step_time": 0.11488771438598633 }, { "epoch": 5.54962158203125e-06, "model_forward_time": 0.02514028549194336, "step": 3637 }, { "epoch": 5.54962158203125e-06, "step": 3637, "training_step_time": 0.10862874984741211 }, { "epoch": 5.5511474609375e-06, "model_forward_time": 0.0251615047454834, "step": 3638 }, { "epoch": 5.5511474609375e-06, "step": 3638, "training_step_time": 0.17350268363952637 }, { "epoch": 5.55267333984375e-06, "model_forward_time": 0.025249242782592773, "step": 3639 }, { "epoch": 5.55267333984375e-06, "step": 3639, "training_step_time": 0.16251111030578613 }, { "epoch": 5.55419921875e-06, "grad_norm": 0.40915581583976746, "learning_rate": 9.861527754993749e-05, "loss": 0.1081, "step": 3640 }, { "epoch": 5.55419921875e-06, "model_forward_time": 0.02501988410949707, "step": 3640 }, { "epoch": 5.55419921875e-06, "step": 3640, "training_step_time": 0.10607171058654785 }, { "epoch": 5.55572509765625e-06, "model_forward_time": 0.025009632110595703, "step": 3641 }, { "epoch": 5.55572509765625e-06, "step": 3641, "training_step_time": 0.10691547393798828 }, { "epoch": 5.5572509765625e-06, "model_forward_time": 0.02519702911376953, "step": 3642 }, { "epoch": 5.5572509765625e-06, "step": 3642, "training_step_time": 0.10802531242370605 }, { "epoch": 5.55877685546875e-06, "model_forward_time": 0.026309967041015625, "step": 3643 }, { "epoch": 5.55877685546875e-06, "step": 3643, "training_step_time": 0.10938906669616699 }, { "epoch": 5.560302734375e-06, "model_forward_time": 0.026763916015625, "step": 3644 }, { "epoch": 5.560302734375e-06, "step": 3644, "training_step_time": 0.11163735389709473 }, { "epoch": 5.56182861328125e-06, "model_forward_time": 0.02516317367553711, "step": 3645 }, { "epoch": 5.56182861328125e-06, "step": 3645, "training_step_time": 0.11041426658630371 }, { "epoch": 5.5633544921875e-06, "model_forward_time": 0.02609705924987793, "step": 3646 }, { "epoch": 5.5633544921875e-06, "step": 3646, "training_step_time": 0.1114192008972168 }, { "epoch": 5.56488037109375e-06, "model_forward_time": 0.02512669563293457, "step": 3647 }, { "epoch": 5.56488037109375e-06, "step": 3647, "training_step_time": 0.11021924018859863 }, { "epoch": 5.56640625e-06, "model_forward_time": 0.02504587173461914, "step": 3648 }, { "epoch": 5.56640625e-06, "step": 3648, "training_step_time": 0.1109774112701416 }, { "epoch": 5.56793212890625e-06, "model_forward_time": 0.02516317367553711, "step": 3649 }, { "epoch": 5.56793212890625e-06, "step": 3649, "training_step_time": 0.10928463935852051 }, { "epoch": 5.5694580078125e-06, "grad_norm": 0.5303035378456116, "learning_rate": 9.860236675250552e-05, "loss": 0.1355, "step": 3650 }, { "epoch": 5.5694580078125e-06, "model_forward_time": 0.025303363800048828, "step": 3650 }, { "epoch": 5.5694580078125e-06, "step": 3650, "training_step_time": 0.10702753067016602 }, { "epoch": 5.57098388671875e-06, "model_forward_time": 0.025582075119018555, "step": 3651 }, { "epoch": 5.57098388671875e-06, "step": 3651, "training_step_time": 0.10846829414367676 }, { "epoch": 5.572509765625e-06, "model_forward_time": 0.025735139846801758, "step": 3652 }, { "epoch": 5.572509765625e-06, "step": 3652, "training_step_time": 0.10896635055541992 }, { "epoch": 5.57403564453125e-06, "model_forward_time": 0.025242328643798828, "step": 3653 }, { "epoch": 5.57403564453125e-06, "step": 3653, "training_step_time": 0.10647916793823242 }, { "epoch": 5.5755615234375e-06, "model_forward_time": 0.025364160537719727, "step": 3654 }, { "epoch": 5.5755615234375e-06, "step": 3654, "training_step_time": 0.11582779884338379 }, { "epoch": 5.57708740234375e-06, "model_forward_time": 0.02546095848083496, "step": 3655 }, { "epoch": 5.57708740234375e-06, "step": 3655, "training_step_time": 0.10675239562988281 }, { "epoch": 5.57861328125e-06, "model_forward_time": 0.025429964065551758, "step": 3656 }, { "epoch": 5.57861328125e-06, "step": 3656, "training_step_time": 0.11009716987609863 }, { "epoch": 5.58013916015625e-06, "model_forward_time": 0.025098800659179688, "step": 3657 }, { "epoch": 5.58013916015625e-06, "step": 3657, "training_step_time": 0.10571575164794922 }, { "epoch": 5.5816650390625e-06, "model_forward_time": 0.025443315505981445, "step": 3658 }, { "epoch": 5.5816650390625e-06, "step": 3658, "training_step_time": 0.10824227333068848 }, { "epoch": 5.58319091796875e-06, "model_forward_time": 0.025310754776000977, "step": 3659 }, { "epoch": 5.58319091796875e-06, "step": 3659, "training_step_time": 0.1077120304107666 }, { "epoch": 5.584716796875e-06, "grad_norm": 0.4611889123916626, "learning_rate": 9.858939689861506e-05, "loss": 0.1111, "step": 3660 }, { "epoch": 5.584716796875e-06, "model_forward_time": 0.02488565444946289, "step": 3660 }, { "epoch": 5.584716796875e-06, "step": 3660, "training_step_time": 0.10646867752075195 }, { "epoch": 5.58624267578125e-06, "model_forward_time": 0.027025461196899414, "step": 3661 }, { "epoch": 5.58624267578125e-06, "step": 3661, "training_step_time": 0.1088418960571289 }, { "epoch": 5.5877685546875e-06, "model_forward_time": 0.025281667709350586, "step": 3662 }, { "epoch": 5.5877685546875e-06, "step": 3662, "training_step_time": 0.10543608665466309 }, { "epoch": 5.58929443359375e-06, "model_forward_time": 0.02533245086669922, "step": 3663 }, { "epoch": 5.58929443359375e-06, "step": 3663, "training_step_time": 0.11092305183410645 }, { "epoch": 5.5908203125e-06, "model_forward_time": 0.02594447135925293, "step": 3664 }, { "epoch": 5.5908203125e-06, "step": 3664, "training_step_time": 0.10593390464782715 }, { "epoch": 5.59234619140625e-06, "model_forward_time": 0.025614261627197266, "step": 3665 }, { "epoch": 5.59234619140625e-06, "step": 3665, "training_step_time": 0.10619735717773438 }, { "epoch": 5.5938720703125e-06, "model_forward_time": 0.02579641342163086, "step": 3666 }, { "epoch": 5.5938720703125e-06, "step": 3666, "training_step_time": 0.12946867942810059 }, { "epoch": 5.59539794921875e-06, "model_forward_time": 0.02540898323059082, "step": 3667 }, { "epoch": 5.59539794921875e-06, "step": 3667, "training_step_time": 0.11614346504211426 }, { "epoch": 5.596923828125e-06, "model_forward_time": 0.02584099769592285, "step": 3668 }, { "epoch": 5.596923828125e-06, "step": 3668, "training_step_time": 0.15394020080566406 }, { "epoch": 5.59844970703125e-06, "model_forward_time": 0.025391578674316406, "step": 3669 }, { "epoch": 5.59844970703125e-06, "step": 3669, "training_step_time": 0.15090489387512207 }, { "epoch": 5.5999755859375e-06, "grad_norm": 0.5910298824310303, "learning_rate": 9.857636800402568e-05, "loss": 0.1179, "step": 3670 }, { "epoch": 5.5999755859375e-06, "model_forward_time": 0.024151086807250977, "step": 3670 }, { "epoch": 5.5999755859375e-06, "step": 3670, "training_step_time": 0.2219538688659668 }, { "epoch": 5.60150146484375e-06, "model_forward_time": 0.02447652816772461, "step": 3671 }, { "epoch": 5.60150146484375e-06, "step": 3671, "training_step_time": 0.2072756290435791 }, { "epoch": 5.60302734375e-06, "model_forward_time": 0.02445077896118164, "step": 3672 }, { "epoch": 5.60302734375e-06, "step": 3672, "training_step_time": 0.13475346565246582 }, { "epoch": 5.60455322265625e-06, "model_forward_time": 0.024463653564453125, "step": 3673 }, { "epoch": 5.60455322265625e-06, "step": 3673, "training_step_time": 0.20469093322753906 }, { "epoch": 5.6060791015625e-06, "model_forward_time": 0.024940013885498047, "step": 3674 }, { "epoch": 5.6060791015625e-06, "step": 3674, "training_step_time": 0.1187291145324707 }, { "epoch": 5.60760498046875e-06, "model_forward_time": 0.024410486221313477, "step": 3675 }, { "epoch": 5.60760498046875e-06, "step": 3675, "training_step_time": 0.17871761322021484 }, { "epoch": 5.609130859375e-06, "model_forward_time": 0.024702072143554688, "step": 3676 }, { "epoch": 5.609130859375e-06, "step": 3676, "training_step_time": 0.11275410652160645 }, { "epoch": 5.61065673828125e-06, "model_forward_time": 0.024342775344848633, "step": 3677 }, { "epoch": 5.61065673828125e-06, "step": 3677, "training_step_time": 0.11335635185241699 }, { "epoch": 5.6121826171875e-06, "model_forward_time": 0.02553391456604004, "step": 3678 }, { "epoch": 5.6121826171875e-06, "step": 3678, "training_step_time": 0.11303186416625977 }, { "epoch": 5.61370849609375e-06, "model_forward_time": 0.02550482749938965, "step": 3679 }, { "epoch": 5.61370849609375e-06, "step": 3679, "training_step_time": 0.11442041397094727 }, { "epoch": 5.615234375e-06, "grad_norm": 0.9671534299850464, "learning_rate": 9.856328008456872e-05, "loss": 0.1244, "step": 3680 }, { "epoch": 5.615234375e-06, "model_forward_time": 0.02512955665588379, "step": 3680 }, { "epoch": 5.615234375e-06, "step": 3680, "training_step_time": 0.21998977661132812 }, { "epoch": 5.61676025390625e-06, "model_forward_time": 0.02484583854675293, "step": 3681 }, { "epoch": 5.61676025390625e-06, "step": 3681, "training_step_time": 0.2035667896270752 }, { "epoch": 5.6182861328125e-06, "model_forward_time": 0.024566650390625, "step": 3682 }, { "epoch": 5.6182861328125e-06, "step": 3682, "training_step_time": 0.13980603218078613 }, { "epoch": 5.61981201171875e-06, "model_forward_time": 0.026850223541259766, "step": 3683 }, { "epoch": 5.61981201171875e-06, "step": 3683, "training_step_time": 0.10766053199768066 }, { "epoch": 5.621337890625e-06, "model_forward_time": 0.025412797927856445, "step": 3684 }, { "epoch": 5.621337890625e-06, "step": 3684, "training_step_time": 0.10948824882507324 }, { "epoch": 5.62286376953125e-06, "model_forward_time": 0.025035381317138672, "step": 3685 }, { "epoch": 5.62286376953125e-06, "step": 3685, "training_step_time": 0.10767197608947754 }, { "epoch": 5.6243896484375e-06, "model_forward_time": 0.025219202041625977, "step": 3686 }, { "epoch": 5.6243896484375e-06, "step": 3686, "training_step_time": 0.10927557945251465 }, { "epoch": 5.62591552734375e-06, "model_forward_time": 0.025241374969482422, "step": 3687 }, { "epoch": 5.62591552734375e-06, "step": 3687, "training_step_time": 0.10854768753051758 }, { "epoch": 5.62744140625e-06, "model_forward_time": 0.02672266960144043, "step": 3688 }, { "epoch": 5.62744140625e-06, "step": 3688, "training_step_time": 0.11433148384094238 }, { "epoch": 5.62896728515625e-06, "model_forward_time": 0.02520918846130371, "step": 3689 }, { "epoch": 5.62896728515625e-06, "step": 3689, "training_step_time": 0.10828781127929688 }, { "epoch": 5.6304931640625e-06, "grad_norm": 0.5670982599258423, "learning_rate": 9.855013315614725e-05, "loss": 0.1106, "step": 3690 }, { "epoch": 5.6304931640625e-06, "model_forward_time": 0.025037765502929688, "step": 3690 }, { "epoch": 5.6304931640625e-06, "step": 3690, "training_step_time": 0.10764527320861816 }, { "epoch": 5.63201904296875e-06, "model_forward_time": 0.026458740234375, "step": 3691 }, { "epoch": 5.63201904296875e-06, "step": 3691, "training_step_time": 0.10952997207641602 }, { "epoch": 5.633544921875e-06, "model_forward_time": 0.02515268325805664, "step": 3692 }, { "epoch": 5.633544921875e-06, "step": 3692, "training_step_time": 0.10698437690734863 }, { "epoch": 5.63507080078125e-06, "model_forward_time": 0.025123119354248047, "step": 3693 }, { "epoch": 5.63507080078125e-06, "step": 3693, "training_step_time": 0.11066079139709473 }, { "epoch": 5.6365966796875e-06, "model_forward_time": 0.025208711624145508, "step": 3694 }, { "epoch": 5.6365966796875e-06, "step": 3694, "training_step_time": 0.10919427871704102 }, { "epoch": 5.63812255859375e-06, "model_forward_time": 0.02528691291809082, "step": 3695 }, { "epoch": 5.63812255859375e-06, "step": 3695, "training_step_time": 0.10968923568725586 }, { "epoch": 5.6396484375e-06, "model_forward_time": 0.025018930435180664, "step": 3696 }, { "epoch": 5.6396484375e-06, "step": 3696, "training_step_time": 0.10768294334411621 }, { "epoch": 5.64117431640625e-06, "model_forward_time": 0.02568364143371582, "step": 3697 }, { "epoch": 5.64117431640625e-06, "step": 3697, "training_step_time": 0.1093449592590332 }, { "epoch": 5.6427001953125e-06, "model_forward_time": 0.025207042694091797, "step": 3698 }, { "epoch": 5.6427001953125e-06, "step": 3698, "training_step_time": 0.11206626892089844 }, { "epoch": 5.64422607421875e-06, "model_forward_time": 0.02564263343811035, "step": 3699 }, { "epoch": 5.64422607421875e-06, "step": 3699, "training_step_time": 0.1087958812713623 }, { "epoch": 5.645751953125e-06, "grad_norm": 0.7125267386436462, "learning_rate": 9.8536927234736e-05, "loss": 0.1251, "step": 3700 }, { "epoch": 5.645751953125e-06, "model_forward_time": 0.024998903274536133, "step": 3700 }, { "epoch": 5.645751953125e-06, "step": 3700, "training_step_time": 0.10839509963989258 }, { "epoch": 5.64727783203125e-06, "model_forward_time": 0.0266721248626709, "step": 3701 }, { "epoch": 5.64727783203125e-06, "step": 3701, "training_step_time": 0.10779452323913574 }, { "epoch": 5.6488037109375e-06, "model_forward_time": 0.025252342224121094, "step": 3702 }, { "epoch": 5.6488037109375e-06, "step": 3702, "training_step_time": 0.10919785499572754 }, { "epoch": 5.65032958984375e-06, "model_forward_time": 0.025144338607788086, "step": 3703 }, { "epoch": 5.65032958984375e-06, "step": 3703, "training_step_time": 0.10828804969787598 }, { "epoch": 5.65185546875e-06, "model_forward_time": 0.02506279945373535, "step": 3704 }, { "epoch": 5.65185546875e-06, "step": 3704, "training_step_time": 0.10794401168823242 }, { "epoch": 5.65338134765625e-06, "model_forward_time": 0.027426719665527344, "step": 3705 }, { "epoch": 5.65338134765625e-06, "step": 3705, "training_step_time": 0.11094355583190918 }, { "epoch": 5.6549072265625e-06, "model_forward_time": 0.025046348571777344, "step": 3706 }, { "epoch": 5.6549072265625e-06, "step": 3706, "training_step_time": 0.10791707038879395 }, { "epoch": 5.65643310546875e-06, "model_forward_time": 0.025409936904907227, "step": 3707 }, { "epoch": 5.65643310546875e-06, "step": 3707, "training_step_time": 0.11396574974060059 }, { "epoch": 5.657958984375e-06, "model_forward_time": 0.025937795639038086, "step": 3708 }, { "epoch": 5.657958984375e-06, "step": 3708, "training_step_time": 0.10824179649353027 }, { "epoch": 5.65948486328125e-06, "model_forward_time": 0.025185823440551758, "step": 3709 }, { "epoch": 5.65948486328125e-06, "step": 3709, "training_step_time": 0.1088399887084961 }, { "epoch": 5.6610107421875e-06, "grad_norm": 0.5925086140632629, "learning_rate": 9.852366233638144e-05, "loss": 0.1192, "step": 3710 }, { "epoch": 5.6610107421875e-06, "model_forward_time": 0.0253448486328125, "step": 3710 }, { "epoch": 5.6610107421875e-06, "step": 3710, "training_step_time": 0.18607091903686523 }, { "epoch": 5.66253662109375e-06, "model_forward_time": 0.024074792861938477, "step": 3711 }, { "epoch": 5.66253662109375e-06, "step": 3711, "training_step_time": 0.11555075645446777 }, { "epoch": 5.6640625e-06, "model_forward_time": 0.024435043334960938, "step": 3712 }, { "epoch": 5.6640625e-06, "step": 3712, "training_step_time": 0.17337560653686523 }, { "epoch": 5.66558837890625e-06, "model_forward_time": 0.024907827377319336, "step": 3713 }, { "epoch": 5.66558837890625e-06, "step": 3713, "training_step_time": 0.13090872764587402 }, { "epoch": 5.6671142578125e-06, "model_forward_time": 0.02438497543334961, "step": 3714 }, { "epoch": 5.6671142578125e-06, "step": 3714, "training_step_time": 0.21389508247375488 }, { "epoch": 5.66864013671875e-06, "model_forward_time": 0.024282217025756836, "step": 3715 }, { "epoch": 5.66864013671875e-06, "step": 3715, "training_step_time": 0.13632965087890625 }, { "epoch": 5.670166015625e-06, "model_forward_time": 0.024297714233398438, "step": 3716 }, { "epoch": 5.670166015625e-06, "step": 3716, "training_step_time": 0.11049032211303711 }, { "epoch": 5.67169189453125e-06, "model_forward_time": 0.02519822120666504, "step": 3717 }, { "epoch": 5.67169189453125e-06, "step": 3717, "training_step_time": 0.20998024940490723 }, { "epoch": 5.6732177734375e-06, "model_forward_time": 0.02444624900817871, "step": 3718 }, { "epoch": 5.6732177734375e-06, "step": 3718, "training_step_time": 0.12879157066345215 }, { "epoch": 5.67474365234375e-06, "model_forward_time": 0.024034500122070312, "step": 3719 }, { "epoch": 5.67474365234375e-06, "step": 3719, "training_step_time": 0.18405604362487793 }, { "epoch": 5.67626953125e-06, "grad_norm": 0.6077547669410706, "learning_rate": 9.851033847720166e-05, "loss": 0.1196, "step": 3720 }, { "epoch": 5.67626953125e-06, "model_forward_time": 0.024678945541381836, "step": 3720 }, { "epoch": 5.67626953125e-06, "step": 3720, "training_step_time": 0.12668275833129883 }, { "epoch": 5.67779541015625e-06, "model_forward_time": 0.023871421813964844, "step": 3721 }, { "epoch": 5.67779541015625e-06, "step": 3721, "training_step_time": 0.11775374412536621 }, { "epoch": 5.6793212890625e-06, "model_forward_time": 0.02523636817932129, "step": 3722 }, { "epoch": 5.6793212890625e-06, "step": 3722, "training_step_time": 0.1190195083618164 }, { "epoch": 5.68084716796875e-06, "model_forward_time": 0.025907278060913086, "step": 3723 }, { "epoch": 5.68084716796875e-06, "step": 3723, "training_step_time": 0.12054204940795898 }, { "epoch": 5.682373046875e-06, "model_forward_time": 0.028300046920776367, "step": 3724 }, { "epoch": 5.682373046875e-06, "step": 3724, "training_step_time": 0.11446428298950195 }, { "epoch": 5.68389892578125e-06, "model_forward_time": 0.02431631088256836, "step": 3725 }, { "epoch": 5.68389892578125e-06, "step": 3725, "training_step_time": 0.14249157905578613 }, { "epoch": 5.6854248046875e-06, "model_forward_time": 0.024969816207885742, "step": 3726 }, { "epoch": 5.6854248046875e-06, "step": 3726, "training_step_time": 0.1646106243133545 }, { "epoch": 5.68695068359375e-06, "model_forward_time": 0.024561166763305664, "step": 3727 }, { "epoch": 5.68695068359375e-06, "step": 3727, "training_step_time": 0.11510896682739258 }, { "epoch": 5.6884765625e-06, "model_forward_time": 0.025491714477539062, "step": 3728 }, { "epoch": 5.6884765625e-06, "step": 3728, "training_step_time": 0.1345815658569336 }, { "epoch": 5.69000244140625e-06, "model_forward_time": 0.025413990020751953, "step": 3729 }, { "epoch": 5.69000244140625e-06, "step": 3729, "training_step_time": 0.21203207969665527 }, { "epoch": 5.6915283203125e-06, "grad_norm": 0.7592687606811523, "learning_rate": 9.849695567338639e-05, "loss": 0.1035, "step": 3730 }, { "epoch": 5.6915283203125e-06, "model_forward_time": 0.02472972869873047, "step": 3730 }, { "epoch": 5.6915283203125e-06, "step": 3730, "training_step_time": 0.10725569725036621 }, { "epoch": 5.69305419921875e-06, "model_forward_time": 0.024759292602539062, "step": 3731 }, { "epoch": 5.69305419921875e-06, "step": 3731, "training_step_time": 0.10465645790100098 }, { "epoch": 5.694580078125e-06, "model_forward_time": 0.025850296020507812, "step": 3732 }, { "epoch": 5.694580078125e-06, "step": 3732, "training_step_time": 0.10812211036682129 }, { "epoch": 5.69610595703125e-06, "model_forward_time": 0.025228500366210938, "step": 3733 }, { "epoch": 5.69610595703125e-06, "step": 3733, "training_step_time": 0.11033248901367188 }, { "epoch": 5.6976318359375e-06, "model_forward_time": 0.02506709098815918, "step": 3734 }, { "epoch": 5.6976318359375e-06, "step": 3734, "training_step_time": 0.10956788063049316 }, { "epoch": 5.69915771484375e-06, "model_forward_time": 0.024998188018798828, "step": 3735 }, { "epoch": 5.69915771484375e-06, "step": 3735, "training_step_time": 0.10901808738708496 }, { "epoch": 5.70068359375e-06, "model_forward_time": 0.025314807891845703, "step": 3736 }, { "epoch": 5.70068359375e-06, "step": 3736, "training_step_time": 0.10727381706237793 }, { "epoch": 5.70220947265625e-06, "model_forward_time": 0.025140762329101562, "step": 3737 }, { "epoch": 5.70220947265625e-06, "step": 3737, "training_step_time": 0.10681843757629395 }, { "epoch": 5.7037353515625e-06, "model_forward_time": 0.025690317153930664, "step": 3738 }, { "epoch": 5.7037353515625e-06, "step": 3738, "training_step_time": 0.11525821685791016 }, { "epoch": 5.70526123046875e-06, "model_forward_time": 0.0267941951751709, "step": 3739 }, { "epoch": 5.70526123046875e-06, "step": 3739, "training_step_time": 0.139298677444458 }, { "epoch": 5.706787109375e-06, "grad_norm": 0.6188333034515381, "learning_rate": 9.848351394119704e-05, "loss": 0.115, "step": 3740 }, { "epoch": 5.706787109375e-06, "model_forward_time": 0.027582645416259766, "step": 3740 }, { "epoch": 5.706787109375e-06, "step": 3740, "training_step_time": 0.18263745307922363 }, { "epoch": 5.70831298828125e-06, "model_forward_time": 0.02722024917602539, "step": 3741 }, { "epoch": 5.70831298828125e-06, "step": 3741, "training_step_time": 0.21411895751953125 }, { "epoch": 5.7098388671875e-06, "model_forward_time": 0.028301715850830078, "step": 3742 }, { "epoch": 5.7098388671875e-06, "step": 3742, "training_step_time": 0.23530244827270508 }, { "epoch": 5.71136474609375e-06, "model_forward_time": 0.029034852981567383, "step": 3743 }, { "epoch": 5.71136474609375e-06, "step": 3743, "training_step_time": 0.2810180187225342 }, { "epoch": 5.712890625e-06, "model_forward_time": 0.02951979637145996, "step": 3744 }, { "epoch": 5.712890625e-06, "step": 3744, "training_step_time": 0.2805464267730713 }, { "epoch": 5.71441650390625e-06, "model_forward_time": 0.031998395919799805, "step": 3745 }, { "epoch": 5.71441650390625e-06, "step": 3745, "training_step_time": 0.30208683013916016 }, { "epoch": 5.7159423828125e-06, "model_forward_time": 0.028746366500854492, "step": 3746 }, { "epoch": 5.7159423828125e-06, "step": 3746, "training_step_time": 0.3071131706237793 }, { "epoch": 5.71746826171875e-06, "model_forward_time": 0.036649465560913086, "step": 3747 }, { "epoch": 5.71746826171875e-06, "step": 3747, "training_step_time": 0.3875277042388916 }, { "epoch": 5.718994140625e-06, "model_forward_time": 0.030651569366455078, "step": 3748 }, { "epoch": 5.718994140625e-06, "step": 3748, "training_step_time": 0.3340270519256592 }, { "epoch": 5.72052001953125e-06, "model_forward_time": 0.030715465545654297, "step": 3749 }, { "epoch": 5.72052001953125e-06, "step": 3749, "training_step_time": 0.3963041305541992 }, { "epoch": 5.7220458984375e-06, "grad_norm": 0.7434552907943726, "learning_rate": 9.847001329696653e-05, "loss": 0.1309, "step": 3750 }, { "epoch": 5.7220458984375e-06, "model_forward_time": 0.031092405319213867, "step": 3750 }, { "epoch": 5.7220458984375e-06, "step": 3750, "training_step_time": 0.28721141815185547 }, { "epoch": 5.72357177734375e-06, "model_forward_time": 0.03343534469604492, "step": 3751 }, { "epoch": 5.72357177734375e-06, "step": 3751, "training_step_time": 0.2773294448852539 }, { "epoch": 5.72509765625e-06, "model_forward_time": 0.0326848030090332, "step": 3752 }, { "epoch": 5.72509765625e-06, "step": 3752, "training_step_time": 0.2819547653198242 }, { "epoch": 5.72662353515625e-06, "model_forward_time": 0.03075122833251953, "step": 3753 }, { "epoch": 5.72662353515625e-06, "step": 3753, "training_step_time": 0.14244890213012695 }, { "epoch": 5.7281494140625e-06, "model_forward_time": 0.030855417251586914, "step": 3754 }, { "epoch": 5.7281494140625e-06, "step": 3754, "training_step_time": 0.17639660835266113 }, { "epoch": 5.72967529296875e-06, "model_forward_time": 0.030652523040771484, "step": 3755 }, { "epoch": 5.72967529296875e-06, "step": 3755, "training_step_time": 0.17862939834594727 }, { "epoch": 5.731201171875e-06, "model_forward_time": 0.029019594192504883, "step": 3756 }, { "epoch": 5.731201171875e-06, "step": 3756, "training_step_time": 0.13964104652404785 }, { "epoch": 5.73272705078125e-06, "model_forward_time": 0.028353452682495117, "step": 3757 }, { "epoch": 5.73272705078125e-06, "step": 3757, "training_step_time": 0.15268564224243164 }, { "epoch": 5.7342529296875e-06, "model_forward_time": 0.027714252471923828, "step": 3758 }, { "epoch": 5.7342529296875e-06, "step": 3758, "training_step_time": 0.1486358642578125 }, { "epoch": 5.73577880859375e-06, "model_forward_time": 0.026096343994140625, "step": 3759 }, { "epoch": 5.73577880859375e-06, "step": 3759, "training_step_time": 0.1441047191619873 }, { "epoch": 5.7373046875e-06, "grad_norm": 0.40131035447120667, "learning_rate": 9.845645375709945e-05, "loss": 0.0955, "step": 3760 }, { "epoch": 5.7373046875e-06, "model_forward_time": 0.026035547256469727, "step": 3760 }, { "epoch": 5.7373046875e-06, "step": 3760, "training_step_time": 0.13003277778625488 }, { "epoch": 5.73883056640625e-06, "model_forward_time": 0.02599048614501953, "step": 3761 }, { "epoch": 5.73883056640625e-06, "step": 3761, "training_step_time": 0.11291670799255371 }, { "epoch": 5.7403564453125e-06, "model_forward_time": 0.02551126480102539, "step": 3762 }, { "epoch": 5.7403564453125e-06, "step": 3762, "training_step_time": 0.1160287857055664 }, { "epoch": 5.74188232421875e-06, "model_forward_time": 0.0249636173248291, "step": 3763 }, { "epoch": 5.74188232421875e-06, "step": 3763, "training_step_time": 0.11399531364440918 }, { "epoch": 5.743408203125e-06, "model_forward_time": 0.02434396743774414, "step": 3764 }, { "epoch": 5.743408203125e-06, "step": 3764, "training_step_time": 0.11345434188842773 }, { "epoch": 5.74493408203125e-06, "model_forward_time": 0.02477431297302246, "step": 3765 }, { "epoch": 5.74493408203125e-06, "step": 3765, "training_step_time": 0.11107635498046875 }, { "epoch": 5.7464599609375e-06, "model_forward_time": 0.025433778762817383, "step": 3766 }, { "epoch": 5.7464599609375e-06, "step": 3766, "training_step_time": 0.11537432670593262 }, { "epoch": 5.74798583984375e-06, "model_forward_time": 0.02428722381591797, "step": 3767 }, { "epoch": 5.74798583984375e-06, "step": 3767, "training_step_time": 0.10775327682495117 }, { "epoch": 5.74951171875e-06, "model_forward_time": 0.025365591049194336, "step": 3768 }, { "epoch": 5.74951171875e-06, "step": 3768, "training_step_time": 0.11297845840454102 }, { "epoch": 5.75103759765625e-06, "model_forward_time": 0.024929046630859375, "step": 3769 }, { "epoch": 5.75103759765625e-06, "step": 3769, "training_step_time": 0.10987067222595215 }, { "epoch": 5.7525634765625e-06, "grad_norm": 0.6926906108856201, "learning_rate": 9.84428353380719e-05, "loss": 0.1233, "step": 3770 }, { "epoch": 5.7525634765625e-06, "model_forward_time": 0.02530646324157715, "step": 3770 }, { "epoch": 5.7525634765625e-06, "step": 3770, "training_step_time": 0.11096477508544922 }, { "epoch": 5.75408935546875e-06, "model_forward_time": 0.025210857391357422, "step": 3771 }, { "epoch": 5.75408935546875e-06, "step": 3771, "training_step_time": 0.10978221893310547 }, { "epoch": 5.755615234375e-06, "model_forward_time": 0.02512955665588379, "step": 3772 }, { "epoch": 5.755615234375e-06, "step": 3772, "training_step_time": 0.11090469360351562 }, { "epoch": 5.75714111328125e-06, "model_forward_time": 0.025104045867919922, "step": 3773 }, { "epoch": 5.75714111328125e-06, "step": 3773, "training_step_time": 0.10835027694702148 }, { "epoch": 5.7586669921875e-06, "model_forward_time": 0.02537703514099121, "step": 3774 }, { "epoch": 5.7586669921875e-06, "step": 3774, "training_step_time": 0.10801815986633301 }, { "epoch": 5.76019287109375e-06, "model_forward_time": 0.025606632232666016, "step": 3775 }, { "epoch": 5.76019287109375e-06, "step": 3775, "training_step_time": 0.11304402351379395 }, { "epoch": 5.76171875e-06, "model_forward_time": 0.025969743728637695, "step": 3776 }, { "epoch": 5.76171875e-06, "step": 3776, "training_step_time": 0.11148881912231445 }, { "epoch": 5.76324462890625e-06, "model_forward_time": 0.029139041900634766, "step": 3777 }, { "epoch": 5.76324462890625e-06, "step": 3777, "training_step_time": 0.11367225646972656 }, { "epoch": 5.7647705078125e-06, "model_forward_time": 0.025634288787841797, "step": 3778 }, { "epoch": 5.7647705078125e-06, "step": 3778, "training_step_time": 0.11123108863830566 }, { "epoch": 5.76629638671875e-06, "model_forward_time": 0.0255887508392334, "step": 3779 }, { "epoch": 5.76629638671875e-06, "step": 3779, "training_step_time": 0.10942316055297852 }, { "epoch": 5.767822265625e-06, "grad_norm": 0.45874303579330444, "learning_rate": 9.842915805643155e-05, "loss": 0.111, "step": 3780 }, { "epoch": 5.767822265625e-06, "model_forward_time": 0.02534317970275879, "step": 3780 }, { "epoch": 5.767822265625e-06, "step": 3780, "training_step_time": 0.10964488983154297 }, { "epoch": 5.76934814453125e-06, "model_forward_time": 0.025545835494995117, "step": 3781 }, { "epoch": 5.76934814453125e-06, "step": 3781, "training_step_time": 0.1132357120513916 }, { "epoch": 5.7708740234375e-06, "model_forward_time": 0.02583479881286621, "step": 3782 }, { "epoch": 5.7708740234375e-06, "step": 3782, "training_step_time": 0.11069869995117188 }, { "epoch": 5.77239990234375e-06, "model_forward_time": 0.02854323387145996, "step": 3783 }, { "epoch": 5.77239990234375e-06, "step": 3783, "training_step_time": 0.11237072944641113 }, { "epoch": 5.77392578125e-06, "model_forward_time": 0.025372743606567383, "step": 3784 }, { "epoch": 5.77392578125e-06, "step": 3784, "training_step_time": 0.10759711265563965 }, { "epoch": 5.77545166015625e-06, "model_forward_time": 0.025457382202148438, "step": 3785 }, { "epoch": 5.77545166015625e-06, "step": 3785, "training_step_time": 0.13559818267822266 }, { "epoch": 5.7769775390625e-06, "model_forward_time": 0.02561330795288086, "step": 3786 }, { "epoch": 5.7769775390625e-06, "step": 3786, "training_step_time": 0.10781168937683105 }, { "epoch": 5.77850341796875e-06, "model_forward_time": 0.025604724884033203, "step": 3787 }, { "epoch": 5.77850341796875e-06, "step": 3787, "training_step_time": 0.1326909065246582 }, { "epoch": 5.780029296875e-06, "model_forward_time": 0.025356531143188477, "step": 3788 }, { "epoch": 5.780029296875e-06, "step": 3788, "training_step_time": 0.19426393508911133 }, { "epoch": 5.78155517578125e-06, "model_forward_time": 0.02521491050720215, "step": 3789 }, { "epoch": 5.78155517578125e-06, "step": 3789, "training_step_time": 0.19021892547607422 }, { "epoch": 5.7830810546875e-06, "grad_norm": 0.3816434442996979, "learning_rate": 9.841542192879762e-05, "loss": 0.0942, "step": 3790 }, { "epoch": 5.7830810546875e-06, "model_forward_time": 0.026805639266967773, "step": 3790 }, { "epoch": 5.7830810546875e-06, "step": 3790, "training_step_time": 0.1793689727783203 }, { "epoch": 5.78460693359375e-06, "model_forward_time": 0.02460956573486328, "step": 3791 }, { "epoch": 5.78460693359375e-06, "step": 3791, "training_step_time": 0.19040393829345703 }, { "epoch": 5.7861328125e-06, "model_forward_time": 0.025027990341186523, "step": 3792 }, { "epoch": 5.7861328125e-06, "step": 3792, "training_step_time": 0.13988256454467773 }, { "epoch": 5.78765869140625e-06, "model_forward_time": 0.02476334571838379, "step": 3793 }, { "epoch": 5.78765869140625e-06, "step": 3793, "training_step_time": 0.18755531311035156 }, { "epoch": 5.7891845703125e-06, "model_forward_time": 0.024788856506347656, "step": 3794 }, { "epoch": 5.7891845703125e-06, "step": 3794, "training_step_time": 0.1333937644958496 }, { "epoch": 5.79071044921875e-06, "model_forward_time": 0.02428746223449707, "step": 3795 }, { "epoch": 5.79071044921875e-06, "step": 3795, "training_step_time": 0.12100100517272949 }, { "epoch": 5.792236328125e-06, "model_forward_time": 0.025979042053222656, "step": 3796 }, { "epoch": 5.792236328125e-06, "step": 3796, "training_step_time": 0.11063575744628906 }, { "epoch": 5.79376220703125e-06, "model_forward_time": 0.025365591049194336, "step": 3797 }, { "epoch": 5.79376220703125e-06, "step": 3797, "training_step_time": 0.10877561569213867 }, { "epoch": 5.7952880859375e-06, "model_forward_time": 0.025317907333374023, "step": 3798 }, { "epoch": 5.7952880859375e-06, "step": 3798, "training_step_time": 0.11102175712585449 }, { "epoch": 5.79681396484375e-06, "model_forward_time": 0.024837017059326172, "step": 3799 }, { "epoch": 5.79681396484375e-06, "step": 3799, "training_step_time": 0.10570573806762695 }, { "epoch": 5.79833984375e-06, "grad_norm": 0.8299492001533508, "learning_rate": 9.840162697186075e-05, "loss": 0.1299, "step": 3800 }, { "epoch": 5.79833984375e-06, "model_forward_time": 0.02722620964050293, "step": 3800 }, { "epoch": 5.79833984375e-06, "step": 3800, "training_step_time": 0.10987448692321777 }, { "epoch": 5.79986572265625e-06, "model_forward_time": 0.02478814125061035, "step": 3801 }, { "epoch": 5.79986572265625e-06, "step": 3801, "training_step_time": 0.10673379898071289 }, { "epoch": 5.8013916015625e-06, "model_forward_time": 0.025382041931152344, "step": 3802 }, { "epoch": 5.8013916015625e-06, "step": 3802, "training_step_time": 0.11433601379394531 }, { "epoch": 5.80291748046875e-06, "model_forward_time": 0.026944637298583984, "step": 3803 }, { "epoch": 5.80291748046875e-06, "step": 3803, "training_step_time": 0.11736917495727539 }, { "epoch": 5.804443359375e-06, "model_forward_time": 0.025789260864257812, "step": 3804 }, { "epoch": 5.804443359375e-06, "step": 3804, "training_step_time": 0.21269869804382324 }, { "epoch": 5.80596923828125e-06, "model_forward_time": 0.02446603775024414, "step": 3805 }, { "epoch": 5.80596923828125e-06, "step": 3805, "training_step_time": 0.11800098419189453 }, { "epoch": 5.8074951171875e-06, "model_forward_time": 0.024841785430908203, "step": 3806 }, { "epoch": 5.8074951171875e-06, "step": 3806, "training_step_time": 0.1126713752746582 }, { "epoch": 5.80902099609375e-06, "model_forward_time": 0.02555990219116211, "step": 3807 }, { "epoch": 5.80902099609375e-06, "step": 3807, "training_step_time": 0.10752463340759277 }, { "epoch": 5.810546875e-06, "model_forward_time": 0.025375843048095703, "step": 3808 }, { "epoch": 5.810546875e-06, "step": 3808, "training_step_time": 0.10749697685241699 }, { "epoch": 5.81207275390625e-06, "model_forward_time": 0.02549290657043457, "step": 3809 }, { "epoch": 5.81207275390625e-06, "step": 3809, "training_step_time": 0.11079144477844238 }, { "epoch": 5.8135986328125e-06, "grad_norm": 0.49644649028778076, "learning_rate": 9.838777320238312e-05, "loss": 0.0955, "step": 3810 }, { "epoch": 5.8135986328125e-06, "model_forward_time": 0.02574944496154785, "step": 3810 }, { "epoch": 5.8135986328125e-06, "step": 3810, "training_step_time": 0.12449502944946289 }, { "epoch": 5.81512451171875e-06, "model_forward_time": 0.025311946868896484, "step": 3811 }, { "epoch": 5.81512451171875e-06, "step": 3811, "training_step_time": 0.1859893798828125 }, { "epoch": 5.816650390625e-06, "model_forward_time": 0.025127649307250977, "step": 3812 }, { "epoch": 5.816650390625e-06, "step": 3812, "training_step_time": 0.16502833366394043 }, { "epoch": 5.81817626953125e-06, "model_forward_time": 0.024425983428955078, "step": 3813 }, { "epoch": 5.81817626953125e-06, "step": 3813, "training_step_time": 0.13900041580200195 }, { "epoch": 5.8197021484375e-06, "model_forward_time": 0.02517104148864746, "step": 3814 }, { "epoch": 5.8197021484375e-06, "step": 3814, "training_step_time": 0.14062261581420898 }, { "epoch": 5.82122802734375e-06, "model_forward_time": 0.02514815330505371, "step": 3815 }, { "epoch": 5.82122802734375e-06, "step": 3815, "training_step_time": 0.12988805770874023 }, { "epoch": 5.82275390625e-06, "model_forward_time": 0.025272846221923828, "step": 3816 }, { "epoch": 5.82275390625e-06, "step": 3816, "training_step_time": 0.1257495880126953 }, { "epoch": 5.82427978515625e-06, "model_forward_time": 0.02889394760131836, "step": 3817 }, { "epoch": 5.82427978515625e-06, "step": 3817, "training_step_time": 0.11177563667297363 }, { "epoch": 5.8258056640625e-06, "model_forward_time": 0.025252342224121094, "step": 3818 }, { "epoch": 5.8258056640625e-06, "step": 3818, "training_step_time": 0.10793757438659668 }, { "epoch": 5.82733154296875e-06, "model_forward_time": 0.02523183822631836, "step": 3819 }, { "epoch": 5.82733154296875e-06, "step": 3819, "training_step_time": 0.1123511791229248 }, { "epoch": 5.828857421875e-06, "grad_norm": 0.6049166321754456, "learning_rate": 9.83738606371984e-05, "loss": 0.1118, "step": 3820 }, { "epoch": 5.828857421875e-06, "model_forward_time": 0.024628877639770508, "step": 3820 }, { "epoch": 5.828857421875e-06, "step": 3820, "training_step_time": 0.10770964622497559 }, { "epoch": 5.83038330078125e-06, "model_forward_time": 0.02467513084411621, "step": 3821 }, { "epoch": 5.83038330078125e-06, "step": 3821, "training_step_time": 0.10818028450012207 }, { "epoch": 5.8319091796875e-06, "model_forward_time": 0.024209022521972656, "step": 3822 }, { "epoch": 5.8319091796875e-06, "step": 3822, "training_step_time": 0.11073660850524902 }, { "epoch": 5.83343505859375e-06, "model_forward_time": 0.024425983428955078, "step": 3823 }, { "epoch": 5.83343505859375e-06, "step": 3823, "training_step_time": 0.10981321334838867 }, { "epoch": 5.8349609375e-06, "model_forward_time": 0.02442169189453125, "step": 3824 }, { "epoch": 5.8349609375e-06, "step": 3824, "training_step_time": 0.10732245445251465 }, { "epoch": 5.83648681640625e-06, "model_forward_time": 0.024677753448486328, "step": 3825 }, { "epoch": 5.83648681640625e-06, "step": 3825, "training_step_time": 0.11009669303894043 }, { "epoch": 5.8380126953125e-06, "model_forward_time": 0.024405956268310547, "step": 3826 }, { "epoch": 5.8380126953125e-06, "step": 3826, "training_step_time": 0.11384105682373047 }, { "epoch": 5.83953857421875e-06, "model_forward_time": 0.024467945098876953, "step": 3827 }, { "epoch": 5.83953857421875e-06, "step": 3827, "training_step_time": 0.12632226943969727 }, { "epoch": 5.841064453125e-06, "model_forward_time": 0.024417400360107422, "step": 3828 }, { "epoch": 5.841064453125e-06, "step": 3828, "training_step_time": 0.15962743759155273 }, { "epoch": 5.84259033203125e-06, "model_forward_time": 0.023672819137573242, "step": 3829 }, { "epoch": 5.84259033203125e-06, "step": 3829, "training_step_time": 0.24622869491577148 }, { "epoch": 5.8441162109375e-06, "grad_norm": 0.5268808603286743, "learning_rate": 9.835988929321165e-05, "loss": 0.093, "step": 3830 }, { "epoch": 5.8441162109375e-06, "model_forward_time": 0.02365422248840332, "step": 3830 }, { "epoch": 5.8441162109375e-06, "step": 3830, "training_step_time": 0.18918681144714355 }, { "epoch": 5.84564208984375e-06, "model_forward_time": 0.023884057998657227, "step": 3831 }, { "epoch": 5.84564208984375e-06, "step": 3831, "training_step_time": 0.273115873336792 }, { "epoch": 5.84716796875e-06, "model_forward_time": 0.02313375473022461, "step": 3832 }, { "epoch": 5.84716796875e-06, "step": 3832, "training_step_time": 0.19476556777954102 }, { "epoch": 5.84869384765625e-06, "model_forward_time": 0.02361607551574707, "step": 3833 }, { "epoch": 5.84869384765625e-06, "step": 3833, "training_step_time": 0.2110886573791504 }, { "epoch": 5.8502197265625e-06, "model_forward_time": 0.0235598087310791, "step": 3834 }, { "epoch": 5.8502197265625e-06, "step": 3834, "training_step_time": 0.1524813175201416 }, { "epoch": 5.85174560546875e-06, "model_forward_time": 0.02361130714416504, "step": 3835 }, { "epoch": 5.85174560546875e-06, "step": 3835, "training_step_time": 0.15381860733032227 }, { "epoch": 5.853271484375e-06, "model_forward_time": 0.02613091468811035, "step": 3836 }, { "epoch": 5.853271484375e-06, "step": 3836, "training_step_time": 0.11510372161865234 }, { "epoch": 5.85479736328125e-06, "model_forward_time": 0.02428603172302246, "step": 3837 }, { "epoch": 5.85479736328125e-06, "step": 3837, "training_step_time": 0.10776138305664062 }, { "epoch": 5.8563232421875e-06, "model_forward_time": 0.02475881576538086, "step": 3838 }, { "epoch": 5.8563232421875e-06, "step": 3838, "training_step_time": 0.10681581497192383 }, { "epoch": 5.85784912109375e-06, "model_forward_time": 0.026027441024780273, "step": 3839 }, { "epoch": 5.85784912109375e-06, "step": 3839, "training_step_time": 0.10921716690063477 }, { "epoch": 5.859375e-06, "grad_norm": 0.4340176582336426, "learning_rate": 9.834585918739936e-05, "loss": 0.0772, "step": 3840 }, { "epoch": 5.859375e-06, "model_forward_time": 0.024509429931640625, "step": 3840 }, { "epoch": 5.859375e-06, "step": 3840, "training_step_time": 0.10770916938781738 }, { "epoch": 5.86090087890625e-06, "model_forward_time": 0.026287555694580078, "step": 3841 }, { "epoch": 5.86090087890625e-06, "step": 3841, "training_step_time": 0.10759139060974121 }, { "epoch": 5.8624267578125e-06, "model_forward_time": 0.024283885955810547, "step": 3842 }, { "epoch": 5.8624267578125e-06, "step": 3842, "training_step_time": 0.10953569412231445 }, { "epoch": 5.86395263671875e-06, "model_forward_time": 0.02478623390197754, "step": 3843 }, { "epoch": 5.86395263671875e-06, "step": 3843, "training_step_time": 0.11611151695251465 }, { "epoch": 5.865478515625e-06, "model_forward_time": 0.02423691749572754, "step": 3844 }, { "epoch": 5.865478515625e-06, "step": 3844, "training_step_time": 0.11401200294494629 }, { "epoch": 5.86700439453125e-06, "model_forward_time": 0.02481245994567871, "step": 3845 }, { "epoch": 5.86700439453125e-06, "step": 3845, "training_step_time": 0.1250908374786377 }, { "epoch": 5.8685302734375e-06, "model_forward_time": 0.025206804275512695, "step": 3846 }, { "epoch": 5.8685302734375e-06, "step": 3846, "training_step_time": 0.13276171684265137 }, { "epoch": 5.87005615234375e-06, "model_forward_time": 0.02493739128112793, "step": 3847 }, { "epoch": 5.87005615234375e-06, "step": 3847, "training_step_time": 0.10967016220092773 }, { "epoch": 5.87158203125e-06, "model_forward_time": 0.025101423263549805, "step": 3848 }, { "epoch": 5.87158203125e-06, "step": 3848, "training_step_time": 0.11724591255187988 }, { "epoch": 5.87310791015625e-06, "model_forward_time": 0.02477860450744629, "step": 3849 }, { "epoch": 5.87310791015625e-06, "step": 3849, "training_step_time": 0.10789847373962402 }, { "epoch": 5.8746337890625e-06, "grad_norm": 0.5973440408706665, "learning_rate": 9.833177033680944e-05, "loss": 0.1038, "step": 3850 }, { "epoch": 5.8746337890625e-06, "model_forward_time": 0.024653196334838867, "step": 3850 }, { "epoch": 5.8746337890625e-06, "step": 3850, "training_step_time": 0.10861515998840332 }, { "epoch": 5.87615966796875e-06, "model_forward_time": 0.024451255798339844, "step": 3851 }, { "epoch": 5.87615966796875e-06, "step": 3851, "training_step_time": 0.1444549560546875 }, { "epoch": 5.877685546875e-06, "model_forward_time": 0.02789020538330078, "step": 3852 }, { "epoch": 5.877685546875e-06, "step": 3852, "training_step_time": 0.12726712226867676 }, { "epoch": 5.87921142578125e-06, "model_forward_time": 0.02399921417236328, "step": 3853 }, { "epoch": 5.87921142578125e-06, "step": 3853, "training_step_time": 0.1215059757232666 }, { "epoch": 5.8807373046875e-06, "model_forward_time": 0.024276256561279297, "step": 3854 }, { "epoch": 5.8807373046875e-06, "step": 3854, "training_step_time": 0.12537002563476562 }, { "epoch": 5.88226318359375e-06, "model_forward_time": 0.024267196655273438, "step": 3855 }, { "epoch": 5.88226318359375e-06, "step": 3855, "training_step_time": 0.11988329887390137 }, { "epoch": 5.8837890625e-06, "model_forward_time": 0.024638652801513672, "step": 3856 }, { "epoch": 5.8837890625e-06, "step": 3856, "training_step_time": 0.11623358726501465 }, { "epoch": 5.88531494140625e-06, "model_forward_time": 0.024324893951416016, "step": 3857 }, { "epoch": 5.88531494140625e-06, "step": 3857, "training_step_time": 0.1108846664428711 }, { "epoch": 5.8868408203125e-06, "model_forward_time": 0.02426743507385254, "step": 3858 }, { "epoch": 5.8868408203125e-06, "step": 3858, "training_step_time": 0.11066842079162598 }, { "epoch": 5.88836669921875e-06, "model_forward_time": 0.024240493774414062, "step": 3859 }, { "epoch": 5.88836669921875e-06, "step": 3859, "training_step_time": 0.11228013038635254 }, { "epoch": 5.889892578125e-06, "grad_norm": 0.4913652241230011, "learning_rate": 9.831762275856118e-05, "loss": 0.093, "step": 3860 }, { "epoch": 5.889892578125e-06, "model_forward_time": 0.024737119674682617, "step": 3860 }, { "epoch": 5.889892578125e-06, "step": 3860, "training_step_time": 0.11471986770629883 }, { "epoch": 5.89141845703125e-06, "model_forward_time": 0.024394750595092773, "step": 3861 }, { "epoch": 5.89141845703125e-06, "step": 3861, "training_step_time": 0.10892915725708008 }, { "epoch": 5.8929443359375e-06, "model_forward_time": 0.024424076080322266, "step": 3862 }, { "epoch": 5.8929443359375e-06, "step": 3862, "training_step_time": 0.10992598533630371 }, { "epoch": 5.89447021484375e-06, "model_forward_time": 0.024509191513061523, "step": 3863 }, { "epoch": 5.89447021484375e-06, "step": 3863, "training_step_time": 0.10797333717346191 }, { "epoch": 5.89599609375e-06, "model_forward_time": 0.025216102600097656, "step": 3864 }, { "epoch": 5.89599609375e-06, "step": 3864, "training_step_time": 0.10838007926940918 }, { "epoch": 5.89752197265625e-06, "model_forward_time": 0.024764537811279297, "step": 3865 }, { "epoch": 5.89752197265625e-06, "step": 3865, "training_step_time": 0.11017775535583496 }, { "epoch": 5.8990478515625e-06, "model_forward_time": 0.024631738662719727, "step": 3866 }, { "epoch": 5.8990478515625e-06, "step": 3866, "training_step_time": 0.10675692558288574 }, { "epoch": 5.90057373046875e-06, "model_forward_time": 0.024269819259643555, "step": 3867 }, { "epoch": 5.90057373046875e-06, "step": 3867, "training_step_time": 0.10904502868652344 }, { "epoch": 5.902099609375e-06, "model_forward_time": 0.025847434997558594, "step": 3868 }, { "epoch": 5.902099609375e-06, "step": 3868, "training_step_time": 0.11874961853027344 }, { "epoch": 5.90362548828125e-06, "model_forward_time": 0.024465322494506836, "step": 3869 }, { "epoch": 5.90362548828125e-06, "step": 3869, "training_step_time": 0.10709261894226074 }, { "epoch": 5.9051513671875e-06, "grad_norm": 0.3654991686344147, "learning_rate": 9.830341646984521e-05, "loss": 0.0947, "step": 3870 }, { "epoch": 5.9051513671875e-06, "model_forward_time": 0.024537324905395508, "step": 3870 }, { "epoch": 5.9051513671875e-06, "step": 3870, "training_step_time": 0.10721540451049805 }, { "epoch": 5.90667724609375e-06, "model_forward_time": 0.025421619415283203, "step": 3871 }, { "epoch": 5.90667724609375e-06, "step": 3871, "training_step_time": 0.10730719566345215 }, { "epoch": 5.908203125e-06, "model_forward_time": 0.02541041374206543, "step": 3872 }, { "epoch": 5.908203125e-06, "step": 3872, "training_step_time": 0.13470244407653809 }, { "epoch": 5.90972900390625e-06, "model_forward_time": 0.025674104690551758, "step": 3873 }, { "epoch": 5.90972900390625e-06, "step": 3873, "training_step_time": 0.11907005310058594 }, { "epoch": 5.9112548828125e-06, "model_forward_time": 0.025252342224121094, "step": 3874 }, { "epoch": 5.9112548828125e-06, "step": 3874, "training_step_time": 0.13324189186096191 }, { "epoch": 5.91278076171875e-06, "model_forward_time": 0.025063037872314453, "step": 3875 }, { "epoch": 5.91278076171875e-06, "step": 3875, "training_step_time": 0.13765406608581543 }, { "epoch": 5.914306640625e-06, "model_forward_time": 0.02518320083618164, "step": 3876 }, { "epoch": 5.914306640625e-06, "step": 3876, "training_step_time": 0.1413424015045166 }, { "epoch": 5.91583251953125e-06, "model_forward_time": 0.025068283081054688, "step": 3877 }, { "epoch": 5.91583251953125e-06, "step": 3877, "training_step_time": 0.20303750038146973 }, { "epoch": 5.9173583984375e-06, "model_forward_time": 0.024590492248535156, "step": 3878 }, { "epoch": 5.9173583984375e-06, "step": 3878, "training_step_time": 0.13485956192016602 }, { "epoch": 5.91888427734375e-06, "model_forward_time": 0.02463674545288086, "step": 3879 }, { "epoch": 5.91888427734375e-06, "step": 3879, "training_step_time": 0.11011409759521484 }, { "epoch": 5.92041015625e-06, "grad_norm": 0.286504864692688, "learning_rate": 9.828915148792352e-05, "loss": 0.0737, "step": 3880 }, { "epoch": 5.92041015625e-06, "model_forward_time": 0.025462865829467773, "step": 3880 }, { "epoch": 5.92041015625e-06, "step": 3880, "training_step_time": 0.11486101150512695 }, { "epoch": 5.92193603515625e-06, "model_forward_time": 0.025619983673095703, "step": 3881 }, { "epoch": 5.92193603515625e-06, "step": 3881, "training_step_time": 0.10775518417358398 }, { "epoch": 5.9234619140625e-06, "model_forward_time": 0.02527928352355957, "step": 3882 }, { "epoch": 5.9234619140625e-06, "step": 3882, "training_step_time": 0.1199493408203125 }, { "epoch": 5.92498779296875e-06, "model_forward_time": 0.0256345272064209, "step": 3883 }, { "epoch": 5.92498779296875e-06, "step": 3883, "training_step_time": 0.1963350772857666 }, { "epoch": 5.926513671875e-06, "model_forward_time": 0.02484750747680664, "step": 3884 }, { "epoch": 5.926513671875e-06, "step": 3884, "training_step_time": 0.10986566543579102 }, { "epoch": 5.92803955078125e-06, "model_forward_time": 0.025430679321289062, "step": 3885 }, { "epoch": 5.92803955078125e-06, "step": 3885, "training_step_time": 0.1064310073852539 }, { "epoch": 5.9295654296875e-06, "model_forward_time": 0.025760650634765625, "step": 3886 }, { "epoch": 5.9295654296875e-06, "step": 3886, "training_step_time": 0.11071205139160156 }, { "epoch": 5.93109130859375e-06, "model_forward_time": 0.02566838264465332, "step": 3887 }, { "epoch": 5.93109130859375e-06, "step": 3887, "training_step_time": 0.10995817184448242 }, { "epoch": 5.9326171875e-06, "model_forward_time": 0.024211883544921875, "step": 3888 }, { "epoch": 5.9326171875e-06, "step": 3888, "training_step_time": 0.10772705078125 }, { "epoch": 5.93414306640625e-06, "model_forward_time": 0.02414703369140625, "step": 3889 }, { "epoch": 5.93414306640625e-06, "step": 3889, "training_step_time": 0.1589820384979248 }, { "epoch": 5.9356689453125e-06, "grad_norm": 0.4652571976184845, "learning_rate": 9.82748278301294e-05, "loss": 0.1036, "step": 3890 }, { "epoch": 5.9356689453125e-06, "model_forward_time": 0.02413654327392578, "step": 3890 }, { "epoch": 5.9356689453125e-06, "step": 3890, "training_step_time": 0.1816256046295166 }, { "epoch": 5.93719482421875e-06, "model_forward_time": 0.023906946182250977, "step": 3891 }, { "epoch": 5.93719482421875e-06, "step": 3891, "training_step_time": 0.1257925033569336 }, { "epoch": 5.938720703125e-06, "model_forward_time": 0.023796558380126953, "step": 3892 }, { "epoch": 5.938720703125e-06, "step": 3892, "training_step_time": 0.13409900665283203 }, { "epoch": 5.94024658203125e-06, "model_forward_time": 0.024081707000732422, "step": 3893 }, { "epoch": 5.94024658203125e-06, "step": 3893, "training_step_time": 0.21071481704711914 }, { "epoch": 5.9417724609375e-06, "model_forward_time": 0.024678945541381836, "step": 3894 }, { "epoch": 5.9417724609375e-06, "step": 3894, "training_step_time": 0.11780500411987305 }, { "epoch": 5.94329833984375e-06, "model_forward_time": 0.025034427642822266, "step": 3895 }, { "epoch": 5.94329833984375e-06, "step": 3895, "training_step_time": 0.1179201602935791 }, { "epoch": 5.94482421875e-06, "model_forward_time": 0.025400638580322266, "step": 3896 }, { "epoch": 5.94482421875e-06, "step": 3896, "training_step_time": 0.11397099494934082 }, { "epoch": 5.94635009765625e-06, "model_forward_time": 0.0254518985748291, "step": 3897 }, { "epoch": 5.94635009765625e-06, "step": 3897, "training_step_time": 0.11244606971740723 }, { "epoch": 5.9478759765625e-06, "model_forward_time": 0.02551722526550293, "step": 3898 }, { "epoch": 5.9478759765625e-06, "step": 3898, "training_step_time": 0.11413455009460449 }, { "epoch": 5.94940185546875e-06, "model_forward_time": 0.02581644058227539, "step": 3899 }, { "epoch": 5.94940185546875e-06, "step": 3899, "training_step_time": 0.10853981971740723 }, { "epoch": 5.950927734375e-06, "grad_norm": 0.6859496235847473, "learning_rate": 9.826044551386744e-05, "loss": 0.1011, "step": 3900 }, { "epoch": 5.950927734375e-06, "model_forward_time": 0.02557682991027832, "step": 3900 }, { "epoch": 5.950927734375e-06, "step": 3900, "training_step_time": 0.10816025733947754 }, { "epoch": 5.95245361328125e-06, "model_forward_time": 0.025686264038085938, "step": 3901 }, { "epoch": 5.95245361328125e-06, "step": 3901, "training_step_time": 0.10817980766296387 }, { "epoch": 5.9539794921875e-06, "model_forward_time": 0.02534937858581543, "step": 3902 }, { "epoch": 5.9539794921875e-06, "step": 3902, "training_step_time": 0.10775613784790039 }, { "epoch": 5.95550537109375e-06, "model_forward_time": 0.025724172592163086, "step": 3903 }, { "epoch": 5.95550537109375e-06, "step": 3903, "training_step_time": 0.10944342613220215 }, { "epoch": 5.95703125e-06, "model_forward_time": 0.026932239532470703, "step": 3904 }, { "epoch": 5.95703125e-06, "step": 3904, "training_step_time": 0.11585474014282227 }, { "epoch": 5.95855712890625e-06, "model_forward_time": 0.025278329849243164, "step": 3905 }, { "epoch": 5.95855712890625e-06, "step": 3905, "training_step_time": 0.11381697654724121 }, { "epoch": 5.9600830078125e-06, "model_forward_time": 0.025643587112426758, "step": 3906 }, { "epoch": 5.9600830078125e-06, "step": 3906, "training_step_time": 0.10903525352478027 }, { "epoch": 5.96160888671875e-06, "model_forward_time": 0.025649309158325195, "step": 3907 }, { "epoch": 5.96160888671875e-06, "step": 3907, "training_step_time": 0.11130738258361816 }, { "epoch": 5.963134765625e-06, "model_forward_time": 0.02526068687438965, "step": 3908 }, { "epoch": 5.963134765625e-06, "step": 3908, "training_step_time": 0.10825371742248535 }, { "epoch": 5.96466064453125e-06, "model_forward_time": 0.025218725204467773, "step": 3909 }, { "epoch": 5.96466064453125e-06, "step": 3909, "training_step_time": 0.10562324523925781 }, { "epoch": 5.9661865234375e-06, "grad_norm": 0.5915921330451965, "learning_rate": 9.824600455661353e-05, "loss": 0.0832, "step": 3910 }, { "epoch": 5.9661865234375e-06, "model_forward_time": 0.025377511978149414, "step": 3910 }, { "epoch": 5.9661865234375e-06, "step": 3910, "training_step_time": 0.10834217071533203 }, { "epoch": 5.96771240234375e-06, "model_forward_time": 0.025218963623046875, "step": 3911 }, { "epoch": 5.96771240234375e-06, "step": 3911, "training_step_time": 0.10789036750793457 }, { "epoch": 5.96923828125e-06, "model_forward_time": 0.025722265243530273, "step": 3912 }, { "epoch": 5.96923828125e-06, "step": 3912, "training_step_time": 0.11448001861572266 }, { "epoch": 5.97076416015625e-06, "model_forward_time": 0.025814056396484375, "step": 3913 }, { "epoch": 5.97076416015625e-06, "step": 3913, "training_step_time": 0.10883164405822754 }, { "epoch": 5.9722900390625e-06, "model_forward_time": 0.02528858184814453, "step": 3914 }, { "epoch": 5.9722900390625e-06, "step": 3914, "training_step_time": 0.10640740394592285 }, { "epoch": 5.97381591796875e-06, "model_forward_time": 0.025534629821777344, "step": 3915 }, { "epoch": 5.97381591796875e-06, "step": 3915, "training_step_time": 0.11209321022033691 }, { "epoch": 5.975341796875e-06, "model_forward_time": 0.025533437728881836, "step": 3916 }, { "epoch": 5.975341796875e-06, "step": 3916, "training_step_time": 0.11191463470458984 }, { "epoch": 5.97686767578125e-06, "model_forward_time": 0.025388479232788086, "step": 3917 }, { "epoch": 5.97686767578125e-06, "step": 3917, "training_step_time": 0.1418931484222412 }, { "epoch": 5.9783935546875e-06, "model_forward_time": 0.024953842163085938, "step": 3918 }, { "epoch": 5.9783935546875e-06, "step": 3918, "training_step_time": 0.1179511547088623 }, { "epoch": 5.97991943359375e-06, "model_forward_time": 0.02536630630493164, "step": 3919 }, { "epoch": 5.97991943359375e-06, "step": 3919, "training_step_time": 0.12344479560852051 }, { "epoch": 5.9814453125e-06, "grad_norm": 0.3807622790336609, "learning_rate": 9.823150497591476e-05, "loss": 0.093, "step": 3920 }, { "epoch": 5.9814453125e-06, "model_forward_time": 0.028074264526367188, "step": 3920 }, { "epoch": 5.9814453125e-06, "step": 3920, "training_step_time": 0.16993212699890137 }, { "epoch": 5.98297119140625e-06, "model_forward_time": 0.02316451072692871, "step": 3921 }, { "epoch": 5.98297119140625e-06, "step": 3921, "training_step_time": 0.21246814727783203 }, { "epoch": 5.9844970703125e-06, "model_forward_time": 0.024144649505615234, "step": 3922 }, { "epoch": 5.9844970703125e-06, "step": 3922, "training_step_time": 0.11600542068481445 }, { "epoch": 5.98602294921875e-06, "model_forward_time": 0.023773670196533203, "step": 3923 }, { "epoch": 5.98602294921875e-06, "step": 3923, "training_step_time": 0.13034272193908691 }, { "epoch": 5.987548828125e-06, "model_forward_time": 0.025002717971801758, "step": 3924 }, { "epoch": 5.987548828125e-06, "step": 3924, "training_step_time": 0.1120913028717041 }, { "epoch": 5.98907470703125e-06, "model_forward_time": 0.024188995361328125, "step": 3925 }, { "epoch": 5.98907470703125e-06, "step": 3925, "training_step_time": 0.12143492698669434 }, { "epoch": 5.9906005859375e-06, "model_forward_time": 0.024413585662841797, "step": 3926 }, { "epoch": 5.9906005859375e-06, "step": 3926, "training_step_time": 0.10945916175842285 }, { "epoch": 5.99212646484375e-06, "model_forward_time": 0.024651288986206055, "step": 3927 }, { "epoch": 5.99212646484375e-06, "step": 3927, "training_step_time": 0.18586015701293945 }, { "epoch": 5.99365234375e-06, "model_forward_time": 0.02423095703125, "step": 3928 }, { "epoch": 5.99365234375e-06, "step": 3928, "training_step_time": 0.12376689910888672 }, { "epoch": 5.99517822265625e-06, "model_forward_time": 0.0237886905670166, "step": 3929 }, { "epoch": 5.99517822265625e-06, "step": 3929, "training_step_time": 0.10692405700683594 }, { "epoch": 5.9967041015625e-06, "grad_norm": 0.431569367647171, "learning_rate": 9.821694678938953e-05, "loss": 0.0773, "step": 3930 }, { "epoch": 5.9967041015625e-06, "model_forward_time": 0.024218320846557617, "step": 3930 }, { "epoch": 5.9967041015625e-06, "step": 3930, "training_step_time": 0.11264729499816895 }, { "epoch": 5.99822998046875e-06, "model_forward_time": 0.02421712875366211, "step": 3931 }, { "epoch": 5.99822998046875e-06, "step": 3931, "training_step_time": 0.1077718734741211 }, { "epoch": 5.999755859375e-06, "model_forward_time": 0.02498316764831543, "step": 3932 }, { "epoch": 5.999755859375e-06, "step": 3932, "training_step_time": 0.1098182201385498 }, { "epoch": 6.00128173828125e-06, "model_forward_time": 0.02467203140258789, "step": 3933 }, { "epoch": 6.00128173828125e-06, "step": 3933, "training_step_time": 0.10857200622558594 }, { "epoch": 6.0028076171875e-06, "model_forward_time": 0.02423882484436035, "step": 3934 }, { "epoch": 6.0028076171875e-06, "step": 3934, "training_step_time": 0.11143040657043457 }, { "epoch": 6.00433349609375e-06, "model_forward_time": 0.02446913719177246, "step": 3935 }, { "epoch": 6.00433349609375e-06, "step": 3935, "training_step_time": 0.10392570495605469 }, { "epoch": 6.005859375e-06, "model_forward_time": 0.0241239070892334, "step": 3936 }, { "epoch": 6.005859375e-06, "step": 3936, "training_step_time": 0.12522149085998535 }, { "epoch": 6.00738525390625e-06, "model_forward_time": 0.024126291275024414, "step": 3937 }, { "epoch": 6.00738525390625e-06, "step": 3937, "training_step_time": 0.12706422805786133 }, { "epoch": 6.0089111328125e-06, "model_forward_time": 0.024190425872802734, "step": 3938 }, { "epoch": 6.0089111328125e-06, "step": 3938, "training_step_time": 0.11108613014221191 }, { "epoch": 6.01043701171875e-06, "model_forward_time": 0.02630615234375, "step": 3939 }, { "epoch": 6.01043701171875e-06, "step": 3939, "training_step_time": 0.1137075424194336 }, { "epoch": 6.011962890625e-06, "grad_norm": 0.9006413817405701, "learning_rate": 9.820233001472738e-05, "loss": 0.117, "step": 3940 }, { "epoch": 6.011962890625e-06, "model_forward_time": 0.024637460708618164, "step": 3940 }, { "epoch": 6.011962890625e-06, "step": 3940, "training_step_time": 0.11525750160217285 }, { "epoch": 6.01348876953125e-06, "model_forward_time": 0.02448296546936035, "step": 3941 }, { "epoch": 6.01348876953125e-06, "step": 3941, "training_step_time": 0.12529683113098145 }, { "epoch": 6.0150146484375e-06, "model_forward_time": 0.024582624435424805, "step": 3942 }, { "epoch": 6.0150146484375e-06, "step": 3942, "training_step_time": 0.12486410140991211 }, { "epoch": 6.01654052734375e-06, "model_forward_time": 0.024431228637695312, "step": 3943 }, { "epoch": 6.01654052734375e-06, "step": 3943, "training_step_time": 0.11432290077209473 }, { "epoch": 6.01806640625e-06, "model_forward_time": 0.02467179298400879, "step": 3944 }, { "epoch": 6.01806640625e-06, "step": 3944, "training_step_time": 0.11053967475891113 }, { "epoch": 6.01959228515625e-06, "model_forward_time": 0.02424025535583496, "step": 3945 }, { "epoch": 6.01959228515625e-06, "step": 3945, "training_step_time": 0.11434650421142578 }, { "epoch": 6.0211181640625e-06, "model_forward_time": 0.02490830421447754, "step": 3946 }, { "epoch": 6.0211181640625e-06, "step": 3946, "training_step_time": 0.10954451560974121 }, { "epoch": 6.02264404296875e-06, "model_forward_time": 0.024805307388305664, "step": 3947 }, { "epoch": 6.02264404296875e-06, "step": 3947, "training_step_time": 0.11148619651794434 }, { "epoch": 6.024169921875e-06, "model_forward_time": 0.024556636810302734, "step": 3948 }, { "epoch": 6.024169921875e-06, "step": 3948, "training_step_time": 0.11260271072387695 }, { "epoch": 6.02569580078125e-06, "model_forward_time": 0.02450084686279297, "step": 3949 }, { "epoch": 6.02569580078125e-06, "step": 3949, "training_step_time": 0.11630487442016602 }, { "epoch": 6.0272216796875e-06, "grad_norm": 0.5529266595840454, "learning_rate": 9.818765466968909e-05, "loss": 0.0928, "step": 3950 }, { "epoch": 6.0272216796875e-06, "model_forward_time": 0.0244143009185791, "step": 3950 }, { "epoch": 6.0272216796875e-06, "step": 3950, "training_step_time": 0.11397242546081543 }, { "epoch": 6.02874755859375e-06, "model_forward_time": 0.0248258113861084, "step": 3951 }, { "epoch": 6.02874755859375e-06, "step": 3951, "training_step_time": 0.1118779182434082 }, { "epoch": 6.0302734375e-06, "model_forward_time": 0.024424314498901367, "step": 3952 }, { "epoch": 6.0302734375e-06, "step": 3952, "training_step_time": 0.11092114448547363 }, { "epoch": 6.03179931640625e-06, "model_forward_time": 0.024285078048706055, "step": 3953 }, { "epoch": 6.03179931640625e-06, "step": 3953, "training_step_time": 0.1101067066192627 }, { "epoch": 6.0333251953125e-06, "model_forward_time": 0.024440765380859375, "step": 3954 }, { "epoch": 6.0333251953125e-06, "step": 3954, "training_step_time": 0.11153817176818848 }, { "epoch": 6.03485107421875e-06, "model_forward_time": 0.024410486221313477, "step": 3955 }, { "epoch": 6.03485107421875e-06, "step": 3955, "training_step_time": 0.11780023574829102 }, { "epoch": 6.036376953125e-06, "model_forward_time": 0.024688720703125, "step": 3956 }, { "epoch": 6.036376953125e-06, "step": 3956, "training_step_time": 0.10722041130065918 }, { "epoch": 6.03790283203125e-06, "model_forward_time": 0.024570465087890625, "step": 3957 }, { "epoch": 6.03790283203125e-06, "step": 3957, "training_step_time": 0.11048007011413574 }, { "epoch": 6.0394287109375e-06, "model_forward_time": 0.024798870086669922, "step": 3958 }, { "epoch": 6.0394287109375e-06, "step": 3958, "training_step_time": 0.11306428909301758 }, { "epoch": 6.04095458984375e-06, "model_forward_time": 0.02511310577392578, "step": 3959 }, { "epoch": 6.04095458984375e-06, "step": 3959, "training_step_time": 0.10807299613952637 }, { "epoch": 6.04248046875e-06, "grad_norm": 0.26172083616256714, "learning_rate": 9.817292077210659e-05, "loss": 0.1002, "step": 3960 }, { "epoch": 6.04248046875e-06, "model_forward_time": 0.024895906448364258, "step": 3960 }, { "epoch": 6.04248046875e-06, "step": 3960, "training_step_time": 0.11130285263061523 }, { "epoch": 6.04400634765625e-06, "model_forward_time": 0.02480936050415039, "step": 3961 }, { "epoch": 6.04400634765625e-06, "step": 3961, "training_step_time": 0.11148691177368164 }, { "epoch": 6.0455322265625e-06, "model_forward_time": 0.024486064910888672, "step": 3962 }, { "epoch": 6.0455322265625e-06, "step": 3962, "training_step_time": 0.10804605484008789 }, { "epoch": 6.04705810546875e-06, "model_forward_time": 0.024596452713012695, "step": 3963 }, { "epoch": 6.04705810546875e-06, "step": 3963, "training_step_time": 0.11092400550842285 }, { "epoch": 6.048583984375e-06, "model_forward_time": 0.02443718910217285, "step": 3964 }, { "epoch": 6.048583984375e-06, "step": 3964, "training_step_time": 0.11322522163391113 }, { "epoch": 6.05010986328125e-06, "model_forward_time": 0.024370670318603516, "step": 3965 }, { "epoch": 6.05010986328125e-06, "step": 3965, "training_step_time": 0.11672163009643555 }, { "epoch": 6.0516357421875e-06, "model_forward_time": 0.028784513473510742, "step": 3966 }, { "epoch": 6.0516357421875e-06, "step": 3966, "training_step_time": 0.11127829551696777 }, { "epoch": 6.05316162109375e-06, "model_forward_time": 0.024376869201660156, "step": 3967 }, { "epoch": 6.05316162109375e-06, "step": 3967, "training_step_time": 0.1616535186767578 }, { "epoch": 6.0546875e-06, "model_forward_time": 0.02426314353942871, "step": 3968 }, { "epoch": 6.0546875e-06, "step": 3968, "training_step_time": 0.1816425323486328 }, { "epoch": 6.05621337890625e-06, "model_forward_time": 0.02413773536682129, "step": 3969 }, { "epoch": 6.05621337890625e-06, "step": 3969, "training_step_time": 0.16640210151672363 }, { "epoch": 6.0577392578125e-06, "grad_norm": 0.6211804747581482, "learning_rate": 9.815812833988291e-05, "loss": 0.0915, "step": 3970 }, { "epoch": 6.0577392578125e-06, "model_forward_time": 0.023595571517944336, "step": 3970 }, { "epoch": 6.0577392578125e-06, "step": 3970, "training_step_time": 0.12278890609741211 }, { "epoch": 6.05926513671875e-06, "model_forward_time": 0.023621320724487305, "step": 3971 }, { "epoch": 6.05926513671875e-06, "step": 3971, "training_step_time": 0.11238932609558105 }, { "epoch": 6.060791015625e-06, "model_forward_time": 0.02471137046813965, "step": 3972 }, { "epoch": 6.060791015625e-06, "step": 3972, "training_step_time": 0.12447094917297363 }, { "epoch": 6.06231689453125e-06, "model_forward_time": 0.024735212326049805, "step": 3973 }, { "epoch": 6.06231689453125e-06, "step": 3973, "training_step_time": 0.10941576957702637 }, { "epoch": 6.0638427734375e-06, "model_forward_time": 0.02462005615234375, "step": 3974 }, { "epoch": 6.0638427734375e-06, "step": 3974, "training_step_time": 0.19194483757019043 }, { "epoch": 6.06536865234375e-06, "model_forward_time": 0.023949146270751953, "step": 3975 }, { "epoch": 6.06536865234375e-06, "step": 3975, "training_step_time": 0.1175544261932373 }, { "epoch": 6.06689453125e-06, "model_forward_time": 0.024126529693603516, "step": 3976 }, { "epoch": 6.06689453125e-06, "step": 3976, "training_step_time": 0.10764050483703613 }, { "epoch": 6.06842041015625e-06, "model_forward_time": 0.024681568145751953, "step": 3977 }, { "epoch": 6.06842041015625e-06, "step": 3977, "training_step_time": 0.10711884498596191 }, { "epoch": 6.0699462890625e-06, "model_forward_time": 0.024898052215576172, "step": 3978 }, { "epoch": 6.0699462890625e-06, "step": 3978, "training_step_time": 0.10722756385803223 }, { "epoch": 6.07147216796875e-06, "model_forward_time": 0.02580738067626953, "step": 3979 }, { "epoch": 6.07147216796875e-06, "step": 3979, "training_step_time": 0.1071929931640625 }, { "epoch": 6.072998046875e-06, "grad_norm": 0.3808532655239105, "learning_rate": 9.81432773909923e-05, "loss": 0.0931, "step": 3980 }, { "epoch": 6.072998046875e-06, "model_forward_time": 0.024674415588378906, "step": 3980 }, { "epoch": 6.072998046875e-06, "step": 3980, "training_step_time": 0.11172676086425781 }, { "epoch": 6.07452392578125e-06, "model_forward_time": 0.02452707290649414, "step": 3981 }, { "epoch": 6.07452392578125e-06, "step": 3981, "training_step_time": 0.10771393775939941 }, { "epoch": 6.0760498046875e-06, "model_forward_time": 0.025019168853759766, "step": 3982 }, { "epoch": 6.0760498046875e-06, "step": 3982, "training_step_time": 0.10748434066772461 }, { "epoch": 6.07757568359375e-06, "model_forward_time": 0.024686336517333984, "step": 3983 }, { "epoch": 6.07757568359375e-06, "step": 3983, "training_step_time": 0.10730838775634766 }, { "epoch": 6.0791015625e-06, "model_forward_time": 0.02414727210998535, "step": 3984 }, { "epoch": 6.0791015625e-06, "step": 3984, "training_step_time": 0.10560250282287598 }, { "epoch": 6.08062744140625e-06, "model_forward_time": 0.02419281005859375, "step": 3985 }, { "epoch": 6.08062744140625e-06, "step": 3985, "training_step_time": 0.11469411849975586 }, { "epoch": 6.0821533203125e-06, "model_forward_time": 0.025173187255859375, "step": 3986 }, { "epoch": 6.0821533203125e-06, "step": 3986, "training_step_time": 0.10912871360778809 }, { "epoch": 6.08367919921875e-06, "model_forward_time": 0.024566173553466797, "step": 3987 }, { "epoch": 6.08367919921875e-06, "step": 3987, "training_step_time": 0.1077573299407959 }, { "epoch": 6.085205078125e-06, "model_forward_time": 0.024798154830932617, "step": 3988 }, { "epoch": 6.085205078125e-06, "step": 3988, "training_step_time": 0.17505693435668945 }, { "epoch": 6.08673095703125e-06, "model_forward_time": 0.02389669418334961, "step": 3989 }, { "epoch": 6.08673095703125e-06, "step": 3989, "training_step_time": 0.16470623016357422 }, { "epoch": 6.0882568359375e-06, "grad_norm": 0.48456794023513794, "learning_rate": 9.812836794348004e-05, "loss": 0.0919, "step": 3990 }, { "epoch": 6.0882568359375e-06, "model_forward_time": 0.02516961097717285, "step": 3990 }, { "epoch": 6.0882568359375e-06, "step": 3990, "training_step_time": 0.10391426086425781 }, { "epoch": 6.08978271484375e-06, "model_forward_time": 0.024981021881103516, "step": 3991 }, { "epoch": 6.08978271484375e-06, "step": 3991, "training_step_time": 0.10563421249389648 }, { "epoch": 6.09130859375e-06, "model_forward_time": 0.025179147720336914, "step": 3992 }, { "epoch": 6.09130859375e-06, "step": 3992, "training_step_time": 0.10694766044616699 }, { "epoch": 6.09283447265625e-06, "model_forward_time": 0.02774214744567871, "step": 3993 }, { "epoch": 6.09283447265625e-06, "step": 3993, "training_step_time": 0.11646103858947754 }, { "epoch": 6.0943603515625e-06, "model_forward_time": 0.025507211685180664, "step": 3994 }, { "epoch": 6.0943603515625e-06, "step": 3994, "training_step_time": 0.11000514030456543 }, { "epoch": 6.09588623046875e-06, "model_forward_time": 0.02602076530456543, "step": 3995 }, { "epoch": 6.09588623046875e-06, "step": 3995, "training_step_time": 0.1121220588684082 }, { "epoch": 6.097412109375e-06, "model_forward_time": 0.025761842727661133, "step": 3996 }, { "epoch": 6.097412109375e-06, "step": 3996, "training_step_time": 0.11238551139831543 }, { "epoch": 6.09893798828125e-06, "model_forward_time": 0.025362014770507812, "step": 3997 }, { "epoch": 6.09893798828125e-06, "step": 3997, "training_step_time": 0.10671401023864746 }, { "epoch": 6.1004638671875e-06, "model_forward_time": 0.02540135383605957, "step": 3998 }, { "epoch": 6.1004638671875e-06, "step": 3998, "training_step_time": 0.10659956932067871 }, { "epoch": 6.10198974609375e-06, "model_forward_time": 0.025347232818603516, "step": 3999 }, { "epoch": 6.10198974609375e-06, "step": 3999, "training_step_time": 0.10850644111633301 }, { "epoch": 6.103515625e-06, "grad_norm": 0.45659807324409485, "learning_rate": 9.811340001546251e-05, "loss": 0.0894, "step": 4000 }, { "epoch": 6.103515625e-06, "model_forward_time": 0.024462461471557617, "step": 4000 }, { "epoch": 6.103515625e-06, "step": 4000, "training_step_time": 0.10233044624328613 }, { "epoch": 6.10504150390625e-06, "model_forward_time": 0.023958683013916016, "step": 4001 }, { "epoch": 6.10504150390625e-06, "step": 4001, "training_step_time": 0.10288405418395996 }, { "epoch": 6.1065673828125e-06, "model_forward_time": 0.023969650268554688, "step": 4002 }, { "epoch": 6.1065673828125e-06, "step": 4002, "training_step_time": 0.10452413558959961 }, { "epoch": 6.10809326171875e-06, "model_forward_time": 0.02502155303955078, "step": 4003 }, { "epoch": 6.10809326171875e-06, "step": 4003, "training_step_time": 0.10599780082702637 }, { "epoch": 6.109619140625e-06, "model_forward_time": 0.024352312088012695, "step": 4004 }, { "epoch": 6.109619140625e-06, "step": 4004, "training_step_time": 0.10656309127807617 }, { "epoch": 6.11114501953125e-06, "model_forward_time": 0.02449321746826172, "step": 4005 }, { "epoch": 6.11114501953125e-06, "step": 4005, "training_step_time": 0.11162996292114258 }, { "epoch": 6.1126708984375e-06, "model_forward_time": 0.02432084083557129, "step": 4006 }, { "epoch": 6.1126708984375e-06, "step": 4006, "training_step_time": 0.10883665084838867 }, { "epoch": 6.11419677734375e-06, "model_forward_time": 0.024701833724975586, "step": 4007 }, { "epoch": 6.11419677734375e-06, "step": 4007, "training_step_time": 0.10908937454223633 }, { "epoch": 6.11572265625e-06, "model_forward_time": 0.02397751808166504, "step": 4008 }, { "epoch": 6.11572265625e-06, "step": 4008, "training_step_time": 0.10703110694885254 }, { "epoch": 6.11724853515625e-06, "model_forward_time": 0.024418354034423828, "step": 4009 }, { "epoch": 6.11724853515625e-06, "step": 4009, "training_step_time": 0.10777568817138672 }, { "epoch": 6.1187744140625e-06, "grad_norm": 0.46199831366539, "learning_rate": 9.80983736251272e-05, "loss": 0.0817, "step": 4010 }, { "epoch": 6.1187744140625e-06, "model_forward_time": 0.024737119674682617, "step": 4010 }, { "epoch": 6.1187744140625e-06, "step": 4010, "training_step_time": 0.10531497001647949 }, { "epoch": 6.12030029296875e-06, "model_forward_time": 0.024564743041992188, "step": 4011 }, { "epoch": 6.12030029296875e-06, "step": 4011, "training_step_time": 0.10743546485900879 }, { "epoch": 6.121826171875e-06, "model_forward_time": 0.02490520477294922, "step": 4012 }, { "epoch": 6.121826171875e-06, "step": 4012, "training_step_time": 0.10643982887268066 }, { "epoch": 6.12335205078125e-06, "model_forward_time": 0.024359703063964844, "step": 4013 }, { "epoch": 6.12335205078125e-06, "step": 4013, "training_step_time": 0.10345602035522461 }, { "epoch": 6.1248779296875e-06, "model_forward_time": 0.024936914443969727, "step": 4014 }, { "epoch": 6.1248779296875e-06, "step": 4014, "training_step_time": 0.11194539070129395 }, { "epoch": 6.12640380859375e-06, "model_forward_time": 0.02465963363647461, "step": 4015 }, { "epoch": 6.12640380859375e-06, "step": 4015, "training_step_time": 0.11056137084960938 }, { "epoch": 6.1279296875e-06, "model_forward_time": 0.024490833282470703, "step": 4016 }, { "epoch": 6.1279296875e-06, "step": 4016, "training_step_time": 0.10954999923706055 }, { "epoch": 6.12945556640625e-06, "model_forward_time": 0.02495884895324707, "step": 4017 }, { "epoch": 6.12945556640625e-06, "step": 4017, "training_step_time": 0.11131048202514648 }, { "epoch": 6.1309814453125e-06, "model_forward_time": 0.02453017234802246, "step": 4018 }, { "epoch": 6.1309814453125e-06, "step": 4018, "training_step_time": 0.1126103401184082 }, { "epoch": 6.13250732421875e-06, "model_forward_time": 0.02529168128967285, "step": 4019 }, { "epoch": 6.13250732421875e-06, "step": 4019, "training_step_time": 0.11054158210754395 }, { "epoch": 6.134033203125e-06, "grad_norm": 0.2944953143596649, "learning_rate": 9.808328879073251e-05, "loss": 0.095, "step": 4020 }, { "epoch": 6.134033203125e-06, "model_forward_time": 0.024592161178588867, "step": 4020 }, { "epoch": 6.134033203125e-06, "step": 4020, "training_step_time": 0.1082160472869873 }, { "epoch": 6.13555908203125e-06, "model_forward_time": 0.024862051010131836, "step": 4021 }, { "epoch": 6.13555908203125e-06, "step": 4021, "training_step_time": 0.17844057083129883 }, { "epoch": 6.1370849609375e-06, "model_forward_time": 0.024178266525268555, "step": 4022 }, { "epoch": 6.1370849609375e-06, "step": 4022, "training_step_time": 0.1128687858581543 }, { "epoch": 6.13861083984375e-06, "model_forward_time": 0.02386164665222168, "step": 4023 }, { "epoch": 6.13861083984375e-06, "step": 4023, "training_step_time": 0.21379590034484863 }, { "epoch": 6.14013671875e-06, "model_forward_time": 0.02419304847717285, "step": 4024 }, { "epoch": 6.14013671875e-06, "step": 4024, "training_step_time": 0.18358683586120605 }, { "epoch": 6.14166259765625e-06, "model_forward_time": 0.02388167381286621, "step": 4025 }, { "epoch": 6.14166259765625e-06, "step": 4025, "training_step_time": 0.19825243949890137 }, { "epoch": 6.1431884765625e-06, "model_forward_time": 0.023534059524536133, "step": 4026 }, { "epoch": 6.1431884765625e-06, "step": 4026, "training_step_time": 0.19337844848632812 }, { "epoch": 6.14471435546875e-06, "model_forward_time": 0.0263822078704834, "step": 4027 }, { "epoch": 6.14471435546875e-06, "step": 4027, "training_step_time": 0.1702885627746582 }, { "epoch": 6.146240234375e-06, "model_forward_time": 0.023482561111450195, "step": 4028 }, { "epoch": 6.146240234375e-06, "step": 4028, "training_step_time": 0.16748619079589844 }, { "epoch": 6.14776611328125e-06, "model_forward_time": 0.023217439651489258, "step": 4029 }, { "epoch": 6.14776611328125e-06, "step": 4029, "training_step_time": 0.10916352272033691 }, { "epoch": 6.1492919921875e-06, "grad_norm": 0.5035075545310974, "learning_rate": 9.806814553060801e-05, "loss": 0.1049, "step": 4030 }, { "epoch": 6.1492919921875e-06, "model_forward_time": 0.024348974227905273, "step": 4030 }, { "epoch": 6.1492919921875e-06, "step": 4030, "training_step_time": 0.11574149131774902 }, { "epoch": 6.15081787109375e-06, "model_forward_time": 0.02473759651184082, "step": 4031 }, { "epoch": 6.15081787109375e-06, "step": 4031, "training_step_time": 0.10732030868530273 }, { "epoch": 6.15234375e-06, "model_forward_time": 0.0256803035736084, "step": 4032 }, { "epoch": 6.15234375e-06, "step": 4032, "training_step_time": 0.1124420166015625 }, { "epoch": 6.15386962890625e-06, "model_forward_time": 0.025347471237182617, "step": 4033 }, { "epoch": 6.15386962890625e-06, "step": 4033, "training_step_time": 0.11317825317382812 }, { "epoch": 6.1553955078125e-06, "model_forward_time": 0.025418519973754883, "step": 4034 }, { "epoch": 6.1553955078125e-06, "step": 4034, "training_step_time": 0.11313104629516602 }, { "epoch": 6.15692138671875e-06, "model_forward_time": 0.024643659591674805, "step": 4035 }, { "epoch": 6.15692138671875e-06, "step": 4035, "training_step_time": 0.11036419868469238 }, { "epoch": 6.158447265625e-06, "model_forward_time": 0.02450418472290039, "step": 4036 }, { "epoch": 6.158447265625e-06, "step": 4036, "training_step_time": 0.1073148250579834 }, { "epoch": 6.15997314453125e-06, "model_forward_time": 0.024698734283447266, "step": 4037 }, { "epoch": 6.15997314453125e-06, "step": 4037, "training_step_time": 0.10583186149597168 }, { "epoch": 6.1614990234375e-06, "model_forward_time": 0.024637460708618164, "step": 4038 }, { "epoch": 6.1614990234375e-06, "step": 4038, "training_step_time": 0.1082921028137207 }, { "epoch": 6.16302490234375e-06, "model_forward_time": 0.024567604064941406, "step": 4039 }, { "epoch": 6.16302490234375e-06, "step": 4039, "training_step_time": 0.10560321807861328 }, { "epoch": 6.16455078125e-06, "grad_norm": 0.2992149293422699, "learning_rate": 9.805294386315415e-05, "loss": 0.0681, "step": 4040 }, { "epoch": 6.16455078125e-06, "model_forward_time": 0.024450302124023438, "step": 4040 }, { "epoch": 6.16455078125e-06, "step": 4040, "training_step_time": 0.21399307250976562 }, { "epoch": 6.16607666015625e-06, "model_forward_time": 0.023947715759277344, "step": 4041 }, { "epoch": 6.16607666015625e-06, "step": 4041, "training_step_time": 0.10506224632263184 }, { "epoch": 6.1676025390625e-06, "model_forward_time": 0.027935028076171875, "step": 4042 }, { "epoch": 6.1676025390625e-06, "step": 4042, "training_step_time": 0.11345887184143066 }, { "epoch": 6.16912841796875e-06, "model_forward_time": 0.024759531021118164, "step": 4043 }, { "epoch": 6.16912841796875e-06, "step": 4043, "training_step_time": 0.1062314510345459 }, { "epoch": 6.170654296875e-06, "model_forward_time": 0.02488231658935547, "step": 4044 }, { "epoch": 6.170654296875e-06, "step": 4044, "training_step_time": 0.11951088905334473 }, { "epoch": 6.17218017578125e-06, "model_forward_time": 0.0244748592376709, "step": 4045 }, { "epoch": 6.17218017578125e-06, "step": 4045, "training_step_time": 0.10823583602905273 }, { "epoch": 6.1737060546875e-06, "model_forward_time": 0.02515125274658203, "step": 4046 }, { "epoch": 6.1737060546875e-06, "step": 4046, "training_step_time": 0.108154296875 }, { "epoch": 6.17523193359375e-06, "model_forward_time": 0.025255441665649414, "step": 4047 }, { "epoch": 6.17523193359375e-06, "step": 4047, "training_step_time": 0.11082696914672852 }, { "epoch": 6.1767578125e-06, "model_forward_time": 0.0252077579498291, "step": 4048 }, { "epoch": 6.1767578125e-06, "step": 4048, "training_step_time": 0.10903191566467285 }, { "epoch": 6.17828369140625e-06, "model_forward_time": 0.024511337280273438, "step": 4049 }, { "epoch": 6.17828369140625e-06, "step": 4049, "training_step_time": 0.10984539985656738 }, { "epoch": 6.1798095703125e-06, "grad_norm": 0.3301985561847687, "learning_rate": 9.803768380684242e-05, "loss": 0.1019, "step": 4050 }, { "epoch": 6.1798095703125e-06, "model_forward_time": 0.02468109130859375, "step": 4050 }, { "epoch": 6.1798095703125e-06, "step": 4050, "training_step_time": 0.1150820255279541 }, { "epoch": 6.18133544921875e-06, "model_forward_time": 0.02499532699584961, "step": 4051 }, { "epoch": 6.18133544921875e-06, "step": 4051, "training_step_time": 0.1070556640625 }, { "epoch": 6.182861328125e-06, "model_forward_time": 0.024591684341430664, "step": 4052 }, { "epoch": 6.182861328125e-06, "step": 4052, "training_step_time": 0.11088418960571289 }, { "epoch": 6.18438720703125e-06, "model_forward_time": 0.02476358413696289, "step": 4053 }, { "epoch": 6.18438720703125e-06, "step": 4053, "training_step_time": 0.11005401611328125 }, { "epoch": 6.1859130859375e-06, "model_forward_time": 0.025078773498535156, "step": 4054 }, { "epoch": 6.1859130859375e-06, "step": 4054, "training_step_time": 0.11010098457336426 }, { "epoch": 6.18743896484375e-06, "model_forward_time": 0.026221036911010742, "step": 4055 }, { "epoch": 6.18743896484375e-06, "step": 4055, "training_step_time": 0.10863089561462402 }, { "epoch": 6.18896484375e-06, "model_forward_time": 0.02585911750793457, "step": 4056 }, { "epoch": 6.18896484375e-06, "step": 4056, "training_step_time": 0.10872244834899902 }, { "epoch": 6.19049072265625e-06, "model_forward_time": 0.02552509307861328, "step": 4057 }, { "epoch": 6.19049072265625e-06, "step": 4057, "training_step_time": 0.10986971855163574 }, { "epoch": 6.1920166015625e-06, "model_forward_time": 0.02529597282409668, "step": 4058 }, { "epoch": 6.1920166015625e-06, "step": 4058, "training_step_time": 0.10926675796508789 }, { "epoch": 6.19354248046875e-06, "model_forward_time": 0.02483081817626953, "step": 4059 }, { "epoch": 6.19354248046875e-06, "step": 4059, "training_step_time": 0.11312460899353027 }, { "epoch": 6.195068359375e-06, "grad_norm": 0.4493350386619568, "learning_rate": 9.802236538021518e-05, "loss": 0.0866, "step": 4060 }, { "epoch": 6.195068359375e-06, "model_forward_time": 0.024946928024291992, "step": 4060 }, { "epoch": 6.195068359375e-06, "step": 4060, "training_step_time": 0.11127257347106934 }, { "epoch": 6.19659423828125e-06, "model_forward_time": 0.025792598724365234, "step": 4061 }, { "epoch": 6.19659423828125e-06, "step": 4061, "training_step_time": 0.1120147705078125 }, { "epoch": 6.1981201171875e-06, "model_forward_time": 0.025043964385986328, "step": 4062 }, { "epoch": 6.1981201171875e-06, "step": 4062, "training_step_time": 0.10994243621826172 }, { "epoch": 6.19964599609375e-06, "model_forward_time": 0.0253140926361084, "step": 4063 }, { "epoch": 6.19964599609375e-06, "step": 4063, "training_step_time": 0.11213088035583496 }, { "epoch": 6.201171875e-06, "model_forward_time": 0.025109052658081055, "step": 4064 }, { "epoch": 6.201171875e-06, "step": 4064, "training_step_time": 0.10786986351013184 }, { "epoch": 6.20269775390625e-06, "model_forward_time": 0.025150537490844727, "step": 4065 }, { "epoch": 6.20269775390625e-06, "step": 4065, "training_step_time": 0.11389446258544922 }, { "epoch": 6.2042236328125e-06, "model_forward_time": 0.02537369728088379, "step": 4066 }, { "epoch": 6.2042236328125e-06, "step": 4066, "training_step_time": 0.11095404624938965 }, { "epoch": 6.20574951171875e-06, "model_forward_time": 0.02577805519104004, "step": 4067 }, { "epoch": 6.20574951171875e-06, "step": 4067, "training_step_time": 0.14825224876403809 }, { "epoch": 6.207275390625e-06, "model_forward_time": 0.026848316192626953, "step": 4068 }, { "epoch": 6.207275390625e-06, "step": 4068, "training_step_time": 0.11350083351135254 }, { "epoch": 6.20880126953125e-06, "model_forward_time": 0.02527761459350586, "step": 4069 }, { "epoch": 6.20880126953125e-06, "step": 4069, "training_step_time": 0.13245630264282227 }, { "epoch": 6.2103271484375e-06, "grad_norm": 0.3537490963935852, "learning_rate": 9.80069886018858e-05, "loss": 0.0774, "step": 4070 }, { "epoch": 6.2103271484375e-06, "model_forward_time": 0.02567887306213379, "step": 4070 }, { "epoch": 6.2103271484375e-06, "step": 4070, "training_step_time": 0.13516759872436523 }, { "epoch": 6.21185302734375e-06, "model_forward_time": 0.02460169792175293, "step": 4071 }, { "epoch": 6.21185302734375e-06, "step": 4071, "training_step_time": 0.1274712085723877 }, { "epoch": 6.21337890625e-06, "model_forward_time": 0.02482295036315918, "step": 4072 }, { "epoch": 6.21337890625e-06, "step": 4072, "training_step_time": 0.21318578720092773 }, { "epoch": 6.21490478515625e-06, "model_forward_time": 0.02386617660522461, "step": 4073 }, { "epoch": 6.21490478515625e-06, "step": 4073, "training_step_time": 0.17571640014648438 }, { "epoch": 6.2164306640625e-06, "model_forward_time": 0.02415299415588379, "step": 4074 }, { "epoch": 6.2164306640625e-06, "step": 4074, "training_step_time": 0.19040417671203613 }, { "epoch": 6.21795654296875e-06, "model_forward_time": 0.024153947830200195, "step": 4075 }, { "epoch": 6.21795654296875e-06, "step": 4075, "training_step_time": 0.14196443557739258 }, { "epoch": 6.219482421875e-06, "model_forward_time": 0.024690866470336914, "step": 4076 }, { "epoch": 6.219482421875e-06, "step": 4076, "training_step_time": 0.10529947280883789 }, { "epoch": 6.22100830078125e-06, "model_forward_time": 0.025324583053588867, "step": 4077 }, { "epoch": 6.22100830078125e-06, "step": 4077, "training_step_time": 0.10925817489624023 }, { "epoch": 6.2225341796875e-06, "model_forward_time": 0.02523040771484375, "step": 4078 }, { "epoch": 6.2225341796875e-06, "step": 4078, "training_step_time": 0.10924935340881348 }, { "epoch": 6.22406005859375e-06, "model_forward_time": 0.025370359420776367, "step": 4079 }, { "epoch": 6.22406005859375e-06, "step": 4079, "training_step_time": 0.10969829559326172 }, { "epoch": 6.2255859375e-06, "grad_norm": 0.38843709230422974, "learning_rate": 9.799155349053851e-05, "loss": 0.1, "step": 4080 }, { "epoch": 6.2255859375e-06, "model_forward_time": 0.02525162696838379, "step": 4080 }, { "epoch": 6.2255859375e-06, "step": 4080, "training_step_time": 0.1104423999786377 }, { "epoch": 6.22711181640625e-06, "model_forward_time": 0.025877952575683594, "step": 4081 }, { "epoch": 6.22711181640625e-06, "step": 4081, "training_step_time": 0.1095893383026123 }, { "epoch": 6.2286376953125e-06, "model_forward_time": 0.025164365768432617, "step": 4082 }, { "epoch": 6.2286376953125e-06, "step": 4082, "training_step_time": 0.10735297203063965 }, { "epoch": 6.23016357421875e-06, "model_forward_time": 0.02541065216064453, "step": 4083 }, { "epoch": 6.23016357421875e-06, "step": 4083, "training_step_time": 0.1112053394317627 }, { "epoch": 6.231689453125e-06, "model_forward_time": 0.025327205657958984, "step": 4084 }, { "epoch": 6.231689453125e-06, "step": 4084, "training_step_time": 0.11129474639892578 }, { "epoch": 6.23321533203125e-06, "model_forward_time": 0.025369644165039062, "step": 4085 }, { "epoch": 6.23321533203125e-06, "step": 4085, "training_step_time": 0.11371779441833496 }, { "epoch": 6.2347412109375e-06, "model_forward_time": 0.025699853897094727, "step": 4086 }, { "epoch": 6.2347412109375e-06, "step": 4086, "training_step_time": 0.11709117889404297 }, { "epoch": 6.23626708984375e-06, "model_forward_time": 0.02540421485900879, "step": 4087 }, { "epoch": 6.23626708984375e-06, "step": 4087, "training_step_time": 0.22353315353393555 }, { "epoch": 6.23779296875e-06, "model_forward_time": 0.025333642959594727, "step": 4088 }, { "epoch": 6.23779296875e-06, "step": 4088, "training_step_time": 0.1325850486755371 }, { "epoch": 6.23931884765625e-06, "model_forward_time": 0.024460792541503906, "step": 4089 }, { "epoch": 6.23931884765625e-06, "step": 4089, "training_step_time": 0.18323898315429688 }, { "epoch": 6.2408447265625e-06, "grad_norm": 0.429690957069397, "learning_rate": 9.797606006492841e-05, "loss": 0.0834, "step": 4090 }, { "epoch": 6.2408447265625e-06, "model_forward_time": 0.02521371841430664, "step": 4090 }, { "epoch": 6.2408447265625e-06, "step": 4090, "training_step_time": 0.1265251636505127 }, { "epoch": 6.24237060546875e-06, "model_forward_time": 0.025280237197875977, "step": 4091 }, { "epoch": 6.24237060546875e-06, "step": 4091, "training_step_time": 0.11684131622314453 }, { "epoch": 6.243896484375e-06, "model_forward_time": 0.024863243103027344, "step": 4092 }, { "epoch": 6.243896484375e-06, "step": 4092, "training_step_time": 0.11471700668334961 }, { "epoch": 6.24542236328125e-06, "model_forward_time": 0.025652408599853516, "step": 4093 }, { "epoch": 6.24542236328125e-06, "step": 4093, "training_step_time": 0.11330246925354004 }, { "epoch": 6.2469482421875e-06, "model_forward_time": 0.025325298309326172, "step": 4094 }, { "epoch": 6.2469482421875e-06, "step": 4094, "training_step_time": 0.11207771301269531 }, { "epoch": 6.24847412109375e-06, "model_forward_time": 0.029282331466674805, "step": 4095 }, { "epoch": 6.24847412109375e-06, "step": 4095, "training_step_time": 0.11543917655944824 }, { "epoch": 6.25e-06, "model_forward_time": 0.025379657745361328, "step": 4096 }, { "epoch": 6.25e-06, "step": 4096, "training_step_time": 0.11293745040893555 }, { "epoch": 6.25152587890625e-06, "model_forward_time": 0.025273561477661133, "step": 4097 }, { "epoch": 6.25152587890625e-06, "step": 4097, "training_step_time": 0.11040186882019043 }, { "epoch": 6.2530517578125e-06, "model_forward_time": 0.02525925636291504, "step": 4098 }, { "epoch": 6.2530517578125e-06, "step": 4098, "training_step_time": 0.10831952095031738 }, { "epoch": 6.25457763671875e-06, "model_forward_time": 0.02610492706298828, "step": 4099 }, { "epoch": 6.25457763671875e-06, "step": 4099, "training_step_time": 0.11008810997009277 }, { "epoch": 6.256103515625e-06, "grad_norm": 0.49703845381736755, "learning_rate": 9.796050834388149e-05, "loss": 0.0782, "step": 4100 }, { "epoch": 6.256103515625e-06, "model_forward_time": 0.02568793296813965, "step": 4100 }, { "epoch": 6.256103515625e-06, "step": 4100, "training_step_time": 0.10692286491394043 }, { "epoch": 6.25762939453125e-06, "model_forward_time": 0.025857210159301758, "step": 4101 }, { "epoch": 6.25762939453125e-06, "step": 4101, "training_step_time": 0.10795879364013672 }, { "epoch": 6.2591552734375e-06, "model_forward_time": 0.0258638858795166, "step": 4102 }, { "epoch": 6.2591552734375e-06, "step": 4102, "training_step_time": 0.11413073539733887 }, { "epoch": 6.26068115234375e-06, "model_forward_time": 0.02537846565246582, "step": 4103 }, { "epoch": 6.26068115234375e-06, "step": 4103, "training_step_time": 0.11638569831848145 }, { "epoch": 6.26220703125e-06, "model_forward_time": 0.02515888214111328, "step": 4104 }, { "epoch": 6.26220703125e-06, "step": 4104, "training_step_time": 0.1065373420715332 }, { "epoch": 6.26373291015625e-06, "model_forward_time": 0.025812387466430664, "step": 4105 }, { "epoch": 6.26373291015625e-06, "step": 4105, "training_step_time": 0.1092829704284668 }, { "epoch": 6.2652587890625e-06, "model_forward_time": 0.02547621726989746, "step": 4106 }, { "epoch": 6.2652587890625e-06, "step": 4106, "training_step_time": 0.10764813423156738 }, { "epoch": 6.26678466796875e-06, "model_forward_time": 0.02538156509399414, "step": 4107 }, { "epoch": 6.26678466796875e-06, "step": 4107, "training_step_time": 0.10922741889953613 }, { "epoch": 6.268310546875e-06, "model_forward_time": 0.02568960189819336, "step": 4108 }, { "epoch": 6.268310546875e-06, "step": 4108, "training_step_time": 0.11070084571838379 }, { "epoch": 6.26983642578125e-06, "model_forward_time": 0.02570319175720215, "step": 4109 }, { "epoch": 6.26983642578125e-06, "step": 4109, "training_step_time": 0.10958242416381836 }, { "epoch": 6.2713623046875e-06, "grad_norm": 0.310067743062973, "learning_rate": 9.794489834629455e-05, "loss": 0.0907, "step": 4110 }, { "epoch": 6.2713623046875e-06, "model_forward_time": 0.02605748176574707, "step": 4110 }, { "epoch": 6.2713623046875e-06, "step": 4110, "training_step_time": 0.10885930061340332 }, { "epoch": 6.27288818359375e-06, "model_forward_time": 0.025922298431396484, "step": 4111 }, { "epoch": 6.27288818359375e-06, "step": 4111, "training_step_time": 0.11516809463500977 }, { "epoch": 6.2744140625e-06, "model_forward_time": 0.026095151901245117, "step": 4112 }, { "epoch": 6.2744140625e-06, "step": 4112, "training_step_time": 0.18856191635131836 }, { "epoch": 6.27593994140625e-06, "model_forward_time": 0.02498459815979004, "step": 4113 }, { "epoch": 6.27593994140625e-06, "step": 4113, "training_step_time": 0.11460161209106445 }, { "epoch": 6.2774658203125e-06, "model_forward_time": 0.0253603458404541, "step": 4114 }, { "epoch": 6.2774658203125e-06, "step": 4114, "training_step_time": 0.14036059379577637 }, { "epoch": 6.27899169921875e-06, "model_forward_time": 0.02765345573425293, "step": 4115 }, { "epoch": 6.27899169921875e-06, "step": 4115, "training_step_time": 0.1616523265838623 }, { "epoch": 6.280517578125e-06, "model_forward_time": 0.02487802505493164, "step": 4116 }, { "epoch": 6.280517578125e-06, "step": 4116, "training_step_time": 0.21881699562072754 }, { "epoch": 6.28204345703125e-06, "model_forward_time": 0.024625539779663086, "step": 4117 }, { "epoch": 6.28204345703125e-06, "step": 4117, "training_step_time": 0.20145440101623535 }, { "epoch": 6.2835693359375e-06, "model_forward_time": 0.024894237518310547, "step": 4118 }, { "epoch": 6.2835693359375e-06, "step": 4118, "training_step_time": 0.13270807266235352 }, { "epoch": 6.28509521484375e-06, "model_forward_time": 0.02697134017944336, "step": 4119 }, { "epoch": 6.28509521484375e-06, "step": 4119, "training_step_time": 0.2041921615600586 }, { "epoch": 6.28662109375e-06, "grad_norm": 0.6953232884407043, "learning_rate": 9.792923009113522e-05, "loss": 0.0958, "step": 4120 }, { "epoch": 6.28662109375e-06, "model_forward_time": 0.024796724319458008, "step": 4120 }, { "epoch": 6.28662109375e-06, "step": 4120, "training_step_time": 0.11383175849914551 }, { "epoch": 6.28814697265625e-06, "model_forward_time": 0.024889707565307617, "step": 4121 }, { "epoch": 6.28814697265625e-06, "step": 4121, "training_step_time": 0.10593914985656738 }, { "epoch": 6.2896728515625e-06, "model_forward_time": 0.025442838668823242, "step": 4122 }, { "epoch": 6.2896728515625e-06, "step": 4122, "training_step_time": 0.1990976333618164 }, { "epoch": 6.29119873046875e-06, "model_forward_time": 0.02472996711730957, "step": 4123 }, { "epoch": 6.29119873046875e-06, "step": 4123, "training_step_time": 0.10743141174316406 }, { "epoch": 6.292724609375e-06, "model_forward_time": 0.02549004554748535, "step": 4124 }, { "epoch": 6.292724609375e-06, "step": 4124, "training_step_time": 0.10535788536071777 }, { "epoch": 6.29425048828125e-06, "model_forward_time": 0.02464461326599121, "step": 4125 }, { "epoch": 6.29425048828125e-06, "step": 4125, "training_step_time": 0.11254453659057617 }, { "epoch": 6.2957763671875e-06, "model_forward_time": 0.025549888610839844, "step": 4126 }, { "epoch": 6.2957763671875e-06, "step": 4126, "training_step_time": 0.11060237884521484 }, { "epoch": 6.29730224609375e-06, "model_forward_time": 0.025659799575805664, "step": 4127 }, { "epoch": 6.29730224609375e-06, "step": 4127, "training_step_time": 0.109893798828125 }, { "epoch": 6.298828125e-06, "model_forward_time": 0.025371551513671875, "step": 4128 }, { "epoch": 6.298828125e-06, "step": 4128, "training_step_time": 0.10962367057800293 }, { "epoch": 6.30035400390625e-06, "model_forward_time": 0.025635957717895508, "step": 4129 }, { "epoch": 6.30035400390625e-06, "step": 4129, "training_step_time": 0.1079559326171875 }, { "epoch": 6.3018798828125e-06, "grad_norm": 0.3230479657649994, "learning_rate": 9.791350359744189e-05, "loss": 0.0931, "step": 4130 }, { "epoch": 6.3018798828125e-06, "model_forward_time": 0.025505542755126953, "step": 4130 }, { "epoch": 6.3018798828125e-06, "step": 4130, "training_step_time": 0.10912442207336426 }, { "epoch": 6.30340576171875e-06, "model_forward_time": 0.025497913360595703, "step": 4131 }, { "epoch": 6.30340576171875e-06, "step": 4131, "training_step_time": 0.1731739044189453 }, { "epoch": 6.304931640625e-06, "model_forward_time": 0.025216102600097656, "step": 4132 }, { "epoch": 6.304931640625e-06, "step": 4132, "training_step_time": 0.15938234329223633 }, { "epoch": 6.30645751953125e-06, "model_forward_time": 0.024874210357666016, "step": 4133 }, { "epoch": 6.30645751953125e-06, "step": 4133, "training_step_time": 0.11167740821838379 }, { "epoch": 6.3079833984375e-06, "model_forward_time": 0.025465965270996094, "step": 4134 }, { "epoch": 6.3079833984375e-06, "step": 4134, "training_step_time": 0.1725161075592041 }, { "epoch": 6.30950927734375e-06, "model_forward_time": 0.02478313446044922, "step": 4135 }, { "epoch": 6.30950927734375e-06, "step": 4135, "training_step_time": 0.16680645942687988 }, { "epoch": 6.31103515625e-06, "model_forward_time": 0.024608612060546875, "step": 4136 }, { "epoch": 6.31103515625e-06, "step": 4136, "training_step_time": 0.10528993606567383 }, { "epoch": 6.31256103515625e-06, "model_forward_time": 0.025942087173461914, "step": 4137 }, { "epoch": 6.31256103515625e-06, "step": 4137, "training_step_time": 0.11053037643432617 }, { "epoch": 6.3140869140625e-06, "model_forward_time": 0.02563190460205078, "step": 4138 }, { "epoch": 6.3140869140625e-06, "step": 4138, "training_step_time": 0.11240792274475098 }, { "epoch": 6.31561279296875e-06, "model_forward_time": 0.02590155601501465, "step": 4139 }, { "epoch": 6.31561279296875e-06, "step": 4139, "training_step_time": 0.1082000732421875 }, { "epoch": 6.317138671875e-06, "grad_norm": 0.33710578083992004, "learning_rate": 9.789771888432375e-05, "loss": 0.0952, "step": 4140 }, { "epoch": 6.317138671875e-06, "model_forward_time": 0.02593541145324707, "step": 4140 }, { "epoch": 6.317138671875e-06, "step": 4140, "training_step_time": 0.1146235466003418 }, { "epoch": 6.31866455078125e-06, "model_forward_time": 0.02570939064025879, "step": 4141 }, { "epoch": 6.31866455078125e-06, "step": 4141, "training_step_time": 0.10707736015319824 }, { "epoch": 6.3201904296875e-06, "model_forward_time": 0.025591611862182617, "step": 4142 }, { "epoch": 6.3201904296875e-06, "step": 4142, "training_step_time": 0.10786318778991699 }, { "epoch": 6.32171630859375e-06, "model_forward_time": 0.025484561920166016, "step": 4143 }, { "epoch": 6.32171630859375e-06, "step": 4143, "training_step_time": 0.10895299911499023 }, { "epoch": 6.3232421875e-06, "model_forward_time": 0.026085853576660156, "step": 4144 }, { "epoch": 6.3232421875e-06, "step": 4144, "training_step_time": 0.11407089233398438 }, { "epoch": 6.32476806640625e-06, "model_forward_time": 0.024844884872436523, "step": 4145 }, { "epoch": 6.32476806640625e-06, "step": 4145, "training_step_time": 0.11085891723632812 }, { "epoch": 6.3262939453125e-06, "model_forward_time": 0.02576899528503418, "step": 4146 }, { "epoch": 6.3262939453125e-06, "step": 4146, "training_step_time": 0.11180472373962402 }, { "epoch": 6.32781982421875e-06, "model_forward_time": 0.02591681480407715, "step": 4147 }, { "epoch": 6.32781982421875e-06, "step": 4147, "training_step_time": 0.11078500747680664 }, { "epoch": 6.329345703125e-06, "model_forward_time": 0.02558112144470215, "step": 4148 }, { "epoch": 6.329345703125e-06, "step": 4148, "training_step_time": 0.10840368270874023 }, { "epoch": 6.33087158203125e-06, "model_forward_time": 0.025533676147460938, "step": 4149 }, { "epoch": 6.33087158203125e-06, "step": 4149, "training_step_time": 0.10827779769897461 }, { "epoch": 6.3323974609375e-06, "grad_norm": 0.43281710147857666, "learning_rate": 9.788187597096069e-05, "loss": 0.1034, "step": 4150 }, { "epoch": 6.3323974609375e-06, "model_forward_time": 0.025478601455688477, "step": 4150 }, { "epoch": 6.3323974609375e-06, "step": 4150, "training_step_time": 0.11036348342895508 }, { "epoch": 6.33392333984375e-06, "model_forward_time": 0.02601313591003418, "step": 4151 }, { "epoch": 6.33392333984375e-06, "step": 4151, "training_step_time": 0.1087794303894043 }, { "epoch": 6.33544921875e-06, "model_forward_time": 0.025410890579223633, "step": 4152 }, { "epoch": 6.33544921875e-06, "step": 4152, "training_step_time": 0.11059260368347168 }, { "epoch": 6.33697509765625e-06, "model_forward_time": 0.025416851043701172, "step": 4153 }, { "epoch": 6.33697509765625e-06, "step": 4153, "training_step_time": 0.145219087600708 }, { "epoch": 6.3385009765625e-06, "model_forward_time": 0.0247952938079834, "step": 4154 }, { "epoch": 6.3385009765625e-06, "step": 4154, "training_step_time": 0.1537766456604004 }, { "epoch": 6.34002685546875e-06, "model_forward_time": 0.024566173553466797, "step": 4155 }, { "epoch": 6.34002685546875e-06, "step": 4155, "training_step_time": 0.14045000076293945 }, { "epoch": 6.341552734375e-06, "model_forward_time": 0.024799108505249023, "step": 4156 }, { "epoch": 6.341552734375e-06, "step": 4156, "training_step_time": 0.12798380851745605 }, { "epoch": 6.34307861328125e-06, "model_forward_time": 0.025618314743041992, "step": 4157 }, { "epoch": 6.34307861328125e-06, "step": 4157, "training_step_time": 0.21854376792907715 }, { "epoch": 6.3446044921875e-06, "model_forward_time": 0.024955272674560547, "step": 4158 }, { "epoch": 6.3446044921875e-06, "step": 4158, "training_step_time": 0.172349214553833 }, { "epoch": 6.34613037109375e-06, "model_forward_time": 0.024769306182861328, "step": 4159 }, { "epoch": 6.34613037109375e-06, "step": 4159, "training_step_time": 0.21823573112487793 }, { "epoch": 6.34765625e-06, "grad_norm": 0.6297304630279541, "learning_rate": 9.786597487660337e-05, "loss": 0.0858, "step": 4160 }, { "epoch": 6.34765625e-06, "model_forward_time": 0.02901601791381836, "step": 4160 }, { "epoch": 6.34765625e-06, "step": 4160, "training_step_time": 0.18529033660888672 }, { "epoch": 6.34918212890625e-06, "model_forward_time": 0.026053667068481445, "step": 4161 }, { "epoch": 6.34918212890625e-06, "step": 4161, "training_step_time": 0.2175889015197754 }, { "epoch": 6.3507080078125e-06, "model_forward_time": 0.02538609504699707, "step": 4162 }, { "epoch": 6.3507080078125e-06, "step": 4162, "training_step_time": 0.14953351020812988 }, { "epoch": 6.35223388671875e-06, "model_forward_time": 0.024985790252685547, "step": 4163 }, { "epoch": 6.35223388671875e-06, "step": 4163, "training_step_time": 0.1788921356201172 }, { "epoch": 6.353759765625e-06, "model_forward_time": 0.02517533302307129, "step": 4164 }, { "epoch": 6.353759765625e-06, "step": 4164, "training_step_time": 0.14028716087341309 }, { "epoch": 6.35528564453125e-06, "model_forward_time": 0.02542567253112793, "step": 4165 }, { "epoch": 6.35528564453125e-06, "step": 4165, "training_step_time": 0.11775922775268555 }, { "epoch": 6.3568115234375e-06, "model_forward_time": 0.02551102638244629, "step": 4166 }, { "epoch": 6.3568115234375e-06, "step": 4166, "training_step_time": 0.10731697082519531 }, { "epoch": 6.35833740234375e-06, "model_forward_time": 0.025676965713500977, "step": 4167 }, { "epoch": 6.35833740234375e-06, "step": 4167, "training_step_time": 0.10850191116333008 }, { "epoch": 6.35986328125e-06, "model_forward_time": 0.025908470153808594, "step": 4168 }, { "epoch": 6.35986328125e-06, "step": 4168, "training_step_time": 0.1078636646270752 }, { "epoch": 6.36138916015625e-06, "model_forward_time": 0.02579975128173828, "step": 4169 }, { "epoch": 6.36138916015625e-06, "step": 4169, "training_step_time": 0.10861635208129883 }, { "epoch": 6.3629150390625e-06, "grad_norm": 0.5773082375526428, "learning_rate": 9.785001562057309e-05, "loss": 0.0785, "step": 4170 }, { "epoch": 6.3629150390625e-06, "model_forward_time": 0.025102853775024414, "step": 4170 }, { "epoch": 6.3629150390625e-06, "step": 4170, "training_step_time": 0.11014986038208008 }, { "epoch": 6.36444091796875e-06, "model_forward_time": 0.025821685791015625, "step": 4171 }, { "epoch": 6.36444091796875e-06, "step": 4171, "training_step_time": 0.10789680480957031 }, { "epoch": 6.365966796875e-06, "model_forward_time": 0.02501654624938965, "step": 4172 }, { "epoch": 6.365966796875e-06, "step": 4172, "training_step_time": 0.10932159423828125 }, { "epoch": 6.36749267578125e-06, "model_forward_time": 0.026610851287841797, "step": 4173 }, { "epoch": 6.36749267578125e-06, "step": 4173, "training_step_time": 0.10804438591003418 }, { "epoch": 6.3690185546875e-06, "model_forward_time": 0.029104232788085938, "step": 4174 }, { "epoch": 6.3690185546875e-06, "step": 4174, "training_step_time": 0.11399102210998535 }, { "epoch": 6.37054443359375e-06, "model_forward_time": 0.025867462158203125, "step": 4175 }, { "epoch": 6.37054443359375e-06, "step": 4175, "training_step_time": 0.10845398902893066 }, { "epoch": 6.3720703125e-06, "model_forward_time": 0.025876283645629883, "step": 4176 }, { "epoch": 6.3720703125e-06, "step": 4176, "training_step_time": 0.1079716682434082 }, { "epoch": 6.37359619140625e-06, "model_forward_time": 0.025553464889526367, "step": 4177 }, { "epoch": 6.37359619140625e-06, "step": 4177, "training_step_time": 0.11165976524353027 }, { "epoch": 6.3751220703125e-06, "model_forward_time": 0.025737285614013672, "step": 4178 }, { "epoch": 6.3751220703125e-06, "step": 4178, "training_step_time": 0.17055392265319824 }, { "epoch": 6.37664794921875e-06, "model_forward_time": 0.02431488037109375, "step": 4179 }, { "epoch": 6.37664794921875e-06, "step": 4179, "training_step_time": 0.17862343788146973 }, { "epoch": 6.378173828125e-06, "grad_norm": 0.38035139441490173, "learning_rate": 9.783399822226189e-05, "loss": 0.0876, "step": 4180 }, { "epoch": 6.378173828125e-06, "model_forward_time": 0.024325847625732422, "step": 4180 }, { "epoch": 6.378173828125e-06, "step": 4180, "training_step_time": 0.10556364059448242 }, { "epoch": 6.37969970703125e-06, "model_forward_time": 0.02483963966369629, "step": 4181 }, { "epoch": 6.37969970703125e-06, "step": 4181, "training_step_time": 0.1257801055908203 }, { "epoch": 6.3812255859375e-06, "model_forward_time": 0.024977445602416992, "step": 4182 }, { "epoch": 6.3812255859375e-06, "step": 4182, "training_step_time": 0.12380051612854004 }, { "epoch": 6.38275146484375e-06, "model_forward_time": 0.025151968002319336, "step": 4183 }, { "epoch": 6.38275146484375e-06, "step": 4183, "training_step_time": 0.11729049682617188 }, { "epoch": 6.38427734375e-06, "model_forward_time": 0.02647566795349121, "step": 4184 }, { "epoch": 6.38427734375e-06, "step": 4184, "training_step_time": 0.11842012405395508 }, { "epoch": 6.38580322265625e-06, "model_forward_time": 0.025264501571655273, "step": 4185 }, { "epoch": 6.38580322265625e-06, "step": 4185, "training_step_time": 0.11367273330688477 }, { "epoch": 6.3873291015625e-06, "model_forward_time": 0.025666475296020508, "step": 4186 }, { "epoch": 6.3873291015625e-06, "step": 4186, "training_step_time": 0.11475920677185059 }, { "epoch": 6.38885498046875e-06, "model_forward_time": 0.02521038055419922, "step": 4187 }, { "epoch": 6.38885498046875e-06, "step": 4187, "training_step_time": 0.10945248603820801 }, { "epoch": 6.390380859375e-06, "model_forward_time": 0.025188207626342773, "step": 4188 }, { "epoch": 6.390380859375e-06, "step": 4188, "training_step_time": 0.10903692245483398 }, { "epoch": 6.39190673828125e-06, "model_forward_time": 0.025120019912719727, "step": 4189 }, { "epoch": 6.39190673828125e-06, "step": 4189, "training_step_time": 0.11234569549560547 }, { "epoch": 6.3934326171875e-06, "grad_norm": 0.5089605450630188, "learning_rate": 9.781792270113241e-05, "loss": 0.0875, "step": 4190 }, { "epoch": 6.3934326171875e-06, "model_forward_time": 0.025568246841430664, "step": 4190 }, { "epoch": 6.3934326171875e-06, "step": 4190, "training_step_time": 0.10845589637756348 }, { "epoch": 6.39495849609375e-06, "model_forward_time": 0.025587081909179688, "step": 4191 }, { "epoch": 6.39495849609375e-06, "step": 4191, "training_step_time": 0.10786962509155273 }, { "epoch": 6.396484375e-06, "model_forward_time": 0.025639772415161133, "step": 4192 }, { "epoch": 6.396484375e-06, "step": 4192, "training_step_time": 0.10895895957946777 }, { "epoch": 6.39801025390625e-06, "model_forward_time": 0.025166749954223633, "step": 4193 }, { "epoch": 6.39801025390625e-06, "step": 4193, "training_step_time": 0.11327099800109863 }, { "epoch": 6.3995361328125e-06, "model_forward_time": 0.025104761123657227, "step": 4194 }, { "epoch": 6.3995361328125e-06, "step": 4194, "training_step_time": 0.10799121856689453 }, { "epoch": 6.40106201171875e-06, "model_forward_time": 0.025744199752807617, "step": 4195 }, { "epoch": 6.40106201171875e-06, "step": 4195, "training_step_time": 0.10897135734558105 }, { "epoch": 6.402587890625e-06, "model_forward_time": 0.026881933212280273, "step": 4196 }, { "epoch": 6.402587890625e-06, "step": 4196, "training_step_time": 0.10857796669006348 }, { "epoch": 6.40411376953125e-06, "model_forward_time": 0.024936676025390625, "step": 4197 }, { "epoch": 6.40411376953125e-06, "step": 4197, "training_step_time": 0.11081933975219727 }, { "epoch": 6.4056396484375e-06, "model_forward_time": 0.025452613830566406, "step": 4198 }, { "epoch": 6.4056396484375e-06, "step": 4198, "training_step_time": 0.11033487319946289 }, { "epoch": 6.40716552734375e-06, "model_forward_time": 0.025463342666625977, "step": 4199 }, { "epoch": 6.40716552734375e-06, "step": 4199, "training_step_time": 0.10696649551391602 }, { "epoch": 6.40869140625e-06, "grad_norm": 0.3083915710449219, "learning_rate": 9.780178907671789e-05, "loss": 0.0864, "step": 4200 }, { "epoch": 6.40869140625e-06, "model_forward_time": 0.025232315063476562, "step": 4200 }, { "epoch": 6.40869140625e-06, "step": 4200, "training_step_time": 0.15863680839538574 }, { "epoch": 6.41021728515625e-06, "model_forward_time": 0.024700164794921875, "step": 4201 }, { "epoch": 6.41021728515625e-06, "step": 4201, "training_step_time": 0.11150646209716797 }, { "epoch": 6.4117431640625e-06, "model_forward_time": 0.024940013885498047, "step": 4202 }, { "epoch": 6.4117431640625e-06, "step": 4202, "training_step_time": 0.1314857006072998 }, { "epoch": 6.41326904296875e-06, "model_forward_time": 0.02516341209411621, "step": 4203 }, { "epoch": 6.41326904296875e-06, "step": 4203, "training_step_time": 0.14148187637329102 }, { "epoch": 6.414794921875e-06, "model_forward_time": 0.02490544319152832, "step": 4204 }, { "epoch": 6.414794921875e-06, "step": 4204, "training_step_time": 0.20806312561035156 }, { "epoch": 6.41632080078125e-06, "model_forward_time": 0.024341583251953125, "step": 4205 }, { "epoch": 6.41632080078125e-06, "step": 4205, "training_step_time": 0.16859102249145508 }, { "epoch": 6.4178466796875e-06, "model_forward_time": 0.024672985076904297, "step": 4206 }, { "epoch": 6.4178466796875e-06, "step": 4206, "training_step_time": 0.1752152442932129 }, { "epoch": 6.41937255859375e-06, "model_forward_time": 0.024674415588378906, "step": 4207 }, { "epoch": 6.41937255859375e-06, "step": 4207, "training_step_time": 0.12914180755615234 }, { "epoch": 6.4208984375e-06, "model_forward_time": 0.02481985092163086, "step": 4208 }, { "epoch": 6.4208984375e-06, "step": 4208, "training_step_time": 0.10686182975769043 }, { "epoch": 6.42242431640625e-06, "model_forward_time": 0.02546072006225586, "step": 4209 }, { "epoch": 6.42242431640625e-06, "step": 4209, "training_step_time": 0.12917852401733398 }, { "epoch": 6.4239501953125e-06, "grad_norm": 0.37601393461227417, "learning_rate": 9.778559736862223e-05, "loss": 0.0863, "step": 4210 }, { "epoch": 6.4239501953125e-06, "model_forward_time": 0.02543807029724121, "step": 4210 }, { "epoch": 6.4239501953125e-06, "step": 4210, "training_step_time": 0.1768176555633545 }, { "epoch": 6.42547607421875e-06, "model_forward_time": 0.025008678436279297, "step": 4211 }, { "epoch": 6.42547607421875e-06, "step": 4211, "training_step_time": 0.10656189918518066 }, { "epoch": 6.427001953125e-06, "model_forward_time": 0.02434086799621582, "step": 4212 }, { "epoch": 6.427001953125e-06, "step": 4212, "training_step_time": 0.10739660263061523 }, { "epoch": 6.42852783203125e-06, "model_forward_time": 0.024610042572021484, "step": 4213 }, { "epoch": 6.42852783203125e-06, "step": 4213, "training_step_time": 0.10789179801940918 }, { "epoch": 6.4300537109375e-06, "model_forward_time": 0.025213956832885742, "step": 4214 }, { "epoch": 6.4300537109375e-06, "step": 4214, "training_step_time": 0.11526608467102051 }, { "epoch": 6.43157958984375e-06, "model_forward_time": 0.02553081512451172, "step": 4215 }, { "epoch": 6.43157958984375e-06, "step": 4215, "training_step_time": 0.11276459693908691 }, { "epoch": 6.43310546875e-06, "model_forward_time": 0.025235652923583984, "step": 4216 }, { "epoch": 6.43310546875e-06, "step": 4216, "training_step_time": 0.10721158981323242 }, { "epoch": 6.43463134765625e-06, "model_forward_time": 0.025502443313598633, "step": 4217 }, { "epoch": 6.43463134765625e-06, "step": 4217, "training_step_time": 0.10886263847351074 }, { "epoch": 6.4361572265625e-06, "model_forward_time": 0.02545952796936035, "step": 4218 }, { "epoch": 6.4361572265625e-06, "step": 4218, "training_step_time": 0.10973095893859863 }, { "epoch": 6.43768310546875e-06, "model_forward_time": 0.025874853134155273, "step": 4219 }, { "epoch": 6.43768310546875e-06, "step": 4219, "training_step_time": 0.11324596405029297 }, { "epoch": 6.439208984375e-06, "grad_norm": 0.38798025250434875, "learning_rate": 9.776934759651988e-05, "loss": 0.101, "step": 4220 }, { "epoch": 6.439208984375e-06, "model_forward_time": 0.024994611740112305, "step": 4220 }, { "epoch": 6.439208984375e-06, "step": 4220, "training_step_time": 0.2174069881439209 }, { "epoch": 6.44073486328125e-06, "model_forward_time": 0.02433609962463379, "step": 4221 }, { "epoch": 6.44073486328125e-06, "step": 4221, "training_step_time": 0.12962818145751953 }, { "epoch": 6.4422607421875e-06, "model_forward_time": 0.02396702766418457, "step": 4222 }, { "epoch": 6.4422607421875e-06, "step": 4222, "training_step_time": 0.18463897705078125 }, { "epoch": 6.44378662109375e-06, "model_forward_time": 0.024483919143676758, "step": 4223 }, { "epoch": 6.44378662109375e-06, "step": 4223, "training_step_time": 0.13622665405273438 }, { "epoch": 6.4453125e-06, "model_forward_time": 0.02459263801574707, "step": 4224 }, { "epoch": 6.4453125e-06, "step": 4224, "training_step_time": 0.1140594482421875 }, { "epoch": 6.44683837890625e-06, "model_forward_time": 0.025010108947753906, "step": 4225 }, { "epoch": 6.44683837890625e-06, "step": 4225, "training_step_time": 0.11551117897033691 }, { "epoch": 6.4483642578125e-06, "model_forward_time": 0.025216102600097656, "step": 4226 }, { "epoch": 6.4483642578125e-06, "step": 4226, "training_step_time": 0.11089873313903809 }, { "epoch": 6.44989013671875e-06, "model_forward_time": 0.0251767635345459, "step": 4227 }, { "epoch": 6.44989013671875e-06, "step": 4227, "training_step_time": 0.10906553268432617 }, { "epoch": 6.451416015625e-06, "model_forward_time": 0.025974035263061523, "step": 4228 }, { "epoch": 6.451416015625e-06, "step": 4228, "training_step_time": 0.11062121391296387 }, { "epoch": 6.45294189453125e-06, "model_forward_time": 0.02478337287902832, "step": 4229 }, { "epoch": 6.45294189453125e-06, "step": 4229, "training_step_time": 0.10900616645812988 }, { "epoch": 6.4544677734375e-06, "grad_norm": 0.49198153614997864, "learning_rate": 9.775303978015585e-05, "loss": 0.0974, "step": 4230 }, { "epoch": 6.4544677734375e-06, "model_forward_time": 0.02502274513244629, "step": 4230 }, { "epoch": 6.4544677734375e-06, "step": 4230, "training_step_time": 0.10780000686645508 }, { "epoch": 6.45599365234375e-06, "model_forward_time": 0.027882099151611328, "step": 4231 }, { "epoch": 6.45599365234375e-06, "step": 4231, "training_step_time": 0.11106705665588379 }, { "epoch": 6.45751953125e-06, "model_forward_time": 0.025277137756347656, "step": 4232 }, { "epoch": 6.45751953125e-06, "step": 4232, "training_step_time": 0.1104590892791748 }, { "epoch": 6.45904541015625e-06, "model_forward_time": 0.02507781982421875, "step": 4233 }, { "epoch": 6.45904541015625e-06, "step": 4233, "training_step_time": 0.11197614669799805 }, { "epoch": 6.4605712890625e-06, "model_forward_time": 0.02429938316345215, "step": 4234 }, { "epoch": 6.4605712890625e-06, "step": 4234, "training_step_time": 0.1090998649597168 }, { "epoch": 6.46209716796875e-06, "model_forward_time": 0.025304079055786133, "step": 4235 }, { "epoch": 6.46209716796875e-06, "step": 4235, "training_step_time": 0.10630369186401367 }, { "epoch": 6.463623046875e-06, "model_forward_time": 0.024771451950073242, "step": 4236 }, { "epoch": 6.463623046875e-06, "step": 4236, "training_step_time": 0.10446619987487793 }, { "epoch": 6.46514892578125e-06, "model_forward_time": 0.02440619468688965, "step": 4237 }, { "epoch": 6.46514892578125e-06, "step": 4237, "training_step_time": 0.10984206199645996 }, { "epoch": 6.4666748046875e-06, "model_forward_time": 0.0257875919342041, "step": 4238 }, { "epoch": 6.4666748046875e-06, "step": 4238, "training_step_time": 0.1082451343536377 }, { "epoch": 6.46820068359375e-06, "model_forward_time": 0.024472475051879883, "step": 4239 }, { "epoch": 6.46820068359375e-06, "step": 4239, "training_step_time": 0.1058354377746582 }, { "epoch": 6.4697265625e-06, "grad_norm": 0.5333660244941711, "learning_rate": 9.773667393934567e-05, "loss": 0.1021, "step": 4240 }, { "epoch": 6.4697265625e-06, "model_forward_time": 0.024556875228881836, "step": 4240 }, { "epoch": 6.4697265625e-06, "step": 4240, "training_step_time": 0.10894632339477539 }, { "epoch": 6.47125244140625e-06, "model_forward_time": 0.024594783782958984, "step": 4241 }, { "epoch": 6.47125244140625e-06, "step": 4241, "training_step_time": 0.10696077346801758 }, { "epoch": 6.4727783203125e-06, "model_forward_time": 0.028829336166381836, "step": 4242 }, { "epoch": 6.4727783203125e-06, "step": 4242, "training_step_time": 0.11257076263427734 }, { "epoch": 6.47430419921875e-06, "model_forward_time": 0.024416208267211914, "step": 4243 }, { "epoch": 6.47430419921875e-06, "step": 4243, "training_step_time": 0.11062979698181152 }, { "epoch": 6.475830078125e-06, "model_forward_time": 0.024642229080200195, "step": 4244 }, { "epoch": 6.475830078125e-06, "step": 4244, "training_step_time": 0.10557866096496582 }, { "epoch": 6.47735595703125e-06, "model_forward_time": 0.024239540100097656, "step": 4245 }, { "epoch": 6.47735595703125e-06, "step": 4245, "training_step_time": 0.22206473350524902 }, { "epoch": 6.4788818359375e-06, "model_forward_time": 0.023090124130249023, "step": 4246 }, { "epoch": 6.4788818359375e-06, "step": 4246, "training_step_time": 0.10763239860534668 }, { "epoch": 6.48040771484375e-06, "model_forward_time": 0.02409815788269043, "step": 4247 }, { "epoch": 6.48040771484375e-06, "step": 4247, "training_step_time": 0.13665151596069336 }, { "epoch": 6.48193359375e-06, "model_forward_time": 0.02477860450744629, "step": 4248 }, { "epoch": 6.48193359375e-06, "step": 4248, "training_step_time": 0.1581439971923828 }, { "epoch": 6.48345947265625e-06, "model_forward_time": 0.026182889938354492, "step": 4249 }, { "epoch": 6.48345947265625e-06, "step": 4249, "training_step_time": 0.18472933769226074 }, { "epoch": 6.4849853515625e-06, "grad_norm": 0.686373770236969, "learning_rate": 9.772025009397537e-05, "loss": 0.0912, "step": 4250 }, { "epoch": 6.4849853515625e-06, "model_forward_time": 0.024106502532958984, "step": 4250 }, { "epoch": 6.4849853515625e-06, "step": 4250, "training_step_time": 0.18533730506896973 }, { "epoch": 6.48651123046875e-06, "model_forward_time": 0.02342534065246582, "step": 4251 }, { "epoch": 6.48651123046875e-06, "step": 4251, "training_step_time": 0.15431475639343262 }, { "epoch": 6.488037109375e-06, "model_forward_time": 0.023751258850097656, "step": 4252 }, { "epoch": 6.488037109375e-06, "step": 4252, "training_step_time": 0.1993701457977295 }, { "epoch": 6.48956298828125e-06, "model_forward_time": 0.02353668212890625, "step": 4253 }, { "epoch": 6.48956298828125e-06, "step": 4253, "training_step_time": 0.17065000534057617 }, { "epoch": 6.4910888671875e-06, "model_forward_time": 0.023382902145385742, "step": 4254 }, { "epoch": 6.4910888671875e-06, "step": 4254, "training_step_time": 0.15351653099060059 }, { "epoch": 6.49261474609375e-06, "model_forward_time": 0.023538827896118164, "step": 4255 }, { "epoch": 6.49261474609375e-06, "step": 4255, "training_step_time": 0.10841250419616699 }, { "epoch": 6.494140625e-06, "model_forward_time": 0.023777246475219727, "step": 4256 }, { "epoch": 6.494140625e-06, "step": 4256, "training_step_time": 0.11066818237304688 }, { "epoch": 6.49566650390625e-06, "model_forward_time": 0.024583101272583008, "step": 4257 }, { "epoch": 6.49566650390625e-06, "step": 4257, "training_step_time": 0.11031103134155273 }, { "epoch": 6.4971923828125e-06, "model_forward_time": 0.024266481399536133, "step": 4258 }, { "epoch": 6.4971923828125e-06, "step": 4258, "training_step_time": 0.11453986167907715 }, { "epoch": 6.49871826171875e-06, "model_forward_time": 0.024438858032226562, "step": 4259 }, { "epoch": 6.49871826171875e-06, "step": 4259, "training_step_time": 0.10804557800292969 }, { "epoch": 6.500244140625e-06, "grad_norm": 0.7169239521026611, "learning_rate": 9.77037682640015e-05, "loss": 0.0958, "step": 4260 }, { "epoch": 6.500244140625e-06, "model_forward_time": 0.024553537368774414, "step": 4260 }, { "epoch": 6.500244140625e-06, "step": 4260, "training_step_time": 0.11106705665588379 }, { "epoch": 6.50177001953125e-06, "model_forward_time": 0.02481675148010254, "step": 4261 }, { "epoch": 6.50177001953125e-06, "step": 4261, "training_step_time": 0.11205410957336426 }, { "epoch": 6.5032958984375e-06, "model_forward_time": 0.02467060089111328, "step": 4262 }, { "epoch": 6.5032958984375e-06, "step": 4262, "training_step_time": 0.11338663101196289 }, { "epoch": 6.50482177734375e-06, "model_forward_time": 0.025105714797973633, "step": 4263 }, { "epoch": 6.50482177734375e-06, "step": 4263, "training_step_time": 0.10671401023864746 }, { "epoch": 6.50634765625e-06, "model_forward_time": 0.024008512496948242, "step": 4264 }, { "epoch": 6.50634765625e-06, "step": 4264, "training_step_time": 0.10971808433532715 }, { "epoch": 6.50787353515625e-06, "model_forward_time": 0.024332046508789062, "step": 4265 }, { "epoch": 6.50787353515625e-06, "step": 4265, "training_step_time": 0.10995745658874512 }, { "epoch": 6.5093994140625e-06, "model_forward_time": 0.0247652530670166, "step": 4266 }, { "epoch": 6.5093994140625e-06, "step": 4266, "training_step_time": 0.11231565475463867 }, { "epoch": 6.51092529296875e-06, "model_forward_time": 0.02478504180908203, "step": 4267 }, { "epoch": 6.51092529296875e-06, "step": 4267, "training_step_time": 0.10709953308105469 }, { "epoch": 6.512451171875e-06, "model_forward_time": 0.02458953857421875, "step": 4268 }, { "epoch": 6.512451171875e-06, "step": 4268, "training_step_time": 0.10959029197692871 }, { "epoch": 6.51397705078125e-06, "model_forward_time": 0.024944067001342773, "step": 4269 }, { "epoch": 6.51397705078125e-06, "step": 4269, "training_step_time": 0.11844182014465332 }, { "epoch": 6.5155029296875e-06, "grad_norm": 0.5746374726295471, "learning_rate": 9.7687228469451e-05, "loss": 0.1016, "step": 4270 }, { "epoch": 6.5155029296875e-06, "model_forward_time": 0.02433943748474121, "step": 4270 }, { "epoch": 6.5155029296875e-06, "step": 4270, "training_step_time": 0.11708927154541016 }, { "epoch": 6.51702880859375e-06, "model_forward_time": 0.024867773056030273, "step": 4271 }, { "epoch": 6.51702880859375e-06, "step": 4271, "training_step_time": 0.11280155181884766 }, { "epoch": 6.5185546875e-06, "model_forward_time": 0.025377273559570312, "step": 4272 }, { "epoch": 6.5185546875e-06, "step": 4272, "training_step_time": 0.11459827423095703 }, { "epoch": 6.52008056640625e-06, "model_forward_time": 0.024292707443237305, "step": 4273 }, { "epoch": 6.52008056640625e-06, "step": 4273, "training_step_time": 0.1119072437286377 }, { "epoch": 6.5216064453125e-06, "model_forward_time": 0.024770498275756836, "step": 4274 }, { "epoch": 6.5216064453125e-06, "step": 4274, "training_step_time": 0.10953712463378906 }, { "epoch": 6.52313232421875e-06, "model_forward_time": 0.024352550506591797, "step": 4275 }, { "epoch": 6.52313232421875e-06, "step": 4275, "training_step_time": 0.10800933837890625 }, { "epoch": 6.524658203125e-06, "model_forward_time": 0.025091171264648438, "step": 4276 }, { "epoch": 6.524658203125e-06, "step": 4276, "training_step_time": 0.1094820499420166 }, { "epoch": 6.52618408203125e-06, "model_forward_time": 0.024437665939331055, "step": 4277 }, { "epoch": 6.52618408203125e-06, "step": 4277, "training_step_time": 0.10814356803894043 }, { "epoch": 6.5277099609375e-06, "model_forward_time": 0.02449822425842285, "step": 4278 }, { "epoch": 6.5277099609375e-06, "step": 4278, "training_step_time": 0.11236715316772461 }, { "epoch": 6.52923583984375e-06, "model_forward_time": 0.024829387664794922, "step": 4279 }, { "epoch": 6.52923583984375e-06, "step": 4279, "training_step_time": 0.10782623291015625 }, { "epoch": 6.53076171875e-06, "grad_norm": 0.3435329496860504, "learning_rate": 9.76706307304213e-05, "loss": 0.084, "step": 4280 }, { "epoch": 6.53076171875e-06, "model_forward_time": 0.0246884822845459, "step": 4280 }, { "epoch": 6.53076171875e-06, "step": 4280, "training_step_time": 0.10952377319335938 }, { "epoch": 6.53228759765625e-06, "model_forward_time": 0.024920940399169922, "step": 4281 }, { "epoch": 6.53228759765625e-06, "step": 4281, "training_step_time": 0.10822224617004395 }, { "epoch": 6.5338134765625e-06, "model_forward_time": 0.024636507034301758, "step": 4282 }, { "epoch": 6.5338134765625e-06, "step": 4282, "training_step_time": 0.10974931716918945 }, { "epoch": 6.53533935546875e-06, "model_forward_time": 0.02486419677734375, "step": 4283 }, { "epoch": 6.53533935546875e-06, "step": 4283, "training_step_time": 0.10721540451049805 }, { "epoch": 6.536865234375e-06, "model_forward_time": 0.024460315704345703, "step": 4284 }, { "epoch": 6.536865234375e-06, "step": 4284, "training_step_time": 0.10967063903808594 }, { "epoch": 6.53839111328125e-06, "model_forward_time": 0.024729490280151367, "step": 4285 }, { "epoch": 6.53839111328125e-06, "step": 4285, "training_step_time": 0.10894012451171875 }, { "epoch": 6.5399169921875e-06, "model_forward_time": 0.024495363235473633, "step": 4286 }, { "epoch": 6.5399169921875e-06, "step": 4286, "training_step_time": 0.10609817504882812 }, { "epoch": 6.54144287109375e-06, "model_forward_time": 0.0285031795501709, "step": 4287 }, { "epoch": 6.54144287109375e-06, "step": 4287, "training_step_time": 0.11117935180664062 }, { "epoch": 6.54296875e-06, "model_forward_time": 0.02460789680480957, "step": 4288 }, { "epoch": 6.54296875e-06, "step": 4288, "training_step_time": 0.1117103099822998 }, { "epoch": 6.54449462890625e-06, "model_forward_time": 0.024452686309814453, "step": 4289 }, { "epoch": 6.54449462890625e-06, "step": 4289, "training_step_time": 0.10788917541503906 }, { "epoch": 6.5460205078125e-06, "grad_norm": 0.6555113792419434, "learning_rate": 9.765397506708023e-05, "loss": 0.0917, "step": 4290 }, { "epoch": 6.5460205078125e-06, "model_forward_time": 0.024636030197143555, "step": 4290 }, { "epoch": 6.5460205078125e-06, "step": 4290, "training_step_time": 0.10979056358337402 }, { "epoch": 6.54754638671875e-06, "model_forward_time": 0.0249631404876709, "step": 4291 }, { "epoch": 6.54754638671875e-06, "step": 4291, "training_step_time": 0.17656445503234863 }, { "epoch": 6.549072265625e-06, "model_forward_time": 0.023801803588867188, "step": 4292 }, { "epoch": 6.549072265625e-06, "step": 4292, "training_step_time": 0.12189984321594238 }, { "epoch": 6.55059814453125e-06, "model_forward_time": 0.02409076690673828, "step": 4293 }, { "epoch": 6.55059814453125e-06, "step": 4293, "training_step_time": 0.11351752281188965 }, { "epoch": 6.5521240234375e-06, "model_forward_time": 0.02458930015563965, "step": 4294 }, { "epoch": 6.5521240234375e-06, "step": 4294, "training_step_time": 0.1238248348236084 }, { "epoch": 6.55364990234375e-06, "model_forward_time": 0.02507162094116211, "step": 4295 }, { "epoch": 6.55364990234375e-06, "step": 4295, "training_step_time": 0.17083096504211426 }, { "epoch": 6.55517578125e-06, "model_forward_time": 0.024291515350341797, "step": 4296 }, { "epoch": 6.55517578125e-06, "step": 4296, "training_step_time": 0.18315649032592773 }, { "epoch": 6.55670166015625e-06, "model_forward_time": 0.023514509201049805, "step": 4297 }, { "epoch": 6.55670166015625e-06, "step": 4297, "training_step_time": 0.21477389335632324 }, { "epoch": 6.5582275390625e-06, "model_forward_time": 0.02351832389831543, "step": 4298 }, { "epoch": 6.5582275390625e-06, "step": 4298, "training_step_time": 0.1544947624206543 }, { "epoch": 6.55975341796875e-06, "model_forward_time": 0.02344512939453125, "step": 4299 }, { "epoch": 6.55975341796875e-06, "step": 4299, "training_step_time": 0.17835426330566406 }, { "epoch": 6.561279296875e-06, "grad_norm": 1.0753647089004517, "learning_rate": 9.763726149966596e-05, "loss": 0.0747, "step": 4300 }, { "epoch": 6.561279296875e-06, "model_forward_time": 0.02583622932434082, "step": 4300 }, { "epoch": 6.561279296875e-06, "step": 4300, "training_step_time": 0.1500256061553955 }, { "epoch": 6.56280517578125e-06, "model_forward_time": 0.024130582809448242, "step": 4301 }, { "epoch": 6.56280517578125e-06, "step": 4301, "training_step_time": 0.1619892120361328 }, { "epoch": 6.5643310546875e-06, "model_forward_time": 0.023835420608520508, "step": 4302 }, { "epoch": 6.5643310546875e-06, "step": 4302, "training_step_time": 0.11077260971069336 }, { "epoch": 6.56585693359375e-06, "model_forward_time": 0.024020910263061523, "step": 4303 }, { "epoch": 6.56585693359375e-06, "step": 4303, "training_step_time": 0.11815953254699707 }, { "epoch": 6.5673828125e-06, "model_forward_time": 0.02437567710876465, "step": 4304 }, { "epoch": 6.5673828125e-06, "step": 4304, "training_step_time": 0.12411046028137207 }, { "epoch": 6.56890869140625e-06, "model_forward_time": 0.0261995792388916, "step": 4305 }, { "epoch": 6.56890869140625e-06, "step": 4305, "training_step_time": 0.12511682510375977 }, { "epoch": 6.5704345703125e-06, "model_forward_time": 0.024224519729614258, "step": 4306 }, { "epoch": 6.5704345703125e-06, "step": 4306, "training_step_time": 0.1197059154510498 }, { "epoch": 6.57196044921875e-06, "model_forward_time": 0.02500128746032715, "step": 4307 }, { "epoch": 6.57196044921875e-06, "step": 4307, "training_step_time": 0.12176656723022461 }, { "epoch": 6.573486328125e-06, "model_forward_time": 0.024811983108520508, "step": 4308 }, { "epoch": 6.573486328125e-06, "step": 4308, "training_step_time": 0.12289047241210938 }, { "epoch": 6.57501220703125e-06, "model_forward_time": 0.025588512420654297, "step": 4309 }, { "epoch": 6.57501220703125e-06, "step": 4309, "training_step_time": 0.1091775894165039 }, { "epoch": 6.5765380859375e-06, "grad_norm": 0.2607550323009491, "learning_rate": 9.762049004848706e-05, "loss": 0.0866, "step": 4310 }, { "epoch": 6.5765380859375e-06, "model_forward_time": 0.024265766143798828, "step": 4310 }, { "epoch": 6.5765380859375e-06, "step": 4310, "training_step_time": 0.17173528671264648 }, { "epoch": 6.57806396484375e-06, "model_forward_time": 0.02408146858215332, "step": 4311 }, { "epoch": 6.57806396484375e-06, "step": 4311, "training_step_time": 0.16152739524841309 }, { "epoch": 6.57958984375e-06, "model_forward_time": 0.023791790008544922, "step": 4312 }, { "epoch": 6.57958984375e-06, "step": 4312, "training_step_time": 0.11660385131835938 }, { "epoch": 6.58111572265625e-06, "model_forward_time": 0.024196863174438477, "step": 4313 }, { "epoch": 6.58111572265625e-06, "step": 4313, "training_step_time": 0.22089767456054688 }, { "epoch": 6.5826416015625e-06, "model_forward_time": 0.02404475212097168, "step": 4314 }, { "epoch": 6.5826416015625e-06, "step": 4314, "training_step_time": 0.10869741439819336 }, { "epoch": 6.58416748046875e-06, "model_forward_time": 0.02373504638671875, "step": 4315 }, { "epoch": 6.58416748046875e-06, "step": 4315, "training_step_time": 0.10488748550415039 }, { "epoch": 6.585693359375e-06, "model_forward_time": 0.02417778968811035, "step": 4316 }, { "epoch": 6.585693359375e-06, "step": 4316, "training_step_time": 0.10684585571289062 }, { "epoch": 6.58721923828125e-06, "model_forward_time": 0.024859905242919922, "step": 4317 }, { "epoch": 6.58721923828125e-06, "step": 4317, "training_step_time": 0.10750150680541992 }, { "epoch": 6.5887451171875e-06, "model_forward_time": 0.024730205535888672, "step": 4318 }, { "epoch": 6.5887451171875e-06, "step": 4318, "training_step_time": 0.10858845710754395 }, { "epoch": 6.59027099609375e-06, "model_forward_time": 0.0245969295501709, "step": 4319 }, { "epoch": 6.59027099609375e-06, "step": 4319, "training_step_time": 0.10819029808044434 }, { "epoch": 6.591796875e-06, "grad_norm": 0.3992424011230469, "learning_rate": 9.760366073392246e-05, "loss": 0.1157, "step": 4320 }, { "epoch": 6.591796875e-06, "model_forward_time": 0.0243532657623291, "step": 4320 }, { "epoch": 6.591796875e-06, "step": 4320, "training_step_time": 0.10850739479064941 }, { "epoch": 6.59332275390625e-06, "model_forward_time": 0.02482748031616211, "step": 4321 }, { "epoch": 6.59332275390625e-06, "step": 4321, "training_step_time": 0.10821819305419922 }, { "epoch": 6.5948486328125e-06, "model_forward_time": 0.02773761749267578, "step": 4322 }, { "epoch": 6.5948486328125e-06, "step": 4322, "training_step_time": 0.11365103721618652 }, { "epoch": 6.59637451171875e-06, "model_forward_time": 0.024251699447631836, "step": 4323 }, { "epoch": 6.59637451171875e-06, "step": 4323, "training_step_time": 0.10649824142456055 }, { "epoch": 6.597900390625e-06, "model_forward_time": 0.024547338485717773, "step": 4324 }, { "epoch": 6.597900390625e-06, "step": 4324, "training_step_time": 0.10802435874938965 }, { "epoch": 6.59942626953125e-06, "model_forward_time": 0.024929046630859375, "step": 4325 }, { "epoch": 6.59942626953125e-06, "step": 4325, "training_step_time": 0.10693168640136719 }, { "epoch": 6.6009521484375e-06, "model_forward_time": 0.024374008178710938, "step": 4326 }, { "epoch": 6.6009521484375e-06, "step": 4326, "training_step_time": 0.10775017738342285 }, { "epoch": 6.60247802734375e-06, "model_forward_time": 0.024434566497802734, "step": 4327 }, { "epoch": 6.60247802734375e-06, "step": 4327, "training_step_time": 0.11057329177856445 }, { "epoch": 6.60400390625e-06, "model_forward_time": 0.024326324462890625, "step": 4328 }, { "epoch": 6.60400390625e-06, "step": 4328, "training_step_time": 0.10746598243713379 }, { "epoch": 6.60552978515625e-06, "model_forward_time": 0.02614116668701172, "step": 4329 }, { "epoch": 6.60552978515625e-06, "step": 4329, "training_step_time": 0.1083993911743164 }, { "epoch": 6.6070556640625e-06, "grad_norm": 0.4989405572414398, "learning_rate": 9.758677357642131e-05, "loss": 0.076, "step": 4330 }, { "epoch": 6.6070556640625e-06, "model_forward_time": 0.0244901180267334, "step": 4330 }, { "epoch": 6.6070556640625e-06, "step": 4330, "training_step_time": 0.11144828796386719 }, { "epoch": 6.60858154296875e-06, "model_forward_time": 0.025089740753173828, "step": 4331 }, { "epoch": 6.60858154296875e-06, "step": 4331, "training_step_time": 0.11182928085327148 }, { "epoch": 6.610107421875e-06, "model_forward_time": 0.02445054054260254, "step": 4332 }, { "epoch": 6.610107421875e-06, "step": 4332, "training_step_time": 0.10628414154052734 }, { "epoch": 6.61163330078125e-06, "model_forward_time": 0.02442789077758789, "step": 4333 }, { "epoch": 6.61163330078125e-06, "step": 4333, "training_step_time": 0.10907697677612305 }, { "epoch": 6.6131591796875e-06, "model_forward_time": 0.02432870864868164, "step": 4334 }, { "epoch": 6.6131591796875e-06, "step": 4334, "training_step_time": 0.10872054100036621 }, { "epoch": 6.61468505859375e-06, "model_forward_time": 0.024495363235473633, "step": 4335 }, { "epoch": 6.61468505859375e-06, "step": 4335, "training_step_time": 0.18063569068908691 }, { "epoch": 6.6162109375e-06, "model_forward_time": 0.023784637451171875, "step": 4336 }, { "epoch": 6.6162109375e-06, "step": 4336, "training_step_time": 0.10920238494873047 }, { "epoch": 6.61773681640625e-06, "model_forward_time": 0.024243831634521484, "step": 4337 }, { "epoch": 6.61773681640625e-06, "step": 4337, "training_step_time": 0.22081351280212402 }, { "epoch": 6.6192626953125e-06, "model_forward_time": 0.02374100685119629, "step": 4338 }, { "epoch": 6.6192626953125e-06, "step": 4338, "training_step_time": 0.10631680488586426 }, { "epoch": 6.62078857421875e-06, "model_forward_time": 0.023685693740844727, "step": 4339 }, { "epoch": 6.62078857421875e-06, "step": 4339, "training_step_time": 0.1123359203338623 }, { "epoch": 6.622314453125e-06, "grad_norm": 0.4247824549674988, "learning_rate": 9.756982859650314e-05, "loss": 0.0757, "step": 4340 }, { "epoch": 6.622314453125e-06, "model_forward_time": 0.024617910385131836, "step": 4340 }, { "epoch": 6.622314453125e-06, "step": 4340, "training_step_time": 0.19577360153198242 }, { "epoch": 6.62384033203125e-06, "model_forward_time": 0.023773670196533203, "step": 4341 }, { "epoch": 6.62384033203125e-06, "step": 4341, "training_step_time": 0.20076966285705566 }, { "epoch": 6.6253662109375e-06, "model_forward_time": 0.023917675018310547, "step": 4342 }, { "epoch": 6.6253662109375e-06, "step": 4342, "training_step_time": 0.18840265274047852 }, { "epoch": 6.62689208984375e-06, "model_forward_time": 0.02486562728881836, "step": 4343 }, { "epoch": 6.62689208984375e-06, "step": 4343, "training_step_time": 0.15751051902770996 }, { "epoch": 6.62841796875e-06, "model_forward_time": 0.023736000061035156, "step": 4344 }, { "epoch": 6.62841796875e-06, "step": 4344, "training_step_time": 0.1656327247619629 }, { "epoch": 6.62994384765625e-06, "model_forward_time": 0.023743391036987305, "step": 4345 }, { "epoch": 6.62994384765625e-06, "step": 4345, "training_step_time": 0.1403505802154541 }, { "epoch": 6.6314697265625e-06, "model_forward_time": 0.023730993270874023, "step": 4346 }, { "epoch": 6.6314697265625e-06, "step": 4346, "training_step_time": 0.1095430850982666 }, { "epoch": 6.63299560546875e-06, "model_forward_time": 0.024067401885986328, "step": 4347 }, { "epoch": 6.63299560546875e-06, "step": 4347, "training_step_time": 0.1119375228881836 }, { "epoch": 6.634521484375e-06, "model_forward_time": 0.02450871467590332, "step": 4348 }, { "epoch": 6.634521484375e-06, "step": 4348, "training_step_time": 0.11150598526000977 }, { "epoch": 6.63604736328125e-06, "model_forward_time": 0.023972034454345703, "step": 4349 }, { "epoch": 6.63604736328125e-06, "step": 4349, "training_step_time": 0.11609840393066406 }, { "epoch": 6.6375732421875e-06, "grad_norm": 0.35964709520339966, "learning_rate": 9.755282581475769e-05, "loss": 0.0697, "step": 4350 }, { "epoch": 6.6375732421875e-06, "model_forward_time": 0.02448415756225586, "step": 4350 }, { "epoch": 6.6375732421875e-06, "step": 4350, "training_step_time": 0.11177849769592285 }, { "epoch": 6.63909912109375e-06, "model_forward_time": 0.02499866485595703, "step": 4351 }, { "epoch": 6.63909912109375e-06, "step": 4351, "training_step_time": 0.1087958812713623 }, { "epoch": 6.640625e-06, "model_forward_time": 0.024538516998291016, "step": 4352 }, { "epoch": 6.640625e-06, "step": 4352, "training_step_time": 0.11056375503540039 }, { "epoch": 6.64215087890625e-06, "model_forward_time": 0.024620532989501953, "step": 4353 }, { "epoch": 6.64215087890625e-06, "step": 4353, "training_step_time": 0.11011075973510742 }, { "epoch": 6.6436767578125e-06, "model_forward_time": 0.02473735809326172, "step": 4354 }, { "epoch": 6.6436767578125e-06, "step": 4354, "training_step_time": 0.1092836856842041 }, { "epoch": 6.64520263671875e-06, "model_forward_time": 0.024943828582763672, "step": 4355 }, { "epoch": 6.64520263671875e-06, "step": 4355, "training_step_time": 0.2120351791381836 }, { "epoch": 6.646728515625e-06, "model_forward_time": 0.024372339248657227, "step": 4356 }, { "epoch": 6.646728515625e-06, "step": 4356, "training_step_time": 0.11501884460449219 }, { "epoch": 6.64825439453125e-06, "model_forward_time": 0.02452254295349121, "step": 4357 }, { "epoch": 6.64825439453125e-06, "step": 4357, "training_step_time": 0.10913777351379395 }, { "epoch": 6.6497802734375e-06, "model_forward_time": 0.024715662002563477, "step": 4358 }, { "epoch": 6.6497802734375e-06, "step": 4358, "training_step_time": 0.18436503410339355 }, { "epoch": 6.65130615234375e-06, "model_forward_time": 0.02393198013305664, "step": 4359 }, { "epoch": 6.65130615234375e-06, "step": 4359, "training_step_time": 0.1616814136505127 }, { "epoch": 6.65283203125e-06, "grad_norm": 0.49390262365341187, "learning_rate": 9.753576525184492e-05, "loss": 0.0804, "step": 4360 }, { "epoch": 6.65283203125e-06, "model_forward_time": 0.02370905876159668, "step": 4360 }, { "epoch": 6.65283203125e-06, "step": 4360, "training_step_time": 0.13049793243408203 }, { "epoch": 6.65435791015625e-06, "model_forward_time": 0.023852109909057617, "step": 4361 }, { "epoch": 6.65435791015625e-06, "step": 4361, "training_step_time": 0.1284928321838379 }, { "epoch": 6.6558837890625e-06, "model_forward_time": 0.024065017700195312, "step": 4362 }, { "epoch": 6.6558837890625e-06, "step": 4362, "training_step_time": 0.12180685997009277 }, { "epoch": 6.65740966796875e-06, "model_forward_time": 0.023887157440185547, "step": 4363 }, { "epoch": 6.65740966796875e-06, "step": 4363, "training_step_time": 0.11612677574157715 }, { "epoch": 6.658935546875e-06, "model_forward_time": 0.024860858917236328, "step": 4364 }, { "epoch": 6.658935546875e-06, "step": 4364, "training_step_time": 0.11703896522521973 }, { "epoch": 6.66046142578125e-06, "model_forward_time": 0.028395891189575195, "step": 4365 }, { "epoch": 6.66046142578125e-06, "step": 4365, "training_step_time": 0.11362886428833008 }, { "epoch": 6.6619873046875e-06, "model_forward_time": 0.024801254272460938, "step": 4366 }, { "epoch": 6.6619873046875e-06, "step": 4366, "training_step_time": 0.11715197563171387 }, { "epoch": 6.66351318359375e-06, "model_forward_time": 0.024309873580932617, "step": 4367 }, { "epoch": 6.66351318359375e-06, "step": 4367, "training_step_time": 0.10759758949279785 }, { "epoch": 6.6650390625e-06, "model_forward_time": 0.024828195571899414, "step": 4368 }, { "epoch": 6.6650390625e-06, "step": 4368, "training_step_time": 0.11082744598388672 }, { "epoch": 6.66656494140625e-06, "model_forward_time": 0.02506875991821289, "step": 4369 }, { "epoch": 6.66656494140625e-06, "step": 4369, "training_step_time": 0.11076688766479492 }, { "epoch": 6.6680908203125e-06, "grad_norm": 0.580703854560852, "learning_rate": 9.751864692849504e-05, "loss": 0.084, "step": 4370 }, { "epoch": 6.6680908203125e-06, "model_forward_time": 0.025594472885131836, "step": 4370 }, { "epoch": 6.6680908203125e-06, "step": 4370, "training_step_time": 0.1085672378540039 }, { "epoch": 6.66961669921875e-06, "model_forward_time": 0.02512383460998535, "step": 4371 }, { "epoch": 6.66961669921875e-06, "step": 4371, "training_step_time": 0.10904216766357422 }, { "epoch": 6.671142578125e-06, "model_forward_time": 0.025000810623168945, "step": 4372 }, { "epoch": 6.671142578125e-06, "step": 4372, "training_step_time": 0.12130069732666016 }, { "epoch": 6.67266845703125e-06, "model_forward_time": 0.024539470672607422, "step": 4373 }, { "epoch": 6.67266845703125e-06, "step": 4373, "training_step_time": 0.11635756492614746 }, { "epoch": 6.6741943359375e-06, "model_forward_time": 0.024425506591796875, "step": 4374 }, { "epoch": 6.6741943359375e-06, "step": 4374, "training_step_time": 0.10800528526306152 }, { "epoch": 6.67572021484375e-06, "model_forward_time": 0.02487349510192871, "step": 4375 }, { "epoch": 6.67572021484375e-06, "step": 4375, "training_step_time": 0.10786843299865723 }, { "epoch": 6.67724609375e-06, "model_forward_time": 0.024628400802612305, "step": 4376 }, { "epoch": 6.67724609375e-06, "step": 4376, "training_step_time": 0.11495780944824219 }, { "epoch": 6.67877197265625e-06, "model_forward_time": 0.024732112884521484, "step": 4377 }, { "epoch": 6.67877197265625e-06, "step": 4377, "training_step_time": 0.11054325103759766 }, { "epoch": 6.6802978515625e-06, "model_forward_time": 0.024713754653930664, "step": 4378 }, { "epoch": 6.6802978515625e-06, "step": 4378, "training_step_time": 0.10920143127441406 }, { "epoch": 6.68182373046875e-06, "model_forward_time": 0.02444314956665039, "step": 4379 }, { "epoch": 6.68182373046875e-06, "step": 4379, "training_step_time": 0.12654709815979004 }, { "epoch": 6.683349609375e-06, "grad_norm": 0.4432964324951172, "learning_rate": 9.750147086550844e-05, "loss": 0.0828, "step": 4380 }, { "epoch": 6.683349609375e-06, "model_forward_time": 0.02497124671936035, "step": 4380 }, { "epoch": 6.683349609375e-06, "step": 4380, "training_step_time": 0.11072230339050293 }, { "epoch": 6.68487548828125e-06, "model_forward_time": 0.024817705154418945, "step": 4381 }, { "epoch": 6.68487548828125e-06, "step": 4381, "training_step_time": 0.22046279907226562 }, { "epoch": 6.6864013671875e-06, "model_forward_time": 0.023878812789916992, "step": 4382 }, { "epoch": 6.6864013671875e-06, "step": 4382, "training_step_time": 0.18476247787475586 }, { "epoch": 6.68792724609375e-06, "model_forward_time": 0.023889541625976562, "step": 4383 }, { "epoch": 6.68792724609375e-06, "step": 4383, "training_step_time": 0.1253042221069336 }, { "epoch": 6.689453125e-06, "model_forward_time": 0.02368640899658203, "step": 4384 }, { "epoch": 6.689453125e-06, "step": 4384, "training_step_time": 0.1708064079284668 }, { "epoch": 6.69097900390625e-06, "model_forward_time": 0.024415969848632812, "step": 4385 }, { "epoch": 6.69097900390625e-06, "step": 4385, "training_step_time": 0.14625191688537598 }, { "epoch": 6.6925048828125e-06, "model_forward_time": 0.023844003677368164, "step": 4386 }, { "epoch": 6.6925048828125e-06, "step": 4386, "training_step_time": 0.12918376922607422 }, { "epoch": 6.69403076171875e-06, "model_forward_time": 0.023836612701416016, "step": 4387 }, { "epoch": 6.69403076171875e-06, "step": 4387, "training_step_time": 0.2027883529663086 }, { "epoch": 6.695556640625e-06, "model_forward_time": 0.0232546329498291, "step": 4388 }, { "epoch": 6.695556640625e-06, "step": 4388, "training_step_time": 0.15815496444702148 }, { "epoch": 6.69708251953125e-06, "model_forward_time": 0.02433037757873535, "step": 4389 }, { "epoch": 6.69708251953125e-06, "step": 4389, "training_step_time": 0.15130901336669922 }, { "epoch": 6.6986083984375e-06, "grad_norm": 0.4692542254924774, "learning_rate": 9.748423708375563e-05, "loss": 0.0777, "step": 4390 }, { "epoch": 6.6986083984375e-06, "model_forward_time": 0.023773670196533203, "step": 4390 }, { "epoch": 6.6986083984375e-06, "step": 4390, "training_step_time": 0.10547184944152832 }, { "epoch": 6.70013427734375e-06, "model_forward_time": 0.0239410400390625, "step": 4391 }, { "epoch": 6.70013427734375e-06, "step": 4391, "training_step_time": 0.10596251487731934 }, { "epoch": 6.70166015625e-06, "model_forward_time": 0.02483224868774414, "step": 4392 }, { "epoch": 6.70166015625e-06, "step": 4392, "training_step_time": 0.10634493827819824 }, { "epoch": 6.70318603515625e-06, "model_forward_time": 0.02451300621032715, "step": 4393 }, { "epoch": 6.70318603515625e-06, "step": 4393, "training_step_time": 0.11151385307312012 }, { "epoch": 6.7047119140625e-06, "model_forward_time": 0.024956226348876953, "step": 4394 }, { "epoch": 6.7047119140625e-06, "step": 4394, "training_step_time": 0.11178135871887207 }, { "epoch": 6.70623779296875e-06, "model_forward_time": 0.024352312088012695, "step": 4395 }, { "epoch": 6.70623779296875e-06, "step": 4395, "training_step_time": 0.1072533130645752 }, { "epoch": 6.707763671875e-06, "model_forward_time": 0.02488875389099121, "step": 4396 }, { "epoch": 6.707763671875e-06, "step": 4396, "training_step_time": 0.10666418075561523 }, { "epoch": 6.70928955078125e-06, "model_forward_time": 0.02468276023864746, "step": 4397 }, { "epoch": 6.70928955078125e-06, "step": 4397, "training_step_time": 0.11514735221862793 }, { "epoch": 6.7108154296875e-06, "model_forward_time": 0.02582573890686035, "step": 4398 }, { "epoch": 6.7108154296875e-06, "step": 4398, "training_step_time": 0.10753536224365234 }, { "epoch": 6.71234130859375e-06, "model_forward_time": 0.025196313858032227, "step": 4399 }, { "epoch": 6.71234130859375e-06, "step": 4399, "training_step_time": 0.10587358474731445 }, { "epoch": 6.7138671875e-06, "grad_norm": 0.3315945863723755, "learning_rate": 9.746694560417731e-05, "loss": 0.1073, "step": 4400 }, { "epoch": 6.7138671875e-06, "model_forward_time": 0.025259733200073242, "step": 4400 }, { "epoch": 6.7138671875e-06, "step": 4400, "training_step_time": 0.1230614185333252 }, { "epoch": 6.71539306640625e-06, "model_forward_time": 0.025791406631469727, "step": 4401 }, { "epoch": 6.71539306640625e-06, "step": 4401, "training_step_time": 0.11361527442932129 }, { "epoch": 6.7169189453125e-06, "model_forward_time": 0.028496265411376953, "step": 4402 }, { "epoch": 6.7169189453125e-06, "step": 4402, "training_step_time": 0.11051273345947266 }, { "epoch": 6.71844482421875e-06, "model_forward_time": 0.02579665184020996, "step": 4403 }, { "epoch": 6.71844482421875e-06, "step": 4403, "training_step_time": 0.20893192291259766 }, { "epoch": 6.719970703125e-06, "model_forward_time": 0.02483367919921875, "step": 4404 }, { "epoch": 6.719970703125e-06, "step": 4404, "training_step_time": 0.13214373588562012 }, { "epoch": 6.72149658203125e-06, "model_forward_time": 0.024660348892211914, "step": 4405 }, { "epoch": 6.72149658203125e-06, "step": 4405, "training_step_time": 0.12876605987548828 }, { "epoch": 6.7230224609375e-06, "model_forward_time": 0.024568557739257812, "step": 4406 }, { "epoch": 6.7230224609375e-06, "step": 4406, "training_step_time": 0.12694811820983887 }, { "epoch": 6.72454833984375e-06, "model_forward_time": 0.025042057037353516, "step": 4407 }, { "epoch": 6.72454833984375e-06, "step": 4407, "training_step_time": 0.12434959411621094 }, { "epoch": 6.72607421875e-06, "model_forward_time": 0.025457382202148438, "step": 4408 }, { "epoch": 6.72607421875e-06, "step": 4408, "training_step_time": 0.1208031177520752 }, { "epoch": 6.72760009765625e-06, "model_forward_time": 0.028746843338012695, "step": 4409 }, { "epoch": 6.72760009765625e-06, "step": 4409, "training_step_time": 0.11565351486206055 }, { "epoch": 6.7291259765625e-06, "grad_norm": 0.44205155968666077, "learning_rate": 9.744959644778422e-05, "loss": 0.0855, "step": 4410 }, { "epoch": 6.7291259765625e-06, "model_forward_time": 0.025493621826171875, "step": 4410 }, { "epoch": 6.7291259765625e-06, "step": 4410, "training_step_time": 0.11760306358337402 }, { "epoch": 6.73065185546875e-06, "model_forward_time": 0.025439023971557617, "step": 4411 }, { "epoch": 6.73065185546875e-06, "step": 4411, "training_step_time": 0.11013936996459961 }, { "epoch": 6.732177734375e-06, "model_forward_time": 0.025485754013061523, "step": 4412 }, { "epoch": 6.732177734375e-06, "step": 4412, "training_step_time": 0.10869908332824707 }, { "epoch": 6.73370361328125e-06, "model_forward_time": 0.025135517120361328, "step": 4413 }, { "epoch": 6.73370361328125e-06, "step": 4413, "training_step_time": 0.11286568641662598 }, { "epoch": 6.7352294921875e-06, "model_forward_time": 0.025252103805541992, "step": 4414 }, { "epoch": 6.7352294921875e-06, "step": 4414, "training_step_time": 0.10966968536376953 }, { "epoch": 6.73675537109375e-06, "model_forward_time": 0.025339126586914062, "step": 4415 }, { "epoch": 6.73675537109375e-06, "step": 4415, "training_step_time": 0.10991954803466797 }, { "epoch": 6.73828125e-06, "model_forward_time": 0.024278879165649414, "step": 4416 }, { "epoch": 6.73828125e-06, "step": 4416, "training_step_time": 0.11007261276245117 }, { "epoch": 6.73980712890625e-06, "model_forward_time": 0.024446964263916016, "step": 4417 }, { "epoch": 6.73980712890625e-06, "step": 4417, "training_step_time": 0.11290287971496582 }, { "epoch": 6.7413330078125e-06, "model_forward_time": 0.025385379791259766, "step": 4418 }, { "epoch": 6.7413330078125e-06, "step": 4418, "training_step_time": 0.10982131958007812 }, { "epoch": 6.74285888671875e-06, "model_forward_time": 0.025242328643798828, "step": 4419 }, { "epoch": 6.74285888671875e-06, "step": 4419, "training_step_time": 0.1099998950958252 }, { "epoch": 6.744384765625e-06, "grad_norm": 0.6597705483436584, "learning_rate": 9.743218963565725e-05, "loss": 0.1024, "step": 4420 }, { "epoch": 6.744384765625e-06, "model_forward_time": 0.02510809898376465, "step": 4420 }, { "epoch": 6.744384765625e-06, "step": 4420, "training_step_time": 0.11147475242614746 }, { "epoch": 6.74591064453125e-06, "model_forward_time": 0.027329444885253906, "step": 4421 }, { "epoch": 6.74591064453125e-06, "step": 4421, "training_step_time": 0.11153841018676758 }, { "epoch": 6.7474365234375e-06, "model_forward_time": 0.02553081512451172, "step": 4422 }, { "epoch": 6.7474365234375e-06, "step": 4422, "training_step_time": 0.10847020149230957 }, { "epoch": 6.74896240234375e-06, "model_forward_time": 0.02446913719177246, "step": 4423 }, { "epoch": 6.74896240234375e-06, "step": 4423, "training_step_time": 0.23080039024353027 }, { "epoch": 6.75048828125e-06, "model_forward_time": 0.02478313446044922, "step": 4424 }, { "epoch": 6.75048828125e-06, "step": 4424, "training_step_time": 0.12435793876647949 }, { "epoch": 6.75201416015625e-06, "model_forward_time": 0.024983644485473633, "step": 4425 }, { "epoch": 6.75201416015625e-06, "step": 4425, "training_step_time": 0.12787890434265137 }, { "epoch": 6.7535400390625e-06, "model_forward_time": 0.025197744369506836, "step": 4426 }, { "epoch": 6.7535400390625e-06, "step": 4426, "training_step_time": 0.11344385147094727 }, { "epoch": 6.75506591796875e-06, "model_forward_time": 0.025916099548339844, "step": 4427 }, { "epoch": 6.75506591796875e-06, "step": 4427, "training_step_time": 0.16034555435180664 }, { "epoch": 6.756591796875e-06, "model_forward_time": 0.024791955947875977, "step": 4428 }, { "epoch": 6.756591796875e-06, "step": 4428, "training_step_time": 0.12233471870422363 }, { "epoch": 6.75811767578125e-06, "model_forward_time": 0.02547454833984375, "step": 4429 }, { "epoch": 6.75811767578125e-06, "step": 4429, "training_step_time": 0.1455223560333252 }, { "epoch": 6.7596435546875e-06, "grad_norm": 0.3992781639099121, "learning_rate": 9.74147251889473e-05, "loss": 0.0782, "step": 4430 }, { "epoch": 6.7596435546875e-06, "model_forward_time": 0.025011777877807617, "step": 4430 }, { "epoch": 6.7596435546875e-06, "step": 4430, "training_step_time": 0.17708420753479004 }, { "epoch": 6.76116943359375e-06, "model_forward_time": 0.02461719512939453, "step": 4431 }, { "epoch": 6.76116943359375e-06, "step": 4431, "training_step_time": 0.18003058433532715 }, { "epoch": 6.7626953125e-06, "model_forward_time": 0.024953365325927734, "step": 4432 }, { "epoch": 6.7626953125e-06, "step": 4432, "training_step_time": 0.1970045566558838 }, { "epoch": 6.76422119140625e-06, "model_forward_time": 0.025161266326904297, "step": 4433 }, { "epoch": 6.76422119140625e-06, "step": 4433, "training_step_time": 0.11963486671447754 }, { "epoch": 6.7657470703125e-06, "model_forward_time": 0.023435115814208984, "step": 4434 }, { "epoch": 6.7657470703125e-06, "step": 4434, "training_step_time": 0.11079859733581543 }, { "epoch": 6.76727294921875e-06, "model_forward_time": 0.024517297744750977, "step": 4435 }, { "epoch": 6.76727294921875e-06, "step": 4435, "training_step_time": 0.1070866584777832 }, { "epoch": 6.768798828125e-06, "model_forward_time": 0.025110244750976562, "step": 4436 }, { "epoch": 6.768798828125e-06, "step": 4436, "training_step_time": 0.10853815078735352 }, { "epoch": 6.77032470703125e-06, "model_forward_time": 0.025333404541015625, "step": 4437 }, { "epoch": 6.77032470703125e-06, "step": 4437, "training_step_time": 0.1096348762512207 }, { "epoch": 6.7718505859375e-06, "model_forward_time": 0.025403261184692383, "step": 4438 }, { "epoch": 6.7718505859375e-06, "step": 4438, "training_step_time": 0.16763520240783691 }, { "epoch": 6.77337646484375e-06, "model_forward_time": 0.02462315559387207, "step": 4439 }, { "epoch": 6.77337646484375e-06, "step": 4439, "training_step_time": 0.1921398639678955 }, { "epoch": 6.77490234375e-06, "grad_norm": 0.3353877067565918, "learning_rate": 9.739720312887535e-05, "loss": 0.0888, "step": 4440 }, { "epoch": 6.77490234375e-06, "model_forward_time": 0.02477550506591797, "step": 4440 }, { "epoch": 6.77490234375e-06, "step": 4440, "training_step_time": 0.18576407432556152 }, { "epoch": 6.77642822265625e-06, "model_forward_time": 0.024771690368652344, "step": 4441 }, { "epoch": 6.77642822265625e-06, "step": 4441, "training_step_time": 0.1255650520324707 }, { "epoch": 6.7779541015625e-06, "model_forward_time": 0.024149417877197266, "step": 4442 }, { "epoch": 6.7779541015625e-06, "step": 4442, "training_step_time": 0.16859841346740723 }, { "epoch": 6.77947998046875e-06, "model_forward_time": 0.024303913116455078, "step": 4443 }, { "epoch": 6.77947998046875e-06, "step": 4443, "training_step_time": 0.1571030616760254 }, { "epoch": 6.781005859375e-06, "model_forward_time": 0.025760173797607422, "step": 4444 }, { "epoch": 6.781005859375e-06, "step": 4444, "training_step_time": 0.10898613929748535 }, { "epoch": 6.78253173828125e-06, "model_forward_time": 0.025356054306030273, "step": 4445 }, { "epoch": 6.78253173828125e-06, "step": 4445, "training_step_time": 0.13817977905273438 }, { "epoch": 6.7840576171875e-06, "model_forward_time": 0.025652647018432617, "step": 4446 }, { "epoch": 6.7840576171875e-06, "step": 4446, "training_step_time": 0.11881566047668457 }, { "epoch": 6.78558349609375e-06, "model_forward_time": 0.02527785301208496, "step": 4447 }, { "epoch": 6.78558349609375e-06, "step": 4447, "training_step_time": 0.11393356323242188 }, { "epoch": 6.787109375e-06, "model_forward_time": 0.02528858184814453, "step": 4448 }, { "epoch": 6.787109375e-06, "step": 4448, "training_step_time": 0.10541319847106934 }, { "epoch": 6.78863525390625e-06, "model_forward_time": 0.025516986846923828, "step": 4449 }, { "epoch": 6.78863525390625e-06, "step": 4449, "training_step_time": 0.10833263397216797 }, { "epoch": 6.7901611328125e-06, "grad_norm": 0.5004896521568298, "learning_rate": 9.737962347673231e-05, "loss": 0.1129, "step": 4450 }, { "epoch": 6.7901611328125e-06, "model_forward_time": 0.025115013122558594, "step": 4450 }, { "epoch": 6.7901611328125e-06, "step": 4450, "training_step_time": 0.10771918296813965 }, { "epoch": 6.79168701171875e-06, "model_forward_time": 0.025381803512573242, "step": 4451 }, { "epoch": 6.79168701171875e-06, "step": 4451, "training_step_time": 0.11132264137268066 }, { "epoch": 6.793212890625e-06, "model_forward_time": 0.025698423385620117, "step": 4452 }, { "epoch": 6.793212890625e-06, "step": 4452, "training_step_time": 0.1142113208770752 }, { "epoch": 6.79473876953125e-06, "model_forward_time": 0.02529311180114746, "step": 4453 }, { "epoch": 6.79473876953125e-06, "step": 4453, "training_step_time": 0.10810112953186035 }, { "epoch": 6.7962646484375e-06, "model_forward_time": 0.02525639533996582, "step": 4454 }, { "epoch": 6.7962646484375e-06, "step": 4454, "training_step_time": 0.10873818397521973 }, { "epoch": 6.79779052734375e-06, "model_forward_time": 0.025655269622802734, "step": 4455 }, { "epoch": 6.79779052734375e-06, "step": 4455, "training_step_time": 0.11636924743652344 }, { "epoch": 6.79931640625e-06, "model_forward_time": 0.024880170822143555, "step": 4456 }, { "epoch": 6.79931640625e-06, "step": 4456, "training_step_time": 0.12848615646362305 }, { "epoch": 6.80084228515625e-06, "model_forward_time": 0.025758981704711914, "step": 4457 }, { "epoch": 6.80084228515625e-06, "step": 4457, "training_step_time": 0.16585397720336914 }, { "epoch": 6.8023681640625e-06, "model_forward_time": 0.024266481399536133, "step": 4458 }, { "epoch": 6.8023681640625e-06, "step": 4458, "training_step_time": 0.15198183059692383 }, { "epoch": 6.80389404296875e-06, "model_forward_time": 0.025344371795654297, "step": 4459 }, { "epoch": 6.80389404296875e-06, "step": 4459, "training_step_time": 0.15577244758605957 }, { "epoch": 6.805419921875e-06, "grad_norm": 0.4311482012271881, "learning_rate": 9.736198625387916e-05, "loss": 0.0864, "step": 4460 }, { "epoch": 6.805419921875e-06, "model_forward_time": 0.024741172790527344, "step": 4460 }, { "epoch": 6.805419921875e-06, "step": 4460, "training_step_time": 0.1317591667175293 }, { "epoch": 6.80694580078125e-06, "model_forward_time": 0.02438831329345703, "step": 4461 }, { "epoch": 6.80694580078125e-06, "step": 4461, "training_step_time": 0.1337747573852539 }, { "epoch": 6.8084716796875e-06, "model_forward_time": 0.024546146392822266, "step": 4462 }, { "epoch": 6.8084716796875e-06, "step": 4462, "training_step_time": 0.1287229061126709 }, { "epoch": 6.80999755859375e-06, "model_forward_time": 0.02500629425048828, "step": 4463 }, { "epoch": 6.80999755859375e-06, "step": 4463, "training_step_time": 0.11901473999023438 }, { "epoch": 6.8115234375e-06, "model_forward_time": 0.02517843246459961, "step": 4464 }, { "epoch": 6.8115234375e-06, "step": 4464, "training_step_time": 0.12088823318481445 }, { "epoch": 6.81304931640625e-06, "model_forward_time": 0.025204181671142578, "step": 4465 }, { "epoch": 6.81304931640625e-06, "step": 4465, "training_step_time": 0.18205976486206055 }, { "epoch": 6.8145751953125e-06, "model_forward_time": 0.0249481201171875, "step": 4466 }, { "epoch": 6.8145751953125e-06, "step": 4466, "training_step_time": 0.10940718650817871 }, { "epoch": 6.81610107421875e-06, "model_forward_time": 0.0248568058013916, "step": 4467 }, { "epoch": 6.81610107421875e-06, "step": 4467, "training_step_time": 0.138472318649292 }, { "epoch": 6.817626953125e-06, "model_forward_time": 0.025551795959472656, "step": 4468 }, { "epoch": 6.817626953125e-06, "step": 4468, "training_step_time": 0.16276073455810547 }, { "epoch": 6.81915283203125e-06, "model_forward_time": 0.024810075759887695, "step": 4469 }, { "epoch": 6.81915283203125e-06, "step": 4469, "training_step_time": 0.21833467483520508 }, { "epoch": 6.8206787109375e-06, "grad_norm": 0.43905940651893616, "learning_rate": 9.734429148174675e-05, "loss": 0.0785, "step": 4470 }, { "epoch": 6.8206787109375e-06, "model_forward_time": 0.02467513084411621, "step": 4470 }, { "epoch": 6.8206787109375e-06, "step": 4470, "training_step_time": 0.1417839527130127 }, { "epoch": 6.82220458984375e-06, "model_forward_time": 0.024777889251708984, "step": 4471 }, { "epoch": 6.82220458984375e-06, "step": 4471, "training_step_time": 0.13990020751953125 }, { "epoch": 6.82373046875e-06, "model_forward_time": 0.026890039443969727, "step": 4472 }, { "epoch": 6.82373046875e-06, "step": 4472, "training_step_time": 0.20887351036071777 }, { "epoch": 6.82525634765625e-06, "model_forward_time": 0.02489614486694336, "step": 4473 }, { "epoch": 6.82525634765625e-06, "step": 4473, "training_step_time": 0.13181209564208984 }, { "epoch": 6.8267822265625e-06, "model_forward_time": 0.024866580963134766, "step": 4474 }, { "epoch": 6.8267822265625e-06, "step": 4474, "training_step_time": 0.1098332405090332 }, { "epoch": 6.82830810546875e-06, "model_forward_time": 0.025820255279541016, "step": 4475 }, { "epoch": 6.82830810546875e-06, "step": 4475, "training_step_time": 0.19210577011108398 }, { "epoch": 6.829833984375e-06, "model_forward_time": 0.024728775024414062, "step": 4476 }, { "epoch": 6.829833984375e-06, "step": 4476, "training_step_time": 0.1051633358001709 }, { "epoch": 6.83135986328125e-06, "model_forward_time": 0.024882078170776367, "step": 4477 }, { "epoch": 6.83135986328125e-06, "step": 4477, "training_step_time": 0.10464668273925781 }, { "epoch": 6.8328857421875e-06, "model_forward_time": 0.027993202209472656, "step": 4478 }, { "epoch": 6.8328857421875e-06, "step": 4478, "training_step_time": 0.11600089073181152 }, { "epoch": 6.83441162109375e-06, "model_forward_time": 0.027083396911621094, "step": 4479 }, { "epoch": 6.83441162109375e-06, "step": 4479, "training_step_time": 0.11164259910583496 }, { "epoch": 6.8359375e-06, "grad_norm": 0.5180590748786926, "learning_rate": 9.732653918183592e-05, "loss": 0.0885, "step": 4480 }, { "epoch": 6.8359375e-06, "model_forward_time": 0.02594733238220215, "step": 4480 }, { "epoch": 6.8359375e-06, "step": 4480, "training_step_time": 0.11564159393310547 }, { "epoch": 6.83746337890625e-06, "model_forward_time": 0.026409626007080078, "step": 4481 }, { "epoch": 6.83746337890625e-06, "step": 4481, "training_step_time": 0.11275362968444824 }, { "epoch": 6.8389892578125e-06, "model_forward_time": 0.025445938110351562, "step": 4482 }, { "epoch": 6.8389892578125e-06, "step": 4482, "training_step_time": 0.11760067939758301 }, { "epoch": 6.84051513671875e-06, "model_forward_time": 0.025847673416137695, "step": 4483 }, { "epoch": 6.84051513671875e-06, "step": 4483, "training_step_time": 0.11334729194641113 }, { "epoch": 6.842041015625e-06, "model_forward_time": 0.02576470375061035, "step": 4484 }, { "epoch": 6.842041015625e-06, "step": 4484, "training_step_time": 0.10874128341674805 }, { "epoch": 6.84356689453125e-06, "model_forward_time": 0.025194644927978516, "step": 4485 }, { "epoch": 6.84356689453125e-06, "step": 4485, "training_step_time": 0.11578106880187988 }, { "epoch": 6.8450927734375e-06, "model_forward_time": 0.028726577758789062, "step": 4486 }, { "epoch": 6.8450927734375e-06, "step": 4486, "training_step_time": 0.12242484092712402 }, { "epoch": 6.84661865234375e-06, "model_forward_time": 0.026030302047729492, "step": 4487 }, { "epoch": 6.84661865234375e-06, "step": 4487, "training_step_time": 0.11379861831665039 }, { "epoch": 6.84814453125e-06, "model_forward_time": 0.025763988494873047, "step": 4488 }, { "epoch": 6.84814453125e-06, "step": 4488, "training_step_time": 0.1197657585144043 }, { "epoch": 6.84967041015625e-06, "model_forward_time": 0.02585458755493164, "step": 4489 }, { "epoch": 6.84967041015625e-06, "step": 4489, "training_step_time": 0.11082053184509277 }, { "epoch": 6.8511962890625e-06, "grad_norm": 0.5689246654510498, "learning_rate": 9.730872937571739e-05, "loss": 0.0898, "step": 4490 }, { "epoch": 6.8511962890625e-06, "model_forward_time": 0.025580644607543945, "step": 4490 }, { "epoch": 6.8511962890625e-06, "step": 4490, "training_step_time": 0.11792278289794922 }, { "epoch": 6.85272216796875e-06, "model_forward_time": 0.02572941780090332, "step": 4491 }, { "epoch": 6.85272216796875e-06, "step": 4491, "training_step_time": 0.10931515693664551 }, { "epoch": 6.854248046875e-06, "model_forward_time": 0.02576756477355957, "step": 4492 }, { "epoch": 6.854248046875e-06, "step": 4492, "training_step_time": 0.10883283615112305 }, { "epoch": 6.85577392578125e-06, "model_forward_time": 0.02566671371459961, "step": 4493 }, { "epoch": 6.85577392578125e-06, "step": 4493, "training_step_time": 0.10997486114501953 }, { "epoch": 6.8572998046875e-06, "model_forward_time": 0.0287020206451416, "step": 4494 }, { "epoch": 6.8572998046875e-06, "step": 4494, "training_step_time": 0.11341476440429688 }, { "epoch": 6.85882568359375e-06, "model_forward_time": 0.025387048721313477, "step": 4495 }, { "epoch": 6.85882568359375e-06, "step": 4495, "training_step_time": 0.1075284481048584 }, { "epoch": 6.8603515625e-06, "model_forward_time": 0.025438308715820312, "step": 4496 }, { "epoch": 6.8603515625e-06, "step": 4496, "training_step_time": 0.11192560195922852 }, { "epoch": 6.86187744140625e-06, "model_forward_time": 0.025542020797729492, "step": 4497 }, { "epoch": 6.86187744140625e-06, "step": 4497, "training_step_time": 0.10827088356018066 }, { "epoch": 6.8634033203125e-06, "model_forward_time": 0.025399446487426758, "step": 4498 }, { "epoch": 6.8634033203125e-06, "step": 4498, "training_step_time": 0.10962748527526855 }, { "epoch": 6.86492919921875e-06, "model_forward_time": 0.02547430992126465, "step": 4499 }, { "epoch": 6.86492919921875e-06, "step": 4499, "training_step_time": 0.11098170280456543 }, { "epoch": 6.866455078125e-06, "grad_norm": 0.5847257971763611, "learning_rate": 9.729086208503174e-05, "loss": 0.0886, "step": 4500 }, { "epoch": 6.866455078125e-06, "model_forward_time": 0.024441003799438477, "step": 4500 }, { "epoch": 6.866455078125e-06, "step": 4500, "training_step_time": 0.10938405990600586 }, { "epoch": 6.86798095703125e-06, "model_forward_time": 0.024552345275878906, "step": 4501 }, { "epoch": 6.86798095703125e-06, "step": 4501, "training_step_time": 0.10780000686645508 }, { "epoch": 6.8695068359375e-06, "model_forward_time": 0.025714635848999023, "step": 4502 }, { "epoch": 6.8695068359375e-06, "step": 4502, "training_step_time": 0.11142683029174805 }, { "epoch": 6.87103271484375e-06, "model_forward_time": 0.025624513626098633, "step": 4503 }, { "epoch": 6.87103271484375e-06, "step": 4503, "training_step_time": 0.1113278865814209 }, { "epoch": 6.87255859375e-06, "model_forward_time": 0.02548956871032715, "step": 4504 }, { "epoch": 6.87255859375e-06, "step": 4504, "training_step_time": 0.1092061996459961 }, { "epoch": 6.87408447265625e-06, "model_forward_time": 0.025415658950805664, "step": 4505 }, { "epoch": 6.87408447265625e-06, "step": 4505, "training_step_time": 0.10877656936645508 }, { "epoch": 6.8756103515625e-06, "model_forward_time": 0.025763988494873047, "step": 4506 }, { "epoch": 6.8756103515625e-06, "step": 4506, "training_step_time": 0.11118531227111816 }, { "epoch": 6.87713623046875e-06, "model_forward_time": 0.02573561668395996, "step": 4507 }, { "epoch": 6.87713623046875e-06, "step": 4507, "training_step_time": 0.10768461227416992 }, { "epoch": 6.878662109375e-06, "model_forward_time": 0.025513887405395508, "step": 4508 }, { "epoch": 6.878662109375e-06, "step": 4508, "training_step_time": 0.1124882698059082 }, { "epoch": 6.88018798828125e-06, "model_forward_time": 0.025415897369384766, "step": 4509 }, { "epoch": 6.88018798828125e-06, "step": 4509, "training_step_time": 0.10879039764404297 }, { "epoch": 6.8817138671875e-06, "grad_norm": 0.30061620473861694, "learning_rate": 9.727293733148942e-05, "loss": 0.0993, "step": 4510 }, { "epoch": 6.8817138671875e-06, "model_forward_time": 0.025926589965820312, "step": 4510 }, { "epoch": 6.8817138671875e-06, "step": 4510, "training_step_time": 0.11031818389892578 }, { "epoch": 6.88323974609375e-06, "model_forward_time": 0.02551746368408203, "step": 4511 }, { "epoch": 6.88323974609375e-06, "step": 4511, "training_step_time": 0.12284135818481445 }, { "epoch": 6.884765625e-06, "model_forward_time": 0.025502681732177734, "step": 4512 }, { "epoch": 6.884765625e-06, "step": 4512, "training_step_time": 0.1155092716217041 }, { "epoch": 6.88629150390625e-06, "model_forward_time": 0.025554180145263672, "step": 4513 }, { "epoch": 6.88629150390625e-06, "step": 4513, "training_step_time": 0.13530898094177246 }, { "epoch": 6.8878173828125e-06, "model_forward_time": 0.02549600601196289, "step": 4514 }, { "epoch": 6.8878173828125e-06, "step": 4514, "training_step_time": 0.1698307991027832 }, { "epoch": 6.88934326171875e-06, "model_forward_time": 0.024968385696411133, "step": 4515 }, { "epoch": 6.88934326171875e-06, "step": 4515, "training_step_time": 0.2276754379272461 }, { "epoch": 6.890869140625e-06, "model_forward_time": 0.024751901626586914, "step": 4516 }, { "epoch": 6.890869140625e-06, "step": 4516, "training_step_time": 0.18165802955627441 }, { "epoch": 6.89239501953125e-06, "model_forward_time": 0.025082826614379883, "step": 4517 }, { "epoch": 6.89239501953125e-06, "step": 4517, "training_step_time": 0.15519428253173828 }, { "epoch": 6.8939208984375e-06, "model_forward_time": 0.024975299835205078, "step": 4518 }, { "epoch": 6.8939208984375e-06, "step": 4518, "training_step_time": 0.19071102142333984 }, { "epoch": 6.89544677734375e-06, "model_forward_time": 0.024646282196044922, "step": 4519 }, { "epoch": 6.89544677734375e-06, "step": 4519, "training_step_time": 0.14542627334594727 }, { "epoch": 6.89697265625e-06, "grad_norm": 0.556941568851471, "learning_rate": 9.72549551368707e-05, "loss": 0.0898, "step": 4520 }, { "epoch": 6.89697265625e-06, "model_forward_time": 0.02476954460144043, "step": 4520 }, { "epoch": 6.89697265625e-06, "step": 4520, "training_step_time": 0.1619255542755127 }, { "epoch": 6.89849853515625e-06, "model_forward_time": 0.024382591247558594, "step": 4521 }, { "epoch": 6.89849853515625e-06, "step": 4521, "training_step_time": 0.16274809837341309 }, { "epoch": 6.9000244140625e-06, "model_forward_time": 0.025071382522583008, "step": 4522 }, { "epoch": 6.9000244140625e-06, "step": 4522, "training_step_time": 0.1696312427520752 }, { "epoch": 6.90155029296875e-06, "model_forward_time": 0.024303674697875977, "step": 4523 }, { "epoch": 6.90155029296875e-06, "step": 4523, "training_step_time": 0.15805792808532715 }, { "epoch": 6.903076171875e-06, "model_forward_time": 0.024580717086791992, "step": 4524 }, { "epoch": 6.903076171875e-06, "step": 4524, "training_step_time": 0.14712285995483398 }, { "epoch": 6.90460205078125e-06, "model_forward_time": 0.024869203567504883, "step": 4525 }, { "epoch": 6.90460205078125e-06, "step": 4525, "training_step_time": 0.12766027450561523 }, { "epoch": 6.9061279296875e-06, "model_forward_time": 0.026020050048828125, "step": 4526 }, { "epoch": 6.9061279296875e-06, "step": 4526, "training_step_time": 0.12654900550842285 }, { "epoch": 6.90765380859375e-06, "model_forward_time": 0.02553701400756836, "step": 4527 }, { "epoch": 6.90765380859375e-06, "step": 4527, "training_step_time": 0.1243131160736084 }, { "epoch": 6.9091796875e-06, "model_forward_time": 0.02545905113220215, "step": 4528 }, { "epoch": 6.9091796875e-06, "step": 4528, "training_step_time": 0.12047910690307617 }, { "epoch": 6.91070556640625e-06, "model_forward_time": 0.02570509910583496, "step": 4529 }, { "epoch": 6.91070556640625e-06, "step": 4529, "training_step_time": 0.1938011646270752 }, { "epoch": 6.9122314453125e-06, "grad_norm": 0.3187704384326935, "learning_rate": 9.723691552302562e-05, "loss": 0.0912, "step": 4530 }, { "epoch": 6.9122314453125e-06, "model_forward_time": 0.024973154067993164, "step": 4530 }, { "epoch": 6.9122314453125e-06, "step": 4530, "training_step_time": 0.12613987922668457 }, { "epoch": 6.91375732421875e-06, "model_forward_time": 0.025076866149902344, "step": 4531 }, { "epoch": 6.91375732421875e-06, "step": 4531, "training_step_time": 0.10954952239990234 }, { "epoch": 6.915283203125e-06, "model_forward_time": 0.02588176727294922, "step": 4532 }, { "epoch": 6.915283203125e-06, "step": 4532, "training_step_time": 0.11190438270568848 }, { "epoch": 6.91680908203125e-06, "model_forward_time": 0.025758981704711914, "step": 4533 }, { "epoch": 6.91680908203125e-06, "step": 4533, "training_step_time": 0.2253270149230957 }, { "epoch": 6.9183349609375e-06, "model_forward_time": 0.024959325790405273, "step": 4534 }, { "epoch": 6.9183349609375e-06, "step": 4534, "training_step_time": 0.10701322555541992 }, { "epoch": 6.91986083984375e-06, "model_forward_time": 0.024904727935791016, "step": 4535 }, { "epoch": 6.91986083984375e-06, "step": 4535, "training_step_time": 0.1057734489440918 }, { "epoch": 6.92138671875e-06, "model_forward_time": 0.027437448501586914, "step": 4536 }, { "epoch": 6.92138671875e-06, "step": 4536, "training_step_time": 0.11645674705505371 }, { "epoch": 6.92291259765625e-06, "model_forward_time": 0.025565385818481445, "step": 4537 }, { "epoch": 6.92291259765625e-06, "step": 4537, "training_step_time": 0.1094202995300293 }, { "epoch": 6.9244384765625e-06, "model_forward_time": 0.025213241577148438, "step": 4538 }, { "epoch": 6.9244384765625e-06, "step": 4538, "training_step_time": 0.11330199241638184 }, { "epoch": 6.92596435546875e-06, "model_forward_time": 0.026393651962280273, "step": 4539 }, { "epoch": 6.92596435546875e-06, "step": 4539, "training_step_time": 0.11123394966125488 }, { "epoch": 6.927490234375e-06, "grad_norm": 0.38522833585739136, "learning_rate": 9.721881851187406e-05, "loss": 0.1091, "step": 4540 }, { "epoch": 6.927490234375e-06, "model_forward_time": 0.0257723331451416, "step": 4540 }, { "epoch": 6.927490234375e-06, "step": 4540, "training_step_time": 0.11416125297546387 }, { "epoch": 6.92901611328125e-06, "model_forward_time": 0.02500319480895996, "step": 4541 }, { "epoch": 6.92901611328125e-06, "step": 4541, "training_step_time": 0.11039328575134277 }, { "epoch": 6.9305419921875e-06, "model_forward_time": 0.025172710418701172, "step": 4542 }, { "epoch": 6.9305419921875e-06, "step": 4542, "training_step_time": 0.10945510864257812 }, { "epoch": 6.93206787109375e-06, "model_forward_time": 0.02553391456604004, "step": 4543 }, { "epoch": 6.93206787109375e-06, "step": 4543, "training_step_time": 0.11800885200500488 }, { "epoch": 6.93359375e-06, "model_forward_time": 0.02550029754638672, "step": 4544 }, { "epoch": 6.93359375e-06, "step": 4544, "training_step_time": 0.11364555358886719 }, { "epoch": 6.93511962890625e-06, "model_forward_time": 0.025170326232910156, "step": 4545 }, { "epoch": 6.93511962890625e-06, "step": 4545, "training_step_time": 0.1092677116394043 }, { "epoch": 6.9366455078125e-06, "model_forward_time": 0.025568485260009766, "step": 4546 }, { "epoch": 6.9366455078125e-06, "step": 4546, "training_step_time": 0.11096715927124023 }, { "epoch": 6.93817138671875e-06, "model_forward_time": 0.025572776794433594, "step": 4547 }, { "epoch": 6.93817138671875e-06, "step": 4547, "training_step_time": 0.10884761810302734 }, { "epoch": 6.939697265625e-06, "model_forward_time": 0.025199174880981445, "step": 4548 }, { "epoch": 6.939697265625e-06, "step": 4548, "training_step_time": 0.10773277282714844 }, { "epoch": 6.94122314453125e-06, "model_forward_time": 0.025654315948486328, "step": 4549 }, { "epoch": 6.94122314453125e-06, "step": 4549, "training_step_time": 0.11041855812072754 }, { "epoch": 6.9427490234375e-06, "grad_norm": 0.3061712086200714, "learning_rate": 9.720066412540554e-05, "loss": 0.1097, "step": 4550 }, { "epoch": 6.9427490234375e-06, "model_forward_time": 0.02569866180419922, "step": 4550 }, { "epoch": 6.9427490234375e-06, "step": 4550, "training_step_time": 0.10866689682006836 }, { "epoch": 6.94427490234375e-06, "model_forward_time": 0.025941133499145508, "step": 4551 }, { "epoch": 6.94427490234375e-06, "step": 4551, "training_step_time": 0.10921406745910645 }, { "epoch": 6.94580078125e-06, "model_forward_time": 0.025366544723510742, "step": 4552 }, { "epoch": 6.94580078125e-06, "step": 4552, "training_step_time": 0.11448407173156738 }, { "epoch": 6.94732666015625e-06, "model_forward_time": 0.02539515495300293, "step": 4553 }, { "epoch": 6.94732666015625e-06, "step": 4553, "training_step_time": 0.2008509635925293 }, { "epoch": 6.9488525390625e-06, "model_forward_time": 0.024308204650878906, "step": 4554 }, { "epoch": 6.9488525390625e-06, "step": 4554, "training_step_time": 0.1146554946899414 }, { "epoch": 6.95037841796875e-06, "model_forward_time": 0.024580955505371094, "step": 4555 }, { "epoch": 6.95037841796875e-06, "step": 4555, "training_step_time": 0.1324918270111084 }, { "epoch": 6.951904296875e-06, "model_forward_time": 0.025465726852416992, "step": 4556 }, { "epoch": 6.951904296875e-06, "step": 4556, "training_step_time": 0.16433191299438477 }, { "epoch": 6.95343017578125e-06, "model_forward_time": 0.02474212646484375, "step": 4557 }, { "epoch": 6.95343017578125e-06, "step": 4557, "training_step_time": 0.21648859977722168 }, { "epoch": 6.9549560546875e-06, "model_forward_time": 0.024992704391479492, "step": 4558 }, { "epoch": 6.9549560546875e-06, "step": 4558, "training_step_time": 0.10788702964782715 }, { "epoch": 6.95648193359375e-06, "model_forward_time": 0.025012731552124023, "step": 4559 }, { "epoch": 6.95648193359375e-06, "step": 4559, "training_step_time": 0.14208555221557617 }, { "epoch": 6.9580078125e-06, "grad_norm": 0.4172661602497101, "learning_rate": 9.718245238567939e-05, "loss": 0.0835, "step": 4560 }, { "epoch": 6.9580078125e-06, "model_forward_time": 0.025568008422851562, "step": 4560 }, { "epoch": 6.9580078125e-06, "step": 4560, "training_step_time": 0.10921645164489746 }, { "epoch": 6.95953369140625e-06, "model_forward_time": 0.02555108070373535, "step": 4561 }, { "epoch": 6.95953369140625e-06, "step": 4561, "training_step_time": 0.1126859188079834 }, { "epoch": 6.9610595703125e-06, "model_forward_time": 0.026616811752319336, "step": 4562 }, { "epoch": 6.9610595703125e-06, "step": 4562, "training_step_time": 0.12203764915466309 }, { "epoch": 6.96258544921875e-06, "model_forward_time": 0.027092456817626953, "step": 4563 }, { "epoch": 6.96258544921875e-06, "step": 4563, "training_step_time": 0.18268942832946777 }, { "epoch": 6.964111328125e-06, "model_forward_time": 0.02462482452392578, "step": 4564 }, { "epoch": 6.964111328125e-06, "step": 4564, "training_step_time": 0.18396282196044922 }, { "epoch": 6.96563720703125e-06, "model_forward_time": 0.024209260940551758, "step": 4565 }, { "epoch": 6.96563720703125e-06, "step": 4565, "training_step_time": 0.113128662109375 }, { "epoch": 6.9671630859375e-06, "model_forward_time": 0.024965286254882812, "step": 4566 }, { "epoch": 6.9671630859375e-06, "step": 4566, "training_step_time": 0.10502767562866211 }, { "epoch": 6.96868896484375e-06, "model_forward_time": 0.025495052337646484, "step": 4567 }, { "epoch": 6.96868896484375e-06, "step": 4567, "training_step_time": 0.1083064079284668 }, { "epoch": 6.97021484375e-06, "model_forward_time": 0.02565455436706543, "step": 4568 }, { "epoch": 6.97021484375e-06, "step": 4568, "training_step_time": 0.1105339527130127 }, { "epoch": 6.97174072265625e-06, "model_forward_time": 0.026070356369018555, "step": 4569 }, { "epoch": 6.97174072265625e-06, "step": 4569, "training_step_time": 0.10763764381408691 }, { "epoch": 6.9732666015625e-06, "grad_norm": 0.6116529703140259, "learning_rate": 9.716418331482458e-05, "loss": 0.0924, "step": 4570 }, { "epoch": 6.9732666015625e-06, "model_forward_time": 0.025993824005126953, "step": 4570 }, { "epoch": 6.9732666015625e-06, "step": 4570, "training_step_time": 0.11006593704223633 }, { "epoch": 6.97479248046875e-06, "model_forward_time": 0.02561354637145996, "step": 4571 }, { "epoch": 6.97479248046875e-06, "step": 4571, "training_step_time": 0.11244559288024902 }, { "epoch": 6.976318359375e-06, "model_forward_time": 0.02550506591796875, "step": 4572 }, { "epoch": 6.976318359375e-06, "step": 4572, "training_step_time": 0.10854148864746094 }, { "epoch": 6.97784423828125e-06, "model_forward_time": 0.024932146072387695, "step": 4573 }, { "epoch": 6.97784423828125e-06, "step": 4573, "training_step_time": 0.11133050918579102 }, { "epoch": 6.9793701171875e-06, "model_forward_time": 0.025921106338500977, "step": 4574 }, { "epoch": 6.9793701171875e-06, "step": 4574, "training_step_time": 0.11167144775390625 }, { "epoch": 6.98089599609375e-06, "model_forward_time": 0.02514338493347168, "step": 4575 }, { "epoch": 6.98089599609375e-06, "step": 4575, "training_step_time": 0.21238327026367188 }, { "epoch": 6.982421875e-06, "model_forward_time": 0.02506232261657715, "step": 4576 }, { "epoch": 6.982421875e-06, "step": 4576, "training_step_time": 0.11548423767089844 }, { "epoch": 6.98394775390625e-06, "model_forward_time": 0.024883747100830078, "step": 4577 }, { "epoch": 6.98394775390625e-06, "step": 4577, "training_step_time": 0.11183857917785645 }, { "epoch": 6.9854736328125e-06, "model_forward_time": 0.025638103485107422, "step": 4578 }, { "epoch": 6.9854736328125e-06, "step": 4578, "training_step_time": 0.17650318145751953 }, { "epoch": 6.98699951171875e-06, "model_forward_time": 0.02788567543029785, "step": 4579 }, { "epoch": 6.98699951171875e-06, "step": 4579, "training_step_time": 0.16304922103881836 }, { "epoch": 6.988525390625e-06, "grad_norm": 0.46140211820602417, "learning_rate": 9.714585693503974e-05, "loss": 0.0848, "step": 4580 }, { "epoch": 6.988525390625e-06, "model_forward_time": 0.025172710418701172, "step": 4580 }, { "epoch": 6.988525390625e-06, "step": 4580, "training_step_time": 0.11041736602783203 }, { "epoch": 6.99005126953125e-06, "model_forward_time": 0.026709318161010742, "step": 4581 }, { "epoch": 6.99005126953125e-06, "step": 4581, "training_step_time": 0.11118030548095703 }, { "epoch": 6.9915771484375e-06, "model_forward_time": 0.025292634963989258, "step": 4582 }, { "epoch": 6.9915771484375e-06, "step": 4582, "training_step_time": 0.1097104549407959 }, { "epoch": 6.99310302734375e-06, "model_forward_time": 0.02509617805480957, "step": 4583 }, { "epoch": 6.99310302734375e-06, "step": 4583, "training_step_time": 0.1124885082244873 }, { "epoch": 6.99462890625e-06, "model_forward_time": 0.025370359420776367, "step": 4584 }, { "epoch": 6.99462890625e-06, "step": 4584, "training_step_time": 0.10934329032897949 }, { "epoch": 6.99615478515625e-06, "model_forward_time": 0.025284290313720703, "step": 4585 }, { "epoch": 6.99615478515625e-06, "step": 4585, "training_step_time": 0.11519885063171387 }, { "epoch": 6.9976806640625e-06, "model_forward_time": 0.025420665740966797, "step": 4586 }, { "epoch": 6.9976806640625e-06, "step": 4586, "training_step_time": 0.16600513458251953 }, { "epoch": 6.99920654296875e-06, "model_forward_time": 0.023598432540893555, "step": 4587 }, { "epoch": 6.99920654296875e-06, "step": 4587, "training_step_time": 0.18465733528137207 }, { "epoch": 7.000732421875e-06, "model_forward_time": 0.024540185928344727, "step": 4588 }, { "epoch": 7.000732421875e-06, "step": 4588, "training_step_time": 0.18309807777404785 }, { "epoch": 7.00225830078125e-06, "model_forward_time": 0.024094343185424805, "step": 4589 }, { "epoch": 7.00225830078125e-06, "step": 4589, "training_step_time": 0.16628265380859375 }, { "epoch": 7.0037841796875e-06, "grad_norm": 0.5488386750221252, "learning_rate": 9.712747326859315e-05, "loss": 0.0904, "step": 4590 }, { "epoch": 7.0037841796875e-06, "model_forward_time": 0.024530649185180664, "step": 4590 }, { "epoch": 7.0037841796875e-06, "step": 4590, "training_step_time": 0.15399384498596191 }, { "epoch": 7.00531005859375e-06, "model_forward_time": 0.024194002151489258, "step": 4591 }, { "epoch": 7.00531005859375e-06, "step": 4591, "training_step_time": 0.14029884338378906 }, { "epoch": 7.0068359375e-06, "model_forward_time": 0.024872541427612305, "step": 4592 }, { "epoch": 7.0068359375e-06, "step": 4592, "training_step_time": 0.1476421356201172 }, { "epoch": 7.00836181640625e-06, "model_forward_time": 0.024680614471435547, "step": 4593 }, { "epoch": 7.00836181640625e-06, "step": 4593, "training_step_time": 0.12543892860412598 }, { "epoch": 7.0098876953125e-06, "model_forward_time": 0.024805068969726562, "step": 4594 }, { "epoch": 7.0098876953125e-06, "step": 4594, "training_step_time": 0.12495279312133789 }, { "epoch": 7.01141357421875e-06, "model_forward_time": 0.02508997917175293, "step": 4595 }, { "epoch": 7.01141357421875e-06, "step": 4595, "training_step_time": 0.12061452865600586 }, { "epoch": 7.012939453125e-06, "model_forward_time": 0.024953126907348633, "step": 4596 }, { "epoch": 7.012939453125e-06, "step": 4596, "training_step_time": 0.11956357955932617 }, { "epoch": 7.01446533203125e-06, "model_forward_time": 0.02517080307006836, "step": 4597 }, { "epoch": 7.01446533203125e-06, "step": 4597, "training_step_time": 0.12378978729248047 }, { "epoch": 7.0159912109375e-06, "model_forward_time": 0.025837421417236328, "step": 4598 }, { "epoch": 7.0159912109375e-06, "step": 4598, "training_step_time": 0.15790629386901855 }, { "epoch": 7.01751708984375e-06, "model_forward_time": 0.025115013122558594, "step": 4599 }, { "epoch": 7.01751708984375e-06, "step": 4599, "training_step_time": 0.1769266128540039 }, { "epoch": 7.01904296875e-06, "grad_norm": 0.4443153142929077, "learning_rate": 9.710903233782272e-05, "loss": 0.0917, "step": 4600 }, { "epoch": 7.01904296875e-06, "model_forward_time": 0.02614116668701172, "step": 4600 }, { "epoch": 7.01904296875e-06, "step": 4600, "training_step_time": 0.1532728672027588 }, { "epoch": 7.02056884765625e-06, "model_forward_time": 0.02469921112060547, "step": 4601 }, { "epoch": 7.02056884765625e-06, "step": 4601, "training_step_time": 0.10748171806335449 }, { "epoch": 7.0220947265625e-06, "model_forward_time": 0.025840044021606445, "step": 4602 }, { "epoch": 7.0220947265625e-06, "step": 4602, "training_step_time": 0.11820530891418457 }, { "epoch": 7.02362060546875e-06, "model_forward_time": 0.025635480880737305, "step": 4603 }, { "epoch": 7.02362060546875e-06, "step": 4603, "training_step_time": 0.1128084659576416 }, { "epoch": 7.025146484375e-06, "model_forward_time": 0.02547478675842285, "step": 4604 }, { "epoch": 7.025146484375e-06, "step": 4604, "training_step_time": 0.17396831512451172 }, { "epoch": 7.02667236328125e-06, "model_forward_time": 0.025224924087524414, "step": 4605 }, { "epoch": 7.02667236328125e-06, "step": 4605, "training_step_time": 0.17707014083862305 }, { "epoch": 7.0281982421875e-06, "model_forward_time": 0.024532556533813477, "step": 4606 }, { "epoch": 7.0281982421875e-06, "step": 4606, "training_step_time": 0.11643719673156738 }, { "epoch": 7.02972412109375e-06, "model_forward_time": 0.028357267379760742, "step": 4607 }, { "epoch": 7.02972412109375e-06, "step": 4607, "training_step_time": 0.1205284595489502 }, { "epoch": 7.03125e-06, "model_forward_time": 0.02562546730041504, "step": 4608 }, { "epoch": 7.03125e-06, "step": 4608, "training_step_time": 0.10737967491149902 }, { "epoch": 7.03277587890625e-06, "model_forward_time": 0.025376558303833008, "step": 4609 }, { "epoch": 7.03277587890625e-06, "step": 4609, "training_step_time": 0.1080939769744873 }, { "epoch": 7.0343017578125e-06, "grad_norm": 0.5178048610687256, "learning_rate": 9.709053416513592e-05, "loss": 0.0848, "step": 4610 }, { "epoch": 7.0343017578125e-06, "model_forward_time": 0.025567054748535156, "step": 4610 }, { "epoch": 7.0343017578125e-06, "step": 4610, "training_step_time": 0.14335012435913086 }, { "epoch": 7.03582763671875e-06, "model_forward_time": 0.025458574295043945, "step": 4611 }, { "epoch": 7.03582763671875e-06, "step": 4611, "training_step_time": 0.16852974891662598 }, { "epoch": 7.037353515625e-06, "model_forward_time": 0.024663686752319336, "step": 4612 }, { "epoch": 7.037353515625e-06, "step": 4612, "training_step_time": 0.15189242362976074 }, { "epoch": 7.03887939453125e-06, "model_forward_time": 0.024314403533935547, "step": 4613 }, { "epoch": 7.03887939453125e-06, "step": 4613, "training_step_time": 0.1353294849395752 }, { "epoch": 7.0404052734375e-06, "model_forward_time": 0.028273820877075195, "step": 4614 }, { "epoch": 7.0404052734375e-06, "step": 4614, "training_step_time": 0.13442587852478027 }, { "epoch": 7.04193115234375e-06, "model_forward_time": 0.025510787963867188, "step": 4615 }, { "epoch": 7.04193115234375e-06, "step": 4615, "training_step_time": 0.11014699935913086 }, { "epoch": 7.04345703125e-06, "model_forward_time": 0.025209903717041016, "step": 4616 }, { "epoch": 7.04345703125e-06, "step": 4616, "training_step_time": 0.1221158504486084 }, { "epoch": 7.04498291015625e-06, "model_forward_time": 0.02506852149963379, "step": 4617 }, { "epoch": 7.04498291015625e-06, "step": 4617, "training_step_time": 0.11951327323913574 }, { "epoch": 7.0465087890625e-06, "model_forward_time": 0.02617192268371582, "step": 4618 }, { "epoch": 7.0465087890625e-06, "step": 4618, "training_step_time": 0.19681954383850098 }, { "epoch": 7.04803466796875e-06, "model_forward_time": 0.02478480339050293, "step": 4619 }, { "epoch": 7.04803466796875e-06, "step": 4619, "training_step_time": 0.1119844913482666 }, { "epoch": 7.049560546875e-06, "grad_norm": 0.37912517786026, "learning_rate": 9.707197877300974e-05, "loss": 0.0726, "step": 4620 }, { "epoch": 7.049560546875e-06, "model_forward_time": 0.025426626205444336, "step": 4620 }, { "epoch": 7.049560546875e-06, "step": 4620, "training_step_time": 0.11473941802978516 }, { "epoch": 7.05108642578125e-06, "model_forward_time": 0.02573418617248535, "step": 4621 }, { "epoch": 7.05108642578125e-06, "step": 4621, "training_step_time": 0.1118619441986084 }, { "epoch": 7.0526123046875e-06, "model_forward_time": 0.025964021682739258, "step": 4622 }, { "epoch": 7.0526123046875e-06, "step": 4622, "training_step_time": 0.11052846908569336 }, { "epoch": 7.05413818359375e-06, "model_forward_time": 0.025844573974609375, "step": 4623 }, { "epoch": 7.05413818359375e-06, "step": 4623, "training_step_time": 0.11656045913696289 }, { "epoch": 7.0556640625e-06, "model_forward_time": 0.02527451515197754, "step": 4624 }, { "epoch": 7.0556640625e-06, "step": 4624, "training_step_time": 0.11308622360229492 }, { "epoch": 7.05718994140625e-06, "model_forward_time": 0.025414705276489258, "step": 4625 }, { "epoch": 7.05718994140625e-06, "step": 4625, "training_step_time": 0.11145639419555664 }, { "epoch": 7.0587158203125e-06, "model_forward_time": 0.026253700256347656, "step": 4626 }, { "epoch": 7.0587158203125e-06, "step": 4626, "training_step_time": 0.11387109756469727 }, { "epoch": 7.06024169921875e-06, "model_forward_time": 0.025023221969604492, "step": 4627 }, { "epoch": 7.06024169921875e-06, "step": 4627, "training_step_time": 0.11048150062561035 }, { "epoch": 7.061767578125e-06, "model_forward_time": 0.025444746017456055, "step": 4628 }, { "epoch": 7.061767578125e-06, "step": 4628, "training_step_time": 0.11424756050109863 }, { "epoch": 7.06329345703125e-06, "model_forward_time": 0.025431156158447266, "step": 4629 }, { "epoch": 7.06329345703125e-06, "step": 4629, "training_step_time": 0.11282157897949219 }, { "epoch": 7.0648193359375e-06, "grad_norm": 0.703524649143219, "learning_rate": 9.705336618399077e-05, "loss": 0.0832, "step": 4630 }, { "epoch": 7.0648193359375e-06, "model_forward_time": 0.025510787963867188, "step": 4630 }, { "epoch": 7.0648193359375e-06, "step": 4630, "training_step_time": 0.10958480834960938 }, { "epoch": 7.06634521484375e-06, "model_forward_time": 0.025275468826293945, "step": 4631 }, { "epoch": 7.06634521484375e-06, "step": 4631, "training_step_time": 0.11008763313293457 }, { "epoch": 7.06787109375e-06, "model_forward_time": 0.025235891342163086, "step": 4632 }, { "epoch": 7.06787109375e-06, "step": 4632, "training_step_time": 0.10924983024597168 }, { "epoch": 7.06939697265625e-06, "model_forward_time": 0.02527475357055664, "step": 4633 }, { "epoch": 7.06939697265625e-06, "step": 4633, "training_step_time": 0.10968446731567383 }, { "epoch": 7.0709228515625e-06, "model_forward_time": 0.0252687931060791, "step": 4634 }, { "epoch": 7.0709228515625e-06, "step": 4634, "training_step_time": 0.1123650074005127 }, { "epoch": 7.07244873046875e-06, "model_forward_time": 0.025458097457885742, "step": 4635 }, { "epoch": 7.07244873046875e-06, "step": 4635, "training_step_time": 0.1084136962890625 }, { "epoch": 7.073974609375e-06, "model_forward_time": 0.02536296844482422, "step": 4636 }, { "epoch": 7.073974609375e-06, "step": 4636, "training_step_time": 0.11239242553710938 }, { "epoch": 7.07550048828125e-06, "model_forward_time": 0.029045820236206055, "step": 4637 }, { "epoch": 7.07550048828125e-06, "step": 4637, "training_step_time": 0.11421704292297363 }, { "epoch": 7.0770263671875e-06, "model_forward_time": 0.025288820266723633, "step": 4638 }, { "epoch": 7.0770263671875e-06, "step": 4638, "training_step_time": 0.11121821403503418 }, { "epoch": 7.07855224609375e-06, "model_forward_time": 0.026355981826782227, "step": 4639 }, { "epoch": 7.07855224609375e-06, "step": 4639, "training_step_time": 0.11396312713623047 }, { "epoch": 7.080078125e-06, "grad_norm": 0.5431896448135376, "learning_rate": 9.703469642069503e-05, "loss": 0.0866, "step": 4640 }, { "epoch": 7.080078125e-06, "model_forward_time": 0.025588512420654297, "step": 4640 }, { "epoch": 7.080078125e-06, "step": 4640, "training_step_time": 0.11531591415405273 }, { "epoch": 7.08160400390625e-06, "model_forward_time": 0.0265805721282959, "step": 4641 }, { "epoch": 7.08160400390625e-06, "step": 4641, "training_step_time": 0.11524343490600586 }, { "epoch": 7.0831298828125e-06, "model_forward_time": 0.02535414695739746, "step": 4642 }, { "epoch": 7.0831298828125e-06, "step": 4642, "training_step_time": 0.11068892478942871 }, { "epoch": 7.08465576171875e-06, "model_forward_time": 0.024876117706298828, "step": 4643 }, { "epoch": 7.08465576171875e-06, "step": 4643, "training_step_time": 0.12270236015319824 }, { "epoch": 7.086181640625e-06, "model_forward_time": 0.02570343017578125, "step": 4644 }, { "epoch": 7.086181640625e-06, "step": 4644, "training_step_time": 0.11148619651794434 }, { "epoch": 7.08770751953125e-06, "model_forward_time": 0.02771162986755371, "step": 4645 }, { "epoch": 7.08770751953125e-06, "step": 4645, "training_step_time": 0.11736011505126953 }, { "epoch": 7.0892333984375e-06, "model_forward_time": 0.02540302276611328, "step": 4646 }, { "epoch": 7.0892333984375e-06, "step": 4646, "training_step_time": 0.1275320053100586 }, { "epoch": 7.09075927734375e-06, "model_forward_time": 0.026554346084594727, "step": 4647 }, { "epoch": 7.09075927734375e-06, "step": 4647, "training_step_time": 0.12313508987426758 }, { "epoch": 7.09228515625e-06, "model_forward_time": 0.02524876594543457, "step": 4648 }, { "epoch": 7.09228515625e-06, "step": 4648, "training_step_time": 0.11437582969665527 }, { "epoch": 7.09381103515625e-06, "model_forward_time": 0.025593042373657227, "step": 4649 }, { "epoch": 7.09381103515625e-06, "step": 4649, "training_step_time": 0.20876097679138184 }, { "epoch": 7.0953369140625e-06, "grad_norm": 0.30929675698280334, "learning_rate": 9.701596950580806e-05, "loss": 0.0714, "step": 4650 }, { "epoch": 7.0953369140625e-06, "model_forward_time": 0.02449512481689453, "step": 4650 }, { "epoch": 7.0953369140625e-06, "step": 4650, "training_step_time": 0.1154944896697998 }, { "epoch": 7.09686279296875e-06, "model_forward_time": 0.024818897247314453, "step": 4651 }, { "epoch": 7.09686279296875e-06, "step": 4651, "training_step_time": 0.22406482696533203 }, { "epoch": 7.098388671875e-06, "model_forward_time": 0.026859521865844727, "step": 4652 }, { "epoch": 7.098388671875e-06, "step": 4652, "training_step_time": 0.13698959350585938 }, { "epoch": 7.09991455078125e-06, "model_forward_time": 0.024960041046142578, "step": 4653 }, { "epoch": 7.09991455078125e-06, "step": 4653, "training_step_time": 0.11091470718383789 }, { "epoch": 7.1014404296875e-06, "model_forward_time": 0.02565598487854004, "step": 4654 }, { "epoch": 7.1014404296875e-06, "step": 4654, "training_step_time": 0.11881065368652344 }, { "epoch": 7.10296630859375e-06, "model_forward_time": 0.025463581085205078, "step": 4655 }, { "epoch": 7.10296630859375e-06, "step": 4655, "training_step_time": 0.1113440990447998 }, { "epoch": 7.1044921875e-06, "model_forward_time": 0.025736570358276367, "step": 4656 }, { "epoch": 7.1044921875e-06, "step": 4656, "training_step_time": 0.11044645309448242 }, { "epoch": 7.10601806640625e-06, "model_forward_time": 0.025641202926635742, "step": 4657 }, { "epoch": 7.10601806640625e-06, "step": 4657, "training_step_time": 0.11413908004760742 }, { "epoch": 7.1075439453125e-06, "model_forward_time": 0.025508880615234375, "step": 4658 }, { "epoch": 7.1075439453125e-06, "step": 4658, "training_step_time": 0.11565136909484863 }, { "epoch": 7.10906982421875e-06, "model_forward_time": 0.02566838264465332, "step": 4659 }, { "epoch": 7.10906982421875e-06, "step": 4659, "training_step_time": 0.11598777770996094 }, { "epoch": 7.110595703125e-06, "grad_norm": 0.4373496472835541, "learning_rate": 9.699718546208484e-05, "loss": 0.1117, "step": 4660 }, { "epoch": 7.110595703125e-06, "model_forward_time": 0.02506399154663086, "step": 4660 }, { "epoch": 7.110595703125e-06, "step": 4660, "training_step_time": 0.1157388687133789 }, { "epoch": 7.11212158203125e-06, "model_forward_time": 0.0249176025390625, "step": 4661 }, { "epoch": 7.11212158203125e-06, "step": 4661, "training_step_time": 0.10822415351867676 }, { "epoch": 7.1136474609375e-06, "model_forward_time": 0.02518630027770996, "step": 4662 }, { "epoch": 7.1136474609375e-06, "step": 4662, "training_step_time": 0.17522430419921875 }, { "epoch": 7.11517333984375e-06, "model_forward_time": 0.024921894073486328, "step": 4663 }, { "epoch": 7.11517333984375e-06, "step": 4663, "training_step_time": 0.15344667434692383 }, { "epoch": 7.11669921875e-06, "model_forward_time": 0.02471017837524414, "step": 4664 }, { "epoch": 7.11669921875e-06, "step": 4664, "training_step_time": 0.10583686828613281 }, { "epoch": 7.11822509765625e-06, "model_forward_time": 0.02735304832458496, "step": 4665 }, { "epoch": 7.11822509765625e-06, "step": 4665, "training_step_time": 0.10798430442810059 }, { "epoch": 7.1197509765625e-06, "model_forward_time": 0.02536320686340332, "step": 4666 }, { "epoch": 7.1197509765625e-06, "step": 4666, "training_step_time": 0.21842479705810547 }, { "epoch": 7.12127685546875e-06, "model_forward_time": 0.024820804595947266, "step": 4667 }, { "epoch": 7.12127685546875e-06, "step": 4667, "training_step_time": 0.12018632888793945 }, { "epoch": 7.122802734375e-06, "model_forward_time": 0.02475118637084961, "step": 4668 }, { "epoch": 7.122802734375e-06, "step": 4668, "training_step_time": 0.10997509956359863 }, { "epoch": 7.12432861328125e-06, "model_forward_time": 0.025513648986816406, "step": 4669 }, { "epoch": 7.12432861328125e-06, "step": 4669, "training_step_time": 0.10856842994689941 }, { "epoch": 7.1258544921875e-06, "grad_norm": 0.5405192375183105, "learning_rate": 9.697834431234973e-05, "loss": 0.0827, "step": 4670 }, { "epoch": 7.1258544921875e-06, "model_forward_time": 0.025429725646972656, "step": 4670 }, { "epoch": 7.1258544921875e-06, "step": 4670, "training_step_time": 0.1075296401977539 }, { "epoch": 7.12738037109375e-06, "model_forward_time": 0.025585651397705078, "step": 4671 }, { "epoch": 7.12738037109375e-06, "step": 4671, "training_step_time": 0.10703492164611816 }, { "epoch": 7.12890625e-06, "model_forward_time": 0.02559947967529297, "step": 4672 }, { "epoch": 7.12890625e-06, "step": 4672, "training_step_time": 0.11008214950561523 }, { "epoch": 7.13043212890625e-06, "model_forward_time": 0.026738882064819336, "step": 4673 }, { "epoch": 7.13043212890625e-06, "step": 4673, "training_step_time": 0.10806107521057129 }, { "epoch": 7.1319580078125e-06, "model_forward_time": 0.025689125061035156, "step": 4674 }, { "epoch": 7.1319580078125e-06, "step": 4674, "training_step_time": 0.1119537353515625 }, { "epoch": 7.13348388671875e-06, "model_forward_time": 0.025330066680908203, "step": 4675 }, { "epoch": 7.13348388671875e-06, "step": 4675, "training_step_time": 0.10892415046691895 }, { "epoch": 7.135009765625e-06, "model_forward_time": 0.02545022964477539, "step": 4676 }, { "epoch": 7.135009765625e-06, "step": 4676, "training_step_time": 0.12022924423217773 }, { "epoch": 7.13653564453125e-06, "model_forward_time": 0.0248873233795166, "step": 4677 }, { "epoch": 7.13653564453125e-06, "step": 4677, "training_step_time": 0.11647558212280273 }, { "epoch": 7.1380615234375e-06, "model_forward_time": 0.025716304779052734, "step": 4678 }, { "epoch": 7.1380615234375e-06, "step": 4678, "training_step_time": 0.1166234016418457 }, { "epoch": 7.13958740234375e-06, "model_forward_time": 0.025435209274291992, "step": 4679 }, { "epoch": 7.13958740234375e-06, "step": 4679, "training_step_time": 0.11140942573547363 }, { "epoch": 7.14111328125e-06, "grad_norm": 0.4129830300807953, "learning_rate": 9.695944607949649e-05, "loss": 0.0891, "step": 4680 }, { "epoch": 7.14111328125e-06, "model_forward_time": 0.025557994842529297, "step": 4680 }, { "epoch": 7.14111328125e-06, "step": 4680, "training_step_time": 0.11357522010803223 }, { "epoch": 7.14263916015625e-06, "model_forward_time": 0.025053739547729492, "step": 4681 }, { "epoch": 7.14263916015625e-06, "step": 4681, "training_step_time": 0.11266279220581055 }, { "epoch": 7.1441650390625e-06, "model_forward_time": 0.025322914123535156, "step": 4682 }, { "epoch": 7.1441650390625e-06, "step": 4682, "training_step_time": 0.11516499519348145 }, { "epoch": 7.14569091796875e-06, "model_forward_time": 0.025264501571655273, "step": 4683 }, { "epoch": 7.14569091796875e-06, "step": 4683, "training_step_time": 0.11101269721984863 }, { "epoch": 7.147216796875e-06, "model_forward_time": 0.025271177291870117, "step": 4684 }, { "epoch": 7.147216796875e-06, "step": 4684, "training_step_time": 0.11160826683044434 }, { "epoch": 7.14874267578125e-06, "model_forward_time": 0.025448322296142578, "step": 4685 }, { "epoch": 7.14874267578125e-06, "step": 4685, "training_step_time": 0.11337518692016602 }, { "epoch": 7.1502685546875e-06, "model_forward_time": 0.025115013122558594, "step": 4686 }, { "epoch": 7.1502685546875e-06, "step": 4686, "training_step_time": 0.2000284194946289 }, { "epoch": 7.15179443359375e-06, "model_forward_time": 0.024370193481445312, "step": 4687 }, { "epoch": 7.15179443359375e-06, "step": 4687, "training_step_time": 0.10823369026184082 }, { "epoch": 7.1533203125e-06, "model_forward_time": 0.024539947509765625, "step": 4688 }, { "epoch": 7.1533203125e-06, "step": 4688, "training_step_time": 0.1290898323059082 }, { "epoch": 7.15484619140625e-06, "model_forward_time": 0.02523493766784668, "step": 4689 }, { "epoch": 7.15484619140625e-06, "step": 4689, "training_step_time": 0.10941481590270996 }, { "epoch": 7.1563720703125e-06, "grad_norm": 0.34348055720329285, "learning_rate": 9.69404907864883e-05, "loss": 0.0967, "step": 4690 }, { "epoch": 7.1563720703125e-06, "model_forward_time": 0.02564263343811035, "step": 4690 }, { "epoch": 7.1563720703125e-06, "step": 4690, "training_step_time": 0.17798900604248047 }, { "epoch": 7.15789794921875e-06, "model_forward_time": 0.02455449104309082, "step": 4691 }, { "epoch": 7.15789794921875e-06, "step": 4691, "training_step_time": 0.14079976081848145 }, { "epoch": 7.159423828125e-06, "model_forward_time": 0.02468395233154297, "step": 4692 }, { "epoch": 7.159423828125e-06, "step": 4692, "training_step_time": 0.1131138801574707 }, { "epoch": 7.16094970703125e-06, "model_forward_time": 0.024825572967529297, "step": 4693 }, { "epoch": 7.16094970703125e-06, "step": 4693, "training_step_time": 0.16120600700378418 }, { "epoch": 7.1624755859375e-06, "model_forward_time": 0.02475738525390625, "step": 4694 }, { "epoch": 7.1624755859375e-06, "step": 4694, "training_step_time": 0.18631601333618164 }, { "epoch": 7.16400146484375e-06, "model_forward_time": 0.024166345596313477, "step": 4695 }, { "epoch": 7.16400146484375e-06, "step": 4695, "training_step_time": 0.18218517303466797 }, { "epoch": 7.16552734375e-06, "model_forward_time": 0.02521371841430664, "step": 4696 }, { "epoch": 7.16552734375e-06, "step": 4696, "training_step_time": 0.1628427505493164 }, { "epoch": 7.16705322265625e-06, "model_forward_time": 0.024338722229003906, "step": 4697 }, { "epoch": 7.16705322265625e-06, "step": 4697, "training_step_time": 0.1306002140045166 }, { "epoch": 7.1685791015625e-06, "model_forward_time": 0.024640321731567383, "step": 4698 }, { "epoch": 7.1685791015625e-06, "step": 4698, "training_step_time": 0.12343811988830566 }, { "epoch": 7.17010498046875e-06, "model_forward_time": 0.02477407455444336, "step": 4699 }, { "epoch": 7.17010498046875e-06, "step": 4699, "training_step_time": 0.13312363624572754 }, { "epoch": 7.171630859375e-06, "grad_norm": 0.4429035186767578, "learning_rate": 9.692147845635761e-05, "loss": 0.085, "step": 4700 }, { "epoch": 7.171630859375e-06, "model_forward_time": 0.02480316162109375, "step": 4700 }, { "epoch": 7.171630859375e-06, "step": 4700, "training_step_time": 0.12159991264343262 }, { "epoch": 7.17315673828125e-06, "model_forward_time": 0.025419235229492188, "step": 4701 }, { "epoch": 7.17315673828125e-06, "step": 4701, "training_step_time": 0.11999964714050293 }, { "epoch": 7.1746826171875e-06, "model_forward_time": 0.025213956832885742, "step": 4702 }, { "epoch": 7.1746826171875e-06, "step": 4702, "training_step_time": 0.11420083045959473 }, { "epoch": 7.17620849609375e-06, "model_forward_time": 0.02863621711730957, "step": 4703 }, { "epoch": 7.17620849609375e-06, "step": 4703, "training_step_time": 0.11843442916870117 }, { "epoch": 7.177734375e-06, "model_forward_time": 0.025918245315551758, "step": 4704 }, { "epoch": 7.177734375e-06, "step": 4704, "training_step_time": 0.10933852195739746 }, { "epoch": 7.17926025390625e-06, "model_forward_time": 0.02541327476501465, "step": 4705 }, { "epoch": 7.17926025390625e-06, "step": 4705, "training_step_time": 0.11254739761352539 }, { "epoch": 7.1807861328125e-06, "model_forward_time": 0.02548956871032715, "step": 4706 }, { "epoch": 7.1807861328125e-06, "step": 4706, "training_step_time": 0.10585522651672363 }, { "epoch": 7.18231201171875e-06, "model_forward_time": 0.02466583251953125, "step": 4707 }, { "epoch": 7.18231201171875e-06, "step": 4707, "training_step_time": 0.10611462593078613 }, { "epoch": 7.183837890625e-06, "model_forward_time": 0.024873733520507812, "step": 4708 }, { "epoch": 7.183837890625e-06, "step": 4708, "training_step_time": 0.10912036895751953 }, { "epoch": 7.18536376953125e-06, "model_forward_time": 0.02541208267211914, "step": 4709 }, { "epoch": 7.18536376953125e-06, "step": 4709, "training_step_time": 0.11274433135986328 }, { "epoch": 7.1868896484375e-06, "grad_norm": 0.34829989075660706, "learning_rate": 9.690240911220618e-05, "loss": 0.0911, "step": 4710 }, { "epoch": 7.1868896484375e-06, "model_forward_time": 0.025482654571533203, "step": 4710 }, { "epoch": 7.1868896484375e-06, "step": 4710, "training_step_time": 0.11333894729614258 }, { "epoch": 7.18841552734375e-06, "model_forward_time": 0.025823116302490234, "step": 4711 }, { "epoch": 7.18841552734375e-06, "step": 4711, "training_step_time": 0.21737074851989746 }, { "epoch": 7.18994140625e-06, "model_forward_time": 0.024890422821044922, "step": 4712 }, { "epoch": 7.18994140625e-06, "step": 4712, "training_step_time": 0.11287426948547363 }, { "epoch": 7.19146728515625e-06, "model_forward_time": 0.02470231056213379, "step": 4713 }, { "epoch": 7.19146728515625e-06, "step": 4713, "training_step_time": 0.10760116577148438 }, { "epoch": 7.1929931640625e-06, "model_forward_time": 0.025428056716918945, "step": 4714 }, { "epoch": 7.1929931640625e-06, "step": 4714, "training_step_time": 0.10931229591369629 }, { "epoch": 7.19451904296875e-06, "model_forward_time": 0.025420188903808594, "step": 4715 }, { "epoch": 7.19451904296875e-06, "step": 4715, "training_step_time": 0.11054682731628418 }, { "epoch": 7.196044921875e-06, "model_forward_time": 0.025307178497314453, "step": 4716 }, { "epoch": 7.196044921875e-06, "step": 4716, "training_step_time": 0.10775995254516602 }, { "epoch": 7.19757080078125e-06, "model_forward_time": 0.025296926498413086, "step": 4717 }, { "epoch": 7.19757080078125e-06, "step": 4717, "training_step_time": 0.10805392265319824 }, { "epoch": 7.1990966796875e-06, "model_forward_time": 0.025362253189086914, "step": 4718 }, { "epoch": 7.1990966796875e-06, "step": 4718, "training_step_time": 0.11155128479003906 }, { "epoch": 7.20062255859375e-06, "model_forward_time": 0.02552938461303711, "step": 4719 }, { "epoch": 7.20062255859375e-06, "step": 4719, "training_step_time": 0.11119389533996582 }, { "epoch": 7.2021484375e-06, "grad_norm": 0.4955514669418335, "learning_rate": 9.688328277720507e-05, "loss": 0.0818, "step": 4720 }, { "epoch": 7.2021484375e-06, "model_forward_time": 0.024996280670166016, "step": 4720 }, { "epoch": 7.2021484375e-06, "step": 4720, "training_step_time": 0.1117095947265625 }, { "epoch": 7.20367431640625e-06, "model_forward_time": 0.02483820915222168, "step": 4721 }, { "epoch": 7.20367431640625e-06, "step": 4721, "training_step_time": 0.10961270332336426 }, { "epoch": 7.2052001953125e-06, "model_forward_time": 0.02516341209411621, "step": 4722 }, { "epoch": 7.2052001953125e-06, "step": 4722, "training_step_time": 0.10739707946777344 }, { "epoch": 7.20672607421875e-06, "model_forward_time": 0.024984359741210938, "step": 4723 }, { "epoch": 7.20672607421875e-06, "step": 4723, "training_step_time": 0.11313128471374512 }, { "epoch": 7.208251953125e-06, "model_forward_time": 0.025177955627441406, "step": 4724 }, { "epoch": 7.208251953125e-06, "step": 4724, "training_step_time": 0.10971808433532715 }, { "epoch": 7.20977783203125e-06, "model_forward_time": 0.024903535842895508, "step": 4725 }, { "epoch": 7.20977783203125e-06, "step": 4725, "training_step_time": 0.10666942596435547 }, { "epoch": 7.2113037109375e-06, "model_forward_time": 0.02503657341003418, "step": 4726 }, { "epoch": 7.2113037109375e-06, "step": 4726, "training_step_time": 0.11020350456237793 }, { "epoch": 7.21282958984375e-06, "model_forward_time": 0.025382280349731445, "step": 4727 }, { "epoch": 7.21282958984375e-06, "step": 4727, "training_step_time": 0.10953283309936523 }, { "epoch": 7.21435546875e-06, "model_forward_time": 0.025365352630615234, "step": 4728 }, { "epoch": 7.21435546875e-06, "step": 4728, "training_step_time": 0.10731363296508789 }, { "epoch": 7.21588134765625e-06, "model_forward_time": 0.025560379028320312, "step": 4729 }, { "epoch": 7.21588134765625e-06, "step": 4729, "training_step_time": 0.10726451873779297 }, { "epoch": 7.2174072265625e-06, "grad_norm": 0.2986734211444855, "learning_rate": 9.686409947459458e-05, "loss": 0.0894, "step": 4730 }, { "epoch": 7.2174072265625e-06, "model_forward_time": 0.025249958038330078, "step": 4730 }, { "epoch": 7.2174072265625e-06, "step": 4730, "training_step_time": 0.11115741729736328 }, { "epoch": 7.21893310546875e-06, "model_forward_time": 0.025550365447998047, "step": 4731 }, { "epoch": 7.21893310546875e-06, "step": 4731, "training_step_time": 0.20619535446166992 }, { "epoch": 7.220458984375e-06, "model_forward_time": 0.024132966995239258, "step": 4732 }, { "epoch": 7.220458984375e-06, "step": 4732, "training_step_time": 0.11654090881347656 }, { "epoch": 7.22198486328125e-06, "model_forward_time": 0.0243375301361084, "step": 4733 }, { "epoch": 7.22198486328125e-06, "step": 4733, "training_step_time": 0.14156818389892578 }, { "epoch": 7.2235107421875e-06, "model_forward_time": 0.025305747985839844, "step": 4734 }, { "epoch": 7.2235107421875e-06, "step": 4734, "training_step_time": 0.16042780876159668 }, { "epoch": 7.22503662109375e-06, "model_forward_time": 0.024780750274658203, "step": 4735 }, { "epoch": 7.22503662109375e-06, "step": 4735, "training_step_time": 0.22370696067810059 }, { "epoch": 7.2265625e-06, "model_forward_time": 0.02442646026611328, "step": 4736 }, { "epoch": 7.2265625e-06, "step": 4736, "training_step_time": 0.11294078826904297 }, { "epoch": 7.22808837890625e-06, "model_forward_time": 0.02524280548095703, "step": 4737 }, { "epoch": 7.22808837890625e-06, "step": 4737, "training_step_time": 0.10611438751220703 }, { "epoch": 7.2296142578125e-06, "model_forward_time": 0.025254249572753906, "step": 4738 }, { "epoch": 7.2296142578125e-06, "step": 4738, "training_step_time": 0.10901308059692383 }, { "epoch": 7.23114013671875e-06, "model_forward_time": 0.025461912155151367, "step": 4739 }, { "epoch": 7.23114013671875e-06, "step": 4739, "training_step_time": 0.17905044555664062 }, { "epoch": 7.232666015625e-06, "grad_norm": 0.4050094783306122, "learning_rate": 9.684485922768422e-05, "loss": 0.0831, "step": 4740 }, { "epoch": 7.232666015625e-06, "model_forward_time": 0.025724411010742188, "step": 4740 }, { "epoch": 7.232666015625e-06, "step": 4740, "training_step_time": 0.14004945755004883 }, { "epoch": 7.23419189453125e-06, "model_forward_time": 0.024344682693481445, "step": 4741 }, { "epoch": 7.23419189453125e-06, "step": 4741, "training_step_time": 0.10919427871704102 }, { "epoch": 7.2357177734375e-06, "model_forward_time": 0.02638101577758789, "step": 4742 }, { "epoch": 7.2357177734375e-06, "step": 4742, "training_step_time": 0.12328267097473145 }, { "epoch": 7.23724365234375e-06, "model_forward_time": 0.025105953216552734, "step": 4743 }, { "epoch": 7.23724365234375e-06, "step": 4743, "training_step_time": 0.12960028648376465 }, { "epoch": 7.23876953125e-06, "model_forward_time": 0.025790929794311523, "step": 4744 }, { "epoch": 7.23876953125e-06, "step": 4744, "training_step_time": 0.11344027519226074 }, { "epoch": 7.24029541015625e-06, "model_forward_time": 0.025326013565063477, "step": 4745 }, { "epoch": 7.24029541015625e-06, "step": 4745, "training_step_time": 0.12403416633605957 }, { "epoch": 7.2418212890625e-06, "model_forward_time": 0.025507688522338867, "step": 4746 }, { "epoch": 7.2418212890625e-06, "step": 4746, "training_step_time": 0.10900402069091797 }, { "epoch": 7.24334716796875e-06, "model_forward_time": 0.02517104148864746, "step": 4747 }, { "epoch": 7.24334716796875e-06, "step": 4747, "training_step_time": 0.10666775703430176 }, { "epoch": 7.244873046875e-06, "model_forward_time": 0.025615930557250977, "step": 4748 }, { "epoch": 7.244873046875e-06, "step": 4748, "training_step_time": 0.11224555969238281 }, { "epoch": 7.24639892578125e-06, "model_forward_time": 0.025099515914916992, "step": 4749 }, { "epoch": 7.24639892578125e-06, "step": 4749, "training_step_time": 0.14161157608032227 }, { "epoch": 7.2479248046875e-06, "grad_norm": 0.49814528226852417, "learning_rate": 9.682556205985274e-05, "loss": 0.0765, "step": 4750 }, { "epoch": 7.2479248046875e-06, "model_forward_time": 0.0240936279296875, "step": 4750 }, { "epoch": 7.2479248046875e-06, "step": 4750, "training_step_time": 0.170487642288208 }, { "epoch": 7.24945068359375e-06, "model_forward_time": 0.024842500686645508, "step": 4751 }, { "epoch": 7.24945068359375e-06, "step": 4751, "training_step_time": 0.11417365074157715 }, { "epoch": 7.2509765625e-06, "model_forward_time": 0.0250244140625, "step": 4752 }, { "epoch": 7.2509765625e-06, "step": 4752, "training_step_time": 0.2241358757019043 }, { "epoch": 7.25250244140625e-06, "model_forward_time": 0.024251222610473633, "step": 4753 }, { "epoch": 7.25250244140625e-06, "step": 4753, "training_step_time": 0.14681768417358398 }, { "epoch": 7.2540283203125e-06, "model_forward_time": 0.024530887603759766, "step": 4754 }, { "epoch": 7.2540283203125e-06, "step": 4754, "training_step_time": 0.1694498062133789 }, { "epoch": 7.25555419921875e-06, "model_forward_time": 0.024786710739135742, "step": 4755 }, { "epoch": 7.25555419921875e-06, "step": 4755, "training_step_time": 0.1360621452331543 }, { "epoch": 7.257080078125e-06, "model_forward_time": 0.025052785873413086, "step": 4756 }, { "epoch": 7.257080078125e-06, "step": 4756, "training_step_time": 0.12085866928100586 }, { "epoch": 7.25860595703125e-06, "model_forward_time": 0.024811744689941406, "step": 4757 }, { "epoch": 7.25860595703125e-06, "step": 4757, "training_step_time": 0.11929082870483398 }, { "epoch": 7.2601318359375e-06, "model_forward_time": 0.024414777755737305, "step": 4758 }, { "epoch": 7.2601318359375e-06, "step": 4758, "training_step_time": 0.1154484748840332 }, { "epoch": 7.26165771484375e-06, "model_forward_time": 0.024258136749267578, "step": 4759 }, { "epoch": 7.26165771484375e-06, "step": 4759, "training_step_time": 0.11262106895446777 }, { "epoch": 7.26318359375e-06, "grad_norm": 0.5395764112472534, "learning_rate": 9.6806207994548e-05, "loss": 0.0837, "step": 4760 }, { "epoch": 7.26318359375e-06, "model_forward_time": 0.024302244186401367, "step": 4760 }, { "epoch": 7.26318359375e-06, "step": 4760, "training_step_time": 0.10857677459716797 }, { "epoch": 7.26470947265625e-06, "model_forward_time": 0.02565765380859375, "step": 4761 }, { "epoch": 7.26470947265625e-06, "step": 4761, "training_step_time": 0.11458134651184082 }, { "epoch": 7.2662353515625e-06, "model_forward_time": 0.025076627731323242, "step": 4762 }, { "epoch": 7.2662353515625e-06, "step": 4762, "training_step_time": 0.11231398582458496 }, { "epoch": 7.26776123046875e-06, "model_forward_time": 0.02521228790283203, "step": 4763 }, { "epoch": 7.26776123046875e-06, "step": 4763, "training_step_time": 0.1078341007232666 }, { "epoch": 7.269287109375e-06, "model_forward_time": 0.025334596633911133, "step": 4764 }, { "epoch": 7.269287109375e-06, "step": 4764, "training_step_time": 0.11000871658325195 }, { "epoch": 7.27081298828125e-06, "model_forward_time": 0.024931907653808594, "step": 4765 }, { "epoch": 7.27081298828125e-06, "step": 4765, "training_step_time": 0.1079704761505127 }, { "epoch": 7.2723388671875e-06, "model_forward_time": 0.025057554244995117, "step": 4766 }, { "epoch": 7.2723388671875e-06, "step": 4766, "training_step_time": 0.11118173599243164 }, { "epoch": 7.27386474609375e-06, "model_forward_time": 0.025484561920166016, "step": 4767 }, { "epoch": 7.27386474609375e-06, "step": 4767, "training_step_time": 0.11023402214050293 }, { "epoch": 7.275390625e-06, "model_forward_time": 0.024859189987182617, "step": 4768 }, { "epoch": 7.275390625e-06, "step": 4768, "training_step_time": 0.10971426963806152 }, { "epoch": 7.27691650390625e-06, "model_forward_time": 0.025653600692749023, "step": 4769 }, { "epoch": 7.27691650390625e-06, "step": 4769, "training_step_time": 0.11468052864074707 }, { "epoch": 7.2784423828125e-06, "grad_norm": 0.4616451859474182, "learning_rate": 9.6786797055287e-05, "loss": 0.0988, "step": 4770 }, { "epoch": 7.2784423828125e-06, "model_forward_time": 0.02511739730834961, "step": 4770 }, { "epoch": 7.2784423828125e-06, "step": 4770, "training_step_time": 0.10827922821044922 }, { "epoch": 7.27996826171875e-06, "model_forward_time": 0.025227785110473633, "step": 4771 }, { "epoch": 7.27996826171875e-06, "step": 4771, "training_step_time": 0.10714459419250488 }, { "epoch": 7.281494140625e-06, "model_forward_time": 0.024961471557617188, "step": 4772 }, { "epoch": 7.281494140625e-06, "step": 4772, "training_step_time": 0.11166548728942871 }, { "epoch": 7.28302001953125e-06, "model_forward_time": 0.02535867691040039, "step": 4773 }, { "epoch": 7.28302001953125e-06, "step": 4773, "training_step_time": 0.10982370376586914 }, { "epoch": 7.2845458984375e-06, "model_forward_time": 0.0250699520111084, "step": 4774 }, { "epoch": 7.2845458984375e-06, "step": 4774, "training_step_time": 0.10844683647155762 }, { "epoch": 7.28607177734375e-06, "model_forward_time": 0.025260210037231445, "step": 4775 }, { "epoch": 7.28607177734375e-06, "step": 4775, "training_step_time": 0.10875964164733887 }, { "epoch": 7.28759765625e-06, "model_forward_time": 0.025423288345336914, "step": 4776 }, { "epoch": 7.28759765625e-06, "step": 4776, "training_step_time": 0.16981196403503418 }, { "epoch": 7.28912353515625e-06, "model_forward_time": 0.02450847625732422, "step": 4777 }, { "epoch": 7.28912353515625e-06, "step": 4777, "training_step_time": 0.12093997001647949 }, { "epoch": 7.2906494140625e-06, "model_forward_time": 0.025540828704833984, "step": 4778 }, { "epoch": 7.2906494140625e-06, "step": 4778, "training_step_time": 0.127671480178833 }, { "epoch": 7.29217529296875e-06, "model_forward_time": 0.025681734085083008, "step": 4779 }, { "epoch": 7.29217529296875e-06, "step": 4779, "training_step_time": 0.10622692108154297 }, { "epoch": 7.293701171875e-06, "grad_norm": 0.4182220697402954, "learning_rate": 9.676732926565585e-05, "loss": 0.0882, "step": 4780 }, { "epoch": 7.293701171875e-06, "model_forward_time": 0.025564908981323242, "step": 4780 }, { "epoch": 7.293701171875e-06, "step": 4780, "training_step_time": 0.1468055248260498 }, { "epoch": 7.29522705078125e-06, "model_forward_time": 0.02535271644592285, "step": 4781 }, { "epoch": 7.29522705078125e-06, "step": 4781, "training_step_time": 0.1356792449951172 }, { "epoch": 7.2967529296875e-06, "model_forward_time": 0.026181697845458984, "step": 4782 }, { "epoch": 7.2967529296875e-06, "step": 4782, "training_step_time": 0.11040091514587402 }, { "epoch": 7.29827880859375e-06, "model_forward_time": 0.024973154067993164, "step": 4783 }, { "epoch": 7.29827880859375e-06, "step": 4783, "training_step_time": 0.1093595027923584 }, { "epoch": 7.2998046875e-06, "model_forward_time": 0.02541971206665039, "step": 4784 }, { "epoch": 7.2998046875e-06, "step": 4784, "training_step_time": 0.1130976676940918 }, { "epoch": 7.30133056640625e-06, "model_forward_time": 0.025498390197753906, "step": 4785 }, { "epoch": 7.30133056640625e-06, "step": 4785, "training_step_time": 0.17387175559997559 }, { "epoch": 7.3028564453125e-06, "model_forward_time": 0.024752140045166016, "step": 4786 }, { "epoch": 7.3028564453125e-06, "step": 4786, "training_step_time": 0.13927507400512695 }, { "epoch": 7.30438232421875e-06, "model_forward_time": 0.025344371795654297, "step": 4787 }, { "epoch": 7.30438232421875e-06, "step": 4787, "training_step_time": 0.20563936233520508 }, { "epoch": 7.305908203125e-06, "model_forward_time": 0.024556636810302734, "step": 4788 }, { "epoch": 7.305908203125e-06, "step": 4788, "training_step_time": 0.13536930084228516 }, { "epoch": 7.30743408203125e-06, "model_forward_time": 0.02483654022216797, "step": 4789 }, { "epoch": 7.30743408203125e-06, "step": 4789, "training_step_time": 0.12777328491210938 }, { "epoch": 7.3089599609375e-06, "grad_norm": 0.6424593329429626, "learning_rate": 9.674780464930979e-05, "loss": 0.085, "step": 4790 }, { "epoch": 7.3089599609375e-06, "model_forward_time": 0.024939298629760742, "step": 4790 }, { "epoch": 7.3089599609375e-06, "step": 4790, "training_step_time": 0.18844985961914062 }, { "epoch": 7.31048583984375e-06, "model_forward_time": 0.024781465530395508, "step": 4791 }, { "epoch": 7.31048583984375e-06, "step": 4791, "training_step_time": 0.1173393726348877 }, { "epoch": 7.31201171875e-06, "model_forward_time": 0.02458333969116211, "step": 4792 }, { "epoch": 7.31201171875e-06, "step": 4792, "training_step_time": 0.11635994911193848 }, { "epoch": 7.31353759765625e-06, "model_forward_time": 0.025293588638305664, "step": 4793 }, { "epoch": 7.31353759765625e-06, "step": 4793, "training_step_time": 0.10970664024353027 }, { "epoch": 7.3150634765625e-06, "model_forward_time": 0.025192975997924805, "step": 4794 }, { "epoch": 7.3150634765625e-06, "step": 4794, "training_step_time": 0.11306452751159668 }, { "epoch": 7.31658935546875e-06, "model_forward_time": 0.02521681785583496, "step": 4795 }, { "epoch": 7.31658935546875e-06, "step": 4795, "training_step_time": 0.10897374153137207 }, { "epoch": 7.318115234375e-06, "model_forward_time": 0.025452136993408203, "step": 4796 }, { "epoch": 7.318115234375e-06, "step": 4796, "training_step_time": 0.1086111068725586 }, { "epoch": 7.31964111328125e-06, "model_forward_time": 0.025265216827392578, "step": 4797 }, { "epoch": 7.31964111328125e-06, "step": 4797, "training_step_time": 0.21549034118652344 }, { "epoch": 7.3211669921875e-06, "model_forward_time": 0.025507450103759766, "step": 4798 }, { "epoch": 7.3211669921875e-06, "step": 4798, "training_step_time": 0.12057685852050781 }, { "epoch": 7.32269287109375e-06, "model_forward_time": 0.02481245994567871, "step": 4799 }, { "epoch": 7.32269287109375e-06, "step": 4799, "training_step_time": 0.1065824031829834 }, { "epoch": 7.32421875e-06, "grad_norm": 0.7688208222389221, "learning_rate": 9.672822322997305e-05, "loss": 0.0851, "step": 4800 }, { "epoch": 7.32421875e-06, "model_forward_time": 0.025484085083007812, "step": 4800 }, { "epoch": 7.32421875e-06, "step": 4800, "training_step_time": 0.21938180923461914 }, { "epoch": 7.32574462890625e-06, "model_forward_time": 0.024980545043945312, "step": 4801 }, { "epoch": 7.32574462890625e-06, "step": 4801, "training_step_time": 0.10861349105834961 }, { "epoch": 7.3272705078125e-06, "model_forward_time": 0.024562597274780273, "step": 4802 }, { "epoch": 7.3272705078125e-06, "step": 4802, "training_step_time": 0.1035916805267334 }, { "epoch": 7.32879638671875e-06, "model_forward_time": 0.02525925636291504, "step": 4803 }, { "epoch": 7.32879638671875e-06, "step": 4803, "training_step_time": 0.11091804504394531 }, { "epoch": 7.330322265625e-06, "model_forward_time": 0.025448322296142578, "step": 4804 }, { "epoch": 7.330322265625e-06, "step": 4804, "training_step_time": 0.10921454429626465 }, { "epoch": 7.33184814453125e-06, "model_forward_time": 0.025190353393554688, "step": 4805 }, { "epoch": 7.33184814453125e-06, "step": 4805, "training_step_time": 0.1101534366607666 }, { "epoch": 7.3333740234375e-06, "model_forward_time": 0.025453805923461914, "step": 4806 }, { "epoch": 7.3333740234375e-06, "step": 4806, "training_step_time": 0.1105642318725586 }, { "epoch": 7.33489990234375e-06, "model_forward_time": 0.02539229393005371, "step": 4807 }, { "epoch": 7.33489990234375e-06, "step": 4807, "training_step_time": 0.10724973678588867 }, { "epoch": 7.33642578125e-06, "model_forward_time": 0.02509903907775879, "step": 4808 }, { "epoch": 7.33642578125e-06, "step": 4808, "training_step_time": 0.10509634017944336 }, { "epoch": 7.33795166015625e-06, "model_forward_time": 0.025491952896118164, "step": 4809 }, { "epoch": 7.33795166015625e-06, "step": 4809, "training_step_time": 0.10988926887512207 }, { "epoch": 7.3394775390625e-06, "grad_norm": 0.3652362525463104, "learning_rate": 9.67085850314389e-05, "loss": 0.0785, "step": 4810 }, { "epoch": 7.3394775390625e-06, "model_forward_time": 0.025037288665771484, "step": 4810 }, { "epoch": 7.3394775390625e-06, "step": 4810, "training_step_time": 0.10843038558959961 }, { "epoch": 7.34100341796875e-06, "model_forward_time": 0.025537729263305664, "step": 4811 }, { "epoch": 7.34100341796875e-06, "step": 4811, "training_step_time": 0.10916590690612793 }, { "epoch": 7.342529296875e-06, "model_forward_time": 0.02484130859375, "step": 4812 }, { "epoch": 7.342529296875e-06, "step": 4812, "training_step_time": 0.10716819763183594 }, { "epoch": 7.34405517578125e-06, "model_forward_time": 0.02549004554748535, "step": 4813 }, { "epoch": 7.34405517578125e-06, "step": 4813, "training_step_time": 0.10799288749694824 }, { "epoch": 7.3455810546875e-06, "model_forward_time": 0.026071548461914062, "step": 4814 }, { "epoch": 7.3455810546875e-06, "step": 4814, "training_step_time": 0.10906171798706055 }, { "epoch": 7.34710693359375e-06, "model_forward_time": 0.025719642639160156, "step": 4815 }, { "epoch": 7.34710693359375e-06, "step": 4815, "training_step_time": 0.10698795318603516 }, { "epoch": 7.3486328125e-06, "model_forward_time": 0.025721073150634766, "step": 4816 }, { "epoch": 7.3486328125e-06, "step": 4816, "training_step_time": 0.10698890686035156 }, { "epoch": 7.35015869140625e-06, "model_forward_time": 0.025378942489624023, "step": 4817 }, { "epoch": 7.35015869140625e-06, "step": 4817, "training_step_time": 0.10692906379699707 }, { "epoch": 7.3516845703125e-06, "model_forward_time": 0.025424957275390625, "step": 4818 }, { "epoch": 7.3516845703125e-06, "step": 4818, "training_step_time": 0.10910320281982422 }, { "epoch": 7.35321044921875e-06, "model_forward_time": 0.025832653045654297, "step": 4819 }, { "epoch": 7.35321044921875e-06, "step": 4819, "training_step_time": 0.10638761520385742 }, { "epoch": 7.354736328125e-06, "grad_norm": 0.6113207936286926, "learning_rate": 9.668889007756961e-05, "loss": 0.0911, "step": 4820 }, { "epoch": 7.354736328125e-06, "model_forward_time": 0.025240182876586914, "step": 4820 }, { "epoch": 7.354736328125e-06, "step": 4820, "training_step_time": 0.10725903511047363 }, { "epoch": 7.35626220703125e-06, "model_forward_time": 0.025260448455810547, "step": 4821 }, { "epoch": 7.35626220703125e-06, "step": 4821, "training_step_time": 0.10784530639648438 }, { "epoch": 7.3577880859375e-06, "model_forward_time": 0.025668859481811523, "step": 4822 }, { "epoch": 7.3577880859375e-06, "step": 4822, "training_step_time": 0.1378769874572754 }, { "epoch": 7.35931396484375e-06, "model_forward_time": 0.025341272354125977, "step": 4823 }, { "epoch": 7.35931396484375e-06, "step": 4823, "training_step_time": 0.1064598560333252 }, { "epoch": 7.36083984375e-06, "model_forward_time": 0.025493860244750977, "step": 4824 }, { "epoch": 7.36083984375e-06, "step": 4824, "training_step_time": 0.13395953178405762 }, { "epoch": 7.36236572265625e-06, "model_forward_time": 0.025141477584838867, "step": 4825 }, { "epoch": 7.36236572265625e-06, "step": 4825, "training_step_time": 0.10646414756774902 }, { "epoch": 7.3638916015625e-06, "model_forward_time": 0.025498151779174805, "step": 4826 }, { "epoch": 7.3638916015625e-06, "step": 4826, "training_step_time": 0.15715265274047852 }, { "epoch": 7.36541748046875e-06, "model_forward_time": 0.02454066276550293, "step": 4827 }, { "epoch": 7.36541748046875e-06, "step": 4827, "training_step_time": 0.1482715606689453 }, { "epoch": 7.366943359375e-06, "model_forward_time": 0.02443099021911621, "step": 4828 }, { "epoch": 7.366943359375e-06, "step": 4828, "training_step_time": 0.21246743202209473 }, { "epoch": 7.36846923828125e-06, "model_forward_time": 0.025068998336791992, "step": 4829 }, { "epoch": 7.36846923828125e-06, "step": 4829, "training_step_time": 0.1546189785003662 }, { "epoch": 7.3699951171875e-06, "grad_norm": 0.329088419675827, "learning_rate": 9.66691383922964e-05, "loss": 0.0792, "step": 4830 }, { "epoch": 7.3699951171875e-06, "model_forward_time": 0.024265289306640625, "step": 4830 }, { "epoch": 7.3699951171875e-06, "step": 4830, "training_step_time": 0.1656179428100586 }, { "epoch": 7.37152099609375e-06, "model_forward_time": 0.024873971939086914, "step": 4831 }, { "epoch": 7.37152099609375e-06, "step": 4831, "training_step_time": 0.18037652969360352 }, { "epoch": 7.373046875e-06, "model_forward_time": 0.025984764099121094, "step": 4832 }, { "epoch": 7.373046875e-06, "step": 4832, "training_step_time": 0.14254474639892578 }, { "epoch": 7.37457275390625e-06, "model_forward_time": 0.028557300567626953, "step": 4833 }, { "epoch": 7.37457275390625e-06, "step": 4833, "training_step_time": 0.12417340278625488 }, { "epoch": 7.3760986328125e-06, "model_forward_time": 0.024771928787231445, "step": 4834 }, { "epoch": 7.3760986328125e-06, "step": 4834, "training_step_time": 0.11800670623779297 }, { "epoch": 7.37762451171875e-06, "model_forward_time": 0.025444984436035156, "step": 4835 }, { "epoch": 7.37762451171875e-06, "step": 4835, "training_step_time": 0.16401004791259766 }, { "epoch": 7.379150390625e-06, "model_forward_time": 0.024940013885498047, "step": 4836 }, { "epoch": 7.379150390625e-06, "step": 4836, "training_step_time": 0.1127314567565918 }, { "epoch": 7.38067626953125e-06, "model_forward_time": 0.024539709091186523, "step": 4837 }, { "epoch": 7.38067626953125e-06, "step": 4837, "training_step_time": 0.11633038520812988 }, { "epoch": 7.3822021484375e-06, "model_forward_time": 0.02541804313659668, "step": 4838 }, { "epoch": 7.3822021484375e-06, "step": 4838, "training_step_time": 0.11622262001037598 }, { "epoch": 7.38372802734375e-06, "model_forward_time": 0.025007963180541992, "step": 4839 }, { "epoch": 7.38372802734375e-06, "step": 4839, "training_step_time": 0.11131119728088379 }, { "epoch": 7.38525390625e-06, "grad_norm": 0.32527342438697815, "learning_rate": 9.664932999961942e-05, "loss": 0.0816, "step": 4840 }, { "epoch": 7.38525390625e-06, "model_forward_time": 0.025327682495117188, "step": 4840 }, { "epoch": 7.38525390625e-06, "step": 4840, "training_step_time": 0.11251616477966309 }, { "epoch": 7.38677978515625e-06, "model_forward_time": 0.025928735733032227, "step": 4841 }, { "epoch": 7.38677978515625e-06, "step": 4841, "training_step_time": 0.10884833335876465 }, { "epoch": 7.3883056640625e-06, "model_forward_time": 0.02536630630493164, "step": 4842 }, { "epoch": 7.3883056640625e-06, "step": 4842, "training_step_time": 0.21310710906982422 }, { "epoch": 7.38983154296875e-06, "model_forward_time": 0.02477860450744629, "step": 4843 }, { "epoch": 7.38983154296875e-06, "step": 4843, "training_step_time": 0.11549973487854004 }, { "epoch": 7.391357421875e-06, "model_forward_time": 0.0248260498046875, "step": 4844 }, { "epoch": 7.391357421875e-06, "step": 4844, "training_step_time": 0.11368012428283691 }, { "epoch": 7.39288330078125e-06, "model_forward_time": 0.02546381950378418, "step": 4845 }, { "epoch": 7.39288330078125e-06, "step": 4845, "training_step_time": 0.17196416854858398 }, { "epoch": 7.3944091796875e-06, "model_forward_time": 0.025854110717773438, "step": 4846 }, { "epoch": 7.3944091796875e-06, "step": 4846, "training_step_time": 0.1599137783050537 }, { "epoch": 7.39593505859375e-06, "model_forward_time": 0.025415897369384766, "step": 4847 }, { "epoch": 7.39593505859375e-06, "step": 4847, "training_step_time": 0.10622811317443848 }, { "epoch": 7.3974609375e-06, "model_forward_time": 0.02519702911376953, "step": 4848 }, { "epoch": 7.3974609375e-06, "step": 4848, "training_step_time": 0.10799169540405273 }, { "epoch": 7.39898681640625e-06, "model_forward_time": 0.025924205780029297, "step": 4849 }, { "epoch": 7.39898681640625e-06, "step": 4849, "training_step_time": 0.11356973648071289 }, { "epoch": 7.4005126953125e-06, "grad_norm": 0.4368501901626587, "learning_rate": 9.662946492360776e-05, "loss": 0.0825, "step": 4850 }, { "epoch": 7.4005126953125e-06, "model_forward_time": 0.025371551513671875, "step": 4850 }, { "epoch": 7.4005126953125e-06, "step": 4850, "training_step_time": 0.1088564395904541 }, { "epoch": 7.40203857421875e-06, "model_forward_time": 0.025156736373901367, "step": 4851 }, { "epoch": 7.40203857421875e-06, "step": 4851, "training_step_time": 0.10562515258789062 }, { "epoch": 7.403564453125e-06, "model_forward_time": 0.025761842727661133, "step": 4852 }, { "epoch": 7.403564453125e-06, "step": 4852, "training_step_time": 0.10777449607849121 }, { "epoch": 7.40509033203125e-06, "model_forward_time": 0.02581501007080078, "step": 4853 }, { "epoch": 7.40509033203125e-06, "step": 4853, "training_step_time": 0.10723352432250977 }, { "epoch": 7.4066162109375e-06, "model_forward_time": 0.02555227279663086, "step": 4854 }, { "epoch": 7.4066162109375e-06, "step": 4854, "training_step_time": 0.10753989219665527 }, { "epoch": 7.40814208984375e-06, "model_forward_time": 0.025534629821777344, "step": 4855 }, { "epoch": 7.40814208984375e-06, "step": 4855, "training_step_time": 0.10834622383117676 }, { "epoch": 7.40966796875e-06, "model_forward_time": 0.025435924530029297, "step": 4856 }, { "epoch": 7.40966796875e-06, "step": 4856, "training_step_time": 0.10753273963928223 }, { "epoch": 7.41119384765625e-06, "model_forward_time": 0.02552199363708496, "step": 4857 }, { "epoch": 7.41119384765625e-06, "step": 4857, "training_step_time": 0.10619354248046875 }, { "epoch": 7.4127197265625e-06, "model_forward_time": 0.025669574737548828, "step": 4858 }, { "epoch": 7.4127197265625e-06, "step": 4858, "training_step_time": 0.11019539833068848 }, { "epoch": 7.41424560546875e-06, "model_forward_time": 0.02552938461303711, "step": 4859 }, { "epoch": 7.41424560546875e-06, "step": 4859, "training_step_time": 0.10848450660705566 }, { "epoch": 7.415771484375e-06, "grad_norm": 0.32892781496047974, "learning_rate": 9.660954318839933e-05, "loss": 0.078, "step": 4860 }, { "epoch": 7.415771484375e-06, "model_forward_time": 0.02518939971923828, "step": 4860 }, { "epoch": 7.415771484375e-06, "step": 4860, "training_step_time": 0.10766482353210449 }, { "epoch": 7.41729736328125e-06, "model_forward_time": 0.025138378143310547, "step": 4861 }, { "epoch": 7.41729736328125e-06, "step": 4861, "training_step_time": 0.10631537437438965 }, { "epoch": 7.4188232421875e-06, "model_forward_time": 0.025055408477783203, "step": 4862 }, { "epoch": 7.4188232421875e-06, "step": 4862, "training_step_time": 0.10762691497802734 }, { "epoch": 7.42034912109375e-06, "model_forward_time": 0.025050878524780273, "step": 4863 }, { "epoch": 7.42034912109375e-06, "step": 4863, "training_step_time": 0.1134636402130127 }, { "epoch": 7.421875e-06, "model_forward_time": 0.025411605834960938, "step": 4864 }, { "epoch": 7.421875e-06, "step": 4864, "training_step_time": 0.11272764205932617 }, { "epoch": 7.42340087890625e-06, "model_forward_time": 0.02487945556640625, "step": 4865 }, { "epoch": 7.42340087890625e-06, "step": 4865, "training_step_time": 0.10802841186523438 }, { "epoch": 7.4249267578125e-06, "model_forward_time": 0.025154829025268555, "step": 4866 }, { "epoch": 7.4249267578125e-06, "step": 4866, "training_step_time": 0.11296749114990234 }, { "epoch": 7.42645263671875e-06, "model_forward_time": 0.02513742446899414, "step": 4867 }, { "epoch": 7.42645263671875e-06, "step": 4867, "training_step_time": 0.133314847946167 }, { "epoch": 7.427978515625e-06, "model_forward_time": 0.02507185935974121, "step": 4868 }, { "epoch": 7.427978515625e-06, "step": 4868, "training_step_time": 0.12149262428283691 }, { "epoch": 7.42950439453125e-06, "model_forward_time": 0.025061845779418945, "step": 4869 }, { "epoch": 7.42950439453125e-06, "step": 4869, "training_step_time": 0.11511659622192383 }, { "epoch": 7.4310302734375e-06, "grad_norm": 0.5905614495277405, "learning_rate": 9.658956481820094e-05, "loss": 0.085, "step": 4870 }, { "epoch": 7.4310302734375e-06, "model_forward_time": 0.025396108627319336, "step": 4870 }, { "epoch": 7.4310302734375e-06, "step": 4870, "training_step_time": 0.11768198013305664 }, { "epoch": 7.43255615234375e-06, "model_forward_time": 0.025496482849121094, "step": 4871 }, { "epoch": 7.43255615234375e-06, "step": 4871, "training_step_time": 0.18574738502502441 }, { "epoch": 7.43408203125e-06, "model_forward_time": 0.025285720825195312, "step": 4872 }, { "epoch": 7.43408203125e-06, "step": 4872, "training_step_time": 0.14091873168945312 }, { "epoch": 7.43560791015625e-06, "model_forward_time": 0.024801969528198242, "step": 4873 }, { "epoch": 7.43560791015625e-06, "step": 4873, "training_step_time": 0.11728286743164062 }, { "epoch": 7.4371337890625e-06, "model_forward_time": 0.025107145309448242, "step": 4874 }, { "epoch": 7.4371337890625e-06, "step": 4874, "training_step_time": 0.11075830459594727 }, { "epoch": 7.43865966796875e-06, "model_forward_time": 0.02520155906677246, "step": 4875 }, { "epoch": 7.43865966796875e-06, "step": 4875, "training_step_time": 0.11309528350830078 }, { "epoch": 7.440185546875e-06, "model_forward_time": 0.025040864944458008, "step": 4876 }, { "epoch": 7.440185546875e-06, "step": 4876, "training_step_time": 0.19763612747192383 }, { "epoch": 7.44171142578125e-06, "model_forward_time": 0.0266268253326416, "step": 4877 }, { "epoch": 7.44171142578125e-06, "step": 4877, "training_step_time": 0.15999817848205566 }, { "epoch": 7.4432373046875e-06, "model_forward_time": 0.024660348892211914, "step": 4878 }, { "epoch": 7.4432373046875e-06, "step": 4878, "training_step_time": 0.1298222541809082 }, { "epoch": 7.44476318359375e-06, "model_forward_time": 0.024340391159057617, "step": 4879 }, { "epoch": 7.44476318359375e-06, "step": 4879, "training_step_time": 0.1301717758178711 }, { "epoch": 7.4462890625e-06, "grad_norm": 0.3333793878555298, "learning_rate": 9.65695298372882e-05, "loss": 0.0809, "step": 4880 }, { "epoch": 7.4462890625e-06, "model_forward_time": 0.024929046630859375, "step": 4880 }, { "epoch": 7.4462890625e-06, "step": 4880, "training_step_time": 0.11731147766113281 }, { "epoch": 7.44781494140625e-06, "model_forward_time": 0.025187969207763672, "step": 4881 }, { "epoch": 7.44781494140625e-06, "step": 4881, "training_step_time": 0.1163172721862793 }, { "epoch": 7.4493408203125e-06, "model_forward_time": 0.025133132934570312, "step": 4882 }, { "epoch": 7.4493408203125e-06, "step": 4882, "training_step_time": 0.11022019386291504 }, { "epoch": 7.45086669921875e-06, "model_forward_time": 0.026172876358032227, "step": 4883 }, { "epoch": 7.45086669921875e-06, "step": 4883, "training_step_time": 0.1091313362121582 }, { "epoch": 7.452392578125e-06, "model_forward_time": 0.025336503982543945, "step": 4884 }, { "epoch": 7.452392578125e-06, "step": 4884, "training_step_time": 0.10798287391662598 }, { "epoch": 7.45391845703125e-06, "model_forward_time": 0.025363445281982422, "step": 4885 }, { "epoch": 7.45391845703125e-06, "step": 4885, "training_step_time": 0.10889506340026855 }, { "epoch": 7.4554443359375e-06, "model_forward_time": 0.025667667388916016, "step": 4886 }, { "epoch": 7.4554443359375e-06, "step": 4886, "training_step_time": 0.10933065414428711 }, { "epoch": 7.45697021484375e-06, "model_forward_time": 0.025874614715576172, "step": 4887 }, { "epoch": 7.45697021484375e-06, "step": 4887, "training_step_time": 0.10619497299194336 }, { "epoch": 7.45849609375e-06, "model_forward_time": 0.0258944034576416, "step": 4888 }, { "epoch": 7.45849609375e-06, "step": 4888, "training_step_time": 0.11079168319702148 }, { "epoch": 7.46002197265625e-06, "model_forward_time": 0.025641679763793945, "step": 4889 }, { "epoch": 7.46002197265625e-06, "step": 4889, "training_step_time": 0.11134529113769531 }, { "epoch": 7.4615478515625e-06, "grad_norm": 0.5133267641067505, "learning_rate": 9.654943827000548e-05, "loss": 0.1024, "step": 4890 }, { "epoch": 7.4615478515625e-06, "model_forward_time": 0.02568507194519043, "step": 4890 }, { "epoch": 7.4615478515625e-06, "step": 4890, "training_step_time": 0.10720133781433105 }, { "epoch": 7.46307373046875e-06, "model_forward_time": 0.02544999122619629, "step": 4891 }, { "epoch": 7.46307373046875e-06, "step": 4891, "training_step_time": 0.10991883277893066 }, { "epoch": 7.464599609375e-06, "model_forward_time": 0.025607824325561523, "step": 4892 }, { "epoch": 7.464599609375e-06, "step": 4892, "training_step_time": 0.10703444480895996 }, { "epoch": 7.46612548828125e-06, "model_forward_time": 0.025355100631713867, "step": 4893 }, { "epoch": 7.46612548828125e-06, "step": 4893, "training_step_time": 0.10680198669433594 }, { "epoch": 7.4676513671875e-06, "model_forward_time": 0.025754928588867188, "step": 4894 }, { "epoch": 7.4676513671875e-06, "step": 4894, "training_step_time": 0.12208080291748047 }, { "epoch": 7.46917724609375e-06, "model_forward_time": 0.025475502014160156, "step": 4895 }, { "epoch": 7.46917724609375e-06, "step": 4895, "training_step_time": 0.10849285125732422 }, { "epoch": 7.470703125e-06, "model_forward_time": 0.02537083625793457, "step": 4896 }, { "epoch": 7.470703125e-06, "step": 4896, "training_step_time": 0.11135053634643555 }, { "epoch": 7.47222900390625e-06, "model_forward_time": 0.02474355697631836, "step": 4897 }, { "epoch": 7.47222900390625e-06, "step": 4897, "training_step_time": 0.11593842506408691 }, { "epoch": 7.4737548828125e-06, "model_forward_time": 0.0256803035736084, "step": 4898 }, { "epoch": 7.4737548828125e-06, "step": 4898, "training_step_time": 0.13811230659484863 }, { "epoch": 7.47528076171875e-06, "model_forward_time": 0.024883031845092773, "step": 4899 }, { "epoch": 7.47528076171875e-06, "step": 4899, "training_step_time": 0.1265702247619629 }, { "epoch": 7.476806640625e-06, "grad_norm": 0.44653648138046265, "learning_rate": 9.652929014076593e-05, "loss": 0.0724, "step": 4900 }, { "epoch": 7.476806640625e-06, "model_forward_time": 0.02468109130859375, "step": 4900 }, { "epoch": 7.476806640625e-06, "step": 4900, "training_step_time": 0.1215810775756836 }, { "epoch": 7.47833251953125e-06, "model_forward_time": 0.02508831024169922, "step": 4901 }, { "epoch": 7.47833251953125e-06, "step": 4901, "training_step_time": 0.1208188533782959 }, { "epoch": 7.4798583984375e-06, "model_forward_time": 0.02523493766784668, "step": 4902 }, { "epoch": 7.4798583984375e-06, "step": 4902, "training_step_time": 0.11416935920715332 }, { "epoch": 7.48138427734375e-06, "model_forward_time": 0.025150060653686523, "step": 4903 }, { "epoch": 7.48138427734375e-06, "step": 4903, "training_step_time": 0.11278820037841797 }, { "epoch": 7.48291015625e-06, "model_forward_time": 0.027357816696166992, "step": 4904 }, { "epoch": 7.48291015625e-06, "step": 4904, "training_step_time": 0.11539578437805176 }, { "epoch": 7.48443603515625e-06, "model_forward_time": 0.024977445602416992, "step": 4905 }, { "epoch": 7.48443603515625e-06, "step": 4905, "training_step_time": 0.1111152172088623 }, { "epoch": 7.4859619140625e-06, "model_forward_time": 0.025313854217529297, "step": 4906 }, { "epoch": 7.4859619140625e-06, "step": 4906, "training_step_time": 0.11122775077819824 }, { "epoch": 7.48748779296875e-06, "model_forward_time": 0.02551102638244629, "step": 4907 }, { "epoch": 7.48748779296875e-06, "step": 4907, "training_step_time": 0.10899519920349121 }, { "epoch": 7.489013671875e-06, "model_forward_time": 0.02572178840637207, "step": 4908 }, { "epoch": 7.489013671875e-06, "step": 4908, "training_step_time": 0.11268043518066406 }, { "epoch": 7.49053955078125e-06, "model_forward_time": 0.0253603458404541, "step": 4909 }, { "epoch": 7.49053955078125e-06, "step": 4909, "training_step_time": 0.11006927490234375 }, { "epoch": 7.4920654296875e-06, "grad_norm": 0.41987279057502747, "learning_rate": 9.650908547405144e-05, "loss": 0.0763, "step": 4910 }, { "epoch": 7.4920654296875e-06, "model_forward_time": 0.025511980056762695, "step": 4910 }, { "epoch": 7.4920654296875e-06, "step": 4910, "training_step_time": 0.11098289489746094 }, { "epoch": 7.49359130859375e-06, "model_forward_time": 0.02529740333557129, "step": 4911 }, { "epoch": 7.49359130859375e-06, "step": 4911, "training_step_time": 0.10816621780395508 }, { "epoch": 7.4951171875e-06, "model_forward_time": 0.02529740333557129, "step": 4912 }, { "epoch": 7.4951171875e-06, "step": 4912, "training_step_time": 0.11208987236022949 }, { "epoch": 7.49664306640625e-06, "model_forward_time": 0.025233745574951172, "step": 4913 }, { "epoch": 7.49664306640625e-06, "step": 4913, "training_step_time": 0.10738110542297363 }, { "epoch": 7.4981689453125e-06, "model_forward_time": 0.02541375160217285, "step": 4914 }, { "epoch": 7.4981689453125e-06, "step": 4914, "training_step_time": 0.14761805534362793 }, { "epoch": 7.49969482421875e-06, "model_forward_time": 0.025133371353149414, "step": 4915 }, { "epoch": 7.49969482421875e-06, "step": 4915, "training_step_time": 0.10699057579040527 }, { "epoch": 7.501220703125e-06, "model_forward_time": 0.02520298957824707, "step": 4916 }, { "epoch": 7.501220703125e-06, "step": 4916, "training_step_time": 0.10644841194152832 }, { "epoch": 7.50274658203125e-06, "model_forward_time": 0.0252382755279541, "step": 4917 }, { "epoch": 7.50274658203125e-06, "step": 4917, "training_step_time": 0.1490633487701416 }, { "epoch": 7.5042724609375e-06, "model_forward_time": 0.024939775466918945, "step": 4918 }, { "epoch": 7.5042724609375e-06, "step": 4918, "training_step_time": 0.18489670753479004 }, { "epoch": 7.50579833984375e-06, "model_forward_time": 0.024770736694335938, "step": 4919 }, { "epoch": 7.50579833984375e-06, "step": 4919, "training_step_time": 0.18448424339294434 }, { "epoch": 7.50732421875e-06, "grad_norm": 0.23898987472057343, "learning_rate": 9.648882429441257e-05, "loss": 0.0837, "step": 4920 }, { "epoch": 7.50732421875e-06, "model_forward_time": 0.024801254272460938, "step": 4920 }, { "epoch": 7.50732421875e-06, "step": 4920, "training_step_time": 0.11690568923950195 }, { "epoch": 7.50885009765625e-06, "model_forward_time": 0.02475285530090332, "step": 4921 }, { "epoch": 7.50885009765625e-06, "step": 4921, "training_step_time": 0.11158609390258789 }, { "epoch": 7.5103759765625e-06, "model_forward_time": 0.02538895606994629, "step": 4922 }, { "epoch": 7.5103759765625e-06, "step": 4922, "training_step_time": 0.11053919792175293 }, { "epoch": 7.51190185546875e-06, "model_forward_time": 0.025562047958374023, "step": 4923 }, { "epoch": 7.51190185546875e-06, "step": 4923, "training_step_time": 0.19273734092712402 }, { "epoch": 7.513427734375e-06, "model_forward_time": 0.024800777435302734, "step": 4924 }, { "epoch": 7.513427734375e-06, "step": 4924, "training_step_time": 0.13716959953308105 }, { "epoch": 7.51495361328125e-06, "model_forward_time": 0.02483987808227539, "step": 4925 }, { "epoch": 7.51495361328125e-06, "step": 4925, "training_step_time": 0.12864446640014648 }, { "epoch": 7.5164794921875e-06, "model_forward_time": 0.025580644607543945, "step": 4926 }, { "epoch": 7.5164794921875e-06, "step": 4926, "training_step_time": 0.1304795742034912 }, { "epoch": 7.51800537109375e-06, "model_forward_time": 0.0253751277923584, "step": 4927 }, { "epoch": 7.51800537109375e-06, "step": 4927, "training_step_time": 0.11570143699645996 }, { "epoch": 7.51953125e-06, "model_forward_time": 0.025242090225219727, "step": 4928 }, { "epoch": 7.51953125e-06, "step": 4928, "training_step_time": 0.11199760437011719 }, { "epoch": 7.52105712890625e-06, "model_forward_time": 0.02543330192565918, "step": 4929 }, { "epoch": 7.52105712890625e-06, "step": 4929, "training_step_time": 0.1078336238861084 }, { "epoch": 7.5225830078125e-06, "grad_norm": 0.28251850605010986, "learning_rate": 9.646850662646859e-05, "loss": 0.0887, "step": 4930 }, { "epoch": 7.5225830078125e-06, "model_forward_time": 0.02592945098876953, "step": 4930 }, { "epoch": 7.5225830078125e-06, "step": 4930, "training_step_time": 0.11133694648742676 }, { "epoch": 7.52410888671875e-06, "model_forward_time": 0.026216506958007812, "step": 4931 }, { "epoch": 7.52410888671875e-06, "step": 4931, "training_step_time": 0.11084461212158203 }, { "epoch": 7.525634765625e-06, "model_forward_time": 0.02541375160217285, "step": 4932 }, { "epoch": 7.525634765625e-06, "step": 4932, "training_step_time": 0.11019587516784668 }, { "epoch": 7.52716064453125e-06, "model_forward_time": 0.025500774383544922, "step": 4933 }, { "epoch": 7.52716064453125e-06, "step": 4933, "training_step_time": 0.11139369010925293 }, { "epoch": 7.5286865234375e-06, "model_forward_time": 0.02955341339111328, "step": 4934 }, { "epoch": 7.5286865234375e-06, "step": 4934, "training_step_time": 0.11224579811096191 }, { "epoch": 7.53021240234375e-06, "model_forward_time": 0.02563309669494629, "step": 4935 }, { "epoch": 7.53021240234375e-06, "step": 4935, "training_step_time": 0.2149190902709961 }, { "epoch": 7.53173828125e-06, "model_forward_time": 0.02507948875427246, "step": 4936 }, { "epoch": 7.53173828125e-06, "step": 4936, "training_step_time": 0.11234664916992188 }, { "epoch": 7.53326416015625e-06, "model_forward_time": 0.02517414093017578, "step": 4937 }, { "epoch": 7.53326416015625e-06, "step": 4937, "training_step_time": 0.10972094535827637 }, { "epoch": 7.5347900390625e-06, "model_forward_time": 0.025571346282958984, "step": 4938 }, { "epoch": 7.5347900390625e-06, "step": 4938, "training_step_time": 0.10860347747802734 }, { "epoch": 7.53631591796875e-06, "model_forward_time": 0.025551319122314453, "step": 4939 }, { "epoch": 7.53631591796875e-06, "step": 4939, "training_step_time": 0.10878515243530273 }, { "epoch": 7.537841796875e-06, "grad_norm": 0.39003708958625793, "learning_rate": 9.644813249490735e-05, "loss": 0.0709, "step": 4940 }, { "epoch": 7.537841796875e-06, "model_forward_time": 0.025496244430541992, "step": 4940 }, { "epoch": 7.537841796875e-06, "step": 4940, "training_step_time": 0.1107637882232666 }, { "epoch": 7.53936767578125e-06, "model_forward_time": 0.02537083625793457, "step": 4941 }, { "epoch": 7.53936767578125e-06, "step": 4941, "training_step_time": 0.11152291297912598 }, { "epoch": 7.5408935546875e-06, "model_forward_time": 0.02521800994873047, "step": 4942 }, { "epoch": 7.5408935546875e-06, "step": 4942, "training_step_time": 0.10835933685302734 }, { "epoch": 7.54241943359375e-06, "model_forward_time": 0.02482748031616211, "step": 4943 }, { "epoch": 7.54241943359375e-06, "step": 4943, "training_step_time": 0.10524463653564453 }, { "epoch": 7.5439453125e-06, "model_forward_time": 0.025475263595581055, "step": 4944 }, { "epoch": 7.5439453125e-06, "step": 4944, "training_step_time": 0.1106879711151123 }, { "epoch": 7.54547119140625e-06, "model_forward_time": 0.025432348251342773, "step": 4945 }, { "epoch": 7.54547119140625e-06, "step": 4945, "training_step_time": 0.10875391960144043 }, { "epoch": 7.5469970703125e-06, "model_forward_time": 0.025447607040405273, "step": 4946 }, { "epoch": 7.5469970703125e-06, "step": 4946, "training_step_time": 0.10958719253540039 }, { "epoch": 7.54852294921875e-06, "model_forward_time": 0.025499582290649414, "step": 4947 }, { "epoch": 7.54852294921875e-06, "step": 4947, "training_step_time": 0.11153674125671387 }, { "epoch": 7.550048828125e-06, "model_forward_time": 0.02516341209411621, "step": 4948 }, { "epoch": 7.550048828125e-06, "step": 4948, "training_step_time": 0.10888934135437012 }, { "epoch": 7.55157470703125e-06, "model_forward_time": 0.025346755981445312, "step": 4949 }, { "epoch": 7.55157470703125e-06, "step": 4949, "training_step_time": 0.11162805557250977 }, { "epoch": 7.5531005859375e-06, "grad_norm": 0.47882208228111267, "learning_rate": 9.642770192448536e-05, "loss": 0.0885, "step": 4950 }, { "epoch": 7.5531005859375e-06, "model_forward_time": 0.025487184524536133, "step": 4950 }, { "epoch": 7.5531005859375e-06, "step": 4950, "training_step_time": 0.11057901382446289 }, { "epoch": 7.55462646484375e-06, "model_forward_time": 0.02506732940673828, "step": 4951 }, { "epoch": 7.55462646484375e-06, "step": 4951, "training_step_time": 0.10875964164733887 }, { "epoch": 7.55615234375e-06, "model_forward_time": 0.024926424026489258, "step": 4952 }, { "epoch": 7.55615234375e-06, "step": 4952, "training_step_time": 0.10720324516296387 }, { "epoch": 7.55767822265625e-06, "model_forward_time": 0.025419950485229492, "step": 4953 }, { "epoch": 7.55767822265625e-06, "step": 4953, "training_step_time": 0.10667800903320312 }, { "epoch": 7.5592041015625e-06, "model_forward_time": 0.02504587173461914, "step": 4954 }, { "epoch": 7.5592041015625e-06, "step": 4954, "training_step_time": 0.10863447189331055 }, { "epoch": 7.56072998046875e-06, "model_forward_time": 0.025823354721069336, "step": 4955 }, { "epoch": 7.56072998046875e-06, "step": 4955, "training_step_time": 0.10754513740539551 }, { "epoch": 7.562255859375e-06, "model_forward_time": 0.025046110153198242, "step": 4956 }, { "epoch": 7.562255859375e-06, "step": 4956, "training_step_time": 0.14478564262390137 }, { "epoch": 7.56378173828125e-06, "model_forward_time": 0.02461862564086914, "step": 4957 }, { "epoch": 7.56378173828125e-06, "step": 4957, "training_step_time": 0.16135287284851074 }, { "epoch": 7.5653076171875e-06, "model_forward_time": 0.024411439895629883, "step": 4958 }, { "epoch": 7.5653076171875e-06, "step": 4958, "training_step_time": 0.15050053596496582 }, { "epoch": 7.56683349609375e-06, "model_forward_time": 0.02342987060546875, "step": 4959 }, { "epoch": 7.56683349609375e-06, "step": 4959, "training_step_time": 0.15452027320861816 }, { "epoch": 7.568359375e-06, "grad_norm": 0.31477200984954834, "learning_rate": 9.640721494002769e-05, "loss": 0.0724, "step": 4960 }, { "epoch": 7.568359375e-06, "model_forward_time": 0.02475118637084961, "step": 4960 }, { "epoch": 7.568359375e-06, "step": 4960, "training_step_time": 0.16670513153076172 }, { "epoch": 7.56988525390625e-06, "model_forward_time": 0.02426624298095703, "step": 4961 }, { "epoch": 7.56988525390625e-06, "step": 4961, "training_step_time": 0.14691925048828125 }, { "epoch": 7.5714111328125e-06, "model_forward_time": 0.02520608901977539, "step": 4962 }, { "epoch": 7.5714111328125e-06, "step": 4962, "training_step_time": 0.17913126945495605 }, { "epoch": 7.57293701171875e-06, "model_forward_time": 0.02399444580078125, "step": 4963 }, { "epoch": 7.57293701171875e-06, "step": 4963, "training_step_time": 0.11542296409606934 }, { "epoch": 7.574462890625e-06, "model_forward_time": 0.023713350296020508, "step": 4964 }, { "epoch": 7.574462890625e-06, "step": 4964, "training_step_time": 0.11583471298217773 }, { "epoch": 7.57598876953125e-06, "model_forward_time": 0.024901151657104492, "step": 4965 }, { "epoch": 7.57598876953125e-06, "step": 4965, "training_step_time": 0.11467123031616211 }, { "epoch": 7.5775146484375e-06, "model_forward_time": 0.02547764778137207, "step": 4966 }, { "epoch": 7.5775146484375e-06, "step": 4966, "training_step_time": 0.11422514915466309 }, { "epoch": 7.57904052734375e-06, "model_forward_time": 0.025173187255859375, "step": 4967 }, { "epoch": 7.57904052734375e-06, "step": 4967, "training_step_time": 0.12285208702087402 }, { "epoch": 7.58056640625e-06, "model_forward_time": 0.025578737258911133, "step": 4968 }, { "epoch": 7.58056640625e-06, "step": 4968, "training_step_time": 0.19126558303833008 }, { "epoch": 7.58209228515625e-06, "model_forward_time": 0.023888111114501953, "step": 4969 }, { "epoch": 7.58209228515625e-06, "step": 4969, "training_step_time": 0.15446972846984863 }, { "epoch": 7.5836181640625e-06, "grad_norm": 0.3941463530063629, "learning_rate": 9.638667156642794e-05, "loss": 0.0949, "step": 4970 }, { "epoch": 7.5836181640625e-06, "model_forward_time": 0.02462029457092285, "step": 4970 }, { "epoch": 7.5836181640625e-06, "step": 4970, "training_step_time": 0.12002444267272949 }, { "epoch": 7.58514404296875e-06, "model_forward_time": 0.024794816970825195, "step": 4971 }, { "epoch": 7.58514404296875e-06, "step": 4971, "training_step_time": 0.11831355094909668 }, { "epoch": 7.586669921875e-06, "model_forward_time": 0.025547266006469727, "step": 4972 }, { "epoch": 7.586669921875e-06, "step": 4972, "training_step_time": 0.11830830574035645 }, { "epoch": 7.58819580078125e-06, "model_forward_time": 0.025634765625, "step": 4973 }, { "epoch": 7.58819580078125e-06, "step": 4973, "training_step_time": 0.11304569244384766 }, { "epoch": 7.5897216796875e-06, "model_forward_time": 0.02518630027770996, "step": 4974 }, { "epoch": 7.5897216796875e-06, "step": 4974, "training_step_time": 0.11473965644836426 }, { "epoch": 7.59124755859375e-06, "model_forward_time": 0.025035858154296875, "step": 4975 }, { "epoch": 7.59124755859375e-06, "step": 4975, "training_step_time": 0.11049509048461914 }, { "epoch": 7.5927734375e-06, "model_forward_time": 0.025081157684326172, "step": 4976 }, { "epoch": 7.5927734375e-06, "step": 4976, "training_step_time": 0.11325383186340332 }, { "epoch": 7.59429931640625e-06, "model_forward_time": 0.02512669563293457, "step": 4977 }, { "epoch": 7.59429931640625e-06, "step": 4977, "training_step_time": 0.10912799835205078 }, { "epoch": 7.5958251953125e-06, "model_forward_time": 0.025065183639526367, "step": 4978 }, { "epoch": 7.5958251953125e-06, "step": 4978, "training_step_time": 0.10924196243286133 }, { "epoch": 7.59735107421875e-06, "model_forward_time": 0.025452613830566406, "step": 4979 }, { "epoch": 7.59735107421875e-06, "step": 4979, "training_step_time": 0.1107790470123291 }, { "epoch": 7.598876953125e-06, "grad_norm": 0.42142805457115173, "learning_rate": 9.636607182864827e-05, "loss": 0.0862, "step": 4980 }, { "epoch": 7.598876953125e-06, "model_forward_time": 0.025710344314575195, "step": 4980 }, { "epoch": 7.598876953125e-06, "step": 4980, "training_step_time": 0.10665369033813477 }, { "epoch": 7.60040283203125e-06, "model_forward_time": 0.02704477310180664, "step": 4981 }, { "epoch": 7.60040283203125e-06, "step": 4981, "training_step_time": 0.10802435874938965 }, { "epoch": 7.6019287109375e-06, "model_forward_time": 0.025354385375976562, "step": 4982 }, { "epoch": 7.6019287109375e-06, "step": 4982, "training_step_time": 0.11356186866760254 }, { "epoch": 7.60345458984375e-06, "model_forward_time": 0.025492429733276367, "step": 4983 }, { "epoch": 7.60345458984375e-06, "step": 4983, "training_step_time": 0.11075949668884277 }, { "epoch": 7.60498046875e-06, "model_forward_time": 0.025390625, "step": 4984 }, { "epoch": 7.60498046875e-06, "step": 4984, "training_step_time": 0.10998678207397461 }, { "epoch": 7.60650634765625e-06, "model_forward_time": 0.027068376541137695, "step": 4985 }, { "epoch": 7.60650634765625e-06, "step": 4985, "training_step_time": 0.1377875804901123 }, { "epoch": 7.6080322265625e-06, "model_forward_time": 0.02533698081970215, "step": 4986 }, { "epoch": 7.6080322265625e-06, "step": 4986, "training_step_time": 0.2097783088684082 }, { "epoch": 7.60955810546875e-06, "model_forward_time": 0.0262300968170166, "step": 4987 }, { "epoch": 7.60955810546875e-06, "step": 4987, "training_step_time": 0.1419978141784668 }, { "epoch": 7.611083984375e-06, "model_forward_time": 0.02506232261657715, "step": 4988 }, { "epoch": 7.611083984375e-06, "step": 4988, "training_step_time": 0.13737893104553223 }, { "epoch": 7.61260986328125e-06, "model_forward_time": 0.024457693099975586, "step": 4989 }, { "epoch": 7.61260986328125e-06, "step": 4989, "training_step_time": 0.12887787818908691 }, { "epoch": 7.6141357421875e-06, "grad_norm": 0.49523815512657166, "learning_rate": 9.634541575171929e-05, "loss": 0.0783, "step": 4990 }, { "epoch": 7.6141357421875e-06, "model_forward_time": 0.02469038963317871, "step": 4990 }, { "epoch": 7.6141357421875e-06, "step": 4990, "training_step_time": 0.1218104362487793 }, { "epoch": 7.61566162109375e-06, "model_forward_time": 0.02471137046813965, "step": 4991 }, { "epoch": 7.61566162109375e-06, "step": 4991, "training_step_time": 0.11813116073608398 }, { "epoch": 7.6171875e-06, "model_forward_time": 0.02520465850830078, "step": 4992 }, { "epoch": 7.6171875e-06, "step": 4992, "training_step_time": 0.12045454978942871 }, { "epoch": 7.61871337890625e-06, "model_forward_time": 0.02532052993774414, "step": 4993 }, { "epoch": 7.61871337890625e-06, "step": 4993, "training_step_time": 0.11654019355773926 }, { "epoch": 7.6202392578125e-06, "model_forward_time": 0.025274038314819336, "step": 4994 }, { "epoch": 7.6202392578125e-06, "step": 4994, "training_step_time": 0.1146087646484375 }, { "epoch": 7.62176513671875e-06, "model_forward_time": 0.025234460830688477, "step": 4995 }, { "epoch": 7.62176513671875e-06, "step": 4995, "training_step_time": 0.11065053939819336 }, { "epoch": 7.623291015625e-06, "model_forward_time": 0.025374174118041992, "step": 4996 }, { "epoch": 7.623291015625e-06, "step": 4996, "training_step_time": 0.10798358917236328 }, { "epoch": 7.62481689453125e-06, "model_forward_time": 0.026602745056152344, "step": 4997 }, { "epoch": 7.62481689453125e-06, "step": 4997, "training_step_time": 0.11116313934326172 }, { "epoch": 7.6263427734375e-06, "model_forward_time": 0.025016069412231445, "step": 4998 }, { "epoch": 7.6263427734375e-06, "step": 4998, "training_step_time": 0.10956907272338867 }, { "epoch": 7.62786865234375e-06, "model_forward_time": 0.025268077850341797, "step": 4999 }, { "epoch": 7.62786865234375e-06, "step": 4999, "training_step_time": 0.10773944854736328 }, { "epoch": 7.62939453125e-06, "grad_norm": 0.5361136794090271, "learning_rate": 9.632470336074009e-05, "loss": 0.0978, "step": 5000 }, { "epoch": 7.62939453125e-06, "model_forward_time": 0.026927947998046875, "step": 5000 }, { "epoch": 7.62939453125e-06, "step": 5000, "training_step_time": 0.10859799385070801 }, { "epoch": 7.63092041015625e-06, "model_forward_time": 0.023974895477294922, "step": 5001 }, { "epoch": 7.63092041015625e-06, "step": 5001, "training_step_time": 0.10182738304138184 }, { "epoch": 7.6324462890625e-06, "model_forward_time": 0.02519059181213379, "step": 5002 }, { "epoch": 7.6324462890625e-06, "step": 5002, "training_step_time": 0.10440278053283691 }, { "epoch": 7.63397216796875e-06, "model_forward_time": 0.024693965911865234, "step": 5003 }, { "epoch": 7.63397216796875e-06, "step": 5003, "training_step_time": 0.11014342308044434 }, { "epoch": 7.635498046875e-06, "model_forward_time": 0.025823354721069336, "step": 5004 }, { "epoch": 7.635498046875e-06, "step": 5004, "training_step_time": 0.10577702522277832 }, { "epoch": 7.63702392578125e-06, "model_forward_time": 0.025117158889770508, "step": 5005 }, { "epoch": 7.63702392578125e-06, "step": 5005, "training_step_time": 0.10631299018859863 }, { "epoch": 7.6385498046875e-06, "model_forward_time": 0.02579355239868164, "step": 5006 }, { "epoch": 7.6385498046875e-06, "step": 5006, "training_step_time": 0.10712122917175293 }, { "epoch": 7.64007568359375e-06, "model_forward_time": 0.02536940574645996, "step": 5007 }, { "epoch": 7.64007568359375e-06, "step": 5007, "training_step_time": 0.10812020301818848 }, { "epoch": 7.6416015625e-06, "model_forward_time": 0.028054475784301758, "step": 5008 }, { "epoch": 7.6416015625e-06, "step": 5008, "training_step_time": 0.11114716529846191 }, { "epoch": 7.64312744140625e-06, "model_forward_time": 0.02658390998840332, "step": 5009 }, { "epoch": 7.64312744140625e-06, "step": 5009, "training_step_time": 0.10692739486694336 }, { "epoch": 7.6446533203125e-06, "grad_norm": 0.3657020926475525, "learning_rate": 9.630393468087818e-05, "loss": 0.0795, "step": 5010 }, { "epoch": 7.6446533203125e-06, "model_forward_time": 0.025216341018676758, "step": 5010 }, { "epoch": 7.6446533203125e-06, "step": 5010, "training_step_time": 0.10995364189147949 }, { "epoch": 7.64617919921875e-06, "model_forward_time": 0.025510549545288086, "step": 5011 }, { "epoch": 7.64617919921875e-06, "step": 5011, "training_step_time": 0.11351227760314941 }, { "epoch": 7.647705078125e-06, "model_forward_time": 0.0254669189453125, "step": 5012 }, { "epoch": 7.647705078125e-06, "step": 5012, "training_step_time": 0.14704322814941406 }, { "epoch": 7.64923095703125e-06, "model_forward_time": 0.025402545928955078, "step": 5013 }, { "epoch": 7.64923095703125e-06, "step": 5013, "training_step_time": 0.12297320365905762 }, { "epoch": 7.6507568359375e-06, "model_forward_time": 0.025186777114868164, "step": 5014 }, { "epoch": 7.6507568359375e-06, "step": 5014, "training_step_time": 0.12395977973937988 }, { "epoch": 7.65228271484375e-06, "model_forward_time": 0.025627613067626953, "step": 5015 }, { "epoch": 7.65228271484375e-06, "step": 5015, "training_step_time": 0.14246249198913574 }, { "epoch": 7.65380859375e-06, "model_forward_time": 0.02500748634338379, "step": 5016 }, { "epoch": 7.65380859375e-06, "step": 5016, "training_step_time": 0.11869072914123535 }, { "epoch": 7.65533447265625e-06, "model_forward_time": 0.02583789825439453, "step": 5017 }, { "epoch": 7.65533447265625e-06, "step": 5017, "training_step_time": 0.12934136390686035 }, { "epoch": 7.6568603515625e-06, "model_forward_time": 0.024960756301879883, "step": 5018 }, { "epoch": 7.6568603515625e-06, "step": 5018, "training_step_time": 0.18588614463806152 }, { "epoch": 7.65838623046875e-06, "model_forward_time": 0.025450468063354492, "step": 5019 }, { "epoch": 7.65838623046875e-06, "step": 5019, "training_step_time": 0.12991809844970703 }, { "epoch": 7.659912109375e-06, "grad_norm": 0.38604509830474854, "learning_rate": 9.628310973736943e-05, "loss": 0.0876, "step": 5020 }, { "epoch": 7.659912109375e-06, "model_forward_time": 0.023944616317749023, "step": 5020 }, { "epoch": 7.659912109375e-06, "step": 5020, "training_step_time": 0.1101841926574707 }, { "epoch": 7.66143798828125e-06, "model_forward_time": 0.025321483612060547, "step": 5021 }, { "epoch": 7.66143798828125e-06, "step": 5021, "training_step_time": 0.11579370498657227 }, { "epoch": 7.6629638671875e-06, "model_forward_time": 0.025134801864624023, "step": 5022 }, { "epoch": 7.6629638671875e-06, "step": 5022, "training_step_time": 0.11626219749450684 }, { "epoch": 7.66448974609375e-06, "model_forward_time": 0.024925708770751953, "step": 5023 }, { "epoch": 7.66448974609375e-06, "step": 5023, "training_step_time": 0.1073770523071289 }, { "epoch": 7.666015625e-06, "model_forward_time": 0.025336027145385742, "step": 5024 }, { "epoch": 7.666015625e-06, "step": 5024, "training_step_time": 0.19783616065979004 }, { "epoch": 7.66754150390625e-06, "model_forward_time": 0.02418208122253418, "step": 5025 }, { "epoch": 7.66754150390625e-06, "step": 5025, "training_step_time": 0.1741950511932373 }, { "epoch": 7.6690673828125e-06, "model_forward_time": 0.02390313148498535, "step": 5026 }, { "epoch": 7.6690673828125e-06, "step": 5026, "training_step_time": 0.1154928207397461 }, { "epoch": 7.67059326171875e-06, "model_forward_time": 0.02483677864074707, "step": 5027 }, { "epoch": 7.67059326171875e-06, "step": 5027, "training_step_time": 0.11839485168457031 }, { "epoch": 7.672119140625e-06, "model_forward_time": 0.024872779846191406, "step": 5028 }, { "epoch": 7.672119140625e-06, "step": 5028, "training_step_time": 0.14141178131103516 }, { "epoch": 7.67364501953125e-06, "model_forward_time": 0.02516937255859375, "step": 5029 }, { "epoch": 7.67364501953125e-06, "step": 5029, "training_step_time": 0.13424110412597656 }, { "epoch": 7.6751708984375e-06, "grad_norm": 0.6332946419715881, "learning_rate": 9.626222855551816e-05, "loss": 0.0744, "step": 5030 }, { "epoch": 7.6751708984375e-06, "model_forward_time": 0.024690628051757812, "step": 5030 }, { "epoch": 7.6751708984375e-06, "step": 5030, "training_step_time": 0.1775212287902832 }, { "epoch": 7.67669677734375e-06, "model_forward_time": 0.02417445182800293, "step": 5031 }, { "epoch": 7.67669677734375e-06, "step": 5031, "training_step_time": 0.1160440444946289 }, { "epoch": 7.67822265625e-06, "model_forward_time": 0.023741960525512695, "step": 5032 }, { "epoch": 7.67822265625e-06, "step": 5032, "training_step_time": 0.11839485168457031 }, { "epoch": 7.67974853515625e-06, "model_forward_time": 0.024202823638916016, "step": 5033 }, { "epoch": 7.67974853515625e-06, "step": 5033, "training_step_time": 0.11390423774719238 }, { "epoch": 7.6812744140625e-06, "model_forward_time": 0.025263309478759766, "step": 5034 }, { "epoch": 7.6812744140625e-06, "step": 5034, "training_step_time": 0.11560988426208496 }, { "epoch": 7.68280029296875e-06, "model_forward_time": 0.025209426879882812, "step": 5035 }, { "epoch": 7.68280029296875e-06, "step": 5035, "training_step_time": 0.10874438285827637 }, { "epoch": 7.684326171875e-06, "model_forward_time": 0.02582263946533203, "step": 5036 }, { "epoch": 7.684326171875e-06, "step": 5036, "training_step_time": 0.2162775993347168 }, { "epoch": 7.68585205078125e-06, "model_forward_time": 0.025000572204589844, "step": 5037 }, { "epoch": 7.68585205078125e-06, "step": 5037, "training_step_time": 0.10718274116516113 }, { "epoch": 7.6873779296875e-06, "model_forward_time": 0.024698495864868164, "step": 5038 }, { "epoch": 7.6873779296875e-06, "step": 5038, "training_step_time": 0.10795450210571289 }, { "epoch": 7.68890380859375e-06, "model_forward_time": 0.02606987953186035, "step": 5039 }, { "epoch": 7.68890380859375e-06, "step": 5039, "training_step_time": 0.10921597480773926 }, { "epoch": 7.6904296875e-06, "grad_norm": 0.3652441203594208, "learning_rate": 9.624129116069694e-05, "loss": 0.0681, "step": 5040 }, { "epoch": 7.6904296875e-06, "model_forward_time": 0.025727510452270508, "step": 5040 }, { "epoch": 7.6904296875e-06, "step": 5040, "training_step_time": 0.11357378959655762 }, { "epoch": 7.69195556640625e-06, "model_forward_time": 0.02573108673095703, "step": 5041 }, { "epoch": 7.69195556640625e-06, "step": 5041, "training_step_time": 0.11369466781616211 }, { "epoch": 7.6934814453125e-06, "model_forward_time": 0.024309873580932617, "step": 5042 }, { "epoch": 7.6934814453125e-06, "step": 5042, "training_step_time": 0.10545206069946289 }, { "epoch": 7.69500732421875e-06, "model_forward_time": 0.02532052993774414, "step": 5043 }, { "epoch": 7.69500732421875e-06, "step": 5043, "training_step_time": 0.10808062553405762 }, { "epoch": 7.696533203125e-06, "model_forward_time": 0.025396347045898438, "step": 5044 }, { "epoch": 7.696533203125e-06, "step": 5044, "training_step_time": 0.10766434669494629 }, { "epoch": 7.69805908203125e-06, "model_forward_time": 0.025324106216430664, "step": 5045 }, { "epoch": 7.69805908203125e-06, "step": 5045, "training_step_time": 0.10772562026977539 }, { "epoch": 7.6995849609375e-06, "model_forward_time": 0.025367259979248047, "step": 5046 }, { "epoch": 7.6995849609375e-06, "step": 5046, "training_step_time": 0.10894322395324707 }, { "epoch": 7.70111083984375e-06, "model_forward_time": 0.025348663330078125, "step": 5047 }, { "epoch": 7.70111083984375e-06, "step": 5047, "training_step_time": 0.10654807090759277 }, { "epoch": 7.70263671875e-06, "model_forward_time": 0.025162220001220703, "step": 5048 }, { "epoch": 7.70263671875e-06, "step": 5048, "training_step_time": 0.11753630638122559 }, { "epoch": 7.70416259765625e-06, "model_forward_time": 0.02490830421447754, "step": 5049 }, { "epoch": 7.70416259765625e-06, "step": 5049, "training_step_time": 0.10760307312011719 }, { "epoch": 7.7056884765625e-06, "grad_norm": 0.3991225063800812, "learning_rate": 9.62202975783467e-05, "loss": 0.0871, "step": 5050 }, { "epoch": 7.7056884765625e-06, "model_forward_time": 0.024593353271484375, "step": 5050 }, { "epoch": 7.7056884765625e-06, "step": 5050, "training_step_time": 0.10759758949279785 }, { "epoch": 7.70721435546875e-06, "model_forward_time": 0.025439739227294922, "step": 5051 }, { "epoch": 7.70721435546875e-06, "step": 5051, "training_step_time": 0.10791921615600586 }, { "epoch": 7.708740234375e-06, "model_forward_time": 0.025187253952026367, "step": 5052 }, { "epoch": 7.708740234375e-06, "step": 5052, "training_step_time": 0.10755205154418945 }, { "epoch": 7.71026611328125e-06, "model_forward_time": 0.025600194931030273, "step": 5053 }, { "epoch": 7.71026611328125e-06, "step": 5053, "training_step_time": 0.11006999015808105 }, { "epoch": 7.7117919921875e-06, "model_forward_time": 0.024882078170776367, "step": 5054 }, { "epoch": 7.7117919921875e-06, "step": 5054, "training_step_time": 0.10697746276855469 }, { "epoch": 7.71331787109375e-06, "model_forward_time": 0.025122880935668945, "step": 5055 }, { "epoch": 7.71331787109375e-06, "step": 5055, "training_step_time": 0.10991668701171875 }, { "epoch": 7.71484375e-06, "model_forward_time": 0.024379968643188477, "step": 5056 }, { "epoch": 7.71484375e-06, "step": 5056, "training_step_time": 0.11037611961364746 }, { "epoch": 7.71636962890625e-06, "model_forward_time": 0.02559494972229004, "step": 5057 }, { "epoch": 7.71636962890625e-06, "step": 5057, "training_step_time": 0.21707653999328613 }, { "epoch": 7.7178955078125e-06, "model_forward_time": 0.024936199188232422, "step": 5058 }, { "epoch": 7.7178955078125e-06, "step": 5058, "training_step_time": 0.1100008487701416 }, { "epoch": 7.71942138671875e-06, "model_forward_time": 0.024688005447387695, "step": 5059 }, { "epoch": 7.71942138671875e-06, "step": 5059, "training_step_time": 0.12002801895141602 }, { "epoch": 7.720947265625e-06, "grad_norm": 0.46479085087776184, "learning_rate": 9.619924783397661e-05, "loss": 0.0693, "step": 5060 }, { "epoch": 7.720947265625e-06, "model_forward_time": 0.025546789169311523, "step": 5060 }, { "epoch": 7.720947265625e-06, "step": 5060, "training_step_time": 0.1540844440460205 }, { "epoch": 7.72247314453125e-06, "model_forward_time": 0.02441120147705078, "step": 5061 }, { "epoch": 7.72247314453125e-06, "step": 5061, "training_step_time": 0.21178293228149414 }, { "epoch": 7.7239990234375e-06, "model_forward_time": 0.02568340301513672, "step": 5062 }, { "epoch": 7.7239990234375e-06, "step": 5062, "training_step_time": 0.13126587867736816 }, { "epoch": 7.72552490234375e-06, "model_forward_time": 0.024960041046142578, "step": 5063 }, { "epoch": 7.72552490234375e-06, "step": 5063, "training_step_time": 0.12482142448425293 }, { "epoch": 7.72705078125e-06, "model_forward_time": 0.026921987533569336, "step": 5064 }, { "epoch": 7.72705078125e-06, "step": 5064, "training_step_time": 0.135298490524292 }, { "epoch": 7.72857666015625e-06, "model_forward_time": 0.02526688575744629, "step": 5065 }, { "epoch": 7.72857666015625e-06, "step": 5065, "training_step_time": 0.10916519165039062 }, { "epoch": 7.7301025390625e-06, "model_forward_time": 0.025776386260986328, "step": 5066 }, { "epoch": 7.7301025390625e-06, "step": 5066, "training_step_time": 0.11279296875 }, { "epoch": 7.73162841796875e-06, "model_forward_time": 0.025534868240356445, "step": 5067 }, { "epoch": 7.73162841796875e-06, "step": 5067, "training_step_time": 0.11626148223876953 }, { "epoch": 7.733154296875e-06, "model_forward_time": 0.02498793601989746, "step": 5068 }, { "epoch": 7.733154296875e-06, "step": 5068, "training_step_time": 0.1060945987701416 }, { "epoch": 7.73468017578125e-06, "model_forward_time": 0.02555680274963379, "step": 5069 }, { "epoch": 7.73468017578125e-06, "step": 5069, "training_step_time": 0.1988391876220703 }, { "epoch": 7.7362060546875e-06, "grad_norm": 0.4381895959377289, "learning_rate": 9.617814195316411e-05, "loss": 0.1059, "step": 5070 }, { "epoch": 7.7362060546875e-06, "model_forward_time": 0.026225805282592773, "step": 5070 }, { "epoch": 7.7362060546875e-06, "step": 5070, "training_step_time": 0.1167140007019043 }, { "epoch": 7.73773193359375e-06, "model_forward_time": 0.02488231658935547, "step": 5071 }, { "epoch": 7.73773193359375e-06, "step": 5071, "training_step_time": 0.15398955345153809 }, { "epoch": 7.7392578125e-06, "model_forward_time": 0.02504110336303711, "step": 5072 }, { "epoch": 7.7392578125e-06, "step": 5072, "training_step_time": 0.22089600563049316 }, { "epoch": 7.74078369140625e-06, "model_forward_time": 0.024670839309692383, "step": 5073 }, { "epoch": 7.74078369140625e-06, "step": 5073, "training_step_time": 0.17334461212158203 }, { "epoch": 7.7423095703125e-06, "model_forward_time": 0.02419114112854004, "step": 5074 }, { "epoch": 7.7423095703125e-06, "step": 5074, "training_step_time": 0.1717381477355957 }, { "epoch": 7.74383544921875e-06, "model_forward_time": 0.025037050247192383, "step": 5075 }, { "epoch": 7.74383544921875e-06, "step": 5075, "training_step_time": 0.1658949851989746 }, { "epoch": 7.745361328125e-06, "model_forward_time": 0.024564027786254883, "step": 5076 }, { "epoch": 7.745361328125e-06, "step": 5076, "training_step_time": 0.14272284507751465 }, { "epoch": 7.74688720703125e-06, "model_forward_time": 0.024655580520629883, "step": 5077 }, { "epoch": 7.74688720703125e-06, "step": 5077, "training_step_time": 0.1324291229248047 }, { "epoch": 7.7484130859375e-06, "model_forward_time": 0.025058507919311523, "step": 5078 }, { "epoch": 7.7484130859375e-06, "step": 5078, "training_step_time": 0.11436796188354492 }, { "epoch": 7.74993896484375e-06, "model_forward_time": 0.024914026260375977, "step": 5079 }, { "epoch": 7.74993896484375e-06, "step": 5079, "training_step_time": 0.1658475399017334 }, { "epoch": 7.75146484375e-06, "grad_norm": 0.3558763265609741, "learning_rate": 9.61569799615548e-05, "loss": 0.0959, "step": 5080 }, { "epoch": 7.75146484375e-06, "model_forward_time": 0.024217605590820312, "step": 5080 }, { "epoch": 7.75146484375e-06, "step": 5080, "training_step_time": 0.16362261772155762 }, { "epoch": 7.75299072265625e-06, "model_forward_time": 0.024431228637695312, "step": 5081 }, { "epoch": 7.75299072265625e-06, "step": 5081, "training_step_time": 0.10477352142333984 }, { "epoch": 7.7545166015625e-06, "model_forward_time": 0.02491021156311035, "step": 5082 }, { "epoch": 7.7545166015625e-06, "step": 5082, "training_step_time": 0.10980844497680664 }, { "epoch": 7.75604248046875e-06, "model_forward_time": 0.025683879852294922, "step": 5083 }, { "epoch": 7.75604248046875e-06, "step": 5083, "training_step_time": 0.1112060546875 }, { "epoch": 7.757568359375e-06, "model_forward_time": 0.025236845016479492, "step": 5084 }, { "epoch": 7.757568359375e-06, "step": 5084, "training_step_time": 0.11798906326293945 }, { "epoch": 7.75909423828125e-06, "model_forward_time": 0.025272846221923828, "step": 5085 }, { "epoch": 7.75909423828125e-06, "step": 5085, "training_step_time": 0.10834503173828125 }, { "epoch": 7.7606201171875e-06, "model_forward_time": 0.025069475173950195, "step": 5086 }, { "epoch": 7.7606201171875e-06, "step": 5086, "training_step_time": 0.10696101188659668 }, { "epoch": 7.76214599609375e-06, "model_forward_time": 0.025365591049194336, "step": 5087 }, { "epoch": 7.76214599609375e-06, "step": 5087, "training_step_time": 0.10917925834655762 }, { "epoch": 7.763671875e-06, "model_forward_time": 0.02533578872680664, "step": 5088 }, { "epoch": 7.763671875e-06, "step": 5088, "training_step_time": 0.11208510398864746 }, { "epoch": 7.76519775390625e-06, "model_forward_time": 0.02526998519897461, "step": 5089 }, { "epoch": 7.76519775390625e-06, "step": 5089, "training_step_time": 0.10644888877868652 }, { "epoch": 7.7667236328125e-06, "grad_norm": 0.4785032272338867, "learning_rate": 9.613576188486253e-05, "loss": 0.08, "step": 5090 }, { "epoch": 7.7667236328125e-06, "model_forward_time": 0.025149822235107422, "step": 5090 }, { "epoch": 7.7667236328125e-06, "step": 5090, "training_step_time": 0.11103463172912598 }, { "epoch": 7.76824951171875e-06, "model_forward_time": 0.025137662887573242, "step": 5091 }, { "epoch": 7.76824951171875e-06, "step": 5091, "training_step_time": 0.1049797534942627 }, { "epoch": 7.769775390625e-06, "model_forward_time": 0.025326967239379883, "step": 5092 }, { "epoch": 7.769775390625e-06, "step": 5092, "training_step_time": 0.11314988136291504 }, { "epoch": 7.77130126953125e-06, "model_forward_time": 0.024926424026489258, "step": 5093 }, { "epoch": 7.77130126953125e-06, "step": 5093, "training_step_time": 0.1066884994506836 }, { "epoch": 7.7728271484375e-06, "model_forward_time": 0.025122404098510742, "step": 5094 }, { "epoch": 7.7728271484375e-06, "step": 5094, "training_step_time": 0.10620594024658203 }, { "epoch": 7.77435302734375e-06, "model_forward_time": 0.02527022361755371, "step": 5095 }, { "epoch": 7.77435302734375e-06, "step": 5095, "training_step_time": 0.1057744026184082 }, { "epoch": 7.77587890625e-06, "model_forward_time": 0.02557539939880371, "step": 5096 }, { "epoch": 7.77587890625e-06, "step": 5096, "training_step_time": 0.10718941688537598 }, { "epoch": 7.77740478515625e-06, "model_forward_time": 0.02547931671142578, "step": 5097 }, { "epoch": 7.77740478515625e-06, "step": 5097, "training_step_time": 0.1082770824432373 }, { "epoch": 7.7789306640625e-06, "model_forward_time": 0.02510833740234375, "step": 5098 }, { "epoch": 7.7789306640625e-06, "step": 5098, "training_step_time": 0.1084136962890625 }, { "epoch": 7.78045654296875e-06, "model_forward_time": 0.02668619155883789, "step": 5099 }, { "epoch": 7.78045654296875e-06, "step": 5099, "training_step_time": 0.11053085327148438 }, { "epoch": 7.781982421875e-06, "grad_norm": 0.4167155921459198, "learning_rate": 9.611448774886924e-05, "loss": 0.0953, "step": 5100 }, { "epoch": 7.781982421875e-06, "model_forward_time": 0.025283098220825195, "step": 5100 }, { "epoch": 7.781982421875e-06, "step": 5100, "training_step_time": 0.14626860618591309 }, { "epoch": 7.78350830078125e-06, "model_forward_time": 0.025179147720336914, "step": 5101 }, { "epoch": 7.78350830078125e-06, "step": 5101, "training_step_time": 0.17770838737487793 }, { "epoch": 7.7850341796875e-06, "model_forward_time": 0.026099681854248047, "step": 5102 }, { "epoch": 7.7850341796875e-06, "step": 5102, "training_step_time": 0.17916131019592285 }, { "epoch": 7.78656005859375e-06, "model_forward_time": 0.02507758140563965, "step": 5103 }, { "epoch": 7.78656005859375e-06, "step": 5103, "training_step_time": 0.16151666641235352 }, { "epoch": 7.7880859375e-06, "model_forward_time": 0.02428889274597168, "step": 5104 }, { "epoch": 7.7880859375e-06, "step": 5104, "training_step_time": 0.15611696243286133 }, { "epoch": 7.78961181640625e-06, "model_forward_time": 0.026854991912841797, "step": 5105 }, { "epoch": 7.78961181640625e-06, "step": 5105, "training_step_time": 0.14175748825073242 }, { "epoch": 7.7911376953125e-06, "model_forward_time": 0.024842500686645508, "step": 5106 }, { "epoch": 7.7911376953125e-06, "step": 5106, "training_step_time": 0.14162063598632812 }, { "epoch": 7.79266357421875e-06, "model_forward_time": 0.026187419891357422, "step": 5107 }, { "epoch": 7.79266357421875e-06, "step": 5107, "training_step_time": 0.1306629180908203 }, { "epoch": 7.794189453125e-06, "model_forward_time": 0.02481532096862793, "step": 5108 }, { "epoch": 7.794189453125e-06, "step": 5108, "training_step_time": 0.11838006973266602 }, { "epoch": 7.79571533203125e-06, "model_forward_time": 0.025243282318115234, "step": 5109 }, { "epoch": 7.79571533203125e-06, "step": 5109, "training_step_time": 0.11831212043762207 }, { "epoch": 7.7972412109375e-06, "grad_norm": 0.30426469445228577, "learning_rate": 9.609315757942503e-05, "loss": 0.0968, "step": 5110 }, { "epoch": 7.7972412109375e-06, "model_forward_time": 0.02549910545349121, "step": 5110 }, { "epoch": 7.7972412109375e-06, "step": 5110, "training_step_time": 0.1936659812927246 }, { "epoch": 7.79876708984375e-06, "model_forward_time": 0.024448871612548828, "step": 5111 }, { "epoch": 7.79876708984375e-06, "step": 5111, "training_step_time": 0.10869193077087402 }, { "epoch": 7.80029296875e-06, "model_forward_time": 0.024936676025390625, "step": 5112 }, { "epoch": 7.80029296875e-06, "step": 5112, "training_step_time": 0.11344146728515625 }, { "epoch": 7.80181884765625e-06, "model_forward_time": 0.025222063064575195, "step": 5113 }, { "epoch": 7.80181884765625e-06, "step": 5113, "training_step_time": 0.11933016777038574 }, { "epoch": 7.8033447265625e-06, "model_forward_time": 0.02550339698791504, "step": 5114 }, { "epoch": 7.8033447265625e-06, "step": 5114, "training_step_time": 0.11156916618347168 }, { "epoch": 7.80487060546875e-06, "model_forward_time": 0.025215625762939453, "step": 5115 }, { "epoch": 7.80487060546875e-06, "step": 5115, "training_step_time": 0.11398673057556152 }, { "epoch": 7.806396484375e-06, "model_forward_time": 0.02557086944580078, "step": 5116 }, { "epoch": 7.806396484375e-06, "step": 5116, "training_step_time": 0.11448311805725098 }, { "epoch": 7.80792236328125e-06, "model_forward_time": 0.02543020248413086, "step": 5117 }, { "epoch": 7.80792236328125e-06, "step": 5117, "training_step_time": 0.13334393501281738 }, { "epoch": 7.8094482421875e-06, "model_forward_time": 0.024976730346679688, "step": 5118 }, { "epoch": 7.8094482421875e-06, "step": 5118, "training_step_time": 0.11498093605041504 }, { "epoch": 7.81097412109375e-06, "model_forward_time": 0.025333881378173828, "step": 5119 }, { "epoch": 7.81097412109375e-06, "step": 5119, "training_step_time": 0.11364006996154785 }, { "epoch": 7.8125e-06, "grad_norm": 0.47002390027046204, "learning_rate": 9.607177140244806e-05, "loss": 0.0971, "step": 5120 }, { "epoch": 7.8125e-06, "model_forward_time": 0.02544999122619629, "step": 5120 }, { "epoch": 7.8125e-06, "step": 5120, "training_step_time": 0.11374187469482422 }, { "epoch": 7.81402587890625e-06, "model_forward_time": 0.0249786376953125, "step": 5121 }, { "epoch": 7.81402587890625e-06, "step": 5121, "training_step_time": 0.11122488975524902 }, { "epoch": 7.8155517578125e-06, "model_forward_time": 0.025346994400024414, "step": 5122 }, { "epoch": 7.8155517578125e-06, "step": 5122, "training_step_time": 0.10609054565429688 }, { "epoch": 7.81707763671875e-06, "model_forward_time": 0.025271177291870117, "step": 5123 }, { "epoch": 7.81707763671875e-06, "step": 5123, "training_step_time": 0.10923027992248535 }, { "epoch": 7.818603515625e-06, "model_forward_time": 0.026297569274902344, "step": 5124 }, { "epoch": 7.818603515625e-06, "step": 5124, "training_step_time": 0.1091604232788086 }, { "epoch": 7.82012939453125e-06, "model_forward_time": 0.025348186492919922, "step": 5125 }, { "epoch": 7.82012939453125e-06, "step": 5125, "training_step_time": 0.17068076133728027 }, { "epoch": 7.8216552734375e-06, "model_forward_time": 0.024475574493408203, "step": 5126 }, { "epoch": 7.8216552734375e-06, "step": 5126, "training_step_time": 0.16425800323486328 }, { "epoch": 7.82318115234375e-06, "model_forward_time": 0.025218486785888672, "step": 5127 }, { "epoch": 7.82318115234375e-06, "step": 5127, "training_step_time": 0.10821413993835449 }, { "epoch": 7.82470703125e-06, "model_forward_time": 0.024888992309570312, "step": 5128 }, { "epoch": 7.82470703125e-06, "step": 5128, "training_step_time": 0.10391950607299805 }, { "epoch": 7.82623291015625e-06, "model_forward_time": 0.025673866271972656, "step": 5129 }, { "epoch": 7.82623291015625e-06, "step": 5129, "training_step_time": 0.1188809871673584 }, { "epoch": 7.8277587890625e-06, "grad_norm": 0.5986289381980896, "learning_rate": 9.605032924392457e-05, "loss": 0.0978, "step": 5130 }, { "epoch": 7.8277587890625e-06, "model_forward_time": 0.025788307189941406, "step": 5130 }, { "epoch": 7.8277587890625e-06, "step": 5130, "training_step_time": 0.11152005195617676 }, { "epoch": 7.82928466796875e-06, "model_forward_time": 0.025192975997924805, "step": 5131 }, { "epoch": 7.82928466796875e-06, "step": 5131, "training_step_time": 0.10466122627258301 }, { "epoch": 7.830810546875e-06, "model_forward_time": 0.02535700798034668, "step": 5132 }, { "epoch": 7.830810546875e-06, "step": 5132, "training_step_time": 0.10646581649780273 }, { "epoch": 7.83233642578125e-06, "model_forward_time": 0.025621652603149414, "step": 5133 }, { "epoch": 7.83233642578125e-06, "step": 5133, "training_step_time": 0.13620877265930176 }, { "epoch": 7.8338623046875e-06, "model_forward_time": 0.02577829360961914, "step": 5134 }, { "epoch": 7.8338623046875e-06, "step": 5134, "training_step_time": 0.1522979736328125 }, { "epoch": 7.83538818359375e-06, "model_forward_time": 0.025319337844848633, "step": 5135 }, { "epoch": 7.83538818359375e-06, "step": 5135, "training_step_time": 0.1520693302154541 }, { "epoch": 7.8369140625e-06, "model_forward_time": 0.024809598922729492, "step": 5136 }, { "epoch": 7.8369140625e-06, "step": 5136, "training_step_time": 0.13495230674743652 }, { "epoch": 7.83843994140625e-06, "model_forward_time": 0.02466726303100586, "step": 5137 }, { "epoch": 7.83843994140625e-06, "step": 5137, "training_step_time": 0.12909626960754395 }, { "epoch": 7.8399658203125e-06, "model_forward_time": 0.024352073669433594, "step": 5138 }, { "epoch": 7.8399658203125e-06, "step": 5138, "training_step_time": 0.12425851821899414 }, { "epoch": 7.84149169921875e-06, "model_forward_time": 0.025299549102783203, "step": 5139 }, { "epoch": 7.84149169921875e-06, "step": 5139, "training_step_time": 0.12037348747253418 }, { "epoch": 7.843017578125e-06, "grad_norm": 0.3774958550930023, "learning_rate": 9.602883112990875e-05, "loss": 0.0798, "step": 5140 }, { "epoch": 7.843017578125e-06, "model_forward_time": 0.02543473243713379, "step": 5140 }, { "epoch": 7.843017578125e-06, "step": 5140, "training_step_time": 0.10543036460876465 }, { "epoch": 7.84454345703125e-06, "model_forward_time": 0.025210857391357422, "step": 5141 }, { "epoch": 7.84454345703125e-06, "step": 5141, "training_step_time": 0.1053779125213623 }, { "epoch": 7.8460693359375e-06, "model_forward_time": 0.025737524032592773, "step": 5142 }, { "epoch": 7.8460693359375e-06, "step": 5142, "training_step_time": 0.11282730102539062 }, { "epoch": 7.84759521484375e-06, "model_forward_time": 0.025147438049316406, "step": 5143 }, { "epoch": 7.84759521484375e-06, "step": 5143, "training_step_time": 0.10622644424438477 }, { "epoch": 7.84912109375e-06, "model_forward_time": 0.024768829345703125, "step": 5144 }, { "epoch": 7.84912109375e-06, "step": 5144, "training_step_time": 0.1564195156097412 }, { "epoch": 7.85064697265625e-06, "model_forward_time": 0.024646520614624023, "step": 5145 }, { "epoch": 7.85064697265625e-06, "step": 5145, "training_step_time": 0.10473775863647461 }, { "epoch": 7.8521728515625e-06, "model_forward_time": 0.024808645248413086, "step": 5146 }, { "epoch": 7.8521728515625e-06, "step": 5146, "training_step_time": 0.11070871353149414 }, { "epoch": 7.85369873046875e-06, "model_forward_time": 0.027230501174926758, "step": 5147 }, { "epoch": 7.85369873046875e-06, "step": 5147, "training_step_time": 0.1368732452392578 }, { "epoch": 7.855224609375e-06, "model_forward_time": 0.025020599365234375, "step": 5148 }, { "epoch": 7.855224609375e-06, "step": 5148, "training_step_time": 0.18535470962524414 }, { "epoch": 7.85675048828125e-06, "model_forward_time": 0.026046276092529297, "step": 5149 }, { "epoch": 7.85675048828125e-06, "step": 5149, "training_step_time": 0.10913944244384766 }, { "epoch": 7.8582763671875e-06, "grad_norm": 0.3100931942462921, "learning_rate": 9.600727708652289e-05, "loss": 0.0715, "step": 5150 }, { "epoch": 7.8582763671875e-06, "model_forward_time": 0.02467179298400879, "step": 5150 }, { "epoch": 7.8582763671875e-06, "step": 5150, "training_step_time": 0.10547018051147461 }, { "epoch": 7.85980224609375e-06, "model_forward_time": 0.025094032287597656, "step": 5151 }, { "epoch": 7.85980224609375e-06, "step": 5151, "training_step_time": 0.1842031478881836 }, { "epoch": 7.861328125e-06, "model_forward_time": 0.024509906768798828, "step": 5152 }, { "epoch": 7.861328125e-06, "step": 5152, "training_step_time": 0.14297962188720703 }, { "epoch": 7.86285400390625e-06, "model_forward_time": 0.024283170700073242, "step": 5153 }, { "epoch": 7.86285400390625e-06, "step": 5153, "training_step_time": 0.10908865928649902 }, { "epoch": 7.8643798828125e-06, "model_forward_time": 0.024906396865844727, "step": 5154 }, { "epoch": 7.8643798828125e-06, "step": 5154, "training_step_time": 0.11689114570617676 }, { "epoch": 7.86590576171875e-06, "model_forward_time": 0.025175809860229492, "step": 5155 }, { "epoch": 7.86590576171875e-06, "step": 5155, "training_step_time": 0.11237907409667969 }, { "epoch": 7.867431640625e-06, "model_forward_time": 0.025460481643676758, "step": 5156 }, { "epoch": 7.867431640625e-06, "step": 5156, "training_step_time": 0.10687518119812012 }, { "epoch": 7.86895751953125e-06, "model_forward_time": 0.026054859161376953, "step": 5157 }, { "epoch": 7.86895751953125e-06, "step": 5157, "training_step_time": 0.1987161636352539 }, { "epoch": 7.8704833984375e-06, "model_forward_time": 0.024463891983032227, "step": 5158 }, { "epoch": 7.8704833984375e-06, "step": 5158, "training_step_time": 0.20881032943725586 }, { "epoch": 7.87200927734375e-06, "model_forward_time": 0.024082422256469727, "step": 5159 }, { "epoch": 7.87200927734375e-06, "step": 5159, "training_step_time": 0.11146974563598633 }, { "epoch": 7.87353515625e-06, "grad_norm": 0.48995721340179443, "learning_rate": 9.598566713995718e-05, "loss": 0.091, "step": 5160 }, { "epoch": 7.87353515625e-06, "model_forward_time": 0.024123430252075195, "step": 5160 }, { "epoch": 7.87353515625e-06, "step": 5160, "training_step_time": 0.17784380912780762 }, { "epoch": 7.87506103515625e-06, "model_forward_time": 0.024363279342651367, "step": 5161 }, { "epoch": 7.87506103515625e-06, "step": 5161, "training_step_time": 0.17849969863891602 }, { "epoch": 7.8765869140625e-06, "model_forward_time": 0.023779630661010742, "step": 5162 }, { "epoch": 7.8765869140625e-06, "step": 5162, "training_step_time": 0.1200704574584961 }, { "epoch": 7.87811279296875e-06, "model_forward_time": 0.023793458938598633, "step": 5163 }, { "epoch": 7.87811279296875e-06, "step": 5163, "training_step_time": 0.12340831756591797 }, { "epoch": 7.879638671875e-06, "model_forward_time": 0.025189876556396484, "step": 5164 }, { "epoch": 7.879638671875e-06, "step": 5164, "training_step_time": 0.11617374420166016 }, { "epoch": 7.88116455078125e-06, "model_forward_time": 0.02508544921875, "step": 5165 }, { "epoch": 7.88116455078125e-06, "step": 5165, "training_step_time": 0.11361527442932129 }, { "epoch": 7.8826904296875e-06, "model_forward_time": 0.025226116180419922, "step": 5166 }, { "epoch": 7.8826904296875e-06, "step": 5166, "training_step_time": 0.12002849578857422 }, { "epoch": 7.88421630859375e-06, "model_forward_time": 0.025399446487426758, "step": 5167 }, { "epoch": 7.88421630859375e-06, "step": 5167, "training_step_time": 0.11835718154907227 }, { "epoch": 7.8857421875e-06, "model_forward_time": 0.025426387786865234, "step": 5168 }, { "epoch": 7.8857421875e-06, "step": 5168, "training_step_time": 0.11257052421569824 }, { "epoch": 7.88726806640625e-06, "model_forward_time": 0.025269746780395508, "step": 5169 }, { "epoch": 7.88726806640625e-06, "step": 5169, "training_step_time": 0.10788369178771973 }, { "epoch": 7.8887939453125e-06, "grad_norm": 0.33801034092903137, "learning_rate": 9.596400131646972e-05, "loss": 0.0808, "step": 5170 }, { "epoch": 7.8887939453125e-06, "model_forward_time": 0.025659561157226562, "step": 5170 }, { "epoch": 7.8887939453125e-06, "step": 5170, "training_step_time": 0.21669483184814453 }, { "epoch": 7.89031982421875e-06, "model_forward_time": 0.024983882904052734, "step": 5171 }, { "epoch": 7.89031982421875e-06, "step": 5171, "training_step_time": 0.1127464771270752 }, { "epoch": 7.891845703125e-06, "model_forward_time": 0.025427818298339844, "step": 5172 }, { "epoch": 7.891845703125e-06, "step": 5172, "training_step_time": 0.10818171501159668 }, { "epoch": 7.89337158203125e-06, "model_forward_time": 0.025274276733398438, "step": 5173 }, { "epoch": 7.89337158203125e-06, "step": 5173, "training_step_time": 0.17029953002929688 }, { "epoch": 7.8948974609375e-06, "model_forward_time": 0.025063514709472656, "step": 5174 }, { "epoch": 7.8948974609375e-06, "step": 5174, "training_step_time": 0.1663830280303955 }, { "epoch": 7.89642333984375e-06, "model_forward_time": 0.0246732234954834, "step": 5175 }, { "epoch": 7.89642333984375e-06, "step": 5175, "training_step_time": 0.11016058921813965 }, { "epoch": 7.89794921875e-06, "model_forward_time": 0.025157451629638672, "step": 5176 }, { "epoch": 7.89794921875e-06, "step": 5176, "training_step_time": 0.11135053634643555 }, { "epoch": 7.89947509765625e-06, "model_forward_time": 0.025400400161743164, "step": 5177 }, { "epoch": 7.89947509765625e-06, "step": 5177, "training_step_time": 0.11173701286315918 }, { "epoch": 7.9010009765625e-06, "model_forward_time": 0.025698423385620117, "step": 5178 }, { "epoch": 7.9010009765625e-06, "step": 5178, "training_step_time": 0.10787200927734375 }, { "epoch": 7.90252685546875e-06, "model_forward_time": 0.025220870971679688, "step": 5179 }, { "epoch": 7.90252685546875e-06, "step": 5179, "training_step_time": 0.10729575157165527 }, { "epoch": 7.904052734375e-06, "grad_norm": 0.3467779755592346, "learning_rate": 9.594227964238653e-05, "loss": 0.0726, "step": 5180 }, { "epoch": 7.904052734375e-06, "model_forward_time": 0.02515554428100586, "step": 5180 }, { "epoch": 7.904052734375e-06, "step": 5180, "training_step_time": 0.11008381843566895 }, { "epoch": 7.90557861328125e-06, "model_forward_time": 0.02536320686340332, "step": 5181 }, { "epoch": 7.90557861328125e-06, "step": 5181, "training_step_time": 0.10834002494812012 }, { "epoch": 7.9071044921875e-06, "model_forward_time": 0.02501201629638672, "step": 5182 }, { "epoch": 7.9071044921875e-06, "step": 5182, "training_step_time": 0.10660028457641602 }, { "epoch": 7.90863037109375e-06, "model_forward_time": 0.025293588638305664, "step": 5183 }, { "epoch": 7.90863037109375e-06, "step": 5183, "training_step_time": 0.10933327674865723 }, { "epoch": 7.91015625e-06, "model_forward_time": 0.02571725845336914, "step": 5184 }, { "epoch": 7.91015625e-06, "step": 5184, "training_step_time": 0.10790586471557617 }, { "epoch": 7.91168212890625e-06, "model_forward_time": 0.02548980712890625, "step": 5185 }, { "epoch": 7.91168212890625e-06, "step": 5185, "training_step_time": 0.12157034873962402 }, { "epoch": 7.9132080078125e-06, "model_forward_time": 0.02502894401550293, "step": 5186 }, { "epoch": 7.9132080078125e-06, "step": 5186, "training_step_time": 0.11047649383544922 }, { "epoch": 7.91473388671875e-06, "model_forward_time": 0.02510666847229004, "step": 5187 }, { "epoch": 7.91473388671875e-06, "step": 5187, "training_step_time": 0.130279541015625 }, { "epoch": 7.916259765625e-06, "model_forward_time": 0.02514505386352539, "step": 5188 }, { "epoch": 7.916259765625e-06, "step": 5188, "training_step_time": 0.1640608310699463 }, { "epoch": 7.91778564453125e-06, "model_forward_time": 0.02451491355895996, "step": 5189 }, { "epoch": 7.91778564453125e-06, "step": 5189, "training_step_time": 0.125579833984375 }, { "epoch": 7.9193115234375e-06, "grad_norm": 0.44719254970550537, "learning_rate": 9.59205021441015e-05, "loss": 0.1019, "step": 5190 }, { "epoch": 7.9193115234375e-06, "model_forward_time": 0.024780988693237305, "step": 5190 }, { "epoch": 7.9193115234375e-06, "step": 5190, "training_step_time": 0.13920855522155762 }, { "epoch": 7.92083740234375e-06, "model_forward_time": 0.025107145309448242, "step": 5191 }, { "epoch": 7.92083740234375e-06, "step": 5191, "training_step_time": 0.10826945304870605 }, { "epoch": 7.92236328125e-06, "model_forward_time": 0.024735450744628906, "step": 5192 }, { "epoch": 7.92236328125e-06, "step": 5192, "training_step_time": 0.10696792602539062 }, { "epoch": 7.92388916015625e-06, "model_forward_time": 0.02563309669494629, "step": 5193 }, { "epoch": 7.92388916015625e-06, "step": 5193, "training_step_time": 0.10863161087036133 }, { "epoch": 7.9254150390625e-06, "model_forward_time": 0.025914430618286133, "step": 5194 }, { "epoch": 7.9254150390625e-06, "step": 5194, "training_step_time": 0.10904383659362793 }, { "epoch": 7.92694091796875e-06, "model_forward_time": 0.025487184524536133, "step": 5195 }, { "epoch": 7.92694091796875e-06, "step": 5195, "training_step_time": 0.20251774787902832 }, { "epoch": 7.928466796875e-06, "model_forward_time": 0.024526596069335938, "step": 5196 }, { "epoch": 7.928466796875e-06, "step": 5196, "training_step_time": 0.13445353507995605 }, { "epoch": 7.92999267578125e-06, "model_forward_time": 0.0247802734375, "step": 5197 }, { "epoch": 7.92999267578125e-06, "step": 5197, "training_step_time": 0.11343836784362793 }, { "epoch": 7.9315185546875e-06, "model_forward_time": 0.02491021156311035, "step": 5198 }, { "epoch": 7.9315185546875e-06, "step": 5198, "training_step_time": 0.11269283294677734 }, { "epoch": 7.93304443359375e-06, "model_forward_time": 0.025566577911376953, "step": 5199 }, { "epoch": 7.93304443359375e-06, "step": 5199, "training_step_time": 0.11288022994995117 }, { "epoch": 7.9345703125e-06, "grad_norm": 0.23744435608386993, "learning_rate": 9.589866884807635e-05, "loss": 0.0593, "step": 5200 }, { "epoch": 7.9345703125e-06, "model_forward_time": 0.025332927703857422, "step": 5200 }, { "epoch": 7.9345703125e-06, "step": 5200, "training_step_time": 0.1583256721496582 }, { "epoch": 7.93609619140625e-06, "model_forward_time": 0.024845600128173828, "step": 5201 }, { "epoch": 7.93609619140625e-06, "step": 5201, "training_step_time": 0.14405035972595215 }, { "epoch": 7.9376220703125e-06, "model_forward_time": 0.024580001831054688, "step": 5202 }, { "epoch": 7.9376220703125e-06, "step": 5202, "training_step_time": 0.10844230651855469 }, { "epoch": 7.93914794921875e-06, "model_forward_time": 0.0248720645904541, "step": 5203 }, { "epoch": 7.93914794921875e-06, "step": 5203, "training_step_time": 0.13251066207885742 }, { "epoch": 7.940673828125e-06, "model_forward_time": 0.026002168655395508, "step": 5204 }, { "epoch": 7.940673828125e-06, "step": 5204, "training_step_time": 0.10731244087219238 }, { "epoch": 7.94219970703125e-06, "model_forward_time": 0.025651216506958008, "step": 5205 }, { "epoch": 7.94219970703125e-06, "step": 5205, "training_step_time": 0.11096382141113281 }, { "epoch": 7.9437255859375e-06, "model_forward_time": 0.02507925033569336, "step": 5206 }, { "epoch": 7.9437255859375e-06, "step": 5206, "training_step_time": 0.13328027725219727 }, { "epoch": 7.94525146484375e-06, "model_forward_time": 0.025351285934448242, "step": 5207 }, { "epoch": 7.94525146484375e-06, "step": 5207, "training_step_time": 0.12077665328979492 }, { "epoch": 7.94677734375e-06, "model_forward_time": 0.025285959243774414, "step": 5208 }, { "epoch": 7.94677734375e-06, "step": 5208, "training_step_time": 0.11151623725891113 }, { "epoch": 7.94830322265625e-06, "model_forward_time": 0.024895191192626953, "step": 5209 }, { "epoch": 7.94830322265625e-06, "step": 5209, "training_step_time": 0.12568116188049316 }, { "epoch": 7.9498291015625e-06, "grad_norm": 0.38819506764411926, "learning_rate": 9.58767797808406e-05, "loss": 0.0832, "step": 5210 }, { "epoch": 7.9498291015625e-06, "model_forward_time": 0.02763080596923828, "step": 5210 }, { "epoch": 7.9498291015625e-06, "step": 5210, "training_step_time": 0.1424870491027832 }, { "epoch": 7.95135498046875e-06, "model_forward_time": 0.025747060775756836, "step": 5211 }, { "epoch": 7.95135498046875e-06, "step": 5211, "training_step_time": 0.16529154777526855 }, { "epoch": 7.952880859375e-06, "model_forward_time": 0.024414539337158203, "step": 5212 }, { "epoch": 7.952880859375e-06, "step": 5212, "training_step_time": 0.1477675437927246 }, { "epoch": 7.95440673828125e-06, "model_forward_time": 0.02502131462097168, "step": 5213 }, { "epoch": 7.95440673828125e-06, "step": 5213, "training_step_time": 0.10902667045593262 }, { "epoch": 7.9559326171875e-06, "model_forward_time": 0.0247499942779541, "step": 5214 }, { "epoch": 7.9559326171875e-06, "step": 5214, "training_step_time": 0.13261079788208008 }, { "epoch": 7.95745849609375e-06, "model_forward_time": 0.024192094802856445, "step": 5215 }, { "epoch": 7.95745849609375e-06, "step": 5215, "training_step_time": 0.1263728141784668 }, { "epoch": 7.958984375e-06, "model_forward_time": 0.02495098114013672, "step": 5216 }, { "epoch": 7.958984375e-06, "step": 5216, "training_step_time": 0.18819475173950195 }, { "epoch": 7.96051025390625e-06, "model_forward_time": 0.02496051788330078, "step": 5217 }, { "epoch": 7.96051025390625e-06, "step": 5217, "training_step_time": 0.11850810050964355 }, { "epoch": 7.9620361328125e-06, "model_forward_time": 0.026095151901245117, "step": 5218 }, { "epoch": 7.9620361328125e-06, "step": 5218, "training_step_time": 0.11518359184265137 }, { "epoch": 7.96356201171875e-06, "model_forward_time": 0.02638387680053711, "step": 5219 }, { "epoch": 7.96356201171875e-06, "step": 5219, "training_step_time": 0.11182236671447754 }, { "epoch": 7.965087890625e-06, "grad_norm": 0.6575888991355896, "learning_rate": 9.58548349689915e-05, "loss": 0.0811, "step": 5220 }, { "epoch": 7.965087890625e-06, "model_forward_time": 0.025304555892944336, "step": 5220 }, { "epoch": 7.965087890625e-06, "step": 5220, "training_step_time": 0.10832977294921875 }, { "epoch": 7.96661376953125e-06, "model_forward_time": 0.029742956161499023, "step": 5221 }, { "epoch": 7.96661376953125e-06, "step": 5221, "training_step_time": 0.1139075756072998 }, { "epoch": 7.9681396484375e-06, "model_forward_time": 0.02559828758239746, "step": 5222 }, { "epoch": 7.9681396484375e-06, "step": 5222, "training_step_time": 0.10848140716552734 }, { "epoch": 7.96966552734375e-06, "model_forward_time": 0.023992061614990234, "step": 5223 }, { "epoch": 7.96966552734375e-06, "step": 5223, "training_step_time": 0.10816287994384766 }, { "epoch": 7.97119140625e-06, "model_forward_time": 0.02505040168762207, "step": 5224 }, { "epoch": 7.97119140625e-06, "step": 5224, "training_step_time": 0.11077737808227539 }, { "epoch": 7.97271728515625e-06, "model_forward_time": 0.02503061294555664, "step": 5225 }, { "epoch": 7.97271728515625e-06, "step": 5225, "training_step_time": 0.10807394981384277 }, { "epoch": 7.9742431640625e-06, "model_forward_time": 0.02598428726196289, "step": 5226 }, { "epoch": 7.9742431640625e-06, "step": 5226, "training_step_time": 0.10855960845947266 }, { "epoch": 7.97576904296875e-06, "model_forward_time": 0.025916099548339844, "step": 5227 }, { "epoch": 7.97576904296875e-06, "step": 5227, "training_step_time": 0.11346054077148438 }, { "epoch": 7.977294921875e-06, "model_forward_time": 0.027048110961914062, "step": 5228 }, { "epoch": 7.977294921875e-06, "step": 5228, "training_step_time": 0.11012005805969238 }, { "epoch": 7.97882080078125e-06, "model_forward_time": 0.025899648666381836, "step": 5229 }, { "epoch": 7.97882080078125e-06, "step": 5229, "training_step_time": 0.11141538619995117 }, { "epoch": 7.9803466796875e-06, "grad_norm": 0.48782312870025635, "learning_rate": 9.583283443919409e-05, "loss": 0.0736, "step": 5230 }, { "epoch": 7.9803466796875e-06, "model_forward_time": 0.024985313415527344, "step": 5230 }, { "epoch": 7.9803466796875e-06, "step": 5230, "training_step_time": 0.11250972747802734 }, { "epoch": 7.98187255859375e-06, "model_forward_time": 0.025539159774780273, "step": 5231 }, { "epoch": 7.98187255859375e-06, "step": 5231, "training_step_time": 0.11655855178833008 }, { "epoch": 7.9833984375e-06, "model_forward_time": 0.025284528732299805, "step": 5232 }, { "epoch": 7.9833984375e-06, "step": 5232, "training_step_time": 0.11226630210876465 }, { "epoch": 7.98492431640625e-06, "model_forward_time": 0.02663254737854004, "step": 5233 }, { "epoch": 7.98492431640625e-06, "step": 5233, "training_step_time": 0.11646366119384766 }, { "epoch": 7.9864501953125e-06, "model_forward_time": 0.025320768356323242, "step": 5234 }, { "epoch": 7.9864501953125e-06, "step": 5234, "training_step_time": 0.11366105079650879 }, { "epoch": 7.98797607421875e-06, "model_forward_time": 0.026799917221069336, "step": 5235 }, { "epoch": 7.98797607421875e-06, "step": 5235, "training_step_time": 0.11258387565612793 }, { "epoch": 7.989501953125e-06, "model_forward_time": 0.026203155517578125, "step": 5236 }, { "epoch": 7.989501953125e-06, "step": 5236, "training_step_time": 0.11353731155395508 }, { "epoch": 7.99102783203125e-06, "model_forward_time": 0.02542257308959961, "step": 5237 }, { "epoch": 7.99102783203125e-06, "step": 5237, "training_step_time": 0.11251950263977051 }, { "epoch": 7.9925537109375e-06, "model_forward_time": 0.025846481323242188, "step": 5238 }, { "epoch": 7.9925537109375e-06, "step": 5238, "training_step_time": 0.10818099975585938 }, { "epoch": 7.99407958984375e-06, "model_forward_time": 0.024787187576293945, "step": 5239 }, { "epoch": 7.99407958984375e-06, "step": 5239, "training_step_time": 0.11342144012451172 }, { "epoch": 7.99560546875e-06, "grad_norm": 0.3838462829589844, "learning_rate": 9.581077821818109e-05, "loss": 0.0776, "step": 5240 }, { "epoch": 7.99560546875e-06, "model_forward_time": 0.025307893753051758, "step": 5240 }, { "epoch": 7.99560546875e-06, "step": 5240, "training_step_time": 0.11136531829833984 }, { "epoch": 7.99713134765625e-06, "model_forward_time": 0.025649309158325195, "step": 5241 }, { "epoch": 7.99713134765625e-06, "step": 5241, "training_step_time": 0.15327787399291992 }, { "epoch": 7.9986572265625e-06, "model_forward_time": 0.025033235549926758, "step": 5242 }, { "epoch": 7.9986572265625e-06, "step": 5242, "training_step_time": 0.12426638603210449 }, { "epoch": 8.00018310546875e-06, "model_forward_time": 0.024860858917236328, "step": 5243 }, { "epoch": 8.00018310546875e-06, "step": 5243, "training_step_time": 0.177994966506958 }, { "epoch": 8.001708984375e-06, "model_forward_time": 0.025020837783813477, "step": 5244 }, { "epoch": 8.001708984375e-06, "step": 5244, "training_step_time": 0.22289562225341797 }, { "epoch": 8.00323486328125e-06, "model_forward_time": 0.024318695068359375, "step": 5245 }, { "epoch": 8.00323486328125e-06, "step": 5245, "training_step_time": 0.20108366012573242 }, { "epoch": 8.0047607421875e-06, "model_forward_time": 0.02483081817626953, "step": 5246 }, { "epoch": 8.0047607421875e-06, "step": 5246, "training_step_time": 0.17573904991149902 }, { "epoch": 8.00628662109375e-06, "model_forward_time": 0.024463415145874023, "step": 5247 }, { "epoch": 8.00628662109375e-06, "step": 5247, "training_step_time": 0.21791696548461914 }, { "epoch": 8.0078125e-06, "model_forward_time": 0.024885892868041992, "step": 5248 }, { "epoch": 8.0078125e-06, "step": 5248, "training_step_time": 0.2028646469116211 }, { "epoch": 8.00933837890625e-06, "model_forward_time": 0.02528524398803711, "step": 5249 }, { "epoch": 8.00933837890625e-06, "step": 5249, "training_step_time": 0.1425306797027588 }, { "epoch": 8.0108642578125e-06, "grad_norm": 0.4634803235530853, "learning_rate": 9.578866633275288e-05, "loss": 0.0977, "step": 5250 }, { "epoch": 8.0108642578125e-06, "model_forward_time": 0.025180339813232422, "step": 5250 }, { "epoch": 8.0108642578125e-06, "step": 5250, "training_step_time": 0.13477802276611328 }, { "epoch": 8.01239013671875e-06, "model_forward_time": 0.024420499801635742, "step": 5251 }, { "epoch": 8.01239013671875e-06, "step": 5251, "training_step_time": 0.20290207862854004 }, { "epoch": 8.013916015625e-06, "model_forward_time": 0.024542808532714844, "step": 5252 }, { "epoch": 8.013916015625e-06, "step": 5252, "training_step_time": 0.10658621788024902 }, { "epoch": 8.01544189453125e-06, "model_forward_time": 0.026471376419067383, "step": 5253 }, { "epoch": 8.01544189453125e-06, "step": 5253, "training_step_time": 0.10831499099731445 }, { "epoch": 8.0169677734375e-06, "model_forward_time": 0.025778770446777344, "step": 5254 }, { "epoch": 8.0169677734375e-06, "step": 5254, "training_step_time": 0.10617899894714355 }, { "epoch": 8.01849365234375e-06, "model_forward_time": 0.02537679672241211, "step": 5255 }, { "epoch": 8.01849365234375e-06, "step": 5255, "training_step_time": 0.10796642303466797 }, { "epoch": 8.02001953125e-06, "model_forward_time": 0.029983043670654297, "step": 5256 }, { "epoch": 8.02001953125e-06, "step": 5256, "training_step_time": 0.11199426651000977 }, { "epoch": 8.02154541015625e-06, "model_forward_time": 0.025526046752929688, "step": 5257 }, { "epoch": 8.02154541015625e-06, "step": 5257, "training_step_time": 0.10637068748474121 }, { "epoch": 8.0230712890625e-06, "model_forward_time": 0.02505326271057129, "step": 5258 }, { "epoch": 8.0230712890625e-06, "step": 5258, "training_step_time": 0.1094517707824707 }, { "epoch": 8.02459716796875e-06, "model_forward_time": 0.025861501693725586, "step": 5259 }, { "epoch": 8.02459716796875e-06, "step": 5259, "training_step_time": 0.1114358901977539 }, { "epoch": 8.026123046875e-06, "grad_norm": 0.5310544371604919, "learning_rate": 9.576649880977748e-05, "loss": 0.0694, "step": 5260 }, { "epoch": 8.026123046875e-06, "model_forward_time": 0.025437355041503906, "step": 5260 }, { "epoch": 8.026123046875e-06, "step": 5260, "training_step_time": 0.11064004898071289 }, { "epoch": 8.02764892578125e-06, "model_forward_time": 0.026332378387451172, "step": 5261 }, { "epoch": 8.02764892578125e-06, "step": 5261, "training_step_time": 0.2164137363433838 }, { "epoch": 8.0291748046875e-06, "model_forward_time": 0.026065587997436523, "step": 5262 }, { "epoch": 8.0291748046875e-06, "step": 5262, "training_step_time": 0.13115239143371582 }, { "epoch": 8.03070068359375e-06, "model_forward_time": 0.024517297744750977, "step": 5263 }, { "epoch": 8.03070068359375e-06, "step": 5263, "training_step_time": 0.11920166015625 }, { "epoch": 8.0322265625e-06, "model_forward_time": 0.025888442993164062, "step": 5264 }, { "epoch": 8.0322265625e-06, "step": 5264, "training_step_time": 0.11392378807067871 }, { "epoch": 8.03375244140625e-06, "model_forward_time": 0.025484323501586914, "step": 5265 }, { "epoch": 8.03375244140625e-06, "step": 5265, "training_step_time": 0.11995553970336914 }, { "epoch": 8.0352783203125e-06, "model_forward_time": 0.025064706802368164, "step": 5266 }, { "epoch": 8.0352783203125e-06, "step": 5266, "training_step_time": 0.11385583877563477 }, { "epoch": 8.03680419921875e-06, "model_forward_time": 0.02621316909790039, "step": 5267 }, { "epoch": 8.03680419921875e-06, "step": 5267, "training_step_time": 0.11371660232543945 }, { "epoch": 8.038330078125e-06, "model_forward_time": 0.026324987411499023, "step": 5268 }, { "epoch": 8.038330078125e-06, "step": 5268, "training_step_time": 0.11027073860168457 }, { "epoch": 8.03985595703125e-06, "model_forward_time": 0.025281906127929688, "step": 5269 }, { "epoch": 8.03985595703125e-06, "step": 5269, "training_step_time": 0.11310696601867676 }, { "epoch": 8.0413818359375e-06, "grad_norm": 0.3431408107280731, "learning_rate": 9.574427567619053e-05, "loss": 0.0789, "step": 5270 }, { "epoch": 8.0413818359375e-06, "model_forward_time": 0.025124073028564453, "step": 5270 }, { "epoch": 8.0413818359375e-06, "step": 5270, "training_step_time": 0.1110532283782959 }, { "epoch": 8.04290771484375e-06, "model_forward_time": 0.02528524398803711, "step": 5271 }, { "epoch": 8.04290771484375e-06, "step": 5271, "training_step_time": 0.11463022232055664 }, { "epoch": 8.04443359375e-06, "model_forward_time": 0.02519965171813965, "step": 5272 }, { "epoch": 8.04443359375e-06, "step": 5272, "training_step_time": 0.11262750625610352 }, { "epoch": 8.04595947265625e-06, "model_forward_time": 0.025567054748535156, "step": 5273 }, { "epoch": 8.04595947265625e-06, "step": 5273, "training_step_time": 0.11079263687133789 }, { "epoch": 8.0474853515625e-06, "model_forward_time": 0.02654719352722168, "step": 5274 }, { "epoch": 8.0474853515625e-06, "step": 5274, "training_step_time": 0.12954401969909668 }, { "epoch": 8.04901123046875e-06, "model_forward_time": 0.02549910545349121, "step": 5275 }, { "epoch": 8.04901123046875e-06, "step": 5275, "training_step_time": 0.12763690948486328 }, { "epoch": 8.050537109375e-06, "model_forward_time": 0.02565908432006836, "step": 5276 }, { "epoch": 8.050537109375e-06, "step": 5276, "training_step_time": 0.10661649703979492 }, { "epoch": 8.05206298828125e-06, "model_forward_time": 0.02559041976928711, "step": 5277 }, { "epoch": 8.05206298828125e-06, "step": 5277, "training_step_time": 0.11897683143615723 }, { "epoch": 8.0535888671875e-06, "model_forward_time": 0.02492809295654297, "step": 5278 }, { "epoch": 8.0535888671875e-06, "step": 5278, "training_step_time": 0.11183881759643555 }, { "epoch": 8.05511474609375e-06, "model_forward_time": 0.02506256103515625, "step": 5279 }, { "epoch": 8.05511474609375e-06, "step": 5279, "training_step_time": 0.11780691146850586 }, { "epoch": 8.056640625e-06, "grad_norm": 0.4310734272003174, "learning_rate": 9.572199695899522e-05, "loss": 0.0882, "step": 5280 }, { "epoch": 8.056640625e-06, "model_forward_time": 0.026833295822143555, "step": 5280 }, { "epoch": 8.056640625e-06, "step": 5280, "training_step_time": 0.1896214485168457 }, { "epoch": 8.05816650390625e-06, "model_forward_time": 0.025113344192504883, "step": 5281 }, { "epoch": 8.05816650390625e-06, "step": 5281, "training_step_time": 0.10593342781066895 }, { "epoch": 8.0596923828125e-06, "model_forward_time": 0.024857282638549805, "step": 5282 }, { "epoch": 8.0596923828125e-06, "step": 5282, "training_step_time": 0.1719036102294922 }, { "epoch": 8.06121826171875e-06, "model_forward_time": 0.025017738342285156, "step": 5283 }, { "epoch": 8.06121826171875e-06, "step": 5283, "training_step_time": 0.16849613189697266 }, { "epoch": 8.062744140625e-06, "model_forward_time": 0.024463653564453125, "step": 5284 }, { "epoch": 8.062744140625e-06, "step": 5284, "training_step_time": 0.10752320289611816 }, { "epoch": 8.06427001953125e-06, "model_forward_time": 0.02465677261352539, "step": 5285 }, { "epoch": 8.06427001953125e-06, "step": 5285, "training_step_time": 0.11778140068054199 }, { "epoch": 8.0657958984375e-06, "model_forward_time": 0.025216341018676758, "step": 5286 }, { "epoch": 8.0657958984375e-06, "step": 5286, "training_step_time": 0.11623454093933105 }, { "epoch": 8.06732177734375e-06, "model_forward_time": 0.025198698043823242, "step": 5287 }, { "epoch": 8.06732177734375e-06, "step": 5287, "training_step_time": 0.11030411720275879 }, { "epoch": 8.06884765625e-06, "model_forward_time": 0.02516913414001465, "step": 5288 }, { "epoch": 8.06884765625e-06, "step": 5288, "training_step_time": 0.19890332221984863 }, { "epoch": 8.07037353515625e-06, "model_forward_time": 0.024277687072753906, "step": 5289 }, { "epoch": 8.07037353515625e-06, "step": 5289, "training_step_time": 0.11043524742126465 }, { "epoch": 8.0718994140625e-06, "grad_norm": 0.43320193886756897, "learning_rate": 9.569966268526232e-05, "loss": 0.0858, "step": 5290 }, { "epoch": 8.0718994140625e-06, "model_forward_time": 0.023698091506958008, "step": 5290 }, { "epoch": 8.0718994140625e-06, "step": 5290, "training_step_time": 0.10822796821594238 }, { "epoch": 8.07342529296875e-06, "model_forward_time": 0.024959564208984375, "step": 5291 }, { "epoch": 8.07342529296875e-06, "step": 5291, "training_step_time": 0.1333456039428711 }, { "epoch": 8.074951171875e-06, "model_forward_time": 0.025366783142089844, "step": 5292 }, { "epoch": 8.074951171875e-06, "step": 5292, "training_step_time": 0.11459040641784668 }, { "epoch": 8.07647705078125e-06, "model_forward_time": 0.024825572967529297, "step": 5293 }, { "epoch": 8.07647705078125e-06, "step": 5293, "training_step_time": 0.11894011497497559 }, { "epoch": 8.0780029296875e-06, "model_forward_time": 0.025238513946533203, "step": 5294 }, { "epoch": 8.0780029296875e-06, "step": 5294, "training_step_time": 0.12187957763671875 }, { "epoch": 8.07952880859375e-06, "model_forward_time": 0.025052547454833984, "step": 5295 }, { "epoch": 8.07952880859375e-06, "step": 5295, "training_step_time": 0.11922979354858398 }, { "epoch": 8.0810546875e-06, "model_forward_time": 0.025362491607666016, "step": 5296 }, { "epoch": 8.0810546875e-06, "step": 5296, "training_step_time": 0.1137394905090332 }, { "epoch": 8.08258056640625e-06, "model_forward_time": 0.024750471115112305, "step": 5297 }, { "epoch": 8.08258056640625e-06, "step": 5297, "training_step_time": 0.1154181957244873 }, { "epoch": 8.0841064453125e-06, "model_forward_time": 0.025093793869018555, "step": 5298 }, { "epoch": 8.0841064453125e-06, "step": 5298, "training_step_time": 0.11117410659790039 }, { "epoch": 8.08563232421875e-06, "model_forward_time": 0.02474522590637207, "step": 5299 }, { "epoch": 8.08563232421875e-06, "step": 5299, "training_step_time": 0.1108696460723877 }, { "epoch": 8.087158203125e-06, "grad_norm": 0.3055928647518158, "learning_rate": 9.567727288213005e-05, "loss": 0.0681, "step": 5300 }, { "epoch": 8.087158203125e-06, "model_forward_time": 0.025014638900756836, "step": 5300 }, { "epoch": 8.087158203125e-06, "step": 5300, "training_step_time": 0.11122751235961914 }, { "epoch": 8.08868408203125e-06, "model_forward_time": 0.02488851547241211, "step": 5301 }, { "epoch": 8.08868408203125e-06, "step": 5301, "training_step_time": 0.11302781105041504 }, { "epoch": 8.0902099609375e-06, "model_forward_time": 0.026043415069580078, "step": 5302 }, { "epoch": 8.0902099609375e-06, "step": 5302, "training_step_time": 0.10748934745788574 }, { "epoch": 8.09173583984375e-06, "model_forward_time": 0.025462627410888672, "step": 5303 }, { "epoch": 8.09173583984375e-06, "step": 5303, "training_step_time": 0.10917472839355469 }, { "epoch": 8.09326171875e-06, "model_forward_time": 0.025010347366333008, "step": 5304 }, { "epoch": 8.09326171875e-06, "step": 5304, "training_step_time": 0.10982155799865723 }, { "epoch": 8.09478759765625e-06, "model_forward_time": 0.025212764739990234, "step": 5305 }, { "epoch": 8.09478759765625e-06, "step": 5305, "training_step_time": 0.1089167594909668 }, { "epoch": 8.0963134765625e-06, "model_forward_time": 0.025351524353027344, "step": 5306 }, { "epoch": 8.0963134765625e-06, "step": 5306, "training_step_time": 0.11288189888000488 }, { "epoch": 8.09783935546875e-06, "model_forward_time": 0.024886369705200195, "step": 5307 }, { "epoch": 8.09783935546875e-06, "step": 5307, "training_step_time": 0.224470853805542 }, { "epoch": 8.099365234375e-06, "model_forward_time": 0.024619579315185547, "step": 5308 }, { "epoch": 8.099365234375e-06, "step": 5308, "training_step_time": 0.12902545928955078 }, { "epoch": 8.10089111328125e-06, "model_forward_time": 0.025023698806762695, "step": 5309 }, { "epoch": 8.10089111328125e-06, "step": 5309, "training_step_time": 0.12845373153686523 }, { "epoch": 8.1024169921875e-06, "grad_norm": 0.5125011205673218, "learning_rate": 9.565482757680415e-05, "loss": 0.0722, "step": 5310 }, { "epoch": 8.1024169921875e-06, "model_forward_time": 0.02520895004272461, "step": 5310 }, { "epoch": 8.1024169921875e-06, "step": 5310, "training_step_time": 0.12505292892456055 }, { "epoch": 8.10394287109375e-06, "model_forward_time": 0.02446269989013672, "step": 5311 }, { "epoch": 8.10394287109375e-06, "step": 5311, "training_step_time": 0.11622929573059082 }, { "epoch": 8.10546875e-06, "model_forward_time": 0.02521371841430664, "step": 5312 }, { "epoch": 8.10546875e-06, "step": 5312, "training_step_time": 0.11935734748840332 }, { "epoch": 8.10699462890625e-06, "model_forward_time": 0.025005102157592773, "step": 5313 }, { "epoch": 8.10699462890625e-06, "step": 5313, "training_step_time": 0.11413335800170898 }, { "epoch": 8.1085205078125e-06, "model_forward_time": 0.025180816650390625, "step": 5314 }, { "epoch": 8.1085205078125e-06, "step": 5314, "training_step_time": 0.11236166954040527 }, { "epoch": 8.11004638671875e-06, "model_forward_time": 0.02509593963623047, "step": 5315 }, { "epoch": 8.11004638671875e-06, "step": 5315, "training_step_time": 0.11250638961791992 }, { "epoch": 8.111572265625e-06, "model_forward_time": 0.02527141571044922, "step": 5316 }, { "epoch": 8.111572265625e-06, "step": 5316, "training_step_time": 0.11560392379760742 }, { "epoch": 8.11309814453125e-06, "model_forward_time": 0.025257587432861328, "step": 5317 }, { "epoch": 8.11309814453125e-06, "step": 5317, "training_step_time": 0.1089942455291748 }, { "epoch": 8.1146240234375e-06, "model_forward_time": 0.0250852108001709, "step": 5318 }, { "epoch": 8.1146240234375e-06, "step": 5318, "training_step_time": 0.11086845397949219 }, { "epoch": 8.11614990234375e-06, "model_forward_time": 0.025179386138916016, "step": 5319 }, { "epoch": 8.11614990234375e-06, "step": 5319, "training_step_time": 0.10838532447814941 }, { "epoch": 8.11767578125e-06, "grad_norm": 0.49064013361930847, "learning_rate": 9.563232679655776e-05, "loss": 0.0706, "step": 5320 }, { "epoch": 8.11767578125e-06, "model_forward_time": 0.024863004684448242, "step": 5320 }, { "epoch": 8.11767578125e-06, "step": 5320, "training_step_time": 0.1320352554321289 }, { "epoch": 8.11920166015625e-06, "model_forward_time": 0.02538919448852539, "step": 5321 }, { "epoch": 8.11920166015625e-06, "step": 5321, "training_step_time": 0.12091398239135742 }, { "epoch": 8.1207275390625e-06, "model_forward_time": 0.025599956512451172, "step": 5322 }, { "epoch": 8.1207275390625e-06, "step": 5322, "training_step_time": 0.12876605987548828 }, { "epoch": 8.12225341796875e-06, "model_forward_time": 0.025117158889770508, "step": 5323 }, { "epoch": 8.12225341796875e-06, "step": 5323, "training_step_time": 0.11061525344848633 }, { "epoch": 8.123779296875e-06, "model_forward_time": 0.02523040771484375, "step": 5324 }, { "epoch": 8.123779296875e-06, "step": 5324, "training_step_time": 0.12949824333190918 }, { "epoch": 8.12530517578125e-06, "model_forward_time": 0.024835586547851562, "step": 5325 }, { "epoch": 8.12530517578125e-06, "step": 5325, "training_step_time": 0.2041008472442627 }, { "epoch": 8.1268310546875e-06, "model_forward_time": 0.024351119995117188, "step": 5326 }, { "epoch": 8.1268310546875e-06, "step": 5326, "training_step_time": 0.11992979049682617 }, { "epoch": 8.12835693359375e-06, "model_forward_time": 0.024280309677124023, "step": 5327 }, { "epoch": 8.12835693359375e-06, "step": 5327, "training_step_time": 0.1040639877319336 }, { "epoch": 8.1298828125e-06, "model_forward_time": 0.025501728057861328, "step": 5328 }, { "epoch": 8.1298828125e-06, "step": 5328, "training_step_time": 0.1516432762145996 }, { "epoch": 8.13140869140625e-06, "model_forward_time": 0.02527022361755371, "step": 5329 }, { "epoch": 8.13140869140625e-06, "step": 5329, "training_step_time": 0.11720442771911621 }, { "epoch": 8.1329345703125e-06, "grad_norm": 0.43517252802848816, "learning_rate": 9.560977056873149e-05, "loss": 0.0971, "step": 5330 }, { "epoch": 8.1329345703125e-06, "model_forward_time": 0.024717092514038086, "step": 5330 }, { "epoch": 8.1329345703125e-06, "step": 5330, "training_step_time": 0.1860368251800537 }, { "epoch": 8.13446044921875e-06, "model_forward_time": 0.02491474151611328, "step": 5331 }, { "epoch": 8.13446044921875e-06, "step": 5331, "training_step_time": 0.17110538482666016 }, { "epoch": 8.135986328125e-06, "model_forward_time": 0.024711132049560547, "step": 5332 }, { "epoch": 8.135986328125e-06, "step": 5332, "training_step_time": 0.17906785011291504 }, { "epoch": 8.13751220703125e-06, "model_forward_time": 0.02483224868774414, "step": 5333 }, { "epoch": 8.13751220703125e-06, "step": 5333, "training_step_time": 0.1056206226348877 }, { "epoch": 8.1390380859375e-06, "model_forward_time": 0.02440476417541504, "step": 5334 }, { "epoch": 8.1390380859375e-06, "step": 5334, "training_step_time": 0.10475826263427734 }, { "epoch": 8.14056396484375e-06, "model_forward_time": 0.02509284019470215, "step": 5335 }, { "epoch": 8.14056396484375e-06, "step": 5335, "training_step_time": 0.10658097267150879 }, { "epoch": 8.14208984375e-06, "model_forward_time": 0.025140047073364258, "step": 5336 }, { "epoch": 8.14208984375e-06, "step": 5336, "training_step_time": 0.17899727821350098 }, { "epoch": 8.14361572265625e-06, "model_forward_time": 0.024337053298950195, "step": 5337 }, { "epoch": 8.14361572265625e-06, "step": 5337, "training_step_time": 0.14462876319885254 }, { "epoch": 8.1451416015625e-06, "model_forward_time": 0.02455282211303711, "step": 5338 }, { "epoch": 8.1451416015625e-06, "step": 5338, "training_step_time": 0.11198210716247559 }, { "epoch": 8.14666748046875e-06, "model_forward_time": 0.024847030639648438, "step": 5339 }, { "epoch": 8.14666748046875e-06, "step": 5339, "training_step_time": 0.12465620040893555 }, { "epoch": 8.148193359375e-06, "grad_norm": 0.5305927991867065, "learning_rate": 9.558715892073323e-05, "loss": 0.0793, "step": 5340 }, { "epoch": 8.148193359375e-06, "model_forward_time": 0.025543689727783203, "step": 5340 }, { "epoch": 8.148193359375e-06, "step": 5340, "training_step_time": 0.12249422073364258 }, { "epoch": 8.14971923828125e-06, "model_forward_time": 0.024969100952148438, "step": 5341 }, { "epoch": 8.14971923828125e-06, "step": 5341, "training_step_time": 0.11046671867370605 }, { "epoch": 8.1512451171875e-06, "model_forward_time": 0.026295900344848633, "step": 5342 }, { "epoch": 8.1512451171875e-06, "step": 5342, "training_step_time": 0.12152767181396484 }, { "epoch": 8.15277099609375e-06, "model_forward_time": 0.025275468826293945, "step": 5343 }, { "epoch": 8.15277099609375e-06, "step": 5343, "training_step_time": 0.11175131797790527 }, { "epoch": 8.154296875e-06, "model_forward_time": 0.026798248291015625, "step": 5344 }, { "epoch": 8.154296875e-06, "step": 5344, "training_step_time": 0.10860991477966309 }, { "epoch": 8.15582275390625e-06, "model_forward_time": 0.028039932250976562, "step": 5345 }, { "epoch": 8.15582275390625e-06, "step": 5345, "training_step_time": 0.1104886531829834 }, { "epoch": 8.1573486328125e-06, "model_forward_time": 0.025432109832763672, "step": 5346 }, { "epoch": 8.1573486328125e-06, "step": 5346, "training_step_time": 0.11380195617675781 }, { "epoch": 8.15887451171875e-06, "model_forward_time": 0.024781465530395508, "step": 5347 }, { "epoch": 8.15887451171875e-06, "step": 5347, "training_step_time": 0.12786364555358887 }, { "epoch": 8.160400390625e-06, "model_forward_time": 0.025107383728027344, "step": 5348 }, { "epoch": 8.160400390625e-06, "step": 5348, "training_step_time": 0.17631149291992188 }, { "epoch": 8.16192626953125e-06, "model_forward_time": 0.025258541107177734, "step": 5349 }, { "epoch": 8.16192626953125e-06, "step": 5349, "training_step_time": 0.14897370338439941 }, { "epoch": 8.1634521484375e-06, "grad_norm": 0.8434357643127441, "learning_rate": 9.556449188003831e-05, "loss": 0.0726, "step": 5350 }, { "epoch": 8.1634521484375e-06, "model_forward_time": 0.024253368377685547, "step": 5350 }, { "epoch": 8.1634521484375e-06, "step": 5350, "training_step_time": 0.21562600135803223 }, { "epoch": 8.16497802734375e-06, "model_forward_time": 0.024890899658203125, "step": 5351 }, { "epoch": 8.16497802734375e-06, "step": 5351, "training_step_time": 0.12376523017883301 }, { "epoch": 8.16650390625e-06, "model_forward_time": 0.02453136444091797, "step": 5352 }, { "epoch": 8.16650390625e-06, "step": 5352, "training_step_time": 0.11982059478759766 }, { "epoch": 8.16802978515625e-06, "model_forward_time": 0.024935245513916016, "step": 5353 }, { "epoch": 8.16802978515625e-06, "step": 5353, "training_step_time": 0.11817359924316406 }, { "epoch": 8.1695556640625e-06, "model_forward_time": 0.025550127029418945, "step": 5354 }, { "epoch": 8.1695556640625e-06, "step": 5354, "training_step_time": 0.11632728576660156 }, { "epoch": 8.17108154296875e-06, "model_forward_time": 0.02511143684387207, "step": 5355 }, { "epoch": 8.17108154296875e-06, "step": 5355, "training_step_time": 0.11466741561889648 }, { "epoch": 8.172607421875e-06, "model_forward_time": 0.02514052391052246, "step": 5356 }, { "epoch": 8.172607421875e-06, "step": 5356, "training_step_time": 0.11188411712646484 }, { "epoch": 8.17413330078125e-06, "model_forward_time": 0.024961233139038086, "step": 5357 }, { "epoch": 8.17413330078125e-06, "step": 5357, "training_step_time": 0.11230134963989258 }, { "epoch": 8.1756591796875e-06, "model_forward_time": 0.025269269943237305, "step": 5358 }, { "epoch": 8.1756591796875e-06, "step": 5358, "training_step_time": 0.1098630428314209 }, { "epoch": 8.17718505859375e-06, "model_forward_time": 0.025458574295043945, "step": 5359 }, { "epoch": 8.17718505859375e-06, "step": 5359, "training_step_time": 0.11239409446716309 }, { "epoch": 8.1787109375e-06, "grad_norm": 0.4748266637325287, "learning_rate": 9.554176947418931e-05, "loss": 0.0804, "step": 5360 }, { "epoch": 8.1787109375e-06, "model_forward_time": 0.025731801986694336, "step": 5360 }, { "epoch": 8.1787109375e-06, "step": 5360, "training_step_time": 0.11655783653259277 }, { "epoch": 8.18023681640625e-06, "model_forward_time": 0.025305986404418945, "step": 5361 }, { "epoch": 8.18023681640625e-06, "step": 5361, "training_step_time": 0.11531305313110352 }, { "epoch": 8.1817626953125e-06, "model_forward_time": 0.02523660659790039, "step": 5362 }, { "epoch": 8.1817626953125e-06, "step": 5362, "training_step_time": 0.11222195625305176 }, { "epoch": 8.18328857421875e-06, "model_forward_time": 0.02534031867980957, "step": 5363 }, { "epoch": 8.18328857421875e-06, "step": 5363, "training_step_time": 0.22997641563415527 }, { "epoch": 8.184814453125e-06, "model_forward_time": 0.024592161178588867, "step": 5364 }, { "epoch": 8.184814453125e-06, "step": 5364, "training_step_time": 0.10897541046142578 }, { "epoch": 8.18634033203125e-06, "model_forward_time": 0.024296283721923828, "step": 5365 }, { "epoch": 8.18634033203125e-06, "step": 5365, "training_step_time": 0.14478683471679688 }, { "epoch": 8.1878662109375e-06, "model_forward_time": 0.025159597396850586, "step": 5366 }, { "epoch": 8.1878662109375e-06, "step": 5366, "training_step_time": 0.16909313201904297 }, { "epoch": 8.18939208984375e-06, "model_forward_time": 0.024129390716552734, "step": 5367 }, { "epoch": 8.18939208984375e-06, "step": 5367, "training_step_time": 0.17233514785766602 }, { "epoch": 8.19091796875e-06, "model_forward_time": 0.024161577224731445, "step": 5368 }, { "epoch": 8.19091796875e-06, "step": 5368, "training_step_time": 0.17670559883117676 }, { "epoch": 8.19244384765625e-06, "model_forward_time": 0.024163007736206055, "step": 5369 }, { "epoch": 8.19244384765625e-06, "step": 5369, "training_step_time": 0.11319446563720703 }, { "epoch": 8.1939697265625e-06, "grad_norm": 0.3221302330493927, "learning_rate": 9.551899173079607e-05, "loss": 0.0664, "step": 5370 }, { "epoch": 8.1939697265625e-06, "model_forward_time": 0.024658203125, "step": 5370 }, { "epoch": 8.1939697265625e-06, "step": 5370, "training_step_time": 0.13960003852844238 }, { "epoch": 8.19549560546875e-06, "model_forward_time": 0.025228023529052734, "step": 5371 }, { "epoch": 8.19549560546875e-06, "step": 5371, "training_step_time": 0.10997891426086426 }, { "epoch": 8.197021484375e-06, "model_forward_time": 0.025026321411132812, "step": 5372 }, { "epoch": 8.197021484375e-06, "step": 5372, "training_step_time": 0.12163496017456055 }, { "epoch": 8.19854736328125e-06, "model_forward_time": 0.024892330169677734, "step": 5373 }, { "epoch": 8.19854736328125e-06, "step": 5373, "training_step_time": 0.1429598331451416 }, { "epoch": 8.2000732421875e-06, "model_forward_time": 0.02488541603088379, "step": 5374 }, { "epoch": 8.2000732421875e-06, "step": 5374, "training_step_time": 0.17635488510131836 }, { "epoch": 8.20159912109375e-06, "model_forward_time": 0.02484607696533203, "step": 5375 }, { "epoch": 8.20159912109375e-06, "step": 5375, "training_step_time": 0.14194035530090332 }, { "epoch": 8.203125e-06, "model_forward_time": 0.023650169372558594, "step": 5376 }, { "epoch": 8.203125e-06, "step": 5376, "training_step_time": 0.12967753410339355 }, { "epoch": 8.20465087890625e-06, "model_forward_time": 0.023450136184692383, "step": 5377 }, { "epoch": 8.20465087890625e-06, "step": 5377, "training_step_time": 0.12477755546569824 }, { "epoch": 8.2061767578125e-06, "model_forward_time": 0.023659944534301758, "step": 5378 }, { "epoch": 8.2061767578125e-06, "step": 5378, "training_step_time": 0.1084284782409668 }, { "epoch": 8.20770263671875e-06, "model_forward_time": 0.024930715560913086, "step": 5379 }, { "epoch": 8.20770263671875e-06, "step": 5379, "training_step_time": 0.18893170356750488 }, { "epoch": 8.209228515625e-06, "grad_norm": 0.34595009684562683, "learning_rate": 9.549615867753573e-05, "loss": 0.0756, "step": 5380 }, { "epoch": 8.209228515625e-06, "model_forward_time": 0.02407526969909668, "step": 5380 }, { "epoch": 8.209228515625e-06, "step": 5380, "training_step_time": 0.11600518226623535 }, { "epoch": 8.21075439453125e-06, "model_forward_time": 0.02679133415222168, "step": 5381 }, { "epoch": 8.21075439453125e-06, "step": 5381, "training_step_time": 0.15187382698059082 }, { "epoch": 8.2122802734375e-06, "model_forward_time": 0.02459883689880371, "step": 5382 }, { "epoch": 8.2122802734375e-06, "step": 5382, "training_step_time": 0.206329345703125 }, { "epoch": 8.21380615234375e-06, "model_forward_time": 0.02478647232055664, "step": 5383 }, { "epoch": 8.21380615234375e-06, "step": 5383, "training_step_time": 0.10579752922058105 }, { "epoch": 8.21533203125e-06, "model_forward_time": 0.02439594268798828, "step": 5384 }, { "epoch": 8.21533203125e-06, "step": 5384, "training_step_time": 0.12673449516296387 }, { "epoch": 8.21685791015625e-06, "model_forward_time": 0.025148391723632812, "step": 5385 }, { "epoch": 8.21685791015625e-06, "step": 5385, "training_step_time": 0.10904955863952637 }, { "epoch": 8.2183837890625e-06, "model_forward_time": 0.02585768699645996, "step": 5386 }, { "epoch": 8.2183837890625e-06, "step": 5386, "training_step_time": 0.11176323890686035 }, { "epoch": 8.21990966796875e-06, "model_forward_time": 0.024792909622192383, "step": 5387 }, { "epoch": 8.21990966796875e-06, "step": 5387, "training_step_time": 0.11025881767272949 }, { "epoch": 8.221435546875e-06, "model_forward_time": 0.02603769302368164, "step": 5388 }, { "epoch": 8.221435546875e-06, "step": 5388, "training_step_time": 0.10681033134460449 }, { "epoch": 8.22296142578125e-06, "model_forward_time": 0.025087356567382812, "step": 5389 }, { "epoch": 8.22296142578125e-06, "step": 5389, "training_step_time": 0.10564279556274414 }, { "epoch": 8.2244873046875e-06, "grad_norm": 0.4957561194896698, "learning_rate": 9.54732703421526e-05, "loss": 0.0693, "step": 5390 }, { "epoch": 8.2244873046875e-06, "model_forward_time": 0.025244474411010742, "step": 5390 }, { "epoch": 8.2244873046875e-06, "step": 5390, "training_step_time": 0.10712337493896484 }, { "epoch": 8.22601318359375e-06, "model_forward_time": 0.02538919448852539, "step": 5391 }, { "epoch": 8.22601318359375e-06, "step": 5391, "training_step_time": 0.11452078819274902 }, { "epoch": 8.2275390625e-06, "model_forward_time": 0.025253772735595703, "step": 5392 }, { "epoch": 8.2275390625e-06, "step": 5392, "training_step_time": 0.11937642097473145 }, { "epoch": 8.22906494140625e-06, "model_forward_time": 0.02517557144165039, "step": 5393 }, { "epoch": 8.22906494140625e-06, "step": 5393, "training_step_time": 0.25300049781799316 }, { "epoch": 8.2305908203125e-06, "model_forward_time": 0.0240328311920166, "step": 5394 }, { "epoch": 8.2305908203125e-06, "step": 5394, "training_step_time": 0.2064368724822998 }, { "epoch": 8.23211669921875e-06, "model_forward_time": 0.023857593536376953, "step": 5395 }, { "epoch": 8.23211669921875e-06, "step": 5395, "training_step_time": 0.19422578811645508 }, { "epoch": 8.233642578125e-06, "model_forward_time": 0.02392411231994629, "step": 5396 }, { "epoch": 8.233642578125e-06, "step": 5396, "training_step_time": 0.1833946704864502 }, { "epoch": 8.23516845703125e-06, "model_forward_time": 0.02422046661376953, "step": 5397 }, { "epoch": 8.23516845703125e-06, "step": 5397, "training_step_time": 0.17303013801574707 }, { "epoch": 8.2366943359375e-06, "model_forward_time": 0.02458477020263672, "step": 5398 }, { "epoch": 8.2366943359375e-06, "step": 5398, "training_step_time": 0.16502642631530762 }, { "epoch": 8.23822021484375e-06, "model_forward_time": 0.02414679527282715, "step": 5399 }, { "epoch": 8.23822021484375e-06, "step": 5399, "training_step_time": 0.10382938385009766 }, { "epoch": 8.23974609375e-06, "grad_norm": 0.2264564335346222, "learning_rate": 9.545032675245813e-05, "loss": 0.0814, "step": 5400 }, { "epoch": 8.23974609375e-06, "model_forward_time": 0.024617433547973633, "step": 5400 }, { "epoch": 8.23974609375e-06, "step": 5400, "training_step_time": 0.10681724548339844 }, { "epoch": 8.24127197265625e-06, "model_forward_time": 0.025135040283203125, "step": 5401 }, { "epoch": 8.24127197265625e-06, "step": 5401, "training_step_time": 0.11032557487487793 }, { "epoch": 8.2427978515625e-06, "model_forward_time": 0.025371789932250977, "step": 5402 }, { "epoch": 8.2427978515625e-06, "step": 5402, "training_step_time": 0.10884857177734375 }, { "epoch": 8.24432373046875e-06, "model_forward_time": 0.0252377986907959, "step": 5403 }, { "epoch": 8.24432373046875e-06, "step": 5403, "training_step_time": 0.1079401969909668 }, { "epoch": 8.245849609375e-06, "model_forward_time": 0.02488398551940918, "step": 5404 }, { "epoch": 8.245849609375e-06, "step": 5404, "training_step_time": 0.15450048446655273 }, { "epoch": 8.24737548828125e-06, "model_forward_time": 0.024698495864868164, "step": 5405 }, { "epoch": 8.24737548828125e-06, "step": 5405, "training_step_time": 0.11373424530029297 }, { "epoch": 8.2489013671875e-06, "model_forward_time": 0.02431511878967285, "step": 5406 }, { "epoch": 8.2489013671875e-06, "step": 5406, "training_step_time": 0.12069821357727051 }, { "epoch": 8.25042724609375e-06, "model_forward_time": 0.02542257308959961, "step": 5407 }, { "epoch": 8.25042724609375e-06, "step": 5407, "training_step_time": 0.1241157054901123 }, { "epoch": 8.251953125e-06, "model_forward_time": 0.026100873947143555, "step": 5408 }, { "epoch": 8.251953125e-06, "step": 5408, "training_step_time": 0.16297125816345215 }, { "epoch": 8.25347900390625e-06, "model_forward_time": 0.02475762367248535, "step": 5409 }, { "epoch": 8.25347900390625e-06, "step": 5409, "training_step_time": 0.13051819801330566 }, { "epoch": 8.2550048828125e-06, "grad_norm": 0.3755182921886444, "learning_rate": 9.542732793633098e-05, "loss": 0.074, "step": 5410 }, { "epoch": 8.2550048828125e-06, "model_forward_time": 0.024434328079223633, "step": 5410 }, { "epoch": 8.2550048828125e-06, "step": 5410, "training_step_time": 0.19635677337646484 }, { "epoch": 8.25653076171875e-06, "model_forward_time": 0.024559736251831055, "step": 5411 }, { "epoch": 8.25653076171875e-06, "step": 5411, "training_step_time": 0.14107203483581543 }, { "epoch": 8.258056640625e-06, "model_forward_time": 0.024749279022216797, "step": 5412 }, { "epoch": 8.258056640625e-06, "step": 5412, "training_step_time": 0.10939240455627441 }, { "epoch": 8.25958251953125e-06, "model_forward_time": 0.02518486976623535, "step": 5413 }, { "epoch": 8.25958251953125e-06, "step": 5413, "training_step_time": 0.11303281784057617 }, { "epoch": 8.2611083984375e-06, "model_forward_time": 0.025122880935668945, "step": 5414 }, { "epoch": 8.2611083984375e-06, "step": 5414, "training_step_time": 0.11872005462646484 }, { "epoch": 8.26263427734375e-06, "model_forward_time": 0.02526712417602539, "step": 5415 }, { "epoch": 8.26263427734375e-06, "step": 5415, "training_step_time": 0.10767006874084473 }, { "epoch": 8.26416015625e-06, "model_forward_time": 0.02498149871826172, "step": 5416 }, { "epoch": 8.26416015625e-06, "step": 5416, "training_step_time": 0.19707155227661133 }, { "epoch": 8.26568603515625e-06, "model_forward_time": 0.023816347122192383, "step": 5417 }, { "epoch": 8.26568603515625e-06, "step": 5417, "training_step_time": 0.10553479194641113 }, { "epoch": 8.2672119140625e-06, "model_forward_time": 0.024667024612426758, "step": 5418 }, { "epoch": 8.2672119140625e-06, "step": 5418, "training_step_time": 0.10673952102661133 }, { "epoch": 8.26873779296875e-06, "model_forward_time": 0.025097370147705078, "step": 5419 }, { "epoch": 8.26873779296875e-06, "step": 5419, "training_step_time": 0.11037850379943848 }, { "epoch": 8.270263671875e-06, "grad_norm": 0.3703526258468628, "learning_rate": 9.540427392171688e-05, "loss": 0.0823, "step": 5420 }, { "epoch": 8.270263671875e-06, "model_forward_time": 0.025620222091674805, "step": 5420 }, { "epoch": 8.270263671875e-06, "step": 5420, "training_step_time": 0.11850118637084961 }, { "epoch": 8.27178955078125e-06, "model_forward_time": 0.025363683700561523, "step": 5421 }, { "epoch": 8.27178955078125e-06, "step": 5421, "training_step_time": 0.11427664756774902 }, { "epoch": 8.2733154296875e-06, "model_forward_time": 0.02570056915283203, "step": 5422 }, { "epoch": 8.2733154296875e-06, "step": 5422, "training_step_time": 0.11615395545959473 }, { "epoch": 8.27484130859375e-06, "model_forward_time": 0.02510213851928711, "step": 5423 }, { "epoch": 8.27484130859375e-06, "step": 5423, "training_step_time": 0.11487960815429688 }, { "epoch": 8.2763671875e-06, "model_forward_time": 0.024814844131469727, "step": 5424 }, { "epoch": 8.2763671875e-06, "step": 5424, "training_step_time": 0.2116868495941162 }, { "epoch": 8.27789306640625e-06, "model_forward_time": 0.023987293243408203, "step": 5425 }, { "epoch": 8.27789306640625e-06, "step": 5425, "training_step_time": 0.13030052185058594 }, { "epoch": 8.2794189453125e-06, "model_forward_time": 0.023725509643554688, "step": 5426 }, { "epoch": 8.2794189453125e-06, "step": 5426, "training_step_time": 0.12003946304321289 }, { "epoch": 8.28094482421875e-06, "model_forward_time": 0.024985313415527344, "step": 5427 }, { "epoch": 8.28094482421875e-06, "step": 5427, "training_step_time": 0.11780261993408203 }, { "epoch": 8.282470703125e-06, "model_forward_time": 0.02527785301208496, "step": 5428 }, { "epoch": 8.282470703125e-06, "step": 5428, "training_step_time": 0.10997509956359863 }, { "epoch": 8.28399658203125e-06, "model_forward_time": 0.025172948837280273, "step": 5429 }, { "epoch": 8.28399658203125e-06, "step": 5429, "training_step_time": 0.11288118362426758 }, { "epoch": 8.2855224609375e-06, "grad_norm": 0.38096684217453003, "learning_rate": 9.538116473662861e-05, "loss": 0.0823, "step": 5430 }, { "epoch": 8.2855224609375e-06, "model_forward_time": 0.02511882781982422, "step": 5430 }, { "epoch": 8.2855224609375e-06, "step": 5430, "training_step_time": 0.10942912101745605 }, { "epoch": 8.28704833984375e-06, "model_forward_time": 0.024660110473632812, "step": 5431 }, { "epoch": 8.28704833984375e-06, "step": 5431, "training_step_time": 0.10489344596862793 }, { "epoch": 8.28857421875e-06, "model_forward_time": 0.024590253829956055, "step": 5432 }, { "epoch": 8.28857421875e-06, "step": 5432, "training_step_time": 0.11569452285766602 }, { "epoch": 8.29010009765625e-06, "model_forward_time": 0.02505016326904297, "step": 5433 }, { "epoch": 8.29010009765625e-06, "step": 5433, "training_step_time": 0.1094369888305664 }, { "epoch": 8.2916259765625e-06, "model_forward_time": 0.025224685668945312, "step": 5434 }, { "epoch": 8.2916259765625e-06, "step": 5434, "training_step_time": 0.10945534706115723 }, { "epoch": 8.29315185546875e-06, "model_forward_time": 0.025134563446044922, "step": 5435 }, { "epoch": 8.29315185546875e-06, "step": 5435, "training_step_time": 0.17635011672973633 }, { "epoch": 8.294677734375e-06, "model_forward_time": 0.02438831329345703, "step": 5436 }, { "epoch": 8.294677734375e-06, "step": 5436, "training_step_time": 0.15650248527526855 }, { "epoch": 8.29620361328125e-06, "model_forward_time": 0.024444103240966797, "step": 5437 }, { "epoch": 8.29620361328125e-06, "step": 5437, "training_step_time": 0.10532855987548828 }, { "epoch": 8.2977294921875e-06, "model_forward_time": 0.024596452713012695, "step": 5438 }, { "epoch": 8.2977294921875e-06, "step": 5438, "training_step_time": 0.1057741641998291 }, { "epoch": 8.29925537109375e-06, "model_forward_time": 0.025053977966308594, "step": 5439 }, { "epoch": 8.29925537109375e-06, "step": 5439, "training_step_time": 0.11162543296813965 }, { "epoch": 8.30078125e-06, "grad_norm": 0.45108526945114136, "learning_rate": 9.535800040914601e-05, "loss": 0.0933, "step": 5440 }, { "epoch": 8.30078125e-06, "model_forward_time": 0.025286436080932617, "step": 5440 }, { "epoch": 8.30078125e-06, "step": 5440, "training_step_time": 0.10928082466125488 }, { "epoch": 8.30230712890625e-06, "model_forward_time": 0.025066852569580078, "step": 5441 }, { "epoch": 8.30230712890625e-06, "step": 5441, "training_step_time": 0.10747909545898438 }, { "epoch": 8.3038330078125e-06, "model_forward_time": 0.02475118637084961, "step": 5442 }, { "epoch": 8.3038330078125e-06, "step": 5442, "training_step_time": 0.10696268081665039 }, { "epoch": 8.30535888671875e-06, "model_forward_time": 0.025704145431518555, "step": 5443 }, { "epoch": 8.30535888671875e-06, "step": 5443, "training_step_time": 0.1087336540222168 }, { "epoch": 8.306884765625e-06, "model_forward_time": 0.024983882904052734, "step": 5444 }, { "epoch": 8.306884765625e-06, "step": 5444, "training_step_time": 0.10955119132995605 }, { "epoch": 8.30841064453125e-06, "model_forward_time": 0.024893760681152344, "step": 5445 }, { "epoch": 8.30841064453125e-06, "step": 5445, "training_step_time": 0.11089968681335449 }, { "epoch": 8.3099365234375e-06, "model_forward_time": 0.02477264404296875, "step": 5446 }, { "epoch": 8.3099365234375e-06, "step": 5446, "training_step_time": 0.11092591285705566 }, { "epoch": 8.31146240234375e-06, "model_forward_time": 0.025128602981567383, "step": 5447 }, { "epoch": 8.31146240234375e-06, "step": 5447, "training_step_time": 0.11223387718200684 }, { "epoch": 8.31298828125e-06, "model_forward_time": 0.025223493576049805, "step": 5448 }, { "epoch": 8.31298828125e-06, "step": 5448, "training_step_time": 0.11142301559448242 }, { "epoch": 8.31451416015625e-06, "model_forward_time": 0.024811267852783203, "step": 5449 }, { "epoch": 8.31451416015625e-06, "step": 5449, "training_step_time": 0.17998123168945312 }, { "epoch": 8.3160400390625e-06, "grad_norm": 0.5918139219284058, "learning_rate": 9.533478096741597e-05, "loss": 0.0578, "step": 5450 }, { "epoch": 8.3160400390625e-06, "model_forward_time": 0.024448156356811523, "step": 5450 }, { "epoch": 8.3160400390625e-06, "step": 5450, "training_step_time": 0.1714034080505371 }, { "epoch": 8.31756591796875e-06, "model_forward_time": 0.024703025817871094, "step": 5451 }, { "epoch": 8.31756591796875e-06, "step": 5451, "training_step_time": 0.10742902755737305 }, { "epoch": 8.319091796875e-06, "model_forward_time": 0.024678945541381836, "step": 5452 }, { "epoch": 8.319091796875e-06, "step": 5452, "training_step_time": 0.11219930648803711 }, { "epoch": 8.32061767578125e-06, "model_forward_time": 0.024983644485473633, "step": 5453 }, { "epoch": 8.32061767578125e-06, "step": 5453, "training_step_time": 0.10629034042358398 }, { "epoch": 8.3221435546875e-06, "model_forward_time": 0.025378942489624023, "step": 5454 }, { "epoch": 8.3221435546875e-06, "step": 5454, "training_step_time": 0.11167120933532715 }, { "epoch": 8.32366943359375e-06, "model_forward_time": 0.02494645118713379, "step": 5455 }, { "epoch": 8.32366943359375e-06, "step": 5455, "training_step_time": 0.1153714656829834 }, { "epoch": 8.3251953125e-06, "model_forward_time": 0.02495431900024414, "step": 5456 }, { "epoch": 8.3251953125e-06, "step": 5456, "training_step_time": 0.16915011405944824 }, { "epoch": 8.32672119140625e-06, "model_forward_time": 0.024498462677001953, "step": 5457 }, { "epoch": 8.32672119140625e-06, "step": 5457, "training_step_time": 0.1372990608215332 }, { "epoch": 8.3282470703125e-06, "model_forward_time": 0.024845600128173828, "step": 5458 }, { "epoch": 8.3282470703125e-06, "step": 5458, "training_step_time": 0.11327838897705078 }, { "epoch": 8.32977294921875e-06, "model_forward_time": 0.025232791900634766, "step": 5459 }, { "epoch": 8.32977294921875e-06, "step": 5459, "training_step_time": 0.1132197380065918 }, { "epoch": 8.331298828125e-06, "grad_norm": 0.5240521430969238, "learning_rate": 9.531150643965223e-05, "loss": 0.0678, "step": 5460 }, { "epoch": 8.331298828125e-06, "model_forward_time": 0.024753093719482422, "step": 5460 }, { "epoch": 8.331298828125e-06, "step": 5460, "training_step_time": 0.11138176918029785 }, { "epoch": 8.33282470703125e-06, "model_forward_time": 0.02484750747680664, "step": 5461 }, { "epoch": 8.33282470703125e-06, "step": 5461, "training_step_time": 0.1871335506439209 }, { "epoch": 8.3343505859375e-06, "model_forward_time": 0.024805307388305664, "step": 5462 }, { "epoch": 8.3343505859375e-06, "step": 5462, "training_step_time": 0.11164474487304688 }, { "epoch": 8.33587646484375e-06, "model_forward_time": 0.024579286575317383, "step": 5463 }, { "epoch": 8.33587646484375e-06, "step": 5463, "training_step_time": 0.10885429382324219 }, { "epoch": 8.33740234375e-06, "model_forward_time": 0.02498483657836914, "step": 5464 }, { "epoch": 8.33740234375e-06, "step": 5464, "training_step_time": 0.10873198509216309 }, { "epoch": 8.33892822265625e-06, "model_forward_time": 0.025329113006591797, "step": 5465 }, { "epoch": 8.33892822265625e-06, "step": 5465, "training_step_time": 0.11266231536865234 }, { "epoch": 8.3404541015625e-06, "model_forward_time": 0.026561260223388672, "step": 5466 }, { "epoch": 8.3404541015625e-06, "step": 5466, "training_step_time": 0.10968375205993652 }, { "epoch": 8.34197998046875e-06, "model_forward_time": 0.025075197219848633, "step": 5467 }, { "epoch": 8.34197998046875e-06, "step": 5467, "training_step_time": 0.10701274871826172 }, { "epoch": 8.343505859375e-06, "model_forward_time": 0.0249021053314209, "step": 5468 }, { "epoch": 8.343505859375e-06, "step": 5468, "training_step_time": 0.14616847038269043 }, { "epoch": 8.34503173828125e-06, "model_forward_time": 0.02481365203857422, "step": 5469 }, { "epoch": 8.34503173828125e-06, "step": 5469, "training_step_time": 0.11165738105773926 }, { "epoch": 8.3465576171875e-06, "grad_norm": 0.28891709446907043, "learning_rate": 9.528817685413558e-05, "loss": 0.0659, "step": 5470 }, { "epoch": 8.3465576171875e-06, "model_forward_time": 0.024705171585083008, "step": 5470 }, { "epoch": 8.3465576171875e-06, "step": 5470, "training_step_time": 0.22222423553466797 }, { "epoch": 8.34808349609375e-06, "model_forward_time": 0.024585723876953125, "step": 5471 }, { "epoch": 8.34808349609375e-06, "step": 5471, "training_step_time": 0.13445329666137695 }, { "epoch": 8.349609375e-06, "model_forward_time": 0.02397942543029785, "step": 5472 }, { "epoch": 8.349609375e-06, "step": 5472, "training_step_time": 0.11018991470336914 }, { "epoch": 8.35113525390625e-06, "model_forward_time": 0.025214433670043945, "step": 5473 }, { "epoch": 8.35113525390625e-06, "step": 5473, "training_step_time": 0.1356046199798584 }, { "epoch": 8.3526611328125e-06, "model_forward_time": 0.025006771087646484, "step": 5474 }, { "epoch": 8.3526611328125e-06, "step": 5474, "training_step_time": 0.15095186233520508 }, { "epoch": 8.35418701171875e-06, "model_forward_time": 0.024355173110961914, "step": 5475 }, { "epoch": 8.35418701171875e-06, "step": 5475, "training_step_time": 0.10936498641967773 }, { "epoch": 8.355712890625e-06, "model_forward_time": 0.024937152862548828, "step": 5476 }, { "epoch": 8.355712890625e-06, "step": 5476, "training_step_time": 0.13046550750732422 }, { "epoch": 8.35723876953125e-06, "model_forward_time": 0.024749755859375, "step": 5477 }, { "epoch": 8.35723876953125e-06, "step": 5477, "training_step_time": 0.19885826110839844 }, { "epoch": 8.3587646484375e-06, "model_forward_time": 0.024549245834350586, "step": 5478 }, { "epoch": 8.3587646484375e-06, "step": 5478, "training_step_time": 0.12249517440795898 }, { "epoch": 8.36029052734375e-06, "model_forward_time": 0.024651050567626953, "step": 5479 }, { "epoch": 8.36029052734375e-06, "step": 5479, "training_step_time": 0.21123838424682617 }, { "epoch": 8.36181640625e-06, "grad_norm": 0.5504735112190247, "learning_rate": 9.526479223921366e-05, "loss": 0.0731, "step": 5480 }, { "epoch": 8.36181640625e-06, "model_forward_time": 0.024690628051757812, "step": 5480 }, { "epoch": 8.36181640625e-06, "step": 5480, "training_step_time": 0.11457467079162598 }, { "epoch": 8.36334228515625e-06, "model_forward_time": 0.024468660354614258, "step": 5481 }, { "epoch": 8.36334228515625e-06, "step": 5481, "training_step_time": 0.10823631286621094 }, { "epoch": 8.3648681640625e-06, "model_forward_time": 0.02501821517944336, "step": 5482 }, { "epoch": 8.3648681640625e-06, "step": 5482, "training_step_time": 0.10824394226074219 }, { "epoch": 8.36639404296875e-06, "model_forward_time": 0.025504112243652344, "step": 5483 }, { "epoch": 8.36639404296875e-06, "step": 5483, "training_step_time": 0.1091454029083252 }, { "epoch": 8.367919921875e-06, "model_forward_time": 0.025091886520385742, "step": 5484 }, { "epoch": 8.367919921875e-06, "step": 5484, "training_step_time": 0.10916900634765625 }, { "epoch": 8.36944580078125e-06, "model_forward_time": 0.025087833404541016, "step": 5485 }, { "epoch": 8.36944580078125e-06, "step": 5485, "training_step_time": 0.10770893096923828 }, { "epoch": 8.3709716796875e-06, "model_forward_time": 0.02560257911682129, "step": 5486 }, { "epoch": 8.3709716796875e-06, "step": 5486, "training_step_time": 0.11017322540283203 }, { "epoch": 8.37249755859375e-06, "model_forward_time": 0.02578258514404297, "step": 5487 }, { "epoch": 8.37249755859375e-06, "step": 5487, "training_step_time": 0.11110877990722656 }, { "epoch": 8.3740234375e-06, "model_forward_time": 0.024693965911865234, "step": 5488 }, { "epoch": 8.3740234375e-06, "step": 5488, "training_step_time": 0.11010169982910156 }, { "epoch": 8.37554931640625e-06, "model_forward_time": 0.025443553924560547, "step": 5489 }, { "epoch": 8.37554931640625e-06, "step": 5489, "training_step_time": 0.11905550956726074 }, { "epoch": 8.3770751953125e-06, "grad_norm": 0.4239727258682251, "learning_rate": 9.524135262330098e-05, "loss": 0.071, "step": 5490 }, { "epoch": 8.3770751953125e-06, "model_forward_time": 0.025124549865722656, "step": 5490 }, { "epoch": 8.3770751953125e-06, "step": 5490, "training_step_time": 0.10994172096252441 }, { "epoch": 8.37860107421875e-06, "model_forward_time": 0.02520608901977539, "step": 5491 }, { "epoch": 8.37860107421875e-06, "step": 5491, "training_step_time": 0.1077728271484375 }, { "epoch": 8.380126953125e-06, "model_forward_time": 0.025278568267822266, "step": 5492 }, { "epoch": 8.380126953125e-06, "step": 5492, "training_step_time": 0.1076817512512207 }, { "epoch": 8.38165283203125e-06, "model_forward_time": 0.025472640991210938, "step": 5493 }, { "epoch": 8.38165283203125e-06, "step": 5493, "training_step_time": 0.10670733451843262 }, { "epoch": 8.3831787109375e-06, "model_forward_time": 0.025128602981567383, "step": 5494 }, { "epoch": 8.3831787109375e-06, "step": 5494, "training_step_time": 0.14341998100280762 }, { "epoch": 8.38470458984375e-06, "model_forward_time": 0.02534651756286621, "step": 5495 }, { "epoch": 8.38470458984375e-06, "step": 5495, "training_step_time": 0.1097710132598877 }, { "epoch": 8.38623046875e-06, "model_forward_time": 0.0252532958984375, "step": 5496 }, { "epoch": 8.38623046875e-06, "step": 5496, "training_step_time": 0.13894319534301758 }, { "epoch": 8.38775634765625e-06, "model_forward_time": 0.02528667449951172, "step": 5497 }, { "epoch": 8.38775634765625e-06, "step": 5497, "training_step_time": 0.11038494110107422 }, { "epoch": 8.3892822265625e-06, "model_forward_time": 0.025043487548828125, "step": 5498 }, { "epoch": 8.3892822265625e-06, "step": 5498, "training_step_time": 0.17298007011413574 }, { "epoch": 8.39080810546875e-06, "model_forward_time": 0.024107933044433594, "step": 5499 }, { "epoch": 8.39080810546875e-06, "step": 5499, "training_step_time": 0.13412261009216309 }, { "epoch": 8.392333984375e-06, "grad_norm": 0.356250137090683, "learning_rate": 9.521785803487889e-05, "loss": 0.1008, "step": 5500 }, { "epoch": 8.392333984375e-06, "model_forward_time": 0.024596214294433594, "step": 5500 }, { "epoch": 8.392333984375e-06, "step": 5500, "training_step_time": 0.13425779342651367 }, { "epoch": 8.39385986328125e-06, "model_forward_time": 0.024463891983032227, "step": 5501 }, { "epoch": 8.39385986328125e-06, "step": 5501, "training_step_time": 0.1372542381286621 }, { "epoch": 8.3953857421875e-06, "model_forward_time": 0.02429485321044922, "step": 5502 }, { "epoch": 8.3953857421875e-06, "step": 5502, "training_step_time": 0.11211919784545898 }, { "epoch": 8.39691162109375e-06, "model_forward_time": 0.024601221084594727, "step": 5503 }, { "epoch": 8.39691162109375e-06, "step": 5503, "training_step_time": 0.11571311950683594 }, { "epoch": 8.3984375e-06, "model_forward_time": 0.025115251541137695, "step": 5504 }, { "epoch": 8.3984375e-06, "step": 5504, "training_step_time": 0.11552047729492188 }, { "epoch": 8.39996337890625e-06, "model_forward_time": 0.025099992752075195, "step": 5505 }, { "epoch": 8.39996337890625e-06, "step": 5505, "training_step_time": 0.15906310081481934 }, { "epoch": 8.4014892578125e-06, "model_forward_time": 0.024593830108642578, "step": 5506 }, { "epoch": 8.4014892578125e-06, "step": 5506, "training_step_time": 0.14723944664001465 }, { "epoch": 8.40301513671875e-06, "model_forward_time": 0.024686098098754883, "step": 5507 }, { "epoch": 8.40301513671875e-06, "step": 5507, "training_step_time": 0.10294818878173828 }, { "epoch": 8.404541015625e-06, "model_forward_time": 0.024959564208984375, "step": 5508 }, { "epoch": 8.404541015625e-06, "step": 5508, "training_step_time": 0.10719561576843262 }, { "epoch": 8.40606689453125e-06, "model_forward_time": 0.025610923767089844, "step": 5509 }, { "epoch": 8.40606689453125e-06, "step": 5509, "training_step_time": 0.10536742210388184 }, { "epoch": 8.4075927734375e-06, "grad_norm": 0.6870419979095459, "learning_rate": 9.51943085024955e-05, "loss": 0.0899, "step": 5510 }, { "epoch": 8.4075927734375e-06, "model_forward_time": 0.024843692779541016, "step": 5510 }, { "epoch": 8.4075927734375e-06, "step": 5510, "training_step_time": 0.11047911643981934 }, { "epoch": 8.40911865234375e-06, "model_forward_time": 0.0254213809967041, "step": 5511 }, { "epoch": 8.40911865234375e-06, "step": 5511, "training_step_time": 0.11006999015808105 }, { "epoch": 8.41064453125e-06, "model_forward_time": 0.025430679321289062, "step": 5512 }, { "epoch": 8.41064453125e-06, "step": 5512, "training_step_time": 0.18528962135314941 }, { "epoch": 8.41217041015625e-06, "model_forward_time": 0.025561094284057617, "step": 5513 }, { "epoch": 8.41217041015625e-06, "step": 5513, "training_step_time": 0.11009383201599121 }, { "epoch": 8.4136962890625e-06, "model_forward_time": 0.02498793601989746, "step": 5514 }, { "epoch": 8.4136962890625e-06, "step": 5514, "training_step_time": 0.10881209373474121 }, { "epoch": 8.41522216796875e-06, "model_forward_time": 0.02525949478149414, "step": 5515 }, { "epoch": 8.41522216796875e-06, "step": 5515, "training_step_time": 0.11917877197265625 }, { "epoch": 8.416748046875e-06, "model_forward_time": 0.025124073028564453, "step": 5516 }, { "epoch": 8.416748046875e-06, "step": 5516, "training_step_time": 0.1311483383178711 }, { "epoch": 8.41827392578125e-06, "model_forward_time": 0.025047779083251953, "step": 5517 }, { "epoch": 8.41827392578125e-06, "step": 5517, "training_step_time": 0.1192178726196289 }, { "epoch": 8.4197998046875e-06, "model_forward_time": 0.02488422393798828, "step": 5518 }, { "epoch": 8.4197998046875e-06, "step": 5518, "training_step_time": 0.1235342025756836 }, { "epoch": 8.42132568359375e-06, "model_forward_time": 0.02543044090270996, "step": 5519 }, { "epoch": 8.42132568359375e-06, "step": 5519, "training_step_time": 0.11320638656616211 }, { "epoch": 8.4228515625e-06, "grad_norm": 0.36433157324790955, "learning_rate": 9.517070405476575e-05, "loss": 0.0688, "step": 5520 }, { "epoch": 8.4228515625e-06, "model_forward_time": 0.025127410888671875, "step": 5520 }, { "epoch": 8.4228515625e-06, "step": 5520, "training_step_time": 0.1095728874206543 }, { "epoch": 8.42437744140625e-06, "model_forward_time": 0.024392366409301758, "step": 5521 }, { "epoch": 8.42437744140625e-06, "step": 5521, "training_step_time": 0.16790127754211426 }, { "epoch": 8.4259033203125e-06, "model_forward_time": 0.02427506446838379, "step": 5522 }, { "epoch": 8.4259033203125e-06, "step": 5522, "training_step_time": 0.16735625267028809 }, { "epoch": 8.42742919921875e-06, "model_forward_time": 0.024598121643066406, "step": 5523 }, { "epoch": 8.42742919921875e-06, "step": 5523, "training_step_time": 0.10605931282043457 }, { "epoch": 8.428955078125e-06, "model_forward_time": 0.024770498275756836, "step": 5524 }, { "epoch": 8.428955078125e-06, "step": 5524, "training_step_time": 0.172194242477417 }, { "epoch": 8.43048095703125e-06, "model_forward_time": 0.024447202682495117, "step": 5525 }, { "epoch": 8.43048095703125e-06, "step": 5525, "training_step_time": 0.16829872131347656 }, { "epoch": 8.4320068359375e-06, "model_forward_time": 0.02463245391845703, "step": 5526 }, { "epoch": 8.4320068359375e-06, "step": 5526, "training_step_time": 0.10637164115905762 }, { "epoch": 8.43353271484375e-06, "model_forward_time": 0.02500438690185547, "step": 5527 }, { "epoch": 8.43353271484375e-06, "step": 5527, "training_step_time": 0.10713505744934082 }, { "epoch": 8.43505859375e-06, "model_forward_time": 0.02534627914428711, "step": 5528 }, { "epoch": 8.43505859375e-06, "step": 5528, "training_step_time": 0.10852265357971191 }, { "epoch": 8.43658447265625e-06, "model_forward_time": 0.027458667755126953, "step": 5529 }, { "epoch": 8.43658447265625e-06, "step": 5529, "training_step_time": 0.1109466552734375 }, { "epoch": 8.4381103515625e-06, "grad_norm": 0.5929972529411316, "learning_rate": 9.514704472037123e-05, "loss": 0.0789, "step": 5530 }, { "epoch": 8.4381103515625e-06, "model_forward_time": 0.024906635284423828, "step": 5530 }, { "epoch": 8.4381103515625e-06, "step": 5530, "training_step_time": 0.11322665214538574 }, { "epoch": 8.43963623046875e-06, "model_forward_time": 0.026383638381958008, "step": 5531 }, { "epoch": 8.43963623046875e-06, "step": 5531, "training_step_time": 0.1084601879119873 }, { "epoch": 8.441162109375e-06, "model_forward_time": 0.025345325469970703, "step": 5532 }, { "epoch": 8.441162109375e-06, "step": 5532, "training_step_time": 0.1101071834564209 }, { "epoch": 8.44268798828125e-06, "model_forward_time": 0.024779319763183594, "step": 5533 }, { "epoch": 8.44268798828125e-06, "step": 5533, "training_step_time": 0.10842418670654297 }, { "epoch": 8.4442138671875e-06, "model_forward_time": 0.02519989013671875, "step": 5534 }, { "epoch": 8.4442138671875e-06, "step": 5534, "training_step_time": 0.10825228691101074 }, { "epoch": 8.44573974609375e-06, "model_forward_time": 0.02506279945373535, "step": 5535 }, { "epoch": 8.44573974609375e-06, "step": 5535, "training_step_time": 0.1068274974822998 }, { "epoch": 8.447265625e-06, "model_forward_time": 0.024892091751098633, "step": 5536 }, { "epoch": 8.447265625e-06, "step": 5536, "training_step_time": 0.11709976196289062 }, { "epoch": 8.44879150390625e-06, "model_forward_time": 0.025391101837158203, "step": 5537 }, { "epoch": 8.44879150390625e-06, "step": 5537, "training_step_time": 0.10761404037475586 }, { "epoch": 8.4503173828125e-06, "model_forward_time": 0.025211334228515625, "step": 5538 }, { "epoch": 8.4503173828125e-06, "step": 5538, "training_step_time": 0.10755133628845215 }, { "epoch": 8.45184326171875e-06, "model_forward_time": 0.025348663330078125, "step": 5539 }, { "epoch": 8.45184326171875e-06, "step": 5539, "training_step_time": 0.11601042747497559 }, { "epoch": 8.453369140625e-06, "grad_norm": 0.5573577284812927, "learning_rate": 9.512333052806033e-05, "loss": 0.0686, "step": 5540 }, { "epoch": 8.453369140625e-06, "model_forward_time": 0.025214195251464844, "step": 5540 }, { "epoch": 8.453369140625e-06, "step": 5540, "training_step_time": 0.11416077613830566 }, { "epoch": 8.45489501953125e-06, "model_forward_time": 0.025387048721313477, "step": 5541 }, { "epoch": 8.45489501953125e-06, "step": 5541, "training_step_time": 0.11627912521362305 }, { "epoch": 8.4564208984375e-06, "model_forward_time": 0.024802207946777344, "step": 5542 }, { "epoch": 8.4564208984375e-06, "step": 5542, "training_step_time": 0.13913702964782715 }, { "epoch": 8.45794677734375e-06, "model_forward_time": 0.025142431259155273, "step": 5543 }, { "epoch": 8.45794677734375e-06, "step": 5543, "training_step_time": 0.15565252304077148 }, { "epoch": 8.45947265625e-06, "model_forward_time": 0.02472662925720215, "step": 5544 }, { "epoch": 8.45947265625e-06, "step": 5544, "training_step_time": 0.20890450477600098 }, { "epoch": 8.46099853515625e-06, "model_forward_time": 0.024294376373291016, "step": 5545 }, { "epoch": 8.46099853515625e-06, "step": 5545, "training_step_time": 0.14517688751220703 }, { "epoch": 8.4625244140625e-06, "model_forward_time": 0.02404952049255371, "step": 5546 }, { "epoch": 8.4625244140625e-06, "step": 5546, "training_step_time": 0.134260892868042 }, { "epoch": 8.46405029296875e-06, "model_forward_time": 0.02417159080505371, "step": 5547 }, { "epoch": 8.46405029296875e-06, "step": 5547, "training_step_time": 0.11555767059326172 }, { "epoch": 8.465576171875e-06, "model_forward_time": 0.02479243278503418, "step": 5548 }, { "epoch": 8.465576171875e-06, "step": 5548, "training_step_time": 0.12091636657714844 }, { "epoch": 8.46710205078125e-06, "model_forward_time": 0.025285959243774414, "step": 5549 }, { "epoch": 8.46710205078125e-06, "step": 5549, "training_step_time": 0.11031556129455566 }, { "epoch": 8.4686279296875e-06, "grad_norm": 0.539337158203125, "learning_rate": 9.509956150664796e-05, "loss": 0.0709, "step": 5550 }, { "epoch": 8.4686279296875e-06, "model_forward_time": 0.025218486785888672, "step": 5550 }, { "epoch": 8.4686279296875e-06, "step": 5550, "training_step_time": 0.15806031227111816 }, { "epoch": 8.47015380859375e-06, "model_forward_time": 0.024510622024536133, "step": 5551 }, { "epoch": 8.47015380859375e-06, "step": 5551, "training_step_time": 0.15035152435302734 }, { "epoch": 8.4716796875e-06, "model_forward_time": 0.025373220443725586, "step": 5552 }, { "epoch": 8.4716796875e-06, "step": 5552, "training_step_time": 0.11540603637695312 }, { "epoch": 8.47320556640625e-06, "model_forward_time": 0.024956703186035156, "step": 5553 }, { "epoch": 8.47320556640625e-06, "step": 5553, "training_step_time": 0.10674762725830078 }, { "epoch": 8.4747314453125e-06, "model_forward_time": 0.02509284019470215, "step": 5554 }, { "epoch": 8.4747314453125e-06, "step": 5554, "training_step_time": 0.11182975769042969 }, { "epoch": 8.47625732421875e-06, "model_forward_time": 0.025166988372802734, "step": 5555 }, { "epoch": 8.47625732421875e-06, "step": 5555, "training_step_time": 0.10920238494873047 }, { "epoch": 8.477783203125e-06, "model_forward_time": 0.024697065353393555, "step": 5556 }, { "epoch": 8.477783203125e-06, "step": 5556, "training_step_time": 0.10861468315124512 }, { "epoch": 8.47930908203125e-06, "model_forward_time": 0.025240659713745117, "step": 5557 }, { "epoch": 8.47930908203125e-06, "step": 5557, "training_step_time": 0.1769580841064453 }, { "epoch": 8.4808349609375e-06, "model_forward_time": 0.02511906623840332, "step": 5558 }, { "epoch": 8.4808349609375e-06, "step": 5558, "training_step_time": 0.14783430099487305 }, { "epoch": 8.48236083984375e-06, "model_forward_time": 0.02468705177307129, "step": 5559 }, { "epoch": 8.48236083984375e-06, "step": 5559, "training_step_time": 0.10929179191589355 }, { "epoch": 8.48388671875e-06, "grad_norm": 0.40480631589889526, "learning_rate": 9.507573768501574e-05, "loss": 0.0729, "step": 5560 }, { "epoch": 8.48388671875e-06, "model_forward_time": 0.028118371963500977, "step": 5560 }, { "epoch": 8.48388671875e-06, "step": 5560, "training_step_time": 0.11645936965942383 }, { "epoch": 8.48541259765625e-06, "model_forward_time": 0.025391101837158203, "step": 5561 }, { "epoch": 8.48541259765625e-06, "step": 5561, "training_step_time": 0.13700103759765625 }, { "epoch": 8.4869384765625e-06, "model_forward_time": 0.025828838348388672, "step": 5562 }, { "epoch": 8.4869384765625e-06, "step": 5562, "training_step_time": 0.10600495338439941 }, { "epoch": 8.48846435546875e-06, "model_forward_time": 0.025227069854736328, "step": 5563 }, { "epoch": 8.48846435546875e-06, "step": 5563, "training_step_time": 0.1769402027130127 }, { "epoch": 8.489990234375e-06, "model_forward_time": 0.02561020851135254, "step": 5564 }, { "epoch": 8.489990234375e-06, "step": 5564, "training_step_time": 0.10607075691223145 }, { "epoch": 8.49151611328125e-06, "model_forward_time": 0.02473282814025879, "step": 5565 }, { "epoch": 8.49151611328125e-06, "step": 5565, "training_step_time": 0.10555696487426758 }, { "epoch": 8.4930419921875e-06, "model_forward_time": 0.02570319175720215, "step": 5566 }, { "epoch": 8.4930419921875e-06, "step": 5566, "training_step_time": 0.1150972843170166 }, { "epoch": 8.49456787109375e-06, "model_forward_time": 0.028559446334838867, "step": 5567 }, { "epoch": 8.49456787109375e-06, "step": 5567, "training_step_time": 0.11337924003601074 }, { "epoch": 8.49609375e-06, "model_forward_time": 0.025995731353759766, "step": 5568 }, { "epoch": 8.49609375e-06, "step": 5568, "training_step_time": 0.10977768898010254 }, { "epoch": 8.49761962890625e-06, "model_forward_time": 0.025331497192382812, "step": 5569 }, { "epoch": 8.49761962890625e-06, "step": 5569, "training_step_time": 0.21548223495483398 }, { "epoch": 8.4991455078125e-06, "grad_norm": 0.43420183658599854, "learning_rate": 9.505185909211188e-05, "loss": 0.0852, "step": 5570 }, { "epoch": 8.4991455078125e-06, "model_forward_time": 0.02463364601135254, "step": 5570 }, { "epoch": 8.4991455078125e-06, "step": 5570, "training_step_time": 0.1160881519317627 }, { "epoch": 8.50067138671875e-06, "model_forward_time": 0.024601459503173828, "step": 5571 }, { "epoch": 8.50067138671875e-06, "step": 5571, "training_step_time": 0.10824370384216309 }, { "epoch": 8.502197265625e-06, "model_forward_time": 0.02516961097717285, "step": 5572 }, { "epoch": 8.502197265625e-06, "step": 5572, "training_step_time": 0.11001777648925781 }, { "epoch": 8.50372314453125e-06, "model_forward_time": 0.025398969650268555, "step": 5573 }, { "epoch": 8.50372314453125e-06, "step": 5573, "training_step_time": 0.10730361938476562 }, { "epoch": 8.5052490234375e-06, "model_forward_time": 0.025559663772583008, "step": 5574 }, { "epoch": 8.5052490234375e-06, "step": 5574, "training_step_time": 0.11347579956054688 }, { "epoch": 8.50677490234375e-06, "model_forward_time": 0.025579452514648438, "step": 5575 }, { "epoch": 8.50677490234375e-06, "step": 5575, "training_step_time": 0.10850358009338379 }, { "epoch": 8.50830078125e-06, "model_forward_time": 0.025150060653686523, "step": 5576 }, { "epoch": 8.50830078125e-06, "step": 5576, "training_step_time": 0.11190223693847656 }, { "epoch": 8.50982666015625e-06, "model_forward_time": 0.025427579879760742, "step": 5577 }, { "epoch": 8.50982666015625e-06, "step": 5577, "training_step_time": 0.10918688774108887 }, { "epoch": 8.5113525390625e-06, "model_forward_time": 0.02529621124267578, "step": 5578 }, { "epoch": 8.5113525390625e-06, "step": 5578, "training_step_time": 0.11261510848999023 }, { "epoch": 8.51287841796875e-06, "model_forward_time": 0.025478839874267578, "step": 5579 }, { "epoch": 8.51287841796875e-06, "step": 5579, "training_step_time": 0.1089320182800293 }, { "epoch": 8.514404296875e-06, "grad_norm": 0.5152722001075745, "learning_rate": 9.502792575695112e-05, "loss": 0.096, "step": 5580 }, { "epoch": 8.514404296875e-06, "model_forward_time": 0.02508234977722168, "step": 5580 }, { "epoch": 8.514404296875e-06, "step": 5580, "training_step_time": 0.11014628410339355 }, { "epoch": 8.51593017578125e-06, "model_forward_time": 0.02529740333557129, "step": 5581 }, { "epoch": 8.51593017578125e-06, "step": 5581, "training_step_time": 0.11190176010131836 }, { "epoch": 8.5174560546875e-06, "model_forward_time": 0.025408267974853516, "step": 5582 }, { "epoch": 8.5174560546875e-06, "step": 5582, "training_step_time": 0.10768556594848633 }, { "epoch": 8.51898193359375e-06, "model_forward_time": 0.02550339698791504, "step": 5583 }, { "epoch": 8.51898193359375e-06, "step": 5583, "training_step_time": 0.10976409912109375 }, { "epoch": 8.5205078125e-06, "model_forward_time": 0.025527000427246094, "step": 5584 }, { "epoch": 8.5205078125e-06, "step": 5584, "training_step_time": 0.10895490646362305 }, { "epoch": 8.52203369140625e-06, "model_forward_time": 0.025280237197875977, "step": 5585 }, { "epoch": 8.52203369140625e-06, "step": 5585, "training_step_time": 0.13655710220336914 }, { "epoch": 8.5235595703125e-06, "model_forward_time": 0.025352001190185547, "step": 5586 }, { "epoch": 8.5235595703125e-06, "step": 5586, "training_step_time": 0.12762689590454102 }, { "epoch": 8.52508544921875e-06, "model_forward_time": 0.024984359741210938, "step": 5587 }, { "epoch": 8.52508544921875e-06, "step": 5587, "training_step_time": 0.11026358604431152 }, { "epoch": 8.526611328125e-06, "model_forward_time": 0.024921655654907227, "step": 5588 }, { "epoch": 8.526611328125e-06, "step": 5588, "training_step_time": 0.11644244194030762 }, { "epoch": 8.52813720703125e-06, "model_forward_time": 0.025429248809814453, "step": 5589 }, { "epoch": 8.52813720703125e-06, "step": 5589, "training_step_time": 0.10667204856872559 }, { "epoch": 8.5296630859375e-06, "grad_norm": 0.4931207001209259, "learning_rate": 9.50039377086147e-05, "loss": 0.0733, "step": 5590 }, { "epoch": 8.5296630859375e-06, "model_forward_time": 0.02527475357055664, "step": 5590 }, { "epoch": 8.5296630859375e-06, "step": 5590, "training_step_time": 0.1112833023071289 }, { "epoch": 8.53118896484375e-06, "model_forward_time": 0.027823209762573242, "step": 5591 }, { "epoch": 8.53118896484375e-06, "step": 5591, "training_step_time": 0.11296486854553223 }, { "epoch": 8.53271484375e-06, "model_forward_time": 0.025278568267822266, "step": 5592 }, { "epoch": 8.53271484375e-06, "step": 5592, "training_step_time": 0.11172080039978027 }, { "epoch": 8.53424072265625e-06, "model_forward_time": 0.02546834945678711, "step": 5593 }, { "epoch": 8.53424072265625e-06, "step": 5593, "training_step_time": 0.12639856338500977 }, { "epoch": 8.5357666015625e-06, "model_forward_time": 0.02545332908630371, "step": 5594 }, { "epoch": 8.5357666015625e-06, "step": 5594, "training_step_time": 0.11121511459350586 }, { "epoch": 8.53729248046875e-06, "model_forward_time": 0.02931523323059082, "step": 5595 }, { "epoch": 8.53729248046875e-06, "step": 5595, "training_step_time": 0.11070609092712402 }, { "epoch": 8.538818359375e-06, "model_forward_time": 0.025269031524658203, "step": 5596 }, { "epoch": 8.538818359375e-06, "step": 5596, "training_step_time": 0.10912632942199707 }, { "epoch": 8.54034423828125e-06, "model_forward_time": 0.025653600692749023, "step": 5597 }, { "epoch": 8.54034423828125e-06, "step": 5597, "training_step_time": 0.12185859680175781 }, { "epoch": 8.5418701171875e-06, "model_forward_time": 0.025304317474365234, "step": 5598 }, { "epoch": 8.5418701171875e-06, "step": 5598, "training_step_time": 0.11203265190124512 }, { "epoch": 8.54339599609375e-06, "model_forward_time": 0.025623321533203125, "step": 5599 }, { "epoch": 8.54339599609375e-06, "step": 5599, "training_step_time": 0.19733309745788574 }, { "epoch": 8.544921875e-06, "grad_norm": 0.6653024554252625, "learning_rate": 9.497989497625035e-05, "loss": 0.0807, "step": 5600 }, { "epoch": 8.544921875e-06, "model_forward_time": 0.02411341667175293, "step": 5600 }, { "epoch": 8.544921875e-06, "step": 5600, "training_step_time": 0.1137847900390625 }, { "epoch": 8.54644775390625e-06, "model_forward_time": 0.02620530128479004, "step": 5601 }, { "epoch": 8.54644775390625e-06, "step": 5601, "training_step_time": 0.11612081527709961 }, { "epoch": 8.5479736328125e-06, "model_forward_time": 0.025931119918823242, "step": 5602 }, { "epoch": 8.5479736328125e-06, "step": 5602, "training_step_time": 0.11466312408447266 }, { "epoch": 8.54949951171875e-06, "model_forward_time": 0.025912046432495117, "step": 5603 }, { "epoch": 8.54949951171875e-06, "step": 5603, "training_step_time": 0.11445784568786621 }, { "epoch": 8.551025390625e-06, "model_forward_time": 0.02537846565246582, "step": 5604 }, { "epoch": 8.551025390625e-06, "step": 5604, "training_step_time": 0.14040780067443848 }, { "epoch": 8.55255126953125e-06, "model_forward_time": 0.025027751922607422, "step": 5605 }, { "epoch": 8.55255126953125e-06, "step": 5605, "training_step_time": 0.1156473159790039 }, { "epoch": 8.5540771484375e-06, "model_forward_time": 0.02823472023010254, "step": 5606 }, { "epoch": 8.5540771484375e-06, "step": 5606, "training_step_time": 0.21980500221252441 }, { "epoch": 8.55560302734375e-06, "model_forward_time": 0.024739503860473633, "step": 5607 }, { "epoch": 8.55560302734375e-06, "step": 5607, "training_step_time": 0.1320197582244873 }, { "epoch": 8.55712890625e-06, "model_forward_time": 0.026064395904541016, "step": 5608 }, { "epoch": 8.55712890625e-06, "step": 5608, "training_step_time": 0.1114811897277832 }, { "epoch": 8.55865478515625e-06, "model_forward_time": 0.025989532470703125, "step": 5609 }, { "epoch": 8.55865478515625e-06, "step": 5609, "training_step_time": 0.11773967742919922 }, { "epoch": 8.5601806640625e-06, "grad_norm": 0.3101156949996948, "learning_rate": 9.49557975890723e-05, "loss": 0.0811, "step": 5610 }, { "epoch": 8.5601806640625e-06, "model_forward_time": 0.02553248405456543, "step": 5610 }, { "epoch": 8.5601806640625e-06, "step": 5610, "training_step_time": 0.11394095420837402 }, { "epoch": 8.56170654296875e-06, "model_forward_time": 0.027122020721435547, "step": 5611 }, { "epoch": 8.56170654296875e-06, "step": 5611, "training_step_time": 0.1120908260345459 }, { "epoch": 8.563232421875e-06, "model_forward_time": 0.025753498077392578, "step": 5612 }, { "epoch": 8.563232421875e-06, "step": 5612, "training_step_time": 0.21460676193237305 }, { "epoch": 8.56475830078125e-06, "model_forward_time": 0.024910449981689453, "step": 5613 }, { "epoch": 8.56475830078125e-06, "step": 5613, "training_step_time": 0.11077499389648438 }, { "epoch": 8.5662841796875e-06, "model_forward_time": 0.02472543716430664, "step": 5614 }, { "epoch": 8.5662841796875e-06, "step": 5614, "training_step_time": 0.10970449447631836 }, { "epoch": 8.56781005859375e-06, "model_forward_time": 0.025194644927978516, "step": 5615 }, { "epoch": 8.56781005859375e-06, "step": 5615, "training_step_time": 0.16884398460388184 }, { "epoch": 8.5693359375e-06, "model_forward_time": 0.02461838722229004, "step": 5616 }, { "epoch": 8.5693359375e-06, "step": 5616, "training_step_time": 0.16986918449401855 }, { "epoch": 8.57086181640625e-06, "model_forward_time": 0.024666786193847656, "step": 5617 }, { "epoch": 8.57086181640625e-06, "step": 5617, "training_step_time": 0.10829949378967285 }, { "epoch": 8.5723876953125e-06, "model_forward_time": 0.024996519088745117, "step": 5618 }, { "epoch": 8.5723876953125e-06, "step": 5618, "training_step_time": 0.10442185401916504 }, { "epoch": 8.57391357421875e-06, "model_forward_time": 0.025464296340942383, "step": 5619 }, { "epoch": 8.57391357421875e-06, "step": 5619, "training_step_time": 0.11213421821594238 }, { "epoch": 8.575439453125e-06, "grad_norm": 0.4704367518424988, "learning_rate": 9.493164557636112e-05, "loss": 0.0747, "step": 5620 }, { "epoch": 8.575439453125e-06, "model_forward_time": 0.025829076766967773, "step": 5620 }, { "epoch": 8.575439453125e-06, "step": 5620, "training_step_time": 0.10870933532714844 }, { "epoch": 8.57696533203125e-06, "model_forward_time": 0.025187969207763672, "step": 5621 }, { "epoch": 8.57696533203125e-06, "step": 5621, "training_step_time": 0.10694003105163574 }, { "epoch": 8.5784912109375e-06, "model_forward_time": 0.025395631790161133, "step": 5622 }, { "epoch": 8.5784912109375e-06, "step": 5622, "training_step_time": 0.10631585121154785 }, { "epoch": 8.58001708984375e-06, "model_forward_time": 0.0256650447845459, "step": 5623 }, { "epoch": 8.58001708984375e-06, "step": 5623, "training_step_time": 0.10675477981567383 }, { "epoch": 8.58154296875e-06, "model_forward_time": 0.026307344436645508, "step": 5624 }, { "epoch": 8.58154296875e-06, "step": 5624, "training_step_time": 0.10687804222106934 }, { "epoch": 8.58306884765625e-06, "model_forward_time": 0.025311946868896484, "step": 5625 }, { "epoch": 8.58306884765625e-06, "step": 5625, "training_step_time": 0.10824322700500488 }, { "epoch": 8.5845947265625e-06, "model_forward_time": 0.02512216567993164, "step": 5626 }, { "epoch": 8.5845947265625e-06, "step": 5626, "training_step_time": 0.1092989444732666 }, { "epoch": 8.58612060546875e-06, "model_forward_time": 0.025203943252563477, "step": 5627 }, { "epoch": 8.58612060546875e-06, "step": 5627, "training_step_time": 0.11082839965820312 }, { "epoch": 8.587646484375e-06, "model_forward_time": 0.02580547332763672, "step": 5628 }, { "epoch": 8.587646484375e-06, "step": 5628, "training_step_time": 0.11706662178039551 }, { "epoch": 8.58917236328125e-06, "model_forward_time": 0.0254366397857666, "step": 5629 }, { "epoch": 8.58917236328125e-06, "step": 5629, "training_step_time": 0.11065983772277832 }, { "epoch": 8.5906982421875e-06, "grad_norm": 0.3832702040672302, "learning_rate": 9.49074389674638e-05, "loss": 0.0623, "step": 5630 }, { "epoch": 8.5906982421875e-06, "model_forward_time": 0.0255584716796875, "step": 5630 }, { "epoch": 8.5906982421875e-06, "step": 5630, "training_step_time": 0.1160573959350586 }, { "epoch": 8.59222412109375e-06, "model_forward_time": 0.025298118591308594, "step": 5631 }, { "epoch": 8.59222412109375e-06, "step": 5631, "training_step_time": 0.11720824241638184 }, { "epoch": 8.59375e-06, "model_forward_time": 0.02544856071472168, "step": 5632 }, { "epoch": 8.59375e-06, "step": 5632, "training_step_time": 0.11803793907165527 }, { "epoch": 8.59527587890625e-06, "model_forward_time": 0.026454687118530273, "step": 5633 }, { "epoch": 8.59527587890625e-06, "step": 5633, "training_step_time": 0.12260031700134277 }, { "epoch": 8.5968017578125e-06, "model_forward_time": 0.024752378463745117, "step": 5634 }, { "epoch": 8.5968017578125e-06, "step": 5634, "training_step_time": 0.13315796852111816 }, { "epoch": 8.59832763671875e-06, "model_forward_time": 0.025404691696166992, "step": 5635 }, { "epoch": 8.59832763671875e-06, "step": 5635, "training_step_time": 0.11161470413208008 }, { "epoch": 8.599853515625e-06, "model_forward_time": 0.026782751083374023, "step": 5636 }, { "epoch": 8.599853515625e-06, "step": 5636, "training_step_time": 0.18033337593078613 }, { "epoch": 8.60137939453125e-06, "model_forward_time": 0.02684950828552246, "step": 5637 }, { "epoch": 8.60137939453125e-06, "step": 5637, "training_step_time": 0.1273043155670166 }, { "epoch": 8.6029052734375e-06, "model_forward_time": 0.02421855926513672, "step": 5638 }, { "epoch": 8.6029052734375e-06, "step": 5638, "training_step_time": 0.1807572841644287 }, { "epoch": 8.60443115234375e-06, "model_forward_time": 0.025712013244628906, "step": 5639 }, { "epoch": 8.60443115234375e-06, "step": 5639, "training_step_time": 0.14128756523132324 }, { "epoch": 8.60595703125e-06, "grad_norm": 0.6627846956253052, "learning_rate": 9.488317779179361e-05, "loss": 0.0708, "step": 5640 }, { "epoch": 8.60595703125e-06, "model_forward_time": 0.024538755416870117, "step": 5640 }, { "epoch": 8.60595703125e-06, "step": 5640, "training_step_time": 0.11018824577331543 }, { "epoch": 8.60748291015625e-06, "model_forward_time": 0.0245513916015625, "step": 5641 }, { "epoch": 8.60748291015625e-06, "step": 5641, "training_step_time": 0.10836195945739746 }, { "epoch": 8.6090087890625e-06, "model_forward_time": 0.025039196014404297, "step": 5642 }, { "epoch": 8.6090087890625e-06, "step": 5642, "training_step_time": 0.11410284042358398 }, { "epoch": 8.61053466796875e-06, "model_forward_time": 0.0258638858795166, "step": 5643 }, { "epoch": 8.61053466796875e-06, "step": 5643, "training_step_time": 0.1118018627166748 }, { "epoch": 8.612060546875e-06, "model_forward_time": 0.025200605392456055, "step": 5644 }, { "epoch": 8.612060546875e-06, "step": 5644, "training_step_time": 0.20527362823486328 }, { "epoch": 8.61358642578125e-06, "model_forward_time": 0.024642229080200195, "step": 5645 }, { "epoch": 8.61358642578125e-06, "step": 5645, "training_step_time": 0.11366033554077148 }, { "epoch": 8.6151123046875e-06, "model_forward_time": 0.024674177169799805, "step": 5646 }, { "epoch": 8.6151123046875e-06, "step": 5646, "training_step_time": 0.11691999435424805 }, { "epoch": 8.61663818359375e-06, "model_forward_time": 0.025403738021850586, "step": 5647 }, { "epoch": 8.61663818359375e-06, "step": 5647, "training_step_time": 0.1130514144897461 }, { "epoch": 8.6181640625e-06, "model_forward_time": 0.02541184425354004, "step": 5648 }, { "epoch": 8.6181640625e-06, "step": 5648, "training_step_time": 0.11801934242248535 }, { "epoch": 8.61968994140625e-06, "model_forward_time": 0.025005102157592773, "step": 5649 }, { "epoch": 8.61968994140625e-06, "step": 5649, "training_step_time": 0.1329195499420166 }, { "epoch": 8.6212158203125e-06, "grad_norm": 0.34214547276496887, "learning_rate": 9.485886207883022e-05, "loss": 0.089, "step": 5650 }, { "epoch": 8.6212158203125e-06, "model_forward_time": 0.025602340698242188, "step": 5650 }, { "epoch": 8.6212158203125e-06, "step": 5650, "training_step_time": 0.12142086029052734 }, { "epoch": 8.62274169921875e-06, "model_forward_time": 0.025153160095214844, "step": 5651 }, { "epoch": 8.62274169921875e-06, "step": 5651, "training_step_time": 0.21445465087890625 }, { "epoch": 8.624267578125e-06, "model_forward_time": 0.02443838119506836, "step": 5652 }, { "epoch": 8.624267578125e-06, "step": 5652, "training_step_time": 0.1307835578918457 }, { "epoch": 8.62579345703125e-06, "model_forward_time": 0.024609804153442383, "step": 5653 }, { "epoch": 8.62579345703125e-06, "step": 5653, "training_step_time": 0.11268973350524902 }, { "epoch": 8.6273193359375e-06, "model_forward_time": 0.02524113655090332, "step": 5654 }, { "epoch": 8.6273193359375e-06, "step": 5654, "training_step_time": 0.11936759948730469 }, { "epoch": 8.62884521484375e-06, "model_forward_time": 0.025458812713623047, "step": 5655 }, { "epoch": 8.62884521484375e-06, "step": 5655, "training_step_time": 0.10752582550048828 }, { "epoch": 8.63037109375e-06, "model_forward_time": 0.025011062622070312, "step": 5656 }, { "epoch": 8.63037109375e-06, "step": 5656, "training_step_time": 0.11252474784851074 }, { "epoch": 8.63189697265625e-06, "model_forward_time": 0.02499675750732422, "step": 5657 }, { "epoch": 8.63189697265625e-06, "step": 5657, "training_step_time": 0.1105186939239502 }, { "epoch": 8.6334228515625e-06, "model_forward_time": 0.025557756423950195, "step": 5658 }, { "epoch": 8.6334228515625e-06, "step": 5658, "training_step_time": 0.11146974563598633 }, { "epoch": 8.63494873046875e-06, "model_forward_time": 0.025761127471923828, "step": 5659 }, { "epoch": 8.63494873046875e-06, "step": 5659, "training_step_time": 0.11534976959228516 }, { "epoch": 8.636474609375e-06, "grad_norm": 0.6344464421272278, "learning_rate": 9.483449185811948e-05, "loss": 0.0694, "step": 5660 }, { "epoch": 8.636474609375e-06, "model_forward_time": 0.025650739669799805, "step": 5660 }, { "epoch": 8.636474609375e-06, "step": 5660, "training_step_time": 0.20753979682922363 }, { "epoch": 8.63800048828125e-06, "model_forward_time": 0.02511882781982422, "step": 5661 }, { "epoch": 8.63800048828125e-06, "step": 5661, "training_step_time": 0.12133455276489258 }, { "epoch": 8.6395263671875e-06, "model_forward_time": 0.024400711059570312, "step": 5662 }, { "epoch": 8.6395263671875e-06, "step": 5662, "training_step_time": 0.11008977890014648 }, { "epoch": 8.64105224609375e-06, "model_forward_time": 0.025488615036010742, "step": 5663 }, { "epoch": 8.64105224609375e-06, "step": 5663, "training_step_time": 0.10864996910095215 }, { "epoch": 8.642578125e-06, "model_forward_time": 0.0258331298828125, "step": 5664 }, { "epoch": 8.642578125e-06, "step": 5664, "training_step_time": 0.11136102676391602 }, { "epoch": 8.64410400390625e-06, "model_forward_time": 0.02547621726989746, "step": 5665 }, { "epoch": 8.64410400390625e-06, "step": 5665, "training_step_time": 0.11194777488708496 }, { "epoch": 8.6456298828125e-06, "model_forward_time": 0.025681257247924805, "step": 5666 }, { "epoch": 8.6456298828125e-06, "step": 5666, "training_step_time": 0.11451148986816406 }, { "epoch": 8.64715576171875e-06, "model_forward_time": 0.025636911392211914, "step": 5667 }, { "epoch": 8.64715576171875e-06, "step": 5667, "training_step_time": 0.11452174186706543 }, { "epoch": 8.648681640625e-06, "model_forward_time": 0.025289535522460938, "step": 5668 }, { "epoch": 8.648681640625e-06, "step": 5668, "training_step_time": 0.10977911949157715 }, { "epoch": 8.65020751953125e-06, "model_forward_time": 0.025005817413330078, "step": 5669 }, { "epoch": 8.65020751953125e-06, "step": 5669, "training_step_time": 0.10927271842956543 }, { "epoch": 8.6517333984375e-06, "grad_norm": 0.5223129987716675, "learning_rate": 9.481006715927351e-05, "loss": 0.081, "step": 5670 }, { "epoch": 8.6517333984375e-06, "model_forward_time": 0.02517533302307129, "step": 5670 }, { "epoch": 8.6517333984375e-06, "step": 5670, "training_step_time": 0.10946846008300781 }, { "epoch": 8.65325927734375e-06, "model_forward_time": 0.025597572326660156, "step": 5671 }, { "epoch": 8.65325927734375e-06, "step": 5671, "training_step_time": 0.11063480377197266 }, { "epoch": 8.65478515625e-06, "model_forward_time": 0.028668642044067383, "step": 5672 }, { "epoch": 8.65478515625e-06, "step": 5672, "training_step_time": 0.11253213882446289 }, { "epoch": 8.65631103515625e-06, "model_forward_time": 0.025168180465698242, "step": 5673 }, { "epoch": 8.65631103515625e-06, "step": 5673, "training_step_time": 0.10771870613098145 }, { "epoch": 8.6578369140625e-06, "model_forward_time": 0.02535867691040039, "step": 5674 }, { "epoch": 8.6578369140625e-06, "step": 5674, "training_step_time": 0.10741615295410156 }, { "epoch": 8.65936279296875e-06, "model_forward_time": 0.025580167770385742, "step": 5675 }, { "epoch": 8.65936279296875e-06, "step": 5675, "training_step_time": 0.1098787784576416 }, { "epoch": 8.660888671875e-06, "model_forward_time": 0.02544879913330078, "step": 5676 }, { "epoch": 8.660888671875e-06, "step": 5676, "training_step_time": 0.1420128345489502 }, { "epoch": 8.66241455078125e-06, "model_forward_time": 0.024190664291381836, "step": 5677 }, { "epoch": 8.66241455078125e-06, "step": 5677, "training_step_time": 0.17170953750610352 }, { "epoch": 8.6639404296875e-06, "model_forward_time": 0.023317813873291016, "step": 5678 }, { "epoch": 8.6639404296875e-06, "step": 5678, "training_step_time": 0.22379803657531738 }, { "epoch": 8.66546630859375e-06, "model_forward_time": 0.02490377426147461, "step": 5679 }, { "epoch": 8.66546630859375e-06, "step": 5679, "training_step_time": 0.16246676445007324 }, { "epoch": 8.6669921875e-06, "grad_norm": 0.5693213939666748, "learning_rate": 9.478558801197065e-05, "loss": 0.079, "step": 5680 }, { "epoch": 8.6669921875e-06, "model_forward_time": 0.024120807647705078, "step": 5680 }, { "epoch": 8.6669921875e-06, "step": 5680, "training_step_time": 0.20720553398132324 }, { "epoch": 8.66851806640625e-06, "model_forward_time": 0.024667024612426758, "step": 5681 }, { "epoch": 8.66851806640625e-06, "step": 5681, "training_step_time": 0.1292276382446289 }, { "epoch": 8.6700439453125e-06, "model_forward_time": 0.024073362350463867, "step": 5682 }, { "epoch": 8.6700439453125e-06, "step": 5682, "training_step_time": 0.12343025207519531 }, { "epoch": 8.67156982421875e-06, "model_forward_time": 0.024791955947875977, "step": 5683 }, { "epoch": 8.67156982421875e-06, "step": 5683, "training_step_time": 0.12181878089904785 }, { "epoch": 8.673095703125e-06, "model_forward_time": 0.02552652359008789, "step": 5684 }, { "epoch": 8.673095703125e-06, "step": 5684, "training_step_time": 0.16749215126037598 }, { "epoch": 8.67462158203125e-06, "model_forward_time": 0.02756333351135254, "step": 5685 }, { "epoch": 8.67462158203125e-06, "step": 5685, "training_step_time": 0.16584062576293945 }, { "epoch": 8.6761474609375e-06, "model_forward_time": 0.024524211883544922, "step": 5686 }, { "epoch": 8.6761474609375e-06, "step": 5686, "training_step_time": 0.11107349395751953 }, { "epoch": 8.67767333984375e-06, "model_forward_time": 0.024565696716308594, "step": 5687 }, { "epoch": 8.67767333984375e-06, "step": 5687, "training_step_time": 0.20196819305419922 }, { "epoch": 8.67919921875e-06, "model_forward_time": 0.024936437606811523, "step": 5688 }, { "epoch": 8.67919921875e-06, "step": 5688, "training_step_time": 0.10818862915039062 }, { "epoch": 8.68072509765625e-06, "model_forward_time": 0.024222612380981445, "step": 5689 }, { "epoch": 8.68072509765625e-06, "step": 5689, "training_step_time": 0.11050271987915039 }, { "epoch": 8.6822509765625e-06, "grad_norm": 0.3727593421936035, "learning_rate": 9.476105444595534e-05, "loss": 0.071, "step": 5690 }, { "epoch": 8.6822509765625e-06, "model_forward_time": 0.02541327476501465, "step": 5690 }, { "epoch": 8.6822509765625e-06, "step": 5690, "training_step_time": 0.11319899559020996 }, { "epoch": 8.68377685546875e-06, "model_forward_time": 0.02712249755859375, "step": 5691 }, { "epoch": 8.68377685546875e-06, "step": 5691, "training_step_time": 0.12575364112854004 }, { "epoch": 8.685302734375e-06, "model_forward_time": 0.027634859085083008, "step": 5692 }, { "epoch": 8.685302734375e-06, "step": 5692, "training_step_time": 0.1254730224609375 }, { "epoch": 8.68682861328125e-06, "model_forward_time": 0.025128602981567383, "step": 5693 }, { "epoch": 8.68682861328125e-06, "step": 5693, "training_step_time": 0.10976171493530273 }, { "epoch": 8.6883544921875e-06, "model_forward_time": 0.02542734146118164, "step": 5694 }, { "epoch": 8.6883544921875e-06, "step": 5694, "training_step_time": 0.21788573265075684 }, { "epoch": 8.68988037109375e-06, "model_forward_time": 0.024222135543823242, "step": 5695 }, { "epoch": 8.68988037109375e-06, "step": 5695, "training_step_time": 0.13775038719177246 }, { "epoch": 8.69140625e-06, "model_forward_time": 0.024397611618041992, "step": 5696 }, { "epoch": 8.69140625e-06, "step": 5696, "training_step_time": 0.1179811954498291 }, { "epoch": 8.69293212890625e-06, "model_forward_time": 0.024003028869628906, "step": 5697 }, { "epoch": 8.69293212890625e-06, "step": 5697, "training_step_time": 0.11922788619995117 }, { "epoch": 8.6944580078125e-06, "model_forward_time": 0.025225400924682617, "step": 5698 }, { "epoch": 8.6944580078125e-06, "step": 5698, "training_step_time": 0.11105680465698242 }, { "epoch": 8.69598388671875e-06, "model_forward_time": 0.024890899658203125, "step": 5699 }, { "epoch": 8.69598388671875e-06, "step": 5699, "training_step_time": 0.21383905410766602 }, { "epoch": 8.697509765625e-06, "grad_norm": 0.1906253546476364, "learning_rate": 9.473646649103818e-05, "loss": 0.0659, "step": 5700 }, { "epoch": 8.697509765625e-06, "model_forward_time": 0.02528548240661621, "step": 5700 }, { "epoch": 8.697509765625e-06, "step": 5700, "training_step_time": 0.10573935508728027 }, { "epoch": 8.69903564453125e-06, "model_forward_time": 0.02491307258605957, "step": 5701 }, { "epoch": 8.69903564453125e-06, "step": 5701, "training_step_time": 0.11289453506469727 }, { "epoch": 8.7005615234375e-06, "model_forward_time": 0.024271011352539062, "step": 5702 }, { "epoch": 8.7005615234375e-06, "step": 5702, "training_step_time": 0.2180488109588623 }, { "epoch": 8.70208740234375e-06, "model_forward_time": 0.02500176429748535, "step": 5703 }, { "epoch": 8.70208740234375e-06, "step": 5703, "training_step_time": 0.11678266525268555 }, { "epoch": 8.70361328125e-06, "model_forward_time": 0.025105953216552734, "step": 5704 }, { "epoch": 8.70361328125e-06, "step": 5704, "training_step_time": 0.10588240623474121 }, { "epoch": 8.70513916015625e-06, "model_forward_time": 0.02847003936767578, "step": 5705 }, { "epoch": 8.70513916015625e-06, "step": 5705, "training_step_time": 0.11067557334899902 }, { "epoch": 8.7066650390625e-06, "model_forward_time": 0.0252685546875, "step": 5706 }, { "epoch": 8.7066650390625e-06, "step": 5706, "training_step_time": 0.10599088668823242 }, { "epoch": 8.70819091796875e-06, "model_forward_time": 0.02571725845336914, "step": 5707 }, { "epoch": 8.70819091796875e-06, "step": 5707, "training_step_time": 0.11007094383239746 }, { "epoch": 8.709716796875e-06, "model_forward_time": 0.025586843490600586, "step": 5708 }, { "epoch": 8.709716796875e-06, "step": 5708, "training_step_time": 0.10913658142089844 }, { "epoch": 8.71124267578125e-06, "model_forward_time": 0.025495052337646484, "step": 5709 }, { "epoch": 8.71124267578125e-06, "step": 5709, "training_step_time": 0.10552549362182617 }, { "epoch": 8.7127685546875e-06, "grad_norm": 0.3626631200313568, "learning_rate": 9.471182417709587e-05, "loss": 0.0793, "step": 5710 }, { "epoch": 8.7127685546875e-06, "model_forward_time": 0.02524399757385254, "step": 5710 }, { "epoch": 8.7127685546875e-06, "step": 5710, "training_step_time": 0.11177921295166016 }, { "epoch": 8.71429443359375e-06, "model_forward_time": 0.025163888931274414, "step": 5711 }, { "epoch": 8.71429443359375e-06, "step": 5711, "training_step_time": 0.1053006649017334 }, { "epoch": 8.7158203125e-06, "model_forward_time": 0.025257349014282227, "step": 5712 }, { "epoch": 8.7158203125e-06, "step": 5712, "training_step_time": 0.10671877861022949 }, { "epoch": 8.71734619140625e-06, "model_forward_time": 0.025287866592407227, "step": 5713 }, { "epoch": 8.71734619140625e-06, "step": 5713, "training_step_time": 0.11122894287109375 }, { "epoch": 8.7188720703125e-06, "model_forward_time": 0.025355100631713867, "step": 5714 }, { "epoch": 8.7188720703125e-06, "step": 5714, "training_step_time": 0.10686850547790527 }, { "epoch": 8.72039794921875e-06, "model_forward_time": 0.02519392967224121, "step": 5715 }, { "epoch": 8.72039794921875e-06, "step": 5715, "training_step_time": 0.1063542366027832 }, { "epoch": 8.721923828125e-06, "model_forward_time": 0.025996685028076172, "step": 5716 }, { "epoch": 8.721923828125e-06, "step": 5716, "training_step_time": 0.10877251625061035 }, { "epoch": 8.72344970703125e-06, "model_forward_time": 0.026959896087646484, "step": 5717 }, { "epoch": 8.72344970703125e-06, "step": 5717, "training_step_time": 0.10953927040100098 }, { "epoch": 8.7249755859375e-06, "model_forward_time": 0.025590896606445312, "step": 5718 }, { "epoch": 8.7249755859375e-06, "step": 5718, "training_step_time": 0.11183333396911621 }, { "epoch": 8.72650146484375e-06, "model_forward_time": 0.025211095809936523, "step": 5719 }, { "epoch": 8.72650146484375e-06, "step": 5719, "training_step_time": 0.18321776390075684 }, { "epoch": 8.72802734375e-06, "grad_norm": 0.4794430136680603, "learning_rate": 9.468712753407112e-05, "loss": 0.0699, "step": 5720 }, { "epoch": 8.72802734375e-06, "model_forward_time": 0.025931596755981445, "step": 5720 }, { "epoch": 8.72802734375e-06, "step": 5720, "training_step_time": 0.10962438583374023 }, { "epoch": 8.72955322265625e-06, "model_forward_time": 0.024333477020263672, "step": 5721 }, { "epoch": 8.72955322265625e-06, "step": 5721, "training_step_time": 0.1341235637664795 }, { "epoch": 8.7310791015625e-06, "model_forward_time": 0.025473594665527344, "step": 5722 }, { "epoch": 8.7310791015625e-06, "step": 5722, "training_step_time": 0.16264986991882324 }, { "epoch": 8.73260498046875e-06, "model_forward_time": 0.024927616119384766, "step": 5723 }, { "epoch": 8.73260498046875e-06, "step": 5723, "training_step_time": 0.22059941291809082 }, { "epoch": 8.734130859375e-06, "model_forward_time": 0.024748563766479492, "step": 5724 }, { "epoch": 8.734130859375e-06, "step": 5724, "training_step_time": 0.11192679405212402 }, { "epoch": 8.73565673828125e-06, "model_forward_time": 0.024431228637695312, "step": 5725 }, { "epoch": 8.73565673828125e-06, "step": 5725, "training_step_time": 0.14302587509155273 }, { "epoch": 8.7371826171875e-06, "model_forward_time": 0.024878978729248047, "step": 5726 }, { "epoch": 8.7371826171875e-06, "step": 5726, "training_step_time": 0.14807486534118652 }, { "epoch": 8.73870849609375e-06, "model_forward_time": 0.024555683135986328, "step": 5727 }, { "epoch": 8.73870849609375e-06, "step": 5727, "training_step_time": 0.11493229866027832 }, { "epoch": 8.740234375e-06, "model_forward_time": 0.025223970413208008, "step": 5728 }, { "epoch": 8.740234375e-06, "step": 5728, "training_step_time": 0.11080574989318848 }, { "epoch": 8.74176025390625e-06, "model_forward_time": 0.02585911750793457, "step": 5729 }, { "epoch": 8.74176025390625e-06, "step": 5729, "training_step_time": 0.1092691421508789 }, { "epoch": 8.7432861328125e-06, "grad_norm": 0.3267877399921417, "learning_rate": 9.46623765919727e-05, "loss": 0.0777, "step": 5730 }, { "epoch": 8.7432861328125e-06, "model_forward_time": 0.02543020248413086, "step": 5730 }, { "epoch": 8.7432861328125e-06, "step": 5730, "training_step_time": 0.16281914710998535 }, { "epoch": 8.74481201171875e-06, "model_forward_time": 0.025120019912719727, "step": 5731 }, { "epoch": 8.74481201171875e-06, "step": 5731, "training_step_time": 0.14694476127624512 }, { "epoch": 8.746337890625e-06, "model_forward_time": 0.024566650390625, "step": 5732 }, { "epoch": 8.746337890625e-06, "step": 5732, "training_step_time": 0.1152794361114502 }, { "epoch": 8.74786376953125e-06, "model_forward_time": 0.02460002899169922, "step": 5733 }, { "epoch": 8.74786376953125e-06, "step": 5733, "training_step_time": 0.10911345481872559 }, { "epoch": 8.7493896484375e-06, "model_forward_time": 0.025265932083129883, "step": 5734 }, { "epoch": 8.7493896484375e-06, "step": 5734, "training_step_time": 0.17486906051635742 }, { "epoch": 8.75091552734375e-06, "model_forward_time": 0.024076223373413086, "step": 5735 }, { "epoch": 8.75091552734375e-06, "step": 5735, "training_step_time": 0.23529672622680664 }, { "epoch": 8.75244140625e-06, "model_forward_time": 0.024236679077148438, "step": 5736 }, { "epoch": 8.75244140625e-06, "step": 5736, "training_step_time": 0.2153477668762207 }, { "epoch": 8.75396728515625e-06, "model_forward_time": 0.024945497512817383, "step": 5737 }, { "epoch": 8.75396728515625e-06, "step": 5737, "training_step_time": 0.24265789985656738 }, { "epoch": 8.7554931640625e-06, "model_forward_time": 0.02516007423400879, "step": 5738 }, { "epoch": 8.7554931640625e-06, "step": 5738, "training_step_time": 0.20834684371948242 }, { "epoch": 8.75701904296875e-06, "model_forward_time": 0.028120756149291992, "step": 5739 }, { "epoch": 8.75701904296875e-06, "step": 5739, "training_step_time": 0.10858917236328125 }, { "epoch": 8.758544921875e-06, "grad_norm": 0.37637773156166077, "learning_rate": 9.463757138087535e-05, "loss": 0.0874, "step": 5740 }, { "epoch": 8.758544921875e-06, "model_forward_time": 0.025181055068969727, "step": 5740 }, { "epoch": 8.758544921875e-06, "step": 5740, "training_step_time": 0.21411871910095215 }, { "epoch": 8.76007080078125e-06, "model_forward_time": 0.024881839752197266, "step": 5741 }, { "epoch": 8.76007080078125e-06, "step": 5741, "training_step_time": 0.11036086082458496 }, { "epoch": 8.7615966796875e-06, "model_forward_time": 0.02480912208557129, "step": 5742 }, { "epoch": 8.7615966796875e-06, "step": 5742, "training_step_time": 0.10637235641479492 }, { "epoch": 8.76312255859375e-06, "model_forward_time": 0.025555849075317383, "step": 5743 }, { "epoch": 8.76312255859375e-06, "step": 5743, "training_step_time": 0.10836005210876465 }, { "epoch": 8.7646484375e-06, "model_forward_time": 0.02655792236328125, "step": 5744 }, { "epoch": 8.7646484375e-06, "step": 5744, "training_step_time": 0.11038470268249512 }, { "epoch": 8.76617431640625e-06, "model_forward_time": 0.025667667388916016, "step": 5745 }, { "epoch": 8.76617431640625e-06, "step": 5745, "training_step_time": 0.11467695236206055 }, { "epoch": 8.7677001953125e-06, "model_forward_time": 0.025905370712280273, "step": 5746 }, { "epoch": 8.7677001953125e-06, "step": 5746, "training_step_time": 0.11172795295715332 }, { "epoch": 8.76922607421875e-06, "model_forward_time": 0.025571107864379883, "step": 5747 }, { "epoch": 8.76922607421875e-06, "step": 5747, "training_step_time": 0.11497855186462402 }, { "epoch": 8.770751953125e-06, "model_forward_time": 0.025565385818481445, "step": 5748 }, { "epoch": 8.770751953125e-06, "step": 5748, "training_step_time": 0.11220669746398926 }, { "epoch": 8.77227783203125e-06, "model_forward_time": 0.025272607803344727, "step": 5749 }, { "epoch": 8.77227783203125e-06, "step": 5749, "training_step_time": 0.12531065940856934 }, { "epoch": 8.7738037109375e-06, "grad_norm": 0.38166099786758423, "learning_rate": 9.46127119309197e-05, "loss": 0.0651, "step": 5750 }, { "epoch": 8.7738037109375e-06, "model_forward_time": 0.024326324462890625, "step": 5750 }, { "epoch": 8.7738037109375e-06, "step": 5750, "training_step_time": 0.1891329288482666 }, { "epoch": 8.77532958984375e-06, "model_forward_time": 0.024070024490356445, "step": 5751 }, { "epoch": 8.77532958984375e-06, "step": 5751, "training_step_time": 0.21283817291259766 }, { "epoch": 8.77685546875e-06, "model_forward_time": 0.024276018142700195, "step": 5752 }, { "epoch": 8.77685546875e-06, "step": 5752, "training_step_time": 0.21341156959533691 }, { "epoch": 8.77838134765625e-06, "model_forward_time": 0.024446964263916016, "step": 5753 }, { "epoch": 8.77838134765625e-06, "step": 5753, "training_step_time": 0.21104049682617188 }, { "epoch": 8.7799072265625e-06, "model_forward_time": 0.024374723434448242, "step": 5754 }, { "epoch": 8.7799072265625e-06, "step": 5754, "training_step_time": 0.2086327075958252 }, { "epoch": 8.78143310546875e-06, "model_forward_time": 0.024730205535888672, "step": 5755 }, { "epoch": 8.78143310546875e-06, "step": 5755, "training_step_time": 0.20209670066833496 }, { "epoch": 8.782958984375e-06, "model_forward_time": 0.024913787841796875, "step": 5756 }, { "epoch": 8.782958984375e-06, "step": 5756, "training_step_time": 0.20732712745666504 }, { "epoch": 8.78448486328125e-06, "model_forward_time": 0.024971485137939453, "step": 5757 }, { "epoch": 8.78448486328125e-06, "step": 5757, "training_step_time": 0.11805987358093262 }, { "epoch": 8.7860107421875e-06, "model_forward_time": 0.024193525314331055, "step": 5758 }, { "epoch": 8.7860107421875e-06, "step": 5758, "training_step_time": 0.13003110885620117 }, { "epoch": 8.78753662109375e-06, "model_forward_time": 0.025430679321289062, "step": 5759 }, { "epoch": 8.78753662109375e-06, "step": 5759, "training_step_time": 0.15594959259033203 }, { "epoch": 8.7890625e-06, "grad_norm": 0.35037916898727417, "learning_rate": 9.458779827231237e-05, "loss": 0.0699, "step": 5760 }, { "epoch": 8.7890625e-06, "model_forward_time": 0.024963855743408203, "step": 5760 }, { "epoch": 8.7890625e-06, "step": 5760, "training_step_time": 0.21167707443237305 }, { "epoch": 8.79058837890625e-06, "model_forward_time": 0.02439093589782715, "step": 5761 }, { "epoch": 8.79058837890625e-06, "step": 5761, "training_step_time": 0.1234140396118164 }, { "epoch": 8.7921142578125e-06, "model_forward_time": 0.024369239807128906, "step": 5762 }, { "epoch": 8.7921142578125e-06, "step": 5762, "training_step_time": 0.13631796836853027 }, { "epoch": 8.79364013671875e-06, "model_forward_time": 0.02476978302001953, "step": 5763 }, { "epoch": 8.79364013671875e-06, "step": 5763, "training_step_time": 0.14091110229492188 }, { "epoch": 8.795166015625e-06, "model_forward_time": 0.025115966796875, "step": 5764 }, { "epoch": 8.795166015625e-06, "step": 5764, "training_step_time": 0.11105227470397949 }, { "epoch": 8.79669189453125e-06, "model_forward_time": 0.025090932846069336, "step": 5765 }, { "epoch": 8.79669189453125e-06, "step": 5765, "training_step_time": 0.11116409301757812 }, { "epoch": 8.7982177734375e-06, "model_forward_time": 0.025231122970581055, "step": 5766 }, { "epoch": 8.7982177734375e-06, "step": 5766, "training_step_time": 0.130354642868042 }, { "epoch": 8.79974365234375e-06, "model_forward_time": 0.025452136993408203, "step": 5767 }, { "epoch": 8.79974365234375e-06, "step": 5767, "training_step_time": 0.20346641540527344 }, { "epoch": 8.80126953125e-06, "model_forward_time": 0.024509668350219727, "step": 5768 }, { "epoch": 8.80126953125e-06, "step": 5768, "training_step_time": 0.12837600708007812 }, { "epoch": 8.80279541015625e-06, "model_forward_time": 0.0244295597076416, "step": 5769 }, { "epoch": 8.80279541015625e-06, "step": 5769, "training_step_time": 0.12457752227783203 }, { "epoch": 8.8043212890625e-06, "grad_norm": 0.4511484205722809, "learning_rate": 9.456283043532576e-05, "loss": 0.0775, "step": 5770 }, { "epoch": 8.8043212890625e-06, "model_forward_time": 0.02500605583190918, "step": 5770 }, { "epoch": 8.8043212890625e-06, "step": 5770, "training_step_time": 0.12194275856018066 }, { "epoch": 8.80584716796875e-06, "model_forward_time": 0.025419950485229492, "step": 5771 }, { "epoch": 8.80584716796875e-06, "step": 5771, "training_step_time": 0.11929988861083984 }, { "epoch": 8.807373046875e-06, "model_forward_time": 0.02536153793334961, "step": 5772 }, { "epoch": 8.807373046875e-06, "step": 5772, "training_step_time": 0.17539334297180176 }, { "epoch": 8.80889892578125e-06, "model_forward_time": 0.024729013442993164, "step": 5773 }, { "epoch": 8.80889892578125e-06, "step": 5773, "training_step_time": 0.11239981651306152 }, { "epoch": 8.8104248046875e-06, "model_forward_time": 0.024279356002807617, "step": 5774 }, { "epoch": 8.8104248046875e-06, "step": 5774, "training_step_time": 0.11043047904968262 }, { "epoch": 8.81195068359375e-06, "model_forward_time": 0.02524590492248535, "step": 5775 }, { "epoch": 8.81195068359375e-06, "step": 5775, "training_step_time": 0.12261509895324707 }, { "epoch": 8.8134765625e-06, "model_forward_time": 0.025134563446044922, "step": 5776 }, { "epoch": 8.8134765625e-06, "step": 5776, "training_step_time": 0.12656331062316895 }, { "epoch": 8.81500244140625e-06, "model_forward_time": 0.025783777236938477, "step": 5777 }, { "epoch": 8.81500244140625e-06, "step": 5777, "training_step_time": 0.1118016242980957 }, { "epoch": 8.8165283203125e-06, "model_forward_time": 0.026101350784301758, "step": 5778 }, { "epoch": 8.8165283203125e-06, "step": 5778, "training_step_time": 0.12193107604980469 }, { "epoch": 8.81805419921875e-06, "model_forward_time": 0.025826454162597656, "step": 5779 }, { "epoch": 8.81805419921875e-06, "step": 5779, "training_step_time": 0.10889267921447754 }, { "epoch": 8.819580078125e-06, "grad_norm": 0.3592827022075653, "learning_rate": 9.453780845029821e-05, "loss": 0.0823, "step": 5780 }, { "epoch": 8.819580078125e-06, "model_forward_time": 0.024648189544677734, "step": 5780 }, { "epoch": 8.819580078125e-06, "step": 5780, "training_step_time": 0.14451336860656738 }, { "epoch": 8.82110595703125e-06, "model_forward_time": 0.025223493576049805, "step": 5781 }, { "epoch": 8.82110595703125e-06, "step": 5781, "training_step_time": 0.10784506797790527 }, { "epoch": 8.8226318359375e-06, "model_forward_time": 0.025252342224121094, "step": 5782 }, { "epoch": 8.8226318359375e-06, "step": 5782, "training_step_time": 0.2090592384338379 }, { "epoch": 8.82415771484375e-06, "model_forward_time": 0.024483680725097656, "step": 5783 }, { "epoch": 8.82415771484375e-06, "step": 5783, "training_step_time": 0.13753032684326172 }, { "epoch": 8.82568359375e-06, "model_forward_time": 0.024658679962158203, "step": 5784 }, { "epoch": 8.82568359375e-06, "step": 5784, "training_step_time": 0.11122369766235352 }, { "epoch": 8.82720947265625e-06, "model_forward_time": 0.025246381759643555, "step": 5785 }, { "epoch": 8.82720947265625e-06, "step": 5785, "training_step_time": 0.11168313026428223 }, { "epoch": 8.8287353515625e-06, "model_forward_time": 0.02494335174560547, "step": 5786 }, { "epoch": 8.8287353515625e-06, "step": 5786, "training_step_time": 0.10713315010070801 }, { "epoch": 8.83026123046875e-06, "model_forward_time": 0.025080442428588867, "step": 5787 }, { "epoch": 8.83026123046875e-06, "step": 5787, "training_step_time": 0.10864138603210449 }, { "epoch": 8.831787109375e-06, "model_forward_time": 0.025205135345458984, "step": 5788 }, { "epoch": 8.831787109375e-06, "step": 5788, "training_step_time": 0.10888957977294922 }, { "epoch": 8.83331298828125e-06, "model_forward_time": 0.025120258331298828, "step": 5789 }, { "epoch": 8.83331298828125e-06, "step": 5789, "training_step_time": 0.1076960563659668 }, { "epoch": 8.8348388671875e-06, "grad_norm": 0.4227290153503418, "learning_rate": 9.451273234763371e-05, "loss": 0.0719, "step": 5790 }, { "epoch": 8.8348388671875e-06, "model_forward_time": 0.02511739730834961, "step": 5790 }, { "epoch": 8.8348388671875e-06, "step": 5790, "training_step_time": 0.10841917991638184 }, { "epoch": 8.83636474609375e-06, "model_forward_time": 0.024907827377319336, "step": 5791 }, { "epoch": 8.83636474609375e-06, "step": 5791, "training_step_time": 0.11310505867004395 }, { "epoch": 8.837890625e-06, "model_forward_time": 0.025264978408813477, "step": 5792 }, { "epoch": 8.837890625e-06, "step": 5792, "training_step_time": 0.10797905921936035 }, { "epoch": 8.83941650390625e-06, "model_forward_time": 0.026732206344604492, "step": 5793 }, { "epoch": 8.83941650390625e-06, "step": 5793, "training_step_time": 0.11080360412597656 }, { "epoch": 8.8409423828125e-06, "model_forward_time": 0.025563478469848633, "step": 5794 }, { "epoch": 8.8409423828125e-06, "step": 5794, "training_step_time": 0.11052417755126953 }, { "epoch": 8.84246826171875e-06, "model_forward_time": 0.025222063064575195, "step": 5795 }, { "epoch": 8.84246826171875e-06, "step": 5795, "training_step_time": 0.10729503631591797 }, { "epoch": 8.843994140625e-06, "model_forward_time": 0.025298595428466797, "step": 5796 }, { "epoch": 8.843994140625e-06, "step": 5796, "training_step_time": 0.10972142219543457 }, { "epoch": 8.84552001953125e-06, "model_forward_time": 0.0251924991607666, "step": 5797 }, { "epoch": 8.84552001953125e-06, "step": 5797, "training_step_time": 0.10793375968933105 }, { "epoch": 8.8470458984375e-06, "model_forward_time": 0.025262832641601562, "step": 5798 }, { "epoch": 8.8470458984375e-06, "step": 5798, "training_step_time": 0.11197161674499512 }, { "epoch": 8.84857177734375e-06, "model_forward_time": 0.025038480758666992, "step": 5799 }, { "epoch": 8.84857177734375e-06, "step": 5799, "training_step_time": 0.10808491706848145 }, { "epoch": 8.85009765625e-06, "grad_norm": 0.3732927739620209, "learning_rate": 9.448760215780217e-05, "loss": 0.0976, "step": 5800 }, { "epoch": 8.85009765625e-06, "model_forward_time": 0.025250911712646484, "step": 5800 }, { "epoch": 8.85009765625e-06, "step": 5800, "training_step_time": 0.21102237701416016 }, { "epoch": 8.85162353515625e-06, "model_forward_time": 0.024827003479003906, "step": 5801 }, { "epoch": 8.85162353515625e-06, "step": 5801, "training_step_time": 0.14337944984436035 }, { "epoch": 8.8531494140625e-06, "model_forward_time": 0.02445507049560547, "step": 5802 }, { "epoch": 8.8531494140625e-06, "step": 5802, "training_step_time": 0.1565699577331543 }, { "epoch": 8.85467529296875e-06, "model_forward_time": 0.024457454681396484, "step": 5803 }, { "epoch": 8.85467529296875e-06, "step": 5803, "training_step_time": 0.15153717994689941 }, { "epoch": 8.856201171875e-06, "model_forward_time": 0.024574995040893555, "step": 5804 }, { "epoch": 8.856201171875e-06, "step": 5804, "training_step_time": 0.18435430526733398 }, { "epoch": 8.85772705078125e-06, "model_forward_time": 0.02444624900817871, "step": 5805 }, { "epoch": 8.85772705078125e-06, "step": 5805, "training_step_time": 0.12979626655578613 }, { "epoch": 8.8592529296875e-06, "model_forward_time": 0.024531841278076172, "step": 5806 }, { "epoch": 8.8592529296875e-06, "step": 5806, "training_step_time": 0.14025068283081055 }, { "epoch": 8.86077880859375e-06, "model_forward_time": 0.025538206100463867, "step": 5807 }, { "epoch": 8.86077880859375e-06, "step": 5807, "training_step_time": 0.13111233711242676 }, { "epoch": 8.8623046875e-06, "model_forward_time": 0.02507638931274414, "step": 5808 }, { "epoch": 8.8623046875e-06, "step": 5808, "training_step_time": 0.1113440990447998 }, { "epoch": 8.86383056640625e-06, "model_forward_time": 0.025632381439208984, "step": 5809 }, { "epoch": 8.86383056640625e-06, "step": 5809, "training_step_time": 0.11174201965332031 }, { "epoch": 8.8653564453125e-06, "grad_norm": 0.3051137328147888, "learning_rate": 9.446241791133907e-05, "loss": 0.072, "step": 5810 }, { "epoch": 8.8653564453125e-06, "model_forward_time": 0.02534627914428711, "step": 5810 }, { "epoch": 8.8653564453125e-06, "step": 5810, "training_step_time": 0.11006307601928711 }, { "epoch": 8.86688232421875e-06, "model_forward_time": 0.025818347930908203, "step": 5811 }, { "epoch": 8.86688232421875e-06, "step": 5811, "training_step_time": 0.11287665367126465 }, { "epoch": 8.868408203125e-06, "model_forward_time": 0.025643587112426758, "step": 5812 }, { "epoch": 8.868408203125e-06, "step": 5812, "training_step_time": 0.189713716506958 }, { "epoch": 8.86993408203125e-06, "model_forward_time": 0.026393651962280273, "step": 5813 }, { "epoch": 8.86993408203125e-06, "step": 5813, "training_step_time": 0.10646700859069824 }, { "epoch": 8.8714599609375e-06, "model_forward_time": 0.024766206741333008, "step": 5814 }, { "epoch": 8.8714599609375e-06, "step": 5814, "training_step_time": 0.10984349250793457 }, { "epoch": 8.87298583984375e-06, "model_forward_time": 0.025118350982666016, "step": 5815 }, { "epoch": 8.87298583984375e-06, "step": 5815, "training_step_time": 0.10932445526123047 }, { "epoch": 8.87451171875e-06, "model_forward_time": 0.026819944381713867, "step": 5816 }, { "epoch": 8.87451171875e-06, "step": 5816, "training_step_time": 0.10876083374023438 }, { "epoch": 8.87603759765625e-06, "model_forward_time": 0.025341033935546875, "step": 5817 }, { "epoch": 8.87603759765625e-06, "step": 5817, "training_step_time": 0.15482735633850098 }, { "epoch": 8.8775634765625e-06, "model_forward_time": 0.02510356903076172, "step": 5818 }, { "epoch": 8.8775634765625e-06, "step": 5818, "training_step_time": 0.11521434783935547 }, { "epoch": 8.87908935546875e-06, "model_forward_time": 0.02530646324157715, "step": 5819 }, { "epoch": 8.87908935546875e-06, "step": 5819, "training_step_time": 0.18747901916503906 }, { "epoch": 8.880615234375e-06, "grad_norm": 0.5610553026199341, "learning_rate": 9.443717963884569e-05, "loss": 0.0839, "step": 5820 }, { "epoch": 8.880615234375e-06, "model_forward_time": 0.024465560913085938, "step": 5820 }, { "epoch": 8.880615234375e-06, "step": 5820, "training_step_time": 0.18909311294555664 }, { "epoch": 8.88214111328125e-06, "model_forward_time": 0.026720523834228516, "step": 5821 }, { "epoch": 8.88214111328125e-06, "step": 5821, "training_step_time": 0.17702102661132812 }, { "epoch": 8.8836669921875e-06, "model_forward_time": 0.024750709533691406, "step": 5822 }, { "epoch": 8.8836669921875e-06, "step": 5822, "training_step_time": 0.15208792686462402 }, { "epoch": 8.88519287109375e-06, "model_forward_time": 0.025708675384521484, "step": 5823 }, { "epoch": 8.88519287109375e-06, "step": 5823, "training_step_time": 0.11994004249572754 }, { "epoch": 8.88671875e-06, "model_forward_time": 0.02420353889465332, "step": 5824 }, { "epoch": 8.88671875e-06, "step": 5824, "training_step_time": 0.12740635871887207 }, { "epoch": 8.88824462890625e-06, "model_forward_time": 0.025762319564819336, "step": 5825 }, { "epoch": 8.88824462890625e-06, "step": 5825, "training_step_time": 0.11667156219482422 }, { "epoch": 8.8897705078125e-06, "model_forward_time": 0.025492429733276367, "step": 5826 }, { "epoch": 8.8897705078125e-06, "step": 5826, "training_step_time": 0.17093229293823242 }, { "epoch": 8.89129638671875e-06, "model_forward_time": 0.024620771408081055, "step": 5827 }, { "epoch": 8.89129638671875e-06, "step": 5827, "training_step_time": 0.16476893424987793 }, { "epoch": 8.892822265625e-06, "model_forward_time": 0.025287628173828125, "step": 5828 }, { "epoch": 8.892822265625e-06, "step": 5828, "training_step_time": 0.1212470531463623 }, { "epoch": 8.89434814453125e-06, "model_forward_time": 0.02477407455444336, "step": 5829 }, { "epoch": 8.89434814453125e-06, "step": 5829, "training_step_time": 0.11799836158752441 }, { "epoch": 8.8958740234375e-06, "grad_norm": 0.5057721138000488, "learning_rate": 9.441188737098889e-05, "loss": 0.0899, "step": 5830 }, { "epoch": 8.8958740234375e-06, "model_forward_time": 0.02567291259765625, "step": 5830 }, { "epoch": 8.8958740234375e-06, "step": 5830, "training_step_time": 0.11262321472167969 }, { "epoch": 8.89739990234375e-06, "model_forward_time": 0.02634739875793457, "step": 5831 }, { "epoch": 8.89739990234375e-06, "step": 5831, "training_step_time": 0.11351990699768066 }, { "epoch": 8.89892578125e-06, "model_forward_time": 0.0253145694732666, "step": 5832 }, { "epoch": 8.89892578125e-06, "step": 5832, "training_step_time": 0.1112065315246582 }, { "epoch": 8.90045166015625e-06, "model_forward_time": 0.02509927749633789, "step": 5833 }, { "epoch": 8.90045166015625e-06, "step": 5833, "training_step_time": 0.10861444473266602 }, { "epoch": 8.9019775390625e-06, "model_forward_time": 0.02516460418701172, "step": 5834 }, { "epoch": 8.9019775390625e-06, "step": 5834, "training_step_time": 0.10914993286132812 }, { "epoch": 8.90350341796875e-06, "model_forward_time": 0.0250394344329834, "step": 5835 }, { "epoch": 8.90350341796875e-06, "step": 5835, "training_step_time": 0.11852455139160156 }, { "epoch": 8.905029296875e-06, "model_forward_time": 0.02480459213256836, "step": 5836 }, { "epoch": 8.905029296875e-06, "step": 5836, "training_step_time": 0.11158275604248047 }, { "epoch": 8.90655517578125e-06, "model_forward_time": 0.02518296241760254, "step": 5837 }, { "epoch": 8.90655517578125e-06, "step": 5837, "training_step_time": 0.10818243026733398 }, { "epoch": 8.9080810546875e-06, "model_forward_time": 0.025329113006591797, "step": 5838 }, { "epoch": 8.9080810546875e-06, "step": 5838, "training_step_time": 0.11196494102478027 }, { "epoch": 8.90960693359375e-06, "model_forward_time": 0.02407217025756836, "step": 5839 }, { "epoch": 8.90960693359375e-06, "step": 5839, "training_step_time": 0.10945892333984375 }, { "epoch": 8.9111328125e-06, "grad_norm": 0.31711509823799133, "learning_rate": 9.438654113850118e-05, "loss": 0.0723, "step": 5840 }, { "epoch": 8.9111328125e-06, "model_forward_time": 0.024309158325195312, "step": 5840 }, { "epoch": 8.9111328125e-06, "step": 5840, "training_step_time": 0.11232709884643555 }, { "epoch": 8.91265869140625e-06, "model_forward_time": 0.025162458419799805, "step": 5841 }, { "epoch": 8.91265869140625e-06, "step": 5841, "training_step_time": 0.10844254493713379 }, { "epoch": 8.9141845703125e-06, "model_forward_time": 0.024079084396362305, "step": 5842 }, { "epoch": 8.9141845703125e-06, "step": 5842, "training_step_time": 0.10884809494018555 }, { "epoch": 8.91571044921875e-06, "model_forward_time": 0.025310516357421875, "step": 5843 }, { "epoch": 8.91571044921875e-06, "step": 5843, "training_step_time": 0.22782158851623535 }, { "epoch": 8.917236328125e-06, "model_forward_time": 0.024475574493408203, "step": 5844 }, { "epoch": 8.917236328125e-06, "step": 5844, "training_step_time": 0.11497926712036133 }, { "epoch": 8.91876220703125e-06, "model_forward_time": 0.023504972457885742, "step": 5845 }, { "epoch": 8.91876220703125e-06, "step": 5845, "training_step_time": 0.13498163223266602 }, { "epoch": 8.9202880859375e-06, "model_forward_time": 0.024995088577270508, "step": 5846 }, { "epoch": 8.9202880859375e-06, "step": 5846, "training_step_time": 0.1605982780456543 }, { "epoch": 8.92181396484375e-06, "model_forward_time": 0.024432897567749023, "step": 5847 }, { "epoch": 8.92181396484375e-06, "step": 5847, "training_step_time": 0.22236394882202148 }, { "epoch": 8.92333984375e-06, "model_forward_time": 0.024111270904541016, "step": 5848 }, { "epoch": 8.92333984375e-06, "step": 5848, "training_step_time": 0.11499524116516113 }, { "epoch": 8.92486572265625e-06, "model_forward_time": 0.024158716201782227, "step": 5849 }, { "epoch": 8.92486572265625e-06, "step": 5849, "training_step_time": 0.15272808074951172 }, { "epoch": 8.9263916015625e-06, "grad_norm": 0.5319364070892334, "learning_rate": 9.43611409721806e-05, "loss": 0.0633, "step": 5850 }, { "epoch": 8.9263916015625e-06, "model_forward_time": 0.0243685245513916, "step": 5850 }, { "epoch": 8.9263916015625e-06, "step": 5850, "training_step_time": 0.13464713096618652 }, { "epoch": 8.92791748046875e-06, "model_forward_time": 0.0245208740234375, "step": 5851 }, { "epoch": 8.92791748046875e-06, "step": 5851, "training_step_time": 0.10843944549560547 }, { "epoch": 8.929443359375e-06, "model_forward_time": 0.024949312210083008, "step": 5852 }, { "epoch": 8.929443359375e-06, "step": 5852, "training_step_time": 0.11367082595825195 }, { "epoch": 8.93096923828125e-06, "model_forward_time": 0.024866342544555664, "step": 5853 }, { "epoch": 8.93096923828125e-06, "step": 5853, "training_step_time": 0.12192869186401367 }, { "epoch": 8.9324951171875e-06, "model_forward_time": 0.02535223960876465, "step": 5854 }, { "epoch": 8.9324951171875e-06, "step": 5854, "training_step_time": 0.10739850997924805 }, { "epoch": 8.93402099609375e-06, "model_forward_time": 0.025511741638183594, "step": 5855 }, { "epoch": 8.93402099609375e-06, "step": 5855, "training_step_time": 0.21050477027893066 }, { "epoch": 8.935546875e-06, "model_forward_time": 0.03985714912414551, "step": 5856 }, { "epoch": 8.935546875e-06, "step": 5856, "training_step_time": 0.13285589218139648 }, { "epoch": 8.93707275390625e-06, "model_forward_time": 0.025583744049072266, "step": 5857 }, { "epoch": 8.93707275390625e-06, "step": 5857, "training_step_time": 0.10788893699645996 }, { "epoch": 8.9385986328125e-06, "model_forward_time": 0.026898860931396484, "step": 5858 }, { "epoch": 8.9385986328125e-06, "step": 5858, "training_step_time": 0.10962200164794922 }, { "epoch": 8.94012451171875e-06, "model_forward_time": 0.026428937911987305, "step": 5859 }, { "epoch": 8.94012451171875e-06, "step": 5859, "training_step_time": 0.11249017715454102 }, { "epoch": 8.941650390625e-06, "grad_norm": 0.5092139840126038, "learning_rate": 9.433568690289075e-05, "loss": 0.0842, "step": 5860 }, { "epoch": 8.941650390625e-06, "model_forward_time": 0.026331424713134766, "step": 5860 }, { "epoch": 8.941650390625e-06, "step": 5860, "training_step_time": 0.14335322380065918 }, { "epoch": 8.94317626953125e-06, "model_forward_time": 0.026547670364379883, "step": 5861 }, { "epoch": 8.94317626953125e-06, "step": 5861, "training_step_time": 0.11163830757141113 }, { "epoch": 8.9447021484375e-06, "model_forward_time": 0.025946378707885742, "step": 5862 }, { "epoch": 8.9447021484375e-06, "step": 5862, "training_step_time": 0.11416816711425781 }, { "epoch": 8.94622802734375e-06, "model_forward_time": 0.02638864517211914, "step": 5863 }, { "epoch": 8.94622802734375e-06, "step": 5863, "training_step_time": 0.11352372169494629 }, { "epoch": 8.94775390625e-06, "model_forward_time": 0.02850794792175293, "step": 5864 }, { "epoch": 8.94775390625e-06, "step": 5864, "training_step_time": 0.1380002498626709 }, { "epoch": 8.94927978515625e-06, "model_forward_time": 0.026858806610107422, "step": 5865 }, { "epoch": 8.94927978515625e-06, "step": 5865, "training_step_time": 0.11621904373168945 }, { "epoch": 8.9508056640625e-06, "model_forward_time": 0.025970935821533203, "step": 5866 }, { "epoch": 8.9508056640625e-06, "step": 5866, "training_step_time": 0.11582231521606445 }, { "epoch": 8.95233154296875e-06, "model_forward_time": 0.026712894439697266, "step": 5867 }, { "epoch": 8.95233154296875e-06, "step": 5867, "training_step_time": 0.11066222190856934 }, { "epoch": 8.953857421875e-06, "model_forward_time": 0.02659916877746582, "step": 5868 }, { "epoch": 8.953857421875e-06, "step": 5868, "training_step_time": 0.11015057563781738 }, { "epoch": 8.95538330078125e-06, "model_forward_time": 0.026492595672607422, "step": 5869 }, { "epoch": 8.95538330078125e-06, "step": 5869, "training_step_time": 0.21688055992126465 }, { "epoch": 8.9569091796875e-06, "grad_norm": 0.36725756525993347, "learning_rate": 9.431017896156074e-05, "loss": 0.0792, "step": 5870 }, { "epoch": 8.9569091796875e-06, "model_forward_time": 0.02571249008178711, "step": 5870 }, { "epoch": 8.9569091796875e-06, "step": 5870, "training_step_time": 0.11063218116760254 }, { "epoch": 8.95843505859375e-06, "model_forward_time": 0.025835752487182617, "step": 5871 }, { "epoch": 8.95843505859375e-06, "step": 5871, "training_step_time": 0.10873532295227051 }, { "epoch": 8.9599609375e-06, "model_forward_time": 0.026432514190673828, "step": 5872 }, { "epoch": 8.9599609375e-06, "step": 5872, "training_step_time": 0.1101069450378418 }, { "epoch": 8.96148681640625e-06, "model_forward_time": 0.026633501052856445, "step": 5873 }, { "epoch": 8.96148681640625e-06, "step": 5873, "training_step_time": 0.12423205375671387 }, { "epoch": 8.9630126953125e-06, "model_forward_time": 0.026635169982910156, "step": 5874 }, { "epoch": 8.9630126953125e-06, "step": 5874, "training_step_time": 0.13721823692321777 }, { "epoch": 8.96453857421875e-06, "model_forward_time": 0.02652716636657715, "step": 5875 }, { "epoch": 8.96453857421875e-06, "step": 5875, "training_step_time": 0.13985204696655273 }, { "epoch": 8.966064453125e-06, "model_forward_time": 0.025879383087158203, "step": 5876 }, { "epoch": 8.966064453125e-06, "step": 5876, "training_step_time": 0.12676477432250977 }, { "epoch": 8.96759033203125e-06, "model_forward_time": 0.025313615798950195, "step": 5877 }, { "epoch": 8.96759033203125e-06, "step": 5877, "training_step_time": 0.12378740310668945 }, { "epoch": 8.9691162109375e-06, "model_forward_time": 0.0267791748046875, "step": 5878 }, { "epoch": 8.9691162109375e-06, "step": 5878, "training_step_time": 0.12459158897399902 }, { "epoch": 8.97064208984375e-06, "model_forward_time": 0.0261538028717041, "step": 5879 }, { "epoch": 8.97064208984375e-06, "step": 5879, "training_step_time": 0.12115287780761719 }, { "epoch": 8.97216796875e-06, "grad_norm": 0.27330896258354187, "learning_rate": 9.428461717918511e-05, "loss": 0.0756, "step": 5880 }, { "epoch": 8.97216796875e-06, "model_forward_time": 0.026023149490356445, "step": 5880 }, { "epoch": 8.97216796875e-06, "step": 5880, "training_step_time": 0.11267638206481934 }, { "epoch": 8.97369384765625e-06, "model_forward_time": 0.028307437896728516, "step": 5881 }, { "epoch": 8.97369384765625e-06, "step": 5881, "training_step_time": 0.11522889137268066 }, { "epoch": 8.9752197265625e-06, "model_forward_time": 0.026381254196166992, "step": 5882 }, { "epoch": 8.9752197265625e-06, "step": 5882, "training_step_time": 0.11294960975646973 }, { "epoch": 8.97674560546875e-06, "model_forward_time": 0.025607824325561523, "step": 5883 }, { "epoch": 8.97674560546875e-06, "step": 5883, "training_step_time": 0.1104576587677002 }, { "epoch": 8.978271484375e-06, "model_forward_time": 0.026769638061523438, "step": 5884 }, { "epoch": 8.978271484375e-06, "step": 5884, "training_step_time": 0.11121988296508789 }, { "epoch": 8.97979736328125e-06, "model_forward_time": 0.026705503463745117, "step": 5885 }, { "epoch": 8.97979736328125e-06, "step": 5885, "training_step_time": 0.11024928092956543 }, { "epoch": 8.9813232421875e-06, "model_forward_time": 0.026415586471557617, "step": 5886 }, { "epoch": 8.9813232421875e-06, "step": 5886, "training_step_time": 0.11356520652770996 }, { "epoch": 8.98284912109375e-06, "model_forward_time": 0.025795936584472656, "step": 5887 }, { "epoch": 8.98284912109375e-06, "step": 5887, "training_step_time": 0.21764588356018066 }, { "epoch": 8.984375e-06, "model_forward_time": 0.025852441787719727, "step": 5888 }, { "epoch": 8.984375e-06, "step": 5888, "training_step_time": 0.11114215850830078 }, { "epoch": 8.98590087890625e-06, "model_forward_time": 0.025300025939941406, "step": 5889 }, { "epoch": 8.98590087890625e-06, "step": 5889, "training_step_time": 0.13242411613464355 }, { "epoch": 8.9874267578125e-06, "grad_norm": 0.3354921042919159, "learning_rate": 9.425900158682385e-05, "loss": 0.0779, "step": 5890 }, { "epoch": 8.9874267578125e-06, "model_forward_time": 0.025885820388793945, "step": 5890 }, { "epoch": 8.9874267578125e-06, "step": 5890, "training_step_time": 0.1098470687866211 }, { "epoch": 8.98895263671875e-06, "model_forward_time": 0.02617645263671875, "step": 5891 }, { "epoch": 8.98895263671875e-06, "step": 5891, "training_step_time": 0.16656708717346191 }, { "epoch": 8.990478515625e-06, "model_forward_time": 0.02542567253112793, "step": 5892 }, { "epoch": 8.990478515625e-06, "step": 5892, "training_step_time": 0.16001534461975098 }, { "epoch": 8.99200439453125e-06, "model_forward_time": 0.02576136589050293, "step": 5893 }, { "epoch": 8.99200439453125e-06, "step": 5893, "training_step_time": 0.11061358451843262 }, { "epoch": 8.9935302734375e-06, "model_forward_time": 0.025977611541748047, "step": 5894 }, { "epoch": 8.9935302734375e-06, "step": 5894, "training_step_time": 0.10731363296508789 }, { "epoch": 8.99505615234375e-06, "model_forward_time": 0.026017427444458008, "step": 5895 }, { "epoch": 8.99505615234375e-06, "step": 5895, "training_step_time": 0.14051508903503418 }, { "epoch": 8.99658203125e-06, "model_forward_time": 0.026168107986450195, "step": 5896 }, { "epoch": 8.99658203125e-06, "step": 5896, "training_step_time": 0.11793398857116699 }, { "epoch": 8.99810791015625e-06, "model_forward_time": 0.026320695877075195, "step": 5897 }, { "epoch": 8.99810791015625e-06, "step": 5897, "training_step_time": 0.11550617218017578 }, { "epoch": 8.9996337890625e-06, "model_forward_time": 0.02654266357421875, "step": 5898 }, { "epoch": 8.9996337890625e-06, "step": 5898, "training_step_time": 0.10700750350952148 }, { "epoch": 9.00115966796875e-06, "model_forward_time": 0.02664351463317871, "step": 5899 }, { "epoch": 9.00115966796875e-06, "step": 5899, "training_step_time": 0.10949516296386719 }, { "epoch": 9.002685546875e-06, "grad_norm": 0.5841869115829468, "learning_rate": 9.42333322156023e-05, "loss": 0.0963, "step": 5900 }, { "epoch": 9.002685546875e-06, "model_forward_time": 0.026638031005859375, "step": 5900 }, { "epoch": 9.002685546875e-06, "step": 5900, "training_step_time": 0.205946683883667 }, { "epoch": 9.00421142578125e-06, "model_forward_time": 0.025427579879760742, "step": 5901 }, { "epoch": 9.00421142578125e-06, "step": 5901, "training_step_time": 0.11169576644897461 }, { "epoch": 9.0057373046875e-06, "model_forward_time": 0.02533698081970215, "step": 5902 }, { "epoch": 9.0057373046875e-06, "step": 5902, "training_step_time": 0.10987305641174316 }, { "epoch": 9.00726318359375e-06, "model_forward_time": 0.026617050170898438, "step": 5903 }, { "epoch": 9.00726318359375e-06, "step": 5903, "training_step_time": 0.10966324806213379 }, { "epoch": 9.0087890625e-06, "model_forward_time": 0.026145458221435547, "step": 5904 }, { "epoch": 9.0087890625e-06, "step": 5904, "training_step_time": 0.10892581939697266 }, { "epoch": 9.01031494140625e-06, "model_forward_time": 0.026487112045288086, "step": 5905 }, { "epoch": 9.01031494140625e-06, "step": 5905, "training_step_time": 0.2060689926147461 }, { "epoch": 9.0118408203125e-06, "model_forward_time": 0.02399754524230957, "step": 5906 }, { "epoch": 9.0118408203125e-06, "step": 5906, "training_step_time": 0.10992431640625 }, { "epoch": 9.01336669921875e-06, "model_forward_time": 0.025414705276489258, "step": 5907 }, { "epoch": 9.01336669921875e-06, "step": 5907, "training_step_time": 0.10914397239685059 }, { "epoch": 9.014892578125e-06, "model_forward_time": 0.026842832565307617, "step": 5908 }, { "epoch": 9.014892578125e-06, "step": 5908, "training_step_time": 0.11783885955810547 }, { "epoch": 9.01641845703125e-06, "model_forward_time": 0.028972864151000977, "step": 5909 }, { "epoch": 9.01641845703125e-06, "step": 5909, "training_step_time": 0.13008832931518555 }, { "epoch": 9.0179443359375e-06, "grad_norm": 0.42277806997299194, "learning_rate": 9.420760909671118e-05, "loss": 0.0828, "step": 5910 }, { "epoch": 9.0179443359375e-06, "model_forward_time": 0.02595686912536621, "step": 5910 }, { "epoch": 9.0179443359375e-06, "step": 5910, "training_step_time": 0.11119413375854492 }, { "epoch": 9.01947021484375e-06, "model_forward_time": 0.026522159576416016, "step": 5911 }, { "epoch": 9.01947021484375e-06, "step": 5911, "training_step_time": 0.11870765686035156 }, { "epoch": 9.02099609375e-06, "model_forward_time": 0.02681422233581543, "step": 5912 }, { "epoch": 9.02099609375e-06, "step": 5912, "training_step_time": 0.10959482192993164 }, { "epoch": 9.02252197265625e-06, "model_forward_time": 0.025684595108032227, "step": 5913 }, { "epoch": 9.02252197265625e-06, "step": 5913, "training_step_time": 0.10828876495361328 }, { "epoch": 9.0240478515625e-06, "model_forward_time": 0.02603745460510254, "step": 5914 }, { "epoch": 9.0240478515625e-06, "step": 5914, "training_step_time": 0.18803858757019043 }, { "epoch": 9.02557373046875e-06, "model_forward_time": 0.025936126708984375, "step": 5915 }, { "epoch": 9.02557373046875e-06, "step": 5915, "training_step_time": 0.2149946689605713 }, { "epoch": 9.027099609375e-06, "model_forward_time": 0.025760650634765625, "step": 5916 }, { "epoch": 9.027099609375e-06, "step": 5916, "training_step_time": 0.2101743221282959 }, { "epoch": 9.02862548828125e-06, "model_forward_time": 0.025112152099609375, "step": 5917 }, { "epoch": 9.02862548828125e-06, "step": 5917, "training_step_time": 0.1929612159729004 }, { "epoch": 9.0301513671875e-06, "model_forward_time": 0.025174856185913086, "step": 5918 }, { "epoch": 9.0301513671875e-06, "step": 5918, "training_step_time": 0.1795511245727539 }, { "epoch": 9.03167724609375e-06, "model_forward_time": 0.025325298309326172, "step": 5919 }, { "epoch": 9.03167724609375e-06, "step": 5919, "training_step_time": 0.10358929634094238 }, { "epoch": 9.033203125e-06, "grad_norm": 0.571058452129364, "learning_rate": 9.41818322614065e-05, "loss": 0.0717, "step": 5920 }, { "epoch": 9.033203125e-06, "model_forward_time": 0.0256803035736084, "step": 5920 }, { "epoch": 9.033203125e-06, "step": 5920, "training_step_time": 0.11001372337341309 }, { "epoch": 9.03472900390625e-06, "model_forward_time": 0.026933908462524414, "step": 5921 }, { "epoch": 9.03472900390625e-06, "step": 5921, "training_step_time": 0.1099863052368164 }, { "epoch": 9.0362548828125e-06, "model_forward_time": 0.02663588523864746, "step": 5922 }, { "epoch": 9.0362548828125e-06, "step": 5922, "training_step_time": 0.11111044883728027 }, { "epoch": 9.03778076171875e-06, "model_forward_time": 0.02638530731201172, "step": 5923 }, { "epoch": 9.03778076171875e-06, "step": 5923, "training_step_time": 0.109222412109375 }, { "epoch": 9.039306640625e-06, "model_forward_time": 0.026906967163085938, "step": 5924 }, { "epoch": 9.039306640625e-06, "step": 5924, "training_step_time": 0.10845232009887695 }, { "epoch": 9.04083251953125e-06, "model_forward_time": 0.026295900344848633, "step": 5925 }, { "epoch": 9.04083251953125e-06, "step": 5925, "training_step_time": 0.1078338623046875 }, { "epoch": 9.0423583984375e-06, "model_forward_time": 0.02643752098083496, "step": 5926 }, { "epoch": 9.0423583984375e-06, "step": 5926, "training_step_time": 0.11005520820617676 }, { "epoch": 9.04388427734375e-06, "model_forward_time": 0.026426076889038086, "step": 5927 }, { "epoch": 9.04388427734375e-06, "step": 5927, "training_step_time": 0.10824060440063477 }, { "epoch": 9.04541015625e-06, "model_forward_time": 0.02659749984741211, "step": 5928 }, { "epoch": 9.04541015625e-06, "step": 5928, "training_step_time": 0.12475919723510742 }, { "epoch": 9.04693603515625e-06, "model_forward_time": 0.026145458221435547, "step": 5929 }, { "epoch": 9.04693603515625e-06, "step": 5929, "training_step_time": 0.10899567604064941 }, { "epoch": 9.0484619140625e-06, "grad_norm": 0.5669389963150024, "learning_rate": 9.415600174100956e-05, "loss": 0.077, "step": 5930 }, { "epoch": 9.0484619140625e-06, "model_forward_time": 0.0261080265045166, "step": 5930 }, { "epoch": 9.0484619140625e-06, "step": 5930, "training_step_time": 0.13007378578186035 }, { "epoch": 9.04998779296875e-06, "model_forward_time": 0.026097774505615234, "step": 5931 }, { "epoch": 9.04998779296875e-06, "step": 5931, "training_step_time": 0.18753290176391602 }, { "epoch": 9.051513671875e-06, "model_forward_time": 0.025613069534301758, "step": 5932 }, { "epoch": 9.051513671875e-06, "step": 5932, "training_step_time": 0.19725966453552246 }, { "epoch": 9.05303955078125e-06, "model_forward_time": 0.026819229125976562, "step": 5933 }, { "epoch": 9.05303955078125e-06, "step": 5933, "training_step_time": 0.17894816398620605 }, { "epoch": 9.0545654296875e-06, "model_forward_time": 0.025414228439331055, "step": 5934 }, { "epoch": 9.0545654296875e-06, "step": 5934, "training_step_time": 0.15322375297546387 }, { "epoch": 9.05609130859375e-06, "model_forward_time": 0.026224851608276367, "step": 5935 }, { "epoch": 9.05609130859375e-06, "step": 5935, "training_step_time": 0.20427942276000977 }, { "epoch": 9.0576171875e-06, "model_forward_time": 0.024692296981811523, "step": 5936 }, { "epoch": 9.0576171875e-06, "step": 5936, "training_step_time": 0.17146587371826172 }, { "epoch": 9.05914306640625e-06, "model_forward_time": 0.025773286819458008, "step": 5937 }, { "epoch": 9.05914306640625e-06, "step": 5937, "training_step_time": 0.19130420684814453 }, { "epoch": 9.0606689453125e-06, "model_forward_time": 0.02550816535949707, "step": 5938 }, { "epoch": 9.0606689453125e-06, "step": 5938, "training_step_time": 0.1665341854095459 }, { "epoch": 9.06219482421875e-06, "model_forward_time": 0.0258636474609375, "step": 5939 }, { "epoch": 9.06219482421875e-06, "step": 5939, "training_step_time": 0.18404364585876465 }, { "epoch": 9.063720703125e-06, "grad_norm": 0.3362850844860077, "learning_rate": 9.413011756690685e-05, "loss": 0.0751, "step": 5940 }, { "epoch": 9.063720703125e-06, "model_forward_time": 0.025766611099243164, "step": 5940 }, { "epoch": 9.063720703125e-06, "step": 5940, "training_step_time": 0.10847282409667969 }, { "epoch": 9.06524658203125e-06, "model_forward_time": 0.025864839553833008, "step": 5941 }, { "epoch": 9.06524658203125e-06, "step": 5941, "training_step_time": 0.1095890998840332 }, { "epoch": 9.0667724609375e-06, "model_forward_time": 0.026218414306640625, "step": 5942 }, { "epoch": 9.0667724609375e-06, "step": 5942, "training_step_time": 0.1092383861541748 }, { "epoch": 9.06829833984375e-06, "model_forward_time": 0.02784132957458496, "step": 5943 }, { "epoch": 9.06829833984375e-06, "step": 5943, "training_step_time": 0.1113736629486084 }, { "epoch": 9.06982421875e-06, "model_forward_time": 0.026187658309936523, "step": 5944 }, { "epoch": 9.06982421875e-06, "step": 5944, "training_step_time": 0.1093747615814209 }, { "epoch": 9.07135009765625e-06, "model_forward_time": 0.02668285369873047, "step": 5945 }, { "epoch": 9.07135009765625e-06, "step": 5945, "training_step_time": 0.11614322662353516 }, { "epoch": 9.0728759765625e-06, "model_forward_time": 0.02617955207824707, "step": 5946 }, { "epoch": 9.0728759765625e-06, "step": 5946, "training_step_time": 0.14415621757507324 }, { "epoch": 9.07440185546875e-06, "model_forward_time": 0.02759552001953125, "step": 5947 }, { "epoch": 9.07440185546875e-06, "step": 5947, "training_step_time": 0.11200642585754395 }, { "epoch": 9.075927734375e-06, "model_forward_time": 0.02657628059387207, "step": 5948 }, { "epoch": 9.075927734375e-06, "step": 5948, "training_step_time": 0.11479640007019043 }, { "epoch": 9.07745361328125e-06, "model_forward_time": 0.026170015335083008, "step": 5949 }, { "epoch": 9.07745361328125e-06, "step": 5949, "training_step_time": 0.1187889575958252 }, { "epoch": 9.0789794921875e-06, "grad_norm": 0.4865238070487976, "learning_rate": 9.410417977055011e-05, "loss": 0.0747, "step": 5950 }, { "epoch": 9.0789794921875e-06, "model_forward_time": 0.02617192268371582, "step": 5950 }, { "epoch": 9.0789794921875e-06, "step": 5950, "training_step_time": 0.1213524341583252 }, { "epoch": 9.08050537109375e-06, "model_forward_time": 0.026523351669311523, "step": 5951 }, { "epoch": 9.08050537109375e-06, "step": 5951, "training_step_time": 0.11065912246704102 }, { "epoch": 9.08203125e-06, "model_forward_time": 0.02640986442565918, "step": 5952 }, { "epoch": 9.08203125e-06, "step": 5952, "training_step_time": 0.11548829078674316 }, { "epoch": 9.08355712890625e-06, "model_forward_time": 0.027084827423095703, "step": 5953 }, { "epoch": 9.08355712890625e-06, "step": 5953, "training_step_time": 0.11052417755126953 }, { "epoch": 9.0850830078125e-06, "model_forward_time": 0.026594161987304688, "step": 5954 }, { "epoch": 9.0850830078125e-06, "step": 5954, "training_step_time": 0.17098760604858398 }, { "epoch": 9.08660888671875e-06, "model_forward_time": 0.02605438232421875, "step": 5955 }, { "epoch": 9.08660888671875e-06, "step": 5955, "training_step_time": 0.16991281509399414 }, { "epoch": 9.088134765625e-06, "model_forward_time": 0.025337696075439453, "step": 5956 }, { "epoch": 9.088134765625e-06, "step": 5956, "training_step_time": 0.11057281494140625 }, { "epoch": 9.08966064453125e-06, "model_forward_time": 0.025789737701416016, "step": 5957 }, { "epoch": 9.08966064453125e-06, "step": 5957, "training_step_time": 0.21548056602478027 }, { "epoch": 9.0911865234375e-06, "model_forward_time": 0.02542424201965332, "step": 5958 }, { "epoch": 9.0911865234375e-06, "step": 5958, "training_step_time": 0.12054777145385742 }, { "epoch": 9.09271240234375e-06, "model_forward_time": 0.025702714920043945, "step": 5959 }, { "epoch": 9.09271240234375e-06, "step": 5959, "training_step_time": 0.11264419555664062 }, { "epoch": 9.09423828125e-06, "grad_norm": 0.5005446672439575, "learning_rate": 9.407818838345619e-05, "loss": 0.0752, "step": 5960 }, { "epoch": 9.09423828125e-06, "model_forward_time": 0.02633190155029297, "step": 5960 }, { "epoch": 9.09423828125e-06, "step": 5960, "training_step_time": 0.11399555206298828 }, { "epoch": 9.09576416015625e-06, "model_forward_time": 0.02656078338623047, "step": 5961 }, { "epoch": 9.09576416015625e-06, "step": 5961, "training_step_time": 0.11565780639648438 }, { "epoch": 9.0972900390625e-06, "model_forward_time": 0.027135848999023438, "step": 5962 }, { "epoch": 9.0972900390625e-06, "step": 5962, "training_step_time": 0.11146664619445801 }, { "epoch": 9.09881591796875e-06, "model_forward_time": 0.02624654769897461, "step": 5963 }, { "epoch": 9.09881591796875e-06, "step": 5963, "training_step_time": 0.10973644256591797 }, { "epoch": 9.100341796875e-06, "model_forward_time": 0.02635788917541504, "step": 5964 }, { "epoch": 9.100341796875e-06, "step": 5964, "training_step_time": 0.1099846363067627 }, { "epoch": 9.10186767578125e-06, "model_forward_time": 0.026107311248779297, "step": 5965 }, { "epoch": 9.10186767578125e-06, "step": 5965, "training_step_time": 0.10917329788208008 }, { "epoch": 9.1033935546875e-06, "model_forward_time": 0.026184558868408203, "step": 5966 }, { "epoch": 9.1033935546875e-06, "step": 5966, "training_step_time": 0.11481904983520508 }, { "epoch": 9.10491943359375e-06, "model_forward_time": 0.026482582092285156, "step": 5967 }, { "epoch": 9.10491943359375e-06, "step": 5967, "training_step_time": 0.10909175872802734 }, { "epoch": 9.1064453125e-06, "model_forward_time": 0.026105642318725586, "step": 5968 }, { "epoch": 9.1064453125e-06, "step": 5968, "training_step_time": 0.11098051071166992 }, { "epoch": 9.10797119140625e-06, "model_forward_time": 0.0263974666595459, "step": 5969 }, { "epoch": 9.10797119140625e-06, "step": 5969, "training_step_time": 0.11201834678649902 }, { "epoch": 9.1094970703125e-06, "grad_norm": 0.39406514167785645, "learning_rate": 9.405214343720707e-05, "loss": 0.0861, "step": 5970 }, { "epoch": 9.1094970703125e-06, "model_forward_time": 0.0286407470703125, "step": 5970 }, { "epoch": 9.1094970703125e-06, "step": 5970, "training_step_time": 0.11314868927001953 }, { "epoch": 9.11102294921875e-06, "model_forward_time": 0.026851177215576172, "step": 5971 }, { "epoch": 9.11102294921875e-06, "step": 5971, "training_step_time": 0.11033892631530762 }, { "epoch": 9.112548828125e-06, "model_forward_time": 0.026098251342773438, "step": 5972 }, { "epoch": 9.112548828125e-06, "step": 5972, "training_step_time": 0.11026859283447266 }, { "epoch": 9.11407470703125e-06, "model_forward_time": 0.02621006965637207, "step": 5973 }, { "epoch": 9.11407470703125e-06, "step": 5973, "training_step_time": 0.1108860969543457 }, { "epoch": 9.1156005859375e-06, "model_forward_time": 0.026166439056396484, "step": 5974 }, { "epoch": 9.1156005859375e-06, "step": 5974, "training_step_time": 0.1348400115966797 }, { "epoch": 9.11712646484375e-06, "model_forward_time": 0.026229143142700195, "step": 5975 }, { "epoch": 9.11712646484375e-06, "step": 5975, "training_step_time": 0.11576271057128906 }, { "epoch": 9.11865234375e-06, "model_forward_time": 0.026325702667236328, "step": 5976 }, { "epoch": 9.11865234375e-06, "step": 5976, "training_step_time": 0.13353204727172852 }, { "epoch": 9.12017822265625e-06, "model_forward_time": 0.026549816131591797, "step": 5977 }, { "epoch": 9.12017822265625e-06, "step": 5977, "training_step_time": 0.15739655494689941 }, { "epoch": 9.1217041015625e-06, "model_forward_time": 0.025455713272094727, "step": 5978 }, { "epoch": 9.1217041015625e-06, "step": 5978, "training_step_time": 0.21822071075439453 }, { "epoch": 9.12322998046875e-06, "model_forward_time": 0.03693079948425293, "step": 5979 }, { "epoch": 9.12322998046875e-06, "step": 5979, "training_step_time": 0.1488494873046875 }, { "epoch": 9.124755859375e-06, "grad_norm": 0.29772940278053284, "learning_rate": 9.402604496344984e-05, "loss": 0.087, "step": 5980 }, { "epoch": 9.124755859375e-06, "model_forward_time": 0.024530649185180664, "step": 5980 }, { "epoch": 9.124755859375e-06, "step": 5980, "training_step_time": 0.10814857482910156 }, { "epoch": 9.12628173828125e-06, "model_forward_time": 0.02520585060119629, "step": 5981 }, { "epoch": 9.12628173828125e-06, "step": 5981, "training_step_time": 0.11840581893920898 }, { "epoch": 9.1278076171875e-06, "model_forward_time": 0.025279760360717773, "step": 5982 }, { "epoch": 9.1278076171875e-06, "step": 5982, "training_step_time": 0.11430883407592773 }, { "epoch": 9.12933349609375e-06, "model_forward_time": 0.025073528289794922, "step": 5983 }, { "epoch": 9.12933349609375e-06, "step": 5983, "training_step_time": 0.1108250617980957 }, { "epoch": 9.130859375e-06, "model_forward_time": 0.025407075881958008, "step": 5984 }, { "epoch": 9.130859375e-06, "step": 5984, "training_step_time": 0.19687366485595703 }, { "epoch": 9.13238525390625e-06, "model_forward_time": 0.024477005004882812, "step": 5985 }, { "epoch": 9.13238525390625e-06, "step": 5985, "training_step_time": 0.10642647743225098 }, { "epoch": 9.1339111328125e-06, "model_forward_time": 0.024410486221313477, "step": 5986 }, { "epoch": 9.1339111328125e-06, "step": 5986, "training_step_time": 0.10787487030029297 }, { "epoch": 9.13543701171875e-06, "model_forward_time": 0.02472543716430664, "step": 5987 }, { "epoch": 9.13543701171875e-06, "step": 5987, "training_step_time": 0.11516404151916504 }, { "epoch": 9.136962890625e-06, "model_forward_time": 0.025516986846923828, "step": 5988 }, { "epoch": 9.136962890625e-06, "step": 5988, "training_step_time": 0.10868453979492188 }, { "epoch": 9.13848876953125e-06, "model_forward_time": 0.02478313446044922, "step": 5989 }, { "epoch": 9.13848876953125e-06, "step": 5989, "training_step_time": 0.11188912391662598 }, { "epoch": 9.1400146484375e-06, "grad_norm": 0.4380154311656952, "learning_rate": 9.399989299389661e-05, "loss": 0.089, "step": 5990 }, { "epoch": 9.1400146484375e-06, "model_forward_time": 0.025496244430541992, "step": 5990 }, { "epoch": 9.1400146484375e-06, "step": 5990, "training_step_time": 0.11003255844116211 }, { "epoch": 9.14154052734375e-06, "model_forward_time": 0.025023698806762695, "step": 5991 }, { "epoch": 9.14154052734375e-06, "step": 5991, "training_step_time": 0.19466519355773926 }, { "epoch": 9.14306640625e-06, "model_forward_time": 0.024448156356811523, "step": 5992 }, { "epoch": 9.14306640625e-06, "step": 5992, "training_step_time": 0.1272737979888916 }, { "epoch": 9.14459228515625e-06, "model_forward_time": 0.02480459213256836, "step": 5993 }, { "epoch": 9.14459228515625e-06, "step": 5993, "training_step_time": 0.21369647979736328 }, { "epoch": 9.1461181640625e-06, "model_forward_time": 0.024152755737304688, "step": 5994 }, { "epoch": 9.1461181640625e-06, "step": 5994, "training_step_time": 0.13400959968566895 }, { "epoch": 9.14764404296875e-06, "model_forward_time": 0.02422952651977539, "step": 5995 }, { "epoch": 9.14764404296875e-06, "step": 5995, "training_step_time": 0.11555933952331543 }, { "epoch": 9.149169921875e-06, "model_forward_time": 0.025062084197998047, "step": 5996 }, { "epoch": 9.149169921875e-06, "step": 5996, "training_step_time": 0.11402225494384766 }, { "epoch": 9.15069580078125e-06, "model_forward_time": 0.025150299072265625, "step": 5997 }, { "epoch": 9.15069580078125e-06, "step": 5997, "training_step_time": 0.11268353462219238 }, { "epoch": 9.1522216796875e-06, "model_forward_time": 0.025678634643554688, "step": 5998 }, { "epoch": 9.1522216796875e-06, "step": 5998, "training_step_time": 0.2115478515625 }, { "epoch": 9.15374755859375e-06, "model_forward_time": 0.024707794189453125, "step": 5999 }, { "epoch": 9.15374755859375e-06, "step": 5999, "training_step_time": 0.11007881164550781 }, { "epoch": 9.1552734375e-06, "grad_norm": 0.3040682077407837, "learning_rate": 9.397368756032445e-05, "loss": 0.0881, "step": 6000 }, { "epoch": 9.1552734375e-06, "model_forward_time": 0.026244163513183594, "step": 6000 }, { "epoch": 9.1552734375e-06, "step": 6000, "training_step_time": 0.11234521865844727 }, { "epoch": 9.15679931640625e-06, "model_forward_time": 0.02359604835510254, "step": 6001 }, { "epoch": 9.15679931640625e-06, "step": 6001, "training_step_time": 0.1065206527709961 }, { "epoch": 9.1583251953125e-06, "model_forward_time": 0.023903846740722656, "step": 6002 }, { "epoch": 9.1583251953125e-06, "step": 6002, "training_step_time": 0.10554099082946777 }, { "epoch": 9.15985107421875e-06, "model_forward_time": 0.024934053421020508, "step": 6003 }, { "epoch": 9.15985107421875e-06, "step": 6003, "training_step_time": 0.11041474342346191 }, { "epoch": 9.161376953125e-06, "model_forward_time": 0.025054454803466797, "step": 6004 }, { "epoch": 9.161376953125e-06, "step": 6004, "training_step_time": 0.12824249267578125 }, { "epoch": 9.16290283203125e-06, "model_forward_time": 0.02612471580505371, "step": 6005 }, { "epoch": 9.16290283203125e-06, "step": 6005, "training_step_time": 0.12433052062988281 }, { "epoch": 9.1644287109375e-06, "model_forward_time": 0.024792909622192383, "step": 6006 }, { "epoch": 9.1644287109375e-06, "step": 6006, "training_step_time": 0.11235356330871582 }, { "epoch": 9.16595458984375e-06, "model_forward_time": 0.025784969329833984, "step": 6007 }, { "epoch": 9.16595458984375e-06, "step": 6007, "training_step_time": 0.1082925796508789 }, { "epoch": 9.16748046875e-06, "model_forward_time": 0.030033111572265625, "step": 6008 }, { "epoch": 9.16748046875e-06, "step": 6008, "training_step_time": 0.17102766036987305 }, { "epoch": 9.16900634765625e-06, "model_forward_time": 0.025413990020751953, "step": 6009 }, { "epoch": 9.16900634765625e-06, "step": 6009, "training_step_time": 0.15809226036071777 }, { "epoch": 9.1705322265625e-06, "grad_norm": 0.5795313119888306, "learning_rate": 9.394742869457547e-05, "loss": 0.0773, "step": 6010 }, { "epoch": 9.1705322265625e-06, "model_forward_time": 0.02652120590209961, "step": 6010 }, { "epoch": 9.1705322265625e-06, "step": 6010, "training_step_time": 0.11453890800476074 }, { "epoch": 9.17205810546875e-06, "model_forward_time": 0.02512526512145996, "step": 6011 }, { "epoch": 9.17205810546875e-06, "step": 6011, "training_step_time": 0.2179718017578125 }, { "epoch": 9.173583984375e-06, "model_forward_time": 0.025237560272216797, "step": 6012 }, { "epoch": 9.173583984375e-06, "step": 6012, "training_step_time": 0.11551570892333984 }, { "epoch": 9.17510986328125e-06, "model_forward_time": 0.02501392364501953, "step": 6013 }, { "epoch": 9.17510986328125e-06, "step": 6013, "training_step_time": 0.10555505752563477 }, { "epoch": 9.1766357421875e-06, "model_forward_time": 0.025770187377929688, "step": 6014 }, { "epoch": 9.1766357421875e-06, "step": 6014, "training_step_time": 0.11014151573181152 }, { "epoch": 9.17816162109375e-06, "model_forward_time": 0.025701522827148438, "step": 6015 }, { "epoch": 9.17816162109375e-06, "step": 6015, "training_step_time": 0.10884952545166016 }, { "epoch": 9.1796875e-06, "model_forward_time": 0.02527022361755371, "step": 6016 }, { "epoch": 9.1796875e-06, "step": 6016, "training_step_time": 0.11094999313354492 }, { "epoch": 9.18121337890625e-06, "model_forward_time": 0.02512502670288086, "step": 6017 }, { "epoch": 9.18121337890625e-06, "step": 6017, "training_step_time": 0.11036252975463867 }, { "epoch": 9.1827392578125e-06, "model_forward_time": 0.02505016326904297, "step": 6018 }, { "epoch": 9.1827392578125e-06, "step": 6018, "training_step_time": 0.1149897575378418 }, { "epoch": 9.18426513671875e-06, "model_forward_time": 0.025131702423095703, "step": 6019 }, { "epoch": 9.18426513671875e-06, "step": 6019, "training_step_time": 0.11183929443359375 }, { "epoch": 9.185791015625e-06, "grad_norm": 0.3081001341342926, "learning_rate": 9.392111642855665e-05, "loss": 0.0758, "step": 6020 }, { "epoch": 9.185791015625e-06, "model_forward_time": 0.025185108184814453, "step": 6020 }, { "epoch": 9.185791015625e-06, "step": 6020, "training_step_time": 0.11016368865966797 }, { "epoch": 9.18731689453125e-06, "model_forward_time": 0.025304079055786133, "step": 6021 }, { "epoch": 9.18731689453125e-06, "step": 6021, "training_step_time": 0.11100363731384277 }, { "epoch": 9.1888427734375e-06, "model_forward_time": 0.02507615089416504, "step": 6022 }, { "epoch": 9.1888427734375e-06, "step": 6022, "training_step_time": 0.10936427116394043 }, { "epoch": 9.19036865234375e-06, "model_forward_time": 0.026483774185180664, "step": 6023 }, { "epoch": 9.19036865234375e-06, "step": 6023, "training_step_time": 0.11307716369628906 }, { "epoch": 9.19189453125e-06, "model_forward_time": 0.025040864944458008, "step": 6024 }, { "epoch": 9.19189453125e-06, "step": 6024, "training_step_time": 0.10874485969543457 }, { "epoch": 9.19342041015625e-06, "model_forward_time": 0.026123523712158203, "step": 6025 }, { "epoch": 9.19342041015625e-06, "step": 6025, "training_step_time": 0.10905599594116211 }, { "epoch": 9.1949462890625e-06, "model_forward_time": 0.025483369827270508, "step": 6026 }, { "epoch": 9.1949462890625e-06, "step": 6026, "training_step_time": 0.10969758033752441 }, { "epoch": 9.19647216796875e-06, "model_forward_time": 0.025699377059936523, "step": 6027 }, { "epoch": 9.19647216796875e-06, "step": 6027, "training_step_time": 0.21152830123901367 }, { "epoch": 9.197998046875e-06, "model_forward_time": 0.024741172790527344, "step": 6028 }, { "epoch": 9.197998046875e-06, "step": 6028, "training_step_time": 0.117645263671875 }, { "epoch": 9.19952392578125e-06, "model_forward_time": 0.024483203887939453, "step": 6029 }, { "epoch": 9.19952392578125e-06, "step": 6029, "training_step_time": 0.12608861923217773 }, { "epoch": 9.2010498046875e-06, "grad_norm": 0.2689782679080963, "learning_rate": 9.389475079423988e-05, "loss": 0.0849, "step": 6030 }, { "epoch": 9.2010498046875e-06, "model_forward_time": 0.02559518814086914, "step": 6030 }, { "epoch": 9.2010498046875e-06, "step": 6030, "training_step_time": 0.15968847274780273 }, { "epoch": 9.20257568359375e-06, "model_forward_time": 0.025335311889648438, "step": 6031 }, { "epoch": 9.20257568359375e-06, "step": 6031, "training_step_time": 0.17377400398254395 }, { "epoch": 9.2041015625e-06, "model_forward_time": 0.024378538131713867, "step": 6032 }, { "epoch": 9.2041015625e-06, "step": 6032, "training_step_time": 0.21812844276428223 }, { "epoch": 9.20562744140625e-06, "model_forward_time": 0.024892568588256836, "step": 6033 }, { "epoch": 9.20562744140625e-06, "step": 6033, "training_step_time": 0.11520123481750488 }, { "epoch": 9.2071533203125e-06, "model_forward_time": 0.02422475814819336, "step": 6034 }, { "epoch": 9.2071533203125e-06, "step": 6034, "training_step_time": 0.11438488960266113 }, { "epoch": 9.20867919921875e-06, "model_forward_time": 0.025313615798950195, "step": 6035 }, { "epoch": 9.20867919921875e-06, "step": 6035, "training_step_time": 0.1146860122680664 }, { "epoch": 9.210205078125e-06, "model_forward_time": 0.025505542755126953, "step": 6036 }, { "epoch": 9.210205078125e-06, "step": 6036, "training_step_time": 0.1875319480895996 }, { "epoch": 9.21173095703125e-06, "model_forward_time": 0.0250699520111084, "step": 6037 }, { "epoch": 9.21173095703125e-06, "step": 6037, "training_step_time": 0.11597681045532227 }, { "epoch": 9.2132568359375e-06, "model_forward_time": 0.024235010147094727, "step": 6038 }, { "epoch": 9.2132568359375e-06, "step": 6038, "training_step_time": 0.10536479949951172 }, { "epoch": 9.21478271484375e-06, "model_forward_time": 0.02575087547302246, "step": 6039 }, { "epoch": 9.21478271484375e-06, "step": 6039, "training_step_time": 0.11037302017211914 }, { "epoch": 9.21630859375e-06, "grad_norm": 0.48524272441864014, "learning_rate": 9.38683318236619e-05, "loss": 0.0829, "step": 6040 }, { "epoch": 9.21630859375e-06, "model_forward_time": 0.025158405303955078, "step": 6040 }, { "epoch": 9.21630859375e-06, "step": 6040, "training_step_time": 0.11300539970397949 }, { "epoch": 9.21783447265625e-06, "model_forward_time": 0.025410175323486328, "step": 6041 }, { "epoch": 9.21783447265625e-06, "step": 6041, "training_step_time": 0.11036491394042969 }, { "epoch": 9.2193603515625e-06, "model_forward_time": 0.025224685668945312, "step": 6042 }, { "epoch": 9.2193603515625e-06, "step": 6042, "training_step_time": 0.10720705986022949 }, { "epoch": 9.22088623046875e-06, "model_forward_time": 0.025285720825195312, "step": 6043 }, { "epoch": 9.22088623046875e-06, "step": 6043, "training_step_time": 0.11042618751525879 }, { "epoch": 9.222412109375e-06, "model_forward_time": 0.025348901748657227, "step": 6044 }, { "epoch": 9.222412109375e-06, "step": 6044, "training_step_time": 0.11624026298522949 }, { "epoch": 9.22393798828125e-06, "model_forward_time": 0.025226116180419922, "step": 6045 }, { "epoch": 9.22393798828125e-06, "step": 6045, "training_step_time": 0.11383605003356934 }, { "epoch": 9.2254638671875e-06, "model_forward_time": 0.0254974365234375, "step": 6046 }, { "epoch": 9.2254638671875e-06, "step": 6046, "training_step_time": 0.12353253364562988 }, { "epoch": 9.22698974609375e-06, "model_forward_time": 0.0258944034576416, "step": 6047 }, { "epoch": 9.22698974609375e-06, "step": 6047, "training_step_time": 0.11751556396484375 }, { "epoch": 9.228515625e-06, "model_forward_time": 0.02533435821533203, "step": 6048 }, { "epoch": 9.228515625e-06, "step": 6048, "training_step_time": 0.17653131484985352 }, { "epoch": 9.23004150390625e-06, "model_forward_time": 0.024591684341430664, "step": 6049 }, { "epoch": 9.23004150390625e-06, "step": 6049, "training_step_time": 0.1816096305847168 }, { "epoch": 9.2315673828125e-06, "grad_norm": 0.4617111086845398, "learning_rate": 9.384185954892422e-05, "loss": 0.0872, "step": 6050 }, { "epoch": 9.2315673828125e-06, "model_forward_time": 0.024418115615844727, "step": 6050 }, { "epoch": 9.2315673828125e-06, "step": 6050, "training_step_time": 0.11177706718444824 }, { "epoch": 9.23309326171875e-06, "model_forward_time": 0.029127836227416992, "step": 6051 }, { "epoch": 9.23309326171875e-06, "step": 6051, "training_step_time": 0.11348795890808105 }, { "epoch": 9.234619140625e-06, "model_forward_time": 0.025887012481689453, "step": 6052 }, { "epoch": 9.234619140625e-06, "step": 6052, "training_step_time": 0.216156005859375 }, { "epoch": 9.23614501953125e-06, "model_forward_time": 0.025029420852661133, "step": 6053 }, { "epoch": 9.23614501953125e-06, "step": 6053, "training_step_time": 0.11359930038452148 }, { "epoch": 9.2376708984375e-06, "model_forward_time": 0.025159835815429688, "step": 6054 }, { "epoch": 9.2376708984375e-06, "step": 6054, "training_step_time": 0.10709095001220703 }, { "epoch": 9.23919677734375e-06, "model_forward_time": 0.025363922119140625, "step": 6055 }, { "epoch": 9.23919677734375e-06, "step": 6055, "training_step_time": 0.183363676071167 }, { "epoch": 9.24072265625e-06, "model_forward_time": 0.024658918380737305, "step": 6056 }, { "epoch": 9.24072265625e-06, "step": 6056, "training_step_time": 0.13089203834533691 }, { "epoch": 9.24224853515625e-06, "model_forward_time": 0.02485799789428711, "step": 6057 }, { "epoch": 9.24224853515625e-06, "step": 6057, "training_step_time": 0.10773396492004395 }, { "epoch": 9.2437744140625e-06, "model_forward_time": 0.025780677795410156, "step": 6058 }, { "epoch": 9.2437744140625e-06, "step": 6058, "training_step_time": 0.10868597030639648 }, { "epoch": 9.24530029296875e-06, "model_forward_time": 0.025675296783447266, "step": 6059 }, { "epoch": 9.24530029296875e-06, "step": 6059, "training_step_time": 0.11011481285095215 }, { "epoch": 9.246826171875e-06, "grad_norm": 0.436614066362381, "learning_rate": 9.381533400219318e-05, "loss": 0.0644, "step": 6060 }, { "epoch": 9.246826171875e-06, "model_forward_time": 0.025234222412109375, "step": 6060 }, { "epoch": 9.246826171875e-06, "step": 6060, "training_step_time": 0.11311459541320801 }, { "epoch": 9.24835205078125e-06, "model_forward_time": 0.025025367736816406, "step": 6061 }, { "epoch": 9.24835205078125e-06, "step": 6061, "training_step_time": 0.10574030876159668 }, { "epoch": 9.2498779296875e-06, "model_forward_time": 0.025058269500732422, "step": 6062 }, { "epoch": 9.2498779296875e-06, "step": 6062, "training_step_time": 0.10557246208190918 }, { "epoch": 9.25140380859375e-06, "model_forward_time": 0.025112390518188477, "step": 6063 }, { "epoch": 9.25140380859375e-06, "step": 6063, "training_step_time": 0.10891342163085938 }, { "epoch": 9.2529296875e-06, "model_forward_time": 0.02516961097717285, "step": 6064 }, { "epoch": 9.2529296875e-06, "step": 6064, "training_step_time": 0.10945272445678711 }, { "epoch": 9.25445556640625e-06, "model_forward_time": 0.025258541107177734, "step": 6065 }, { "epoch": 9.25445556640625e-06, "step": 6065, "training_step_time": 0.11072397232055664 }, { "epoch": 9.2559814453125e-06, "model_forward_time": 0.026082754135131836, "step": 6066 }, { "epoch": 9.2559814453125e-06, "step": 6066, "training_step_time": 0.11956906318664551 }, { "epoch": 9.25750732421875e-06, "model_forward_time": 0.024955272674560547, "step": 6067 }, { "epoch": 9.25750732421875e-06, "step": 6067, "training_step_time": 0.10808420181274414 }, { "epoch": 9.259033203125e-06, "model_forward_time": 0.02546095848083496, "step": 6068 }, { "epoch": 9.259033203125e-06, "step": 6068, "training_step_time": 0.10856771469116211 }, { "epoch": 9.26055908203125e-06, "model_forward_time": 0.024938583374023438, "step": 6069 }, { "epoch": 9.26055908203125e-06, "step": 6069, "training_step_time": 0.1090707778930664 }, { "epoch": 9.2620849609375e-06, "grad_norm": 0.5905839204788208, "learning_rate": 9.378875521569981e-05, "loss": 0.0834, "step": 6070 }, { "epoch": 9.2620849609375e-06, "model_forward_time": 0.025115966796875, "step": 6070 }, { "epoch": 9.2620849609375e-06, "step": 6070, "training_step_time": 0.18505430221557617 }, { "epoch": 9.26361083984375e-06, "model_forward_time": 0.024486303329467773, "step": 6071 }, { "epoch": 9.26361083984375e-06, "step": 6071, "training_step_time": 0.20850515365600586 }, { "epoch": 9.26513671875e-06, "model_forward_time": 0.024213314056396484, "step": 6072 }, { "epoch": 9.26513671875e-06, "step": 6072, "training_step_time": 0.20346498489379883 }, { "epoch": 9.26666259765625e-06, "model_forward_time": 0.023839235305786133, "step": 6073 }, { "epoch": 9.26666259765625e-06, "step": 6073, "training_step_time": 0.20498895645141602 }, { "epoch": 9.2681884765625e-06, "model_forward_time": 0.02411675453186035, "step": 6074 }, { "epoch": 9.2681884765625e-06, "step": 6074, "training_step_time": 0.11251711845397949 }, { "epoch": 9.26971435546875e-06, "model_forward_time": 0.025182723999023438, "step": 6075 }, { "epoch": 9.26971435546875e-06, "step": 6075, "training_step_time": 0.10746002197265625 }, { "epoch": 9.271240234375e-06, "model_forward_time": 0.02534770965576172, "step": 6076 }, { "epoch": 9.271240234375e-06, "step": 6076, "training_step_time": 0.15038490295410156 }, { "epoch": 9.27276611328125e-06, "model_forward_time": 0.02530503273010254, "step": 6077 }, { "epoch": 9.27276611328125e-06, "step": 6077, "training_step_time": 0.10592985153198242 }, { "epoch": 9.2742919921875e-06, "model_forward_time": 0.025371551513671875, "step": 6078 }, { "epoch": 9.2742919921875e-06, "step": 6078, "training_step_time": 0.10715937614440918 }, { "epoch": 9.27581787109375e-06, "model_forward_time": 0.025108814239501953, "step": 6079 }, { "epoch": 9.27581787109375e-06, "step": 6079, "training_step_time": 0.11736893653869629 }, { "epoch": 9.27734375e-06, "grad_norm": 0.40063944458961487, "learning_rate": 9.376212322173985e-05, "loss": 0.0914, "step": 6080 }, { "epoch": 9.27734375e-06, "model_forward_time": 0.026326894760131836, "step": 6080 }, { "epoch": 9.27734375e-06, "step": 6080, "training_step_time": 0.10839056968688965 }, { "epoch": 9.27886962890625e-06, "model_forward_time": 0.02506279945373535, "step": 6081 }, { "epoch": 9.27886962890625e-06, "step": 6081, "training_step_time": 0.1984419822692871 }, { "epoch": 9.2803955078125e-06, "model_forward_time": 0.024677753448486328, "step": 6082 }, { "epoch": 9.2803955078125e-06, "step": 6082, "training_step_time": 0.1103506088256836 }, { "epoch": 9.28192138671875e-06, "model_forward_time": 0.0246737003326416, "step": 6083 }, { "epoch": 9.28192138671875e-06, "step": 6083, "training_step_time": 0.10547709465026855 }, { "epoch": 9.283447265625e-06, "model_forward_time": 0.02567601203918457, "step": 6084 }, { "epoch": 9.283447265625e-06, "step": 6084, "training_step_time": 0.11008095741271973 }, { "epoch": 9.28497314453125e-06, "model_forward_time": 0.025330543518066406, "step": 6085 }, { "epoch": 9.28497314453125e-06, "step": 6085, "training_step_time": 0.10984945297241211 }, { "epoch": 9.2864990234375e-06, "model_forward_time": 0.025125503540039062, "step": 6086 }, { "epoch": 9.2864990234375e-06, "step": 6086, "training_step_time": 0.11048316955566406 }, { "epoch": 9.28802490234375e-06, "model_forward_time": 0.024862289428710938, "step": 6087 }, { "epoch": 9.28802490234375e-06, "step": 6087, "training_step_time": 0.11214327812194824 }, { "epoch": 9.28955078125e-06, "model_forward_time": 0.025130271911621094, "step": 6088 }, { "epoch": 9.28955078125e-06, "step": 6088, "training_step_time": 0.11055469512939453 }, { "epoch": 9.29107666015625e-06, "model_forward_time": 0.025161027908325195, "step": 6089 }, { "epoch": 9.29107666015625e-06, "step": 6089, "training_step_time": 0.10976648330688477 }, { "epoch": 9.2926025390625e-06, "grad_norm": 0.5862994194030762, "learning_rate": 9.373543805267368e-05, "loss": 0.0908, "step": 6090 }, { "epoch": 9.2926025390625e-06, "model_forward_time": 0.02575206756591797, "step": 6090 }, { "epoch": 9.2926025390625e-06, "step": 6090, "training_step_time": 0.17469048500061035 }, { "epoch": 9.29412841796875e-06, "model_forward_time": 0.024547576904296875, "step": 6091 }, { "epoch": 9.29412841796875e-06, "step": 6091, "training_step_time": 0.10777735710144043 }, { "epoch": 9.295654296875e-06, "model_forward_time": 0.02457904815673828, "step": 6092 }, { "epoch": 9.295654296875e-06, "step": 6092, "training_step_time": 0.11010622978210449 }, { "epoch": 9.29718017578125e-06, "model_forward_time": 0.025714874267578125, "step": 6093 }, { "epoch": 9.29718017578125e-06, "step": 6093, "training_step_time": 0.11882877349853516 }, { "epoch": 9.2987060546875e-06, "model_forward_time": 0.026005029678344727, "step": 6094 }, { "epoch": 9.2987060546875e-06, "step": 6094, "training_step_time": 0.13192081451416016 }, { "epoch": 9.30023193359375e-06, "model_forward_time": 0.02557516098022461, "step": 6095 }, { "epoch": 9.30023193359375e-06, "step": 6095, "training_step_time": 0.1109921932220459 }, { "epoch": 9.3017578125e-06, "model_forward_time": 0.02533245086669922, "step": 6096 }, { "epoch": 9.3017578125e-06, "step": 6096, "training_step_time": 0.10853457450866699 }, { "epoch": 9.30328369140625e-06, "model_forward_time": 0.024608373641967773, "step": 6097 }, { "epoch": 9.30328369140625e-06, "step": 6097, "training_step_time": 0.10795736312866211 }, { "epoch": 9.3048095703125e-06, "model_forward_time": 0.025374174118041992, "step": 6098 }, { "epoch": 9.3048095703125e-06, "step": 6098, "training_step_time": 0.11507225036621094 }, { "epoch": 9.30633544921875e-06, "model_forward_time": 0.02528977394104004, "step": 6099 }, { "epoch": 9.30633544921875e-06, "step": 6099, "training_step_time": 0.11120748519897461 }, { "epoch": 9.307861328125e-06, "grad_norm": 0.621437132358551, "learning_rate": 9.370869974092629e-05, "loss": 0.1028, "step": 6100 }, { "epoch": 9.307861328125e-06, "model_forward_time": 0.02544379234313965, "step": 6100 }, { "epoch": 9.307861328125e-06, "step": 6100, "training_step_time": 0.11160445213317871 }, { "epoch": 9.30938720703125e-06, "model_forward_time": 0.025341033935546875, "step": 6101 }, { "epoch": 9.30938720703125e-06, "step": 6101, "training_step_time": 0.21409273147583008 }, { "epoch": 9.3109130859375e-06, "model_forward_time": 0.025267362594604492, "step": 6102 }, { "epoch": 9.3109130859375e-06, "step": 6102, "training_step_time": 0.12935900688171387 }, { "epoch": 9.31243896484375e-06, "model_forward_time": 0.024766921997070312, "step": 6103 }, { "epoch": 9.31243896484375e-06, "step": 6103, "training_step_time": 0.1256544589996338 }, { "epoch": 9.31396484375e-06, "model_forward_time": 0.024988174438476562, "step": 6104 }, { "epoch": 9.31396484375e-06, "step": 6104, "training_step_time": 0.12459301948547363 }, { "epoch": 9.31549072265625e-06, "model_forward_time": 0.025059223175048828, "step": 6105 }, { "epoch": 9.31549072265625e-06, "step": 6105, "training_step_time": 0.12719130516052246 }, { "epoch": 9.3170166015625e-06, "model_forward_time": 0.025389671325683594, "step": 6106 }, { "epoch": 9.3170166015625e-06, "step": 6106, "training_step_time": 0.1249997615814209 }, { "epoch": 9.31854248046875e-06, "model_forward_time": 0.024181842803955078, "step": 6107 }, { "epoch": 9.31854248046875e-06, "step": 6107, "training_step_time": 0.12222790718078613 }, { "epoch": 9.320068359375e-06, "model_forward_time": 0.02405714988708496, "step": 6108 }, { "epoch": 9.320068359375e-06, "step": 6108, "training_step_time": 0.12341189384460449 }, { "epoch": 9.32159423828125e-06, "model_forward_time": 0.024137020111083984, "step": 6109 }, { "epoch": 9.32159423828125e-06, "step": 6109, "training_step_time": 0.11735177040100098 }, { "epoch": 9.3231201171875e-06, "grad_norm": 0.6244925856590271, "learning_rate": 9.368190831898724e-05, "loss": 0.0705, "step": 6110 }, { "epoch": 9.3231201171875e-06, "model_forward_time": 0.024169921875, "step": 6110 }, { "epoch": 9.3231201171875e-06, "step": 6110, "training_step_time": 0.11231660842895508 }, { "epoch": 9.32464599609375e-06, "model_forward_time": 0.025360822677612305, "step": 6111 }, { "epoch": 9.32464599609375e-06, "step": 6111, "training_step_time": 0.1119542121887207 }, { "epoch": 9.326171875e-06, "model_forward_time": 0.02526235580444336, "step": 6112 }, { "epoch": 9.326171875e-06, "step": 6112, "training_step_time": 0.12076354026794434 }, { "epoch": 9.32769775390625e-06, "model_forward_time": 0.025669574737548828, "step": 6113 }, { "epoch": 9.32769775390625e-06, "step": 6113, "training_step_time": 0.11294817924499512 }, { "epoch": 9.3292236328125e-06, "model_forward_time": 0.025708436965942383, "step": 6114 }, { "epoch": 9.3292236328125e-06, "step": 6114, "training_step_time": 0.11118698120117188 }, { "epoch": 9.33074951171875e-06, "model_forward_time": 0.025431156158447266, "step": 6115 }, { "epoch": 9.33074951171875e-06, "step": 6115, "training_step_time": 0.11300110816955566 }, { "epoch": 9.332275390625e-06, "model_forward_time": 0.02508091926574707, "step": 6116 }, { "epoch": 9.332275390625e-06, "step": 6116, "training_step_time": 0.14181160926818848 }, { "epoch": 9.33380126953125e-06, "model_forward_time": 0.028625011444091797, "step": 6117 }, { "epoch": 9.33380126953125e-06, "step": 6117, "training_step_time": 0.1242678165435791 }, { "epoch": 9.3353271484375e-06, "model_forward_time": 0.02494978904724121, "step": 6118 }, { "epoch": 9.3353271484375e-06, "step": 6118, "training_step_time": 0.13138628005981445 }, { "epoch": 9.33685302734375e-06, "model_forward_time": 0.025508642196655273, "step": 6119 }, { "epoch": 9.33685302734375e-06, "step": 6119, "training_step_time": 0.1087806224822998 }, { "epoch": 9.33837890625e-06, "grad_norm": 0.3642801344394684, "learning_rate": 9.365506381941066e-05, "loss": 0.0719, "step": 6120 }, { "epoch": 9.33837890625e-06, "model_forward_time": 0.02524399757385254, "step": 6120 }, { "epoch": 9.33837890625e-06, "step": 6120, "training_step_time": 0.17859911918640137 }, { "epoch": 9.33990478515625e-06, "model_forward_time": 0.025277137756347656, "step": 6121 }, { "epoch": 9.33990478515625e-06, "step": 6121, "training_step_time": 0.21158599853515625 }, { "epoch": 9.3414306640625e-06, "model_forward_time": 0.025077104568481445, "step": 6122 }, { "epoch": 9.3414306640625e-06, "step": 6122, "training_step_time": 0.10510659217834473 }, { "epoch": 9.34295654296875e-06, "model_forward_time": 0.026541471481323242, "step": 6123 }, { "epoch": 9.34295654296875e-06, "step": 6123, "training_step_time": 0.11869454383850098 }, { "epoch": 9.344482421875e-06, "model_forward_time": 0.025813817977905273, "step": 6124 }, { "epoch": 9.344482421875e-06, "step": 6124, "training_step_time": 0.11863088607788086 }, { "epoch": 9.34600830078125e-06, "model_forward_time": 0.025994300842285156, "step": 6125 }, { "epoch": 9.34600830078125e-06, "step": 6125, "training_step_time": 0.10991549491882324 }, { "epoch": 9.3475341796875e-06, "model_forward_time": 0.02545905113220215, "step": 6126 }, { "epoch": 9.3475341796875e-06, "step": 6126, "training_step_time": 0.19866538047790527 }, { "epoch": 9.34906005859375e-06, "model_forward_time": 0.02476978302001953, "step": 6127 }, { "epoch": 9.34906005859375e-06, "step": 6127, "training_step_time": 0.10454964637756348 }, { "epoch": 9.3505859375e-06, "model_forward_time": 0.02542424201965332, "step": 6128 }, { "epoch": 9.3505859375e-06, "step": 6128, "training_step_time": 0.11229300498962402 }, { "epoch": 9.35211181640625e-06, "model_forward_time": 0.025456905364990234, "step": 6129 }, { "epoch": 9.35211181640625e-06, "step": 6129, "training_step_time": 0.10899591445922852 }, { "epoch": 9.3536376953125e-06, "grad_norm": 0.30002570152282715, "learning_rate": 9.362816627481512e-05, "loss": 0.0783, "step": 6130 }, { "epoch": 9.3536376953125e-06, "model_forward_time": 0.02555537223815918, "step": 6130 }, { "epoch": 9.3536376953125e-06, "step": 6130, "training_step_time": 0.10988306999206543 }, { "epoch": 9.35516357421875e-06, "model_forward_time": 0.02541518211364746, "step": 6131 }, { "epoch": 9.35516357421875e-06, "step": 6131, "training_step_time": 0.1168522834777832 }, { "epoch": 9.356689453125e-06, "model_forward_time": 0.025153636932373047, "step": 6132 }, { "epoch": 9.356689453125e-06, "step": 6132, "training_step_time": 0.10937237739562988 }, { "epoch": 9.35821533203125e-06, "model_forward_time": 0.025398969650268555, "step": 6133 }, { "epoch": 9.35821533203125e-06, "step": 6133, "training_step_time": 0.1102149486541748 }, { "epoch": 9.3597412109375e-06, "model_forward_time": 0.02559661865234375, "step": 6134 }, { "epoch": 9.3597412109375e-06, "step": 6134, "training_step_time": 0.10921001434326172 }, { "epoch": 9.36126708984375e-06, "model_forward_time": 0.025552749633789062, "step": 6135 }, { "epoch": 9.36126708984375e-06, "step": 6135, "training_step_time": 0.18670964241027832 }, { "epoch": 9.36279296875e-06, "model_forward_time": 0.02482128143310547, "step": 6136 }, { "epoch": 9.36279296875e-06, "step": 6136, "training_step_time": 0.10925698280334473 }, { "epoch": 9.36431884765625e-06, "model_forward_time": 0.0248110294342041, "step": 6137 }, { "epoch": 9.36431884765625e-06, "step": 6137, "training_step_time": 0.10845065116882324 }, { "epoch": 9.3658447265625e-06, "model_forward_time": 0.025116920471191406, "step": 6138 }, { "epoch": 9.3658447265625e-06, "step": 6138, "training_step_time": 0.12160754203796387 }, { "epoch": 9.36737060546875e-06, "model_forward_time": 0.025736331939697266, "step": 6139 }, { "epoch": 9.36737060546875e-06, "step": 6139, "training_step_time": 0.13090991973876953 }, { "epoch": 9.368896484375e-06, "grad_norm": 0.35604357719421387, "learning_rate": 9.360121571788371e-05, "loss": 0.0863, "step": 6140 }, { "epoch": 9.368896484375e-06, "model_forward_time": 0.02489185333251953, "step": 6140 }, { "epoch": 9.368896484375e-06, "step": 6140, "training_step_time": 0.1155397891998291 }, { "epoch": 9.37042236328125e-06, "model_forward_time": 0.025765657424926758, "step": 6141 }, { "epoch": 9.37042236328125e-06, "step": 6141, "training_step_time": 0.10877752304077148 }, { "epoch": 9.3719482421875e-06, "model_forward_time": 0.025333404541015625, "step": 6142 }, { "epoch": 9.3719482421875e-06, "step": 6142, "training_step_time": 0.21905922889709473 }, { "epoch": 9.37347412109375e-06, "model_forward_time": 0.02503657341003418, "step": 6143 }, { "epoch": 9.37347412109375e-06, "step": 6143, "training_step_time": 0.11533713340759277 }, { "epoch": 9.375e-06, "model_forward_time": 0.024666786193847656, "step": 6144 }, { "epoch": 9.375e-06, "step": 6144, "training_step_time": 0.1097874641418457 }, { "epoch": 9.37652587890625e-06, "model_forward_time": 0.025460243225097656, "step": 6145 }, { "epoch": 9.37652587890625e-06, "step": 6145, "training_step_time": 0.21968793869018555 }, { "epoch": 9.3780517578125e-06, "model_forward_time": 0.02520298957824707, "step": 6146 }, { "epoch": 9.3780517578125e-06, "step": 6146, "training_step_time": 0.11222696304321289 }, { "epoch": 9.37957763671875e-06, "model_forward_time": 0.0250399112701416, "step": 6147 }, { "epoch": 9.37957763671875e-06, "step": 6147, "training_step_time": 0.10654711723327637 }, { "epoch": 9.381103515625e-06, "model_forward_time": 0.025681018829345703, "step": 6148 }, { "epoch": 9.381103515625e-06, "step": 6148, "training_step_time": 0.11099672317504883 }, { "epoch": 9.38262939453125e-06, "model_forward_time": 0.02589869499206543, "step": 6149 }, { "epoch": 9.38262939453125e-06, "step": 6149, "training_step_time": 0.10991072654724121 }, { "epoch": 9.3841552734375e-06, "grad_norm": 0.37343111634254456, "learning_rate": 9.357421218136386e-05, "loss": 0.0946, "step": 6150 }, { "epoch": 9.3841552734375e-06, "model_forward_time": 0.02591109275817871, "step": 6150 }, { "epoch": 9.3841552734375e-06, "step": 6150, "training_step_time": 0.11036872863769531 }, { "epoch": 9.38568115234375e-06, "model_forward_time": 0.025465011596679688, "step": 6151 }, { "epoch": 9.38568115234375e-06, "step": 6151, "training_step_time": 0.11012673377990723 }, { "epoch": 9.38720703125e-06, "model_forward_time": 0.025464296340942383, "step": 6152 }, { "epoch": 9.38720703125e-06, "step": 6152, "training_step_time": 0.10990166664123535 }, { "epoch": 9.38873291015625e-06, "model_forward_time": 0.02550649642944336, "step": 6153 }, { "epoch": 9.38873291015625e-06, "step": 6153, "training_step_time": 0.11498093605041504 }, { "epoch": 9.3902587890625e-06, "model_forward_time": 0.025923728942871094, "step": 6154 }, { "epoch": 9.3902587890625e-06, "step": 6154, "training_step_time": 0.1114034652709961 }, { "epoch": 9.39178466796875e-06, "model_forward_time": 0.025403499603271484, "step": 6155 }, { "epoch": 9.39178466796875e-06, "step": 6155, "training_step_time": 0.11058354377746582 }, { "epoch": 9.393310546875e-06, "model_forward_time": 0.025498151779174805, "step": 6156 }, { "epoch": 9.393310546875e-06, "step": 6156, "training_step_time": 0.10887289047241211 }, { "epoch": 9.39483642578125e-06, "model_forward_time": 0.025255203247070312, "step": 6157 }, { "epoch": 9.39483642578125e-06, "step": 6157, "training_step_time": 0.10874533653259277 }, { "epoch": 9.3963623046875e-06, "model_forward_time": 0.025488615036010742, "step": 6158 }, { "epoch": 9.3963623046875e-06, "step": 6158, "training_step_time": 0.10801100730895996 }, { "epoch": 9.39788818359375e-06, "model_forward_time": 0.025959491729736328, "step": 6159 }, { "epoch": 9.39788818359375e-06, "step": 6159, "training_step_time": 0.11063981056213379 }, { "epoch": 9.3994140625e-06, "grad_norm": 0.564619779586792, "learning_rate": 9.354715569806744e-05, "loss": 0.0981, "step": 6160 }, { "epoch": 9.3994140625e-06, "model_forward_time": 0.02518439292907715, "step": 6160 }, { "epoch": 9.3994140625e-06, "step": 6160, "training_step_time": 0.11025691032409668 }, { "epoch": 9.40093994140625e-06, "model_forward_time": 0.025241851806640625, "step": 6161 }, { "epoch": 9.40093994140625e-06, "step": 6161, "training_step_time": 0.20263051986694336 }, { "epoch": 9.4024658203125e-06, "model_forward_time": 0.02475738525390625, "step": 6162 }, { "epoch": 9.4024658203125e-06, "step": 6162, "training_step_time": 0.11385011672973633 }, { "epoch": 9.40399169921875e-06, "model_forward_time": 0.0248873233795166, "step": 6163 }, { "epoch": 9.40399169921875e-06, "step": 6163, "training_step_time": 0.12442660331726074 }, { "epoch": 9.405517578125e-06, "model_forward_time": 0.025155305862426758, "step": 6164 }, { "epoch": 9.405517578125e-06, "step": 6164, "training_step_time": 0.14322829246520996 }, { "epoch": 9.40704345703125e-06, "model_forward_time": 0.0251619815826416, "step": 6165 }, { "epoch": 9.40704345703125e-06, "step": 6165, "training_step_time": 0.11675572395324707 }, { "epoch": 9.4085693359375e-06, "model_forward_time": 0.02509331703186035, "step": 6166 }, { "epoch": 9.4085693359375e-06, "step": 6166, "training_step_time": 0.13042712211608887 }, { "epoch": 9.41009521484375e-06, "model_forward_time": 0.025488853454589844, "step": 6167 }, { "epoch": 9.41009521484375e-06, "step": 6167, "training_step_time": 0.1405041217803955 }, { "epoch": 9.41162109375e-06, "model_forward_time": 0.024792194366455078, "step": 6168 }, { "epoch": 9.41162109375e-06, "step": 6168, "training_step_time": 0.11320900917053223 }, { "epoch": 9.41314697265625e-06, "model_forward_time": 0.02507495880126953, "step": 6169 }, { "epoch": 9.41314697265625e-06, "step": 6169, "training_step_time": 0.11163187026977539 }, { "epoch": 9.4146728515625e-06, "grad_norm": 0.583834171295166, "learning_rate": 9.352004630087062e-05, "loss": 0.0866, "step": 6170 }, { "epoch": 9.4146728515625e-06, "model_forward_time": 0.02593088150024414, "step": 6170 }, { "epoch": 9.4146728515625e-06, "step": 6170, "training_step_time": 0.1136016845703125 }, { "epoch": 9.41619873046875e-06, "model_forward_time": 0.025444507598876953, "step": 6171 }, { "epoch": 9.41619873046875e-06, "step": 6171, "training_step_time": 0.1195216178894043 }, { "epoch": 9.417724609375e-06, "model_forward_time": 0.025081396102905273, "step": 6172 }, { "epoch": 9.417724609375e-06, "step": 6172, "training_step_time": 0.12510013580322266 }, { "epoch": 9.41925048828125e-06, "model_forward_time": 0.025089740753173828, "step": 6173 }, { "epoch": 9.41925048828125e-06, "step": 6173, "training_step_time": 0.13684582710266113 }, { "epoch": 9.4207763671875e-06, "model_forward_time": 0.024561643600463867, "step": 6174 }, { "epoch": 9.4207763671875e-06, "step": 6174, "training_step_time": 0.13260364532470703 }, { "epoch": 9.42230224609375e-06, "model_forward_time": 0.02380514144897461, "step": 6175 }, { "epoch": 9.42230224609375e-06, "step": 6175, "training_step_time": 0.12708663940429688 }, { "epoch": 9.423828125e-06, "model_forward_time": 0.024834156036376953, "step": 6176 }, { "epoch": 9.423828125e-06, "step": 6176, "training_step_time": 0.12423372268676758 }, { "epoch": 9.42535400390625e-06, "model_forward_time": 0.025018692016601562, "step": 6177 }, { "epoch": 9.42535400390625e-06, "step": 6177, "training_step_time": 0.11737179756164551 }, { "epoch": 9.4268798828125e-06, "model_forward_time": 0.02531266212463379, "step": 6178 }, { "epoch": 9.4268798828125e-06, "step": 6178, "training_step_time": 0.1186058521270752 }, { "epoch": 9.42840576171875e-06, "model_forward_time": 0.025248050689697266, "step": 6179 }, { "epoch": 9.42840576171875e-06, "step": 6179, "training_step_time": 0.11556363105773926 }, { "epoch": 9.429931640625e-06, "grad_norm": 0.41108426451683044, "learning_rate": 9.349288402271388e-05, "loss": 0.0706, "step": 6180 }, { "epoch": 9.429931640625e-06, "model_forward_time": 0.02515387535095215, "step": 6180 }, { "epoch": 9.429931640625e-06, "step": 6180, "training_step_time": 0.11102104187011719 }, { "epoch": 9.43145751953125e-06, "model_forward_time": 0.02567458152770996, "step": 6181 }, { "epoch": 9.43145751953125e-06, "step": 6181, "training_step_time": 0.12255430221557617 }, { "epoch": 9.4329833984375e-06, "model_forward_time": 0.02473163604736328, "step": 6182 }, { "epoch": 9.4329833984375e-06, "step": 6182, "training_step_time": 0.1137089729309082 }, { "epoch": 9.43450927734375e-06, "model_forward_time": 0.024974346160888672, "step": 6183 }, { "epoch": 9.43450927734375e-06, "step": 6183, "training_step_time": 0.23195481300354004 }, { "epoch": 9.43603515625e-06, "model_forward_time": 0.024399518966674805, "step": 6184 }, { "epoch": 9.43603515625e-06, "step": 6184, "training_step_time": 0.12193107604980469 }, { "epoch": 9.43756103515625e-06, "model_forward_time": 0.024416446685791016, "step": 6185 }, { "epoch": 9.43756103515625e-06, "step": 6185, "training_step_time": 0.11677813529968262 }, { "epoch": 9.4390869140625e-06, "model_forward_time": 0.025165557861328125, "step": 6186 }, { "epoch": 9.4390869140625e-06, "step": 6186, "training_step_time": 0.10760855674743652 }, { "epoch": 9.44061279296875e-06, "model_forward_time": 0.024599790573120117, "step": 6187 }, { "epoch": 9.44061279296875e-06, "step": 6187, "training_step_time": 0.1706552505493164 }, { "epoch": 9.442138671875e-06, "model_forward_time": 0.024727821350097656, "step": 6188 }, { "epoch": 9.442138671875e-06, "step": 6188, "training_step_time": 0.15900611877441406 }, { "epoch": 9.44366455078125e-06, "model_forward_time": 0.02461409568786621, "step": 6189 }, { "epoch": 9.44366455078125e-06, "step": 6189, "training_step_time": 0.12340497970581055 }, { "epoch": 9.4451904296875e-06, "grad_norm": 0.30063387751579285, "learning_rate": 9.346566889660193e-05, "loss": 0.0882, "step": 6190 }, { "epoch": 9.4451904296875e-06, "model_forward_time": 0.024756908416748047, "step": 6190 }, { "epoch": 9.4451904296875e-06, "step": 6190, "training_step_time": 0.2031841278076172 }, { "epoch": 9.44671630859375e-06, "model_forward_time": 0.024410724639892578, "step": 6191 }, { "epoch": 9.44671630859375e-06, "step": 6191, "training_step_time": 0.1258711814880371 }, { "epoch": 9.4482421875e-06, "model_forward_time": 0.024598360061645508, "step": 6192 }, { "epoch": 9.4482421875e-06, "step": 6192, "training_step_time": 0.10519027709960938 }, { "epoch": 9.44976806640625e-06, "model_forward_time": 0.025116920471191406, "step": 6193 }, { "epoch": 9.44976806640625e-06, "step": 6193, "training_step_time": 0.10613679885864258 }, { "epoch": 9.4512939453125e-06, "model_forward_time": 0.025054931640625, "step": 6194 }, { "epoch": 9.4512939453125e-06, "step": 6194, "training_step_time": 0.10699892044067383 }, { "epoch": 9.45281982421875e-06, "model_forward_time": 0.025023221969604492, "step": 6195 }, { "epoch": 9.45281982421875e-06, "step": 6195, "training_step_time": 0.1083076000213623 }, { "epoch": 9.454345703125e-06, "model_forward_time": 0.025154829025268555, "step": 6196 }, { "epoch": 9.454345703125e-06, "step": 6196, "training_step_time": 0.1218252182006836 }, { "epoch": 9.45587158203125e-06, "model_forward_time": 0.025255441665649414, "step": 6197 }, { "epoch": 9.45587158203125e-06, "step": 6197, "training_step_time": 0.13233566284179688 }, { "epoch": 9.4573974609375e-06, "model_forward_time": 0.0247042179107666, "step": 6198 }, { "epoch": 9.4573974609375e-06, "step": 6198, "training_step_time": 0.12758111953735352 }, { "epoch": 9.45892333984375e-06, "model_forward_time": 0.02450275421142578, "step": 6199 }, { "epoch": 9.45892333984375e-06, "step": 6199, "training_step_time": 0.1211395263671875 }, { "epoch": 9.46044921875e-06, "grad_norm": 0.42109575867652893, "learning_rate": 9.343840095560372e-05, "loss": 0.0643, "step": 6200 }, { "epoch": 9.46044921875e-06, "model_forward_time": 0.02542853355407715, "step": 6200 }, { "epoch": 9.46044921875e-06, "step": 6200, "training_step_time": 0.11516880989074707 }, { "epoch": 9.46197509765625e-06, "model_forward_time": 0.025655269622802734, "step": 6201 }, { "epoch": 9.46197509765625e-06, "step": 6201, "training_step_time": 0.11709213256835938 }, { "epoch": 9.4635009765625e-06, "model_forward_time": 0.024913311004638672, "step": 6202 }, { "epoch": 9.4635009765625e-06, "step": 6202, "training_step_time": 0.11503815650939941 }, { "epoch": 9.46502685546875e-06, "model_forward_time": 0.0254209041595459, "step": 6203 }, { "epoch": 9.46502685546875e-06, "step": 6203, "training_step_time": 0.11393857002258301 }, { "epoch": 9.466552734375e-06, "model_forward_time": 0.024818897247314453, "step": 6204 }, { "epoch": 9.466552734375e-06, "step": 6204, "training_step_time": 0.1094815731048584 }, { "epoch": 9.46807861328125e-06, "model_forward_time": 0.02553558349609375, "step": 6205 }, { "epoch": 9.46807861328125e-06, "step": 6205, "training_step_time": 0.11114192008972168 }, { "epoch": 9.4696044921875e-06, "model_forward_time": 0.024440288543701172, "step": 6206 }, { "epoch": 9.4696044921875e-06, "step": 6206, "training_step_time": 0.10841751098632812 }, { "epoch": 9.47113037109375e-06, "model_forward_time": 0.02457880973815918, "step": 6207 }, { "epoch": 9.47113037109375e-06, "step": 6207, "training_step_time": 0.17777109146118164 }, { "epoch": 9.47265625e-06, "model_forward_time": 0.024361848831176758, "step": 6208 }, { "epoch": 9.47265625e-06, "step": 6208, "training_step_time": 0.1142737865447998 }, { "epoch": 9.47418212890625e-06, "model_forward_time": 0.024552345275878906, "step": 6209 }, { "epoch": 9.47418212890625e-06, "step": 6209, "training_step_time": 0.13236141204833984 }, { "epoch": 9.4757080078125e-06, "grad_norm": 0.3996535837650299, "learning_rate": 9.341108023285238e-05, "loss": 0.0814, "step": 6210 }, { "epoch": 9.4757080078125e-06, "model_forward_time": 0.025156259536743164, "step": 6210 }, { "epoch": 9.4757080078125e-06, "step": 6210, "training_step_time": 0.1558079719543457 }, { "epoch": 9.47723388671875e-06, "model_forward_time": 0.024362802505493164, "step": 6211 }, { "epoch": 9.47723388671875e-06, "step": 6211, "training_step_time": 0.22547006607055664 }, { "epoch": 9.478759765625e-06, "model_forward_time": 0.02451014518737793, "step": 6212 }, { "epoch": 9.478759765625e-06, "step": 6212, "training_step_time": 0.11100244522094727 }, { "epoch": 9.48028564453125e-06, "model_forward_time": 0.025341510772705078, "step": 6213 }, { "epoch": 9.48028564453125e-06, "step": 6213, "training_step_time": 0.10994076728820801 }, { "epoch": 9.4818115234375e-06, "model_forward_time": 0.02511906623840332, "step": 6214 }, { "epoch": 9.4818115234375e-06, "step": 6214, "training_step_time": 0.1165621280670166 }, { "epoch": 9.48333740234375e-06, "model_forward_time": 0.024886608123779297, "step": 6215 }, { "epoch": 9.48333740234375e-06, "step": 6215, "training_step_time": 0.10929584503173828 }, { "epoch": 9.48486328125e-06, "model_forward_time": 0.02535867691040039, "step": 6216 }, { "epoch": 9.48486328125e-06, "step": 6216, "training_step_time": 0.19078516960144043 }, { "epoch": 9.48638916015625e-06, "model_forward_time": 0.02449178695678711, "step": 6217 }, { "epoch": 9.48638916015625e-06, "step": 6217, "training_step_time": 0.1040031909942627 }, { "epoch": 9.4879150390625e-06, "model_forward_time": 0.023343324661254883, "step": 6218 }, { "epoch": 9.4879150390625e-06, "step": 6218, "training_step_time": 0.10399699211120605 }, { "epoch": 9.48944091796875e-06, "model_forward_time": 0.025333404541015625, "step": 6219 }, { "epoch": 9.48944091796875e-06, "step": 6219, "training_step_time": 0.11214232444763184 }, { "epoch": 9.490966796875e-06, "grad_norm": 0.6419973969459534, "learning_rate": 9.338370676154516e-05, "loss": 0.0668, "step": 6220 }, { "epoch": 9.490966796875e-06, "model_forward_time": 0.025419235229492188, "step": 6220 }, { "epoch": 9.490966796875e-06, "step": 6220, "training_step_time": 0.10859036445617676 }, { "epoch": 9.49249267578125e-06, "model_forward_time": 0.02519702911376953, "step": 6221 }, { "epoch": 9.49249267578125e-06, "step": 6221, "training_step_time": 0.1115415096282959 }, { "epoch": 9.4940185546875e-06, "model_forward_time": 0.025003910064697266, "step": 6222 }, { "epoch": 9.4940185546875e-06, "step": 6222, "training_step_time": 0.12275409698486328 }, { "epoch": 9.49554443359375e-06, "model_forward_time": 0.02475595474243164, "step": 6223 }, { "epoch": 9.49554443359375e-06, "step": 6223, "training_step_time": 0.11707901954650879 }, { "epoch": 9.4970703125e-06, "model_forward_time": 0.0244448184967041, "step": 6224 }, { "epoch": 9.4970703125e-06, "step": 6224, "training_step_time": 0.11331820487976074 }, { "epoch": 9.49859619140625e-06, "model_forward_time": 0.025983810424804688, "step": 6225 }, { "epoch": 9.49859619140625e-06, "step": 6225, "training_step_time": 0.167982816696167 }, { "epoch": 9.5001220703125e-06, "model_forward_time": 0.02460169792175293, "step": 6226 }, { "epoch": 9.5001220703125e-06, "step": 6226, "training_step_time": 0.11571502685546875 }, { "epoch": 9.50164794921875e-06, "model_forward_time": 0.02713298797607422, "step": 6227 }, { "epoch": 9.50164794921875e-06, "step": 6227, "training_step_time": 0.17585968971252441 }, { "epoch": 9.503173828125e-06, "model_forward_time": 0.024763822555541992, "step": 6228 }, { "epoch": 9.503173828125e-06, "step": 6228, "training_step_time": 0.18323731422424316 }, { "epoch": 9.50469970703125e-06, "model_forward_time": 0.023694515228271484, "step": 6229 }, { "epoch": 9.50469970703125e-06, "step": 6229, "training_step_time": 0.11025428771972656 }, { "epoch": 9.5062255859375e-06, "grad_norm": 0.36760419607162476, "learning_rate": 9.335628057494341e-05, "loss": 0.0705, "step": 6230 }, { "epoch": 9.5062255859375e-06, "model_forward_time": 0.024565458297729492, "step": 6230 }, { "epoch": 9.5062255859375e-06, "step": 6230, "training_step_time": 0.18889498710632324 }, { "epoch": 9.50775146484375e-06, "model_forward_time": 0.026060104370117188, "step": 6231 }, { "epoch": 9.50775146484375e-06, "step": 6231, "training_step_time": 0.12735915184020996 }, { "epoch": 9.50927734375e-06, "model_forward_time": 0.02496933937072754, "step": 6232 }, { "epoch": 9.50927734375e-06, "step": 6232, "training_step_time": 0.10544681549072266 }, { "epoch": 9.51080322265625e-06, "model_forward_time": 0.025466203689575195, "step": 6233 }, { "epoch": 9.51080322265625e-06, "step": 6233, "training_step_time": 0.10987114906311035 }, { "epoch": 9.5123291015625e-06, "model_forward_time": 0.025264501571655273, "step": 6234 }, { "epoch": 9.5123291015625e-06, "step": 6234, "training_step_time": 0.17144393920898438 }, { "epoch": 9.51385498046875e-06, "model_forward_time": 0.024371862411499023, "step": 6235 }, { "epoch": 9.51385498046875e-06, "step": 6235, "training_step_time": 0.16897082328796387 }, { "epoch": 9.515380859375e-06, "model_forward_time": 0.02393341064453125, "step": 6236 }, { "epoch": 9.515380859375e-06, "step": 6236, "training_step_time": 0.10353326797485352 }, { "epoch": 9.51690673828125e-06, "model_forward_time": 0.024433374404907227, "step": 6237 }, { "epoch": 9.51690673828125e-06, "step": 6237, "training_step_time": 0.10612702369689941 }, { "epoch": 9.5184326171875e-06, "model_forward_time": 0.024900197982788086, "step": 6238 }, { "epoch": 9.5184326171875e-06, "step": 6238, "training_step_time": 0.10895013809204102 }, { "epoch": 9.51995849609375e-06, "model_forward_time": 0.02509784698486328, "step": 6239 }, { "epoch": 9.51995849609375e-06, "step": 6239, "training_step_time": 0.10925722122192383 }, { "epoch": 9.521484375e-06, "grad_norm": 0.4853487014770508, "learning_rate": 9.332880170637252e-05, "loss": 0.0732, "step": 6240 }, { "epoch": 9.521484375e-06, "model_forward_time": 0.02447199821472168, "step": 6240 }, { "epoch": 9.521484375e-06, "step": 6240, "training_step_time": 0.1110687255859375 }, { "epoch": 9.52301025390625e-06, "model_forward_time": 0.0251615047454834, "step": 6241 }, { "epoch": 9.52301025390625e-06, "step": 6241, "training_step_time": 0.10802507400512695 }, { "epoch": 9.5245361328125e-06, "model_forward_time": 0.024557113647460938, "step": 6242 }, { "epoch": 9.5245361328125e-06, "step": 6242, "training_step_time": 0.10723423957824707 }, { "epoch": 9.52606201171875e-06, "model_forward_time": 0.025428295135498047, "step": 6243 }, { "epoch": 9.52606201171875e-06, "step": 6243, "training_step_time": 0.10991764068603516 }, { "epoch": 9.527587890625e-06, "model_forward_time": 0.024806976318359375, "step": 6244 }, { "epoch": 9.527587890625e-06, "step": 6244, "training_step_time": 0.11141562461853027 }, { "epoch": 9.52911376953125e-06, "model_forward_time": 0.024971485137939453, "step": 6245 }, { "epoch": 9.52911376953125e-06, "step": 6245, "training_step_time": 0.10636734962463379 }, { "epoch": 9.5306396484375e-06, "model_forward_time": 0.024776458740234375, "step": 6246 }, { "epoch": 9.5306396484375e-06, "step": 6246, "training_step_time": 0.13183140754699707 }, { "epoch": 9.53216552734375e-06, "model_forward_time": 0.025059223175048828, "step": 6247 }, { "epoch": 9.53216552734375e-06, "step": 6247, "training_step_time": 0.15092754364013672 }, { "epoch": 9.53369140625e-06, "model_forward_time": 0.024275541305541992, "step": 6248 }, { "epoch": 9.53369140625e-06, "step": 6248, "training_step_time": 0.142836332321167 }, { "epoch": 9.53521728515625e-06, "model_forward_time": 0.024199485778808594, "step": 6249 }, { "epoch": 9.53521728515625e-06, "step": 6249, "training_step_time": 0.12726211547851562 }, { "epoch": 9.5367431640625e-06, "grad_norm": 0.4221380949020386, "learning_rate": 9.330127018922194e-05, "loss": 0.0843, "step": 6250 }, { "epoch": 9.5367431640625e-06, "model_forward_time": 0.02413344383239746, "step": 6250 }, { "epoch": 9.5367431640625e-06, "step": 6250, "training_step_time": 0.12211441993713379 }, { "epoch": 9.53826904296875e-06, "model_forward_time": 0.024712085723876953, "step": 6251 }, { "epoch": 9.53826904296875e-06, "step": 6251, "training_step_time": 0.19897794723510742 }, { "epoch": 9.539794921875e-06, "model_forward_time": 0.024135828018188477, "step": 6252 }, { "epoch": 9.539794921875e-06, "step": 6252, "training_step_time": 0.11450624465942383 }, { "epoch": 9.54132080078125e-06, "model_forward_time": 0.024530887603759766, "step": 6253 }, { "epoch": 9.54132080078125e-06, "step": 6253, "training_step_time": 0.1367018222808838 }, { "epoch": 9.5428466796875e-06, "model_forward_time": 0.02434539794921875, "step": 6254 }, { "epoch": 9.5428466796875e-06, "step": 6254, "training_step_time": 0.16207027435302734 }, { "epoch": 9.54437255859375e-06, "model_forward_time": 0.024758100509643555, "step": 6255 }, { "epoch": 9.54437255859375e-06, "step": 6255, "training_step_time": 0.2168560028076172 }, { "epoch": 9.5458984375e-06, "model_forward_time": 0.02465987205505371, "step": 6256 }, { "epoch": 9.5458984375e-06, "step": 6256, "training_step_time": 0.11516547203063965 }, { "epoch": 9.54742431640625e-06, "model_forward_time": 0.024525880813598633, "step": 6257 }, { "epoch": 9.54742431640625e-06, "step": 6257, "training_step_time": 0.11011433601379395 }, { "epoch": 9.5489501953125e-06, "model_forward_time": 0.024698495864868164, "step": 6258 }, { "epoch": 9.5489501953125e-06, "step": 6258, "training_step_time": 0.11081743240356445 }, { "epoch": 9.55047607421875e-06, "model_forward_time": 0.025320053100585938, "step": 6259 }, { "epoch": 9.55047607421875e-06, "step": 6259, "training_step_time": 0.1095888614654541 }, { "epoch": 9.552001953125e-06, "grad_norm": 0.3353863060474396, "learning_rate": 9.327368605694502e-05, "loss": 0.0624, "step": 6260 }, { "epoch": 9.552001953125e-06, "model_forward_time": 0.02522420883178711, "step": 6260 }, { "epoch": 9.552001953125e-06, "step": 6260, "training_step_time": 0.1951580047607422 }, { "epoch": 9.55352783203125e-06, "model_forward_time": 0.02455282211303711, "step": 6261 }, { "epoch": 9.55352783203125e-06, "step": 6261, "training_step_time": 0.10666394233703613 }, { "epoch": 9.5550537109375e-06, "model_forward_time": 0.02463984489440918, "step": 6262 }, { "epoch": 9.5550537109375e-06, "step": 6262, "training_step_time": 0.1113584041595459 }, { "epoch": 9.55657958984375e-06, "model_forward_time": 0.024731874465942383, "step": 6263 }, { "epoch": 9.55657958984375e-06, "step": 6263, "training_step_time": 0.11738872528076172 }, { "epoch": 9.55810546875e-06, "model_forward_time": 0.025014877319335938, "step": 6264 }, { "epoch": 9.55810546875e-06, "step": 6264, "training_step_time": 0.11180663108825684 }, { "epoch": 9.55963134765625e-06, "model_forward_time": 0.025120019912719727, "step": 6265 }, { "epoch": 9.55963134765625e-06, "step": 6265, "training_step_time": 0.1087641716003418 }, { "epoch": 9.5611572265625e-06, "model_forward_time": 0.025112390518188477, "step": 6266 }, { "epoch": 9.5611572265625e-06, "step": 6266, "training_step_time": 0.11778473854064941 }, { "epoch": 9.56268310546875e-06, "model_forward_time": 0.0248105525970459, "step": 6267 }, { "epoch": 9.56268310546875e-06, "step": 6267, "training_step_time": 0.1143186092376709 }, { "epoch": 9.564208984375e-06, "model_forward_time": 0.02477717399597168, "step": 6268 }, { "epoch": 9.564208984375e-06, "step": 6268, "training_step_time": 0.11207222938537598 }, { "epoch": 9.56573486328125e-06, "model_forward_time": 0.025023937225341797, "step": 6269 }, { "epoch": 9.56573486328125e-06, "step": 6269, "training_step_time": 0.14268803596496582 }, { "epoch": 9.5672607421875e-06, "grad_norm": 0.38684239983558655, "learning_rate": 9.32460493430591e-05, "loss": 0.0619, "step": 6270 }, { "epoch": 9.5672607421875e-06, "model_forward_time": 0.024422883987426758, "step": 6270 }, { "epoch": 9.5672607421875e-06, "step": 6270, "training_step_time": 0.11173820495605469 }, { "epoch": 9.56878662109375e-06, "model_forward_time": 0.024866580963134766, "step": 6271 }, { "epoch": 9.56878662109375e-06, "step": 6271, "training_step_time": 0.11045074462890625 }, { "epoch": 9.5703125e-06, "model_forward_time": 0.02553415298461914, "step": 6272 }, { "epoch": 9.5703125e-06, "step": 6272, "training_step_time": 0.11159992218017578 }, { "epoch": 9.57183837890625e-06, "model_forward_time": 0.02513861656188965, "step": 6273 }, { "epoch": 9.57183837890625e-06, "step": 6273, "training_step_time": 0.13217711448669434 }, { "epoch": 9.5733642578125e-06, "model_forward_time": 0.02485799789428711, "step": 6274 }, { "epoch": 9.5733642578125e-06, "step": 6274, "training_step_time": 0.10873818397521973 }, { "epoch": 9.57489013671875e-06, "model_forward_time": 0.024221420288085938, "step": 6275 }, { "epoch": 9.57489013671875e-06, "step": 6275, "training_step_time": 0.16686153411865234 }, { "epoch": 9.576416015625e-06, "model_forward_time": 0.02460002899169922, "step": 6276 }, { "epoch": 9.576416015625e-06, "step": 6276, "training_step_time": 0.17150664329528809 }, { "epoch": 9.57794189453125e-06, "model_forward_time": 0.02502131462097168, "step": 6277 }, { "epoch": 9.57794189453125e-06, "step": 6277, "training_step_time": 0.11208891868591309 }, { "epoch": 9.5794677734375e-06, "model_forward_time": 0.024988412857055664, "step": 6278 }, { "epoch": 9.5794677734375e-06, "step": 6278, "training_step_time": 0.2189488410949707 }, { "epoch": 9.58099365234375e-06, "model_forward_time": 0.024121761322021484, "step": 6279 }, { "epoch": 9.58099365234375e-06, "step": 6279, "training_step_time": 0.1135091781616211 }, { "epoch": 9.58251953125e-06, "grad_norm": 0.4543576240539551, "learning_rate": 9.321836008114539e-05, "loss": 0.0786, "step": 6280 }, { "epoch": 9.58251953125e-06, "model_forward_time": 0.02466416358947754, "step": 6280 }, { "epoch": 9.58251953125e-06, "step": 6280, "training_step_time": 0.1085960865020752 }, { "epoch": 9.58404541015625e-06, "model_forward_time": 0.02508854866027832, "step": 6281 }, { "epoch": 9.58404541015625e-06, "step": 6281, "training_step_time": 0.10630202293395996 }, { "epoch": 9.5855712890625e-06, "model_forward_time": 0.024652957916259766, "step": 6282 }, { "epoch": 9.5855712890625e-06, "step": 6282, "training_step_time": 0.11135077476501465 }, { "epoch": 9.58709716796875e-06, "model_forward_time": 0.02559351921081543, "step": 6283 }, { "epoch": 9.58709716796875e-06, "step": 6283, "training_step_time": 0.1100010871887207 }, { "epoch": 9.588623046875e-06, "model_forward_time": 0.025285959243774414, "step": 6284 }, { "epoch": 9.588623046875e-06, "step": 6284, "training_step_time": 0.11015105247497559 }, { "epoch": 9.59014892578125e-06, "model_forward_time": 0.025254011154174805, "step": 6285 }, { "epoch": 9.59014892578125e-06, "step": 6285, "training_step_time": 0.11470675468444824 }, { "epoch": 9.5916748046875e-06, "model_forward_time": 0.02506422996520996, "step": 6286 }, { "epoch": 9.5916748046875e-06, "step": 6286, "training_step_time": 0.11157894134521484 }, { "epoch": 9.59320068359375e-06, "model_forward_time": 0.025619983673095703, "step": 6287 }, { "epoch": 9.59320068359375e-06, "step": 6287, "training_step_time": 0.10990262031555176 }, { "epoch": 9.5947265625e-06, "model_forward_time": 0.025167465209960938, "step": 6288 }, { "epoch": 9.5947265625e-06, "step": 6288, "training_step_time": 0.16965055465698242 }, { "epoch": 9.59625244140625e-06, "model_forward_time": 0.024462223052978516, "step": 6289 }, { "epoch": 9.59625244140625e-06, "step": 6289, "training_step_time": 0.15226197242736816 }, { "epoch": 9.5977783203125e-06, "grad_norm": 0.428813099861145, "learning_rate": 9.319061830484898e-05, "loss": 0.0796, "step": 6290 }, { "epoch": 9.5977783203125e-06, "model_forward_time": 0.024161100387573242, "step": 6290 }, { "epoch": 9.5977783203125e-06, "step": 6290, "training_step_time": 0.13245582580566406 }, { "epoch": 9.59930419921875e-06, "model_forward_time": 0.024697303771972656, "step": 6291 }, { "epoch": 9.59930419921875e-06, "step": 6291, "training_step_time": 0.12979745864868164 }, { "epoch": 9.600830078125e-06, "model_forward_time": 0.024670124053955078, "step": 6292 }, { "epoch": 9.600830078125e-06, "step": 6292, "training_step_time": 0.12748312950134277 }, { "epoch": 9.60235595703125e-06, "model_forward_time": 0.024752378463745117, "step": 6293 }, { "epoch": 9.60235595703125e-06, "step": 6293, "training_step_time": 0.11943197250366211 }, { "epoch": 9.6038818359375e-06, "model_forward_time": 0.02444005012512207, "step": 6294 }, { "epoch": 9.6038818359375e-06, "step": 6294, "training_step_time": 0.11942100524902344 }, { "epoch": 9.60540771484375e-06, "model_forward_time": 0.0250093936920166, "step": 6295 }, { "epoch": 9.60540771484375e-06, "step": 6295, "training_step_time": 0.13805007934570312 }, { "epoch": 9.60693359375e-06, "model_forward_time": 0.02575373649597168, "step": 6296 }, { "epoch": 9.60693359375e-06, "step": 6296, "training_step_time": 0.11794281005859375 }, { "epoch": 9.60845947265625e-06, "model_forward_time": 0.0251615047454834, "step": 6297 }, { "epoch": 9.60845947265625e-06, "step": 6297, "training_step_time": 0.12678050994873047 }, { "epoch": 9.6099853515625e-06, "model_forward_time": 0.02752089500427246, "step": 6298 }, { "epoch": 9.6099853515625e-06, "step": 6298, "training_step_time": 0.14595413208007812 }, { "epoch": 9.61151123046875e-06, "model_forward_time": 0.024409055709838867, "step": 6299 }, { "epoch": 9.61151123046875e-06, "step": 6299, "training_step_time": 0.22737455368041992 }, { "epoch": 9.613037109375e-06, "grad_norm": 0.7419989109039307, "learning_rate": 9.316282404787871e-05, "loss": 0.067, "step": 6300 }, { "epoch": 9.613037109375e-06, "model_forward_time": 0.024438142776489258, "step": 6300 }, { "epoch": 9.613037109375e-06, "step": 6300, "training_step_time": 0.13497567176818848 }, { "epoch": 9.61456298828125e-06, "model_forward_time": 0.02456068992614746, "step": 6301 }, { "epoch": 9.61456298828125e-06, "step": 6301, "training_step_time": 0.189544677734375 }, { "epoch": 9.6160888671875e-06, "model_forward_time": 0.024581432342529297, "step": 6302 }, { "epoch": 9.6160888671875e-06, "step": 6302, "training_step_time": 0.11003494262695312 }, { "epoch": 9.61761474609375e-06, "model_forward_time": 0.024633169174194336, "step": 6303 }, { "epoch": 9.61761474609375e-06, "step": 6303, "training_step_time": 0.15676069259643555 }, { "epoch": 9.619140625e-06, "model_forward_time": 0.024286746978759766, "step": 6304 }, { "epoch": 9.619140625e-06, "step": 6304, "training_step_time": 0.11244535446166992 }, { "epoch": 9.62066650390625e-06, "model_forward_time": 0.02470254898071289, "step": 6305 }, { "epoch": 9.62066650390625e-06, "step": 6305, "training_step_time": 0.1061868667602539 }, { "epoch": 9.6221923828125e-06, "model_forward_time": 0.0251009464263916, "step": 6306 }, { "epoch": 9.6221923828125e-06, "step": 6306, "training_step_time": 0.11033749580383301 }, { "epoch": 9.62371826171875e-06, "model_forward_time": 0.024740219116210938, "step": 6307 }, { "epoch": 9.62371826171875e-06, "step": 6307, "training_step_time": 0.10691952705383301 }, { "epoch": 9.625244140625e-06, "model_forward_time": 0.024940967559814453, "step": 6308 }, { "epoch": 9.625244140625e-06, "step": 6308, "training_step_time": 0.10649633407592773 }, { "epoch": 9.62677001953125e-06, "model_forward_time": 0.025206327438354492, "step": 6309 }, { "epoch": 9.62677001953125e-06, "step": 6309, "training_step_time": 0.11043024063110352 }, { "epoch": 9.6282958984375e-06, "grad_norm": 0.7096858620643616, "learning_rate": 9.313497734400722e-05, "loss": 0.074, "step": 6310 }, { "epoch": 9.6282958984375e-06, "model_forward_time": 0.024983882904052734, "step": 6310 }, { "epoch": 9.6282958984375e-06, "step": 6310, "training_step_time": 0.11170029640197754 }, { "epoch": 9.62982177734375e-06, "model_forward_time": 0.025534868240356445, "step": 6311 }, { "epoch": 9.62982177734375e-06, "step": 6311, "training_step_time": 0.1065671443939209 }, { "epoch": 9.63134765625e-06, "model_forward_time": 0.024755477905273438, "step": 6312 }, { "epoch": 9.63134765625e-06, "step": 6312, "training_step_time": 0.10960102081298828 }, { "epoch": 9.63287353515625e-06, "model_forward_time": 0.025238990783691406, "step": 6313 }, { "epoch": 9.63287353515625e-06, "step": 6313, "training_step_time": 0.17888092994689941 }, { "epoch": 9.6343994140625e-06, "model_forward_time": 0.024356842041015625, "step": 6314 }, { "epoch": 9.6343994140625e-06, "step": 6314, "training_step_time": 0.11677289009094238 }, { "epoch": 9.63592529296875e-06, "model_forward_time": 0.024043798446655273, "step": 6315 }, { "epoch": 9.63592529296875e-06, "step": 6315, "training_step_time": 0.1807842254638672 }, { "epoch": 9.637451171875e-06, "model_forward_time": 0.02550673484802246, "step": 6316 }, { "epoch": 9.637451171875e-06, "step": 6316, "training_step_time": 0.17399907112121582 }, { "epoch": 9.63897705078125e-06, "model_forward_time": 0.025082826614379883, "step": 6317 }, { "epoch": 9.63897705078125e-06, "step": 6317, "training_step_time": 0.10758781433105469 }, { "epoch": 9.6405029296875e-06, "model_forward_time": 0.02474212646484375, "step": 6318 }, { "epoch": 9.6405029296875e-06, "step": 6318, "training_step_time": 0.2243356704711914 }, { "epoch": 9.64202880859375e-06, "model_forward_time": 0.0251007080078125, "step": 6319 }, { "epoch": 9.64202880859375e-06, "step": 6319, "training_step_time": 0.10608887672424316 }, { "epoch": 9.6435546875e-06, "grad_norm": 0.40498948097229004, "learning_rate": 9.31070782270709e-05, "loss": 0.0725, "step": 6320 }, { "epoch": 9.6435546875e-06, "model_forward_time": 0.02472209930419922, "step": 6320 }, { "epoch": 9.6435546875e-06, "step": 6320, "training_step_time": 0.11642622947692871 }, { "epoch": 9.64508056640625e-06, "model_forward_time": 0.02520298957824707, "step": 6321 }, { "epoch": 9.64508056640625e-06, "step": 6321, "training_step_time": 0.2090015411376953 }, { "epoch": 9.6466064453125e-06, "model_forward_time": 0.02429819107055664, "step": 6322 }, { "epoch": 9.6466064453125e-06, "step": 6322, "training_step_time": 0.11702394485473633 }, { "epoch": 9.64813232421875e-06, "model_forward_time": 0.024440765380859375, "step": 6323 }, { "epoch": 9.64813232421875e-06, "step": 6323, "training_step_time": 0.10973167419433594 }, { "epoch": 9.649658203125e-06, "model_forward_time": 0.024803876876831055, "step": 6324 }, { "epoch": 9.649658203125e-06, "step": 6324, "training_step_time": 0.10746574401855469 }, { "epoch": 9.65118408203125e-06, "model_forward_time": 0.02493906021118164, "step": 6325 }, { "epoch": 9.65118408203125e-06, "step": 6325, "training_step_time": 0.10668706893920898 }, { "epoch": 9.6527099609375e-06, "model_forward_time": 0.025269508361816406, "step": 6326 }, { "epoch": 9.6527099609375e-06, "step": 6326, "training_step_time": 0.11152958869934082 }, { "epoch": 9.65423583984375e-06, "model_forward_time": 0.02511453628540039, "step": 6327 }, { "epoch": 9.65423583984375e-06, "step": 6327, "training_step_time": 0.10652971267700195 }, { "epoch": 9.65576171875e-06, "model_forward_time": 0.024953603744506836, "step": 6328 }, { "epoch": 9.65576171875e-06, "step": 6328, "training_step_time": 0.10905265808105469 }, { "epoch": 9.65728759765625e-06, "model_forward_time": 0.02517414093017578, "step": 6329 }, { "epoch": 9.65728759765625e-06, "step": 6329, "training_step_time": 0.1081702709197998 }, { "epoch": 9.6588134765625e-06, "grad_norm": 0.38989371061325073, "learning_rate": 9.30791267309698e-05, "loss": 0.0732, "step": 6330 }, { "epoch": 9.6588134765625e-06, "model_forward_time": 0.02578139305114746, "step": 6330 }, { "epoch": 9.6588134765625e-06, "step": 6330, "training_step_time": 0.10853290557861328 }, { "epoch": 9.66033935546875e-06, "model_forward_time": 0.02470541000366211, "step": 6331 }, { "epoch": 9.66033935546875e-06, "step": 6331, "training_step_time": 0.1062326431274414 }, { "epoch": 9.661865234375e-06, "model_forward_time": 0.025282621383666992, "step": 6332 }, { "epoch": 9.661865234375e-06, "step": 6332, "training_step_time": 0.1095738410949707 }, { "epoch": 9.66339111328125e-06, "model_forward_time": 0.024968385696411133, "step": 6333 }, { "epoch": 9.66339111328125e-06, "step": 6333, "training_step_time": 0.10760498046875 }, { "epoch": 9.6649169921875e-06, "model_forward_time": 0.025264501571655273, "step": 6334 }, { "epoch": 9.6649169921875e-06, "step": 6334, "training_step_time": 0.11597156524658203 }, { "epoch": 9.66644287109375e-06, "model_forward_time": 0.025235414505004883, "step": 6335 }, { "epoch": 9.66644287109375e-06, "step": 6335, "training_step_time": 0.1047053337097168 }, { "epoch": 9.66796875e-06, "model_forward_time": 0.025152921676635742, "step": 6336 }, { "epoch": 9.66796875e-06, "step": 6336, "training_step_time": 0.10790801048278809 }, { "epoch": 9.66949462890625e-06, "model_forward_time": 0.025397300720214844, "step": 6337 }, { "epoch": 9.66949462890625e-06, "step": 6337, "training_step_time": 0.10804605484008789 }, { "epoch": 9.6710205078125e-06, "model_forward_time": 0.025082826614379883, "step": 6338 }, { "epoch": 9.6710205078125e-06, "step": 6338, "training_step_time": 0.1320970058441162 }, { "epoch": 9.67254638671875e-06, "model_forward_time": 0.024398326873779297, "step": 6339 }, { "epoch": 9.67254638671875e-06, "step": 6339, "training_step_time": 0.20021557807922363 }, { "epoch": 9.674072265625e-06, "grad_norm": 0.4955099821090698, "learning_rate": 9.305112288966761e-05, "loss": 0.0828, "step": 6340 }, { "epoch": 9.674072265625e-06, "model_forward_time": 0.024440765380859375, "step": 6340 }, { "epoch": 9.674072265625e-06, "step": 6340, "training_step_time": 0.16709351539611816 }, { "epoch": 9.67559814453125e-06, "model_forward_time": 0.0248868465423584, "step": 6341 }, { "epoch": 9.67559814453125e-06, "step": 6341, "training_step_time": 0.1698284149169922 }, { "epoch": 9.6771240234375e-06, "model_forward_time": 0.024354219436645508, "step": 6342 }, { "epoch": 9.6771240234375e-06, "step": 6342, "training_step_time": 0.19667887687683105 }, { "epoch": 9.67864990234375e-06, "model_forward_time": 0.024588823318481445, "step": 6343 }, { "epoch": 9.67864990234375e-06, "step": 6343, "training_step_time": 0.19199872016906738 }, { "epoch": 9.68017578125e-06, "model_forward_time": 0.024130582809448242, "step": 6344 }, { "epoch": 9.68017578125e-06, "step": 6344, "training_step_time": 0.1731119155883789 }, { "epoch": 9.68170166015625e-06, "model_forward_time": 0.024015188217163086, "step": 6345 }, { "epoch": 9.68170166015625e-06, "step": 6345, "training_step_time": 0.12573671340942383 }, { "epoch": 9.6832275390625e-06, "model_forward_time": 0.024234771728515625, "step": 6346 }, { "epoch": 9.6832275390625e-06, "step": 6346, "training_step_time": 0.18442654609680176 }, { "epoch": 9.68475341796875e-06, "model_forward_time": 0.024148941040039062, "step": 6347 }, { "epoch": 9.68475341796875e-06, "step": 6347, "training_step_time": 0.11178421974182129 }, { "epoch": 9.686279296875e-06, "model_forward_time": 0.024275779724121094, "step": 6348 }, { "epoch": 9.686279296875e-06, "step": 6348, "training_step_time": 0.1101071834564209 }, { "epoch": 9.68780517578125e-06, "model_forward_time": 0.024898290634155273, "step": 6349 }, { "epoch": 9.68780517578125e-06, "step": 6349, "training_step_time": 0.10902762413024902 }, { "epoch": 9.6893310546875e-06, "grad_norm": 0.5339387655258179, "learning_rate": 9.30230667371917e-05, "loss": 0.0752, "step": 6350 }, { "epoch": 9.6893310546875e-06, "model_forward_time": 0.02393794059753418, "step": 6350 }, { "epoch": 9.6893310546875e-06, "step": 6350, "training_step_time": 0.11030197143554688 }, { "epoch": 9.69085693359375e-06, "model_forward_time": 0.025090694427490234, "step": 6351 }, { "epoch": 9.69085693359375e-06, "step": 6351, "training_step_time": 0.11490869522094727 }, { "epoch": 9.6923828125e-06, "model_forward_time": 0.025071382522583008, "step": 6352 }, { "epoch": 9.6923828125e-06, "step": 6352, "training_step_time": 0.10852313041687012 }, { "epoch": 9.69390869140625e-06, "model_forward_time": 0.025237321853637695, "step": 6353 }, { "epoch": 9.69390869140625e-06, "step": 6353, "training_step_time": 0.10785460472106934 }, { "epoch": 9.6954345703125e-06, "model_forward_time": 0.025052309036254883, "step": 6354 }, { "epoch": 9.6954345703125e-06, "step": 6354, "training_step_time": 0.11221599578857422 }, { "epoch": 9.69696044921875e-06, "model_forward_time": 0.025387048721313477, "step": 6355 }, { "epoch": 9.69696044921875e-06, "step": 6355, "training_step_time": 0.1149601936340332 }, { "epoch": 9.698486328125e-06, "model_forward_time": 0.025333881378173828, "step": 6356 }, { "epoch": 9.698486328125e-06, "step": 6356, "training_step_time": 0.20598673820495605 }, { "epoch": 9.70001220703125e-06, "model_forward_time": 0.024985074996948242, "step": 6357 }, { "epoch": 9.70001220703125e-06, "step": 6357, "training_step_time": 0.11120963096618652 }, { "epoch": 9.7015380859375e-06, "model_forward_time": 0.024375200271606445, "step": 6358 }, { "epoch": 9.7015380859375e-06, "step": 6358, "training_step_time": 0.10765981674194336 }, { "epoch": 9.70306396484375e-06, "model_forward_time": 0.025217533111572266, "step": 6359 }, { "epoch": 9.70306396484375e-06, "step": 6359, "training_step_time": 0.12080144882202148 }, { "epoch": 9.70458984375e-06, "grad_norm": 0.39799192547798157, "learning_rate": 9.299495830763286e-05, "loss": 0.0784, "step": 6360 }, { "epoch": 9.70458984375e-06, "model_forward_time": 0.02470684051513672, "step": 6360 }, { "epoch": 9.70458984375e-06, "step": 6360, "training_step_time": 0.10517120361328125 }, { "epoch": 9.70611572265625e-06, "model_forward_time": 0.02449965476989746, "step": 6361 }, { "epoch": 9.70611572265625e-06, "step": 6361, "training_step_time": 0.1679856777191162 }, { "epoch": 9.7076416015625e-06, "model_forward_time": 0.02473163604736328, "step": 6362 }, { "epoch": 9.7076416015625e-06, "step": 6362, "training_step_time": 0.17008018493652344 }, { "epoch": 9.70916748046875e-06, "model_forward_time": 0.02400684356689453, "step": 6363 }, { "epoch": 9.70916748046875e-06, "step": 6363, "training_step_time": 0.11502933502197266 }, { "epoch": 9.710693359375e-06, "model_forward_time": 0.025376081466674805, "step": 6364 }, { "epoch": 9.710693359375e-06, "step": 6364, "training_step_time": 0.1728818416595459 }, { "epoch": 9.71221923828125e-06, "model_forward_time": 0.02463531494140625, "step": 6365 }, { "epoch": 9.71221923828125e-06, "step": 6365, "training_step_time": 0.1679515838623047 }, { "epoch": 9.7137451171875e-06, "model_forward_time": 0.025004863739013672, "step": 6366 }, { "epoch": 9.7137451171875e-06, "step": 6366, "training_step_time": 0.1052093505859375 }, { "epoch": 9.71527099609375e-06, "model_forward_time": 0.02472066879272461, "step": 6367 }, { "epoch": 9.71527099609375e-06, "step": 6367, "training_step_time": 0.10825943946838379 }, { "epoch": 9.716796875e-06, "model_forward_time": 0.025316953659057617, "step": 6368 }, { "epoch": 9.716796875e-06, "step": 6368, "training_step_time": 0.11265206336975098 }, { "epoch": 9.71832275390625e-06, "model_forward_time": 0.02568817138671875, "step": 6369 }, { "epoch": 9.71832275390625e-06, "step": 6369, "training_step_time": 0.11026954650878906 }, { "epoch": 9.7198486328125e-06, "grad_norm": 0.3123718500137329, "learning_rate": 9.296679763514552e-05, "loss": 0.0696, "step": 6370 }, { "epoch": 9.7198486328125e-06, "model_forward_time": 0.02479076385498047, "step": 6370 }, { "epoch": 9.7198486328125e-06, "step": 6370, "training_step_time": 0.10735440254211426 }, { "epoch": 9.72137451171875e-06, "model_forward_time": 0.02542257308959961, "step": 6371 }, { "epoch": 9.72137451171875e-06, "step": 6371, "training_step_time": 0.11055397987365723 }, { "epoch": 9.722900390625e-06, "model_forward_time": 0.02499079704284668, "step": 6372 }, { "epoch": 9.722900390625e-06, "step": 6372, "training_step_time": 0.11044192314147949 }, { "epoch": 9.72442626953125e-06, "model_forward_time": 0.025032758712768555, "step": 6373 }, { "epoch": 9.72442626953125e-06, "step": 6373, "training_step_time": 0.10766029357910156 }, { "epoch": 9.7259521484375e-06, "model_forward_time": 0.024792909622192383, "step": 6374 }, { "epoch": 9.7259521484375e-06, "step": 6374, "training_step_time": 0.10824060440063477 }, { "epoch": 9.72747802734375e-06, "model_forward_time": 0.02501845359802246, "step": 6375 }, { "epoch": 9.72747802734375e-06, "step": 6375, "training_step_time": 0.11243414878845215 }, { "epoch": 9.72900390625e-06, "model_forward_time": 0.02621150016784668, "step": 6376 }, { "epoch": 9.72900390625e-06, "step": 6376, "training_step_time": 0.1194758415222168 }, { "epoch": 9.73052978515625e-06, "model_forward_time": 0.02504897117614746, "step": 6377 }, { "epoch": 9.73052978515625e-06, "step": 6377, "training_step_time": 0.10839653015136719 }, { "epoch": 9.7320556640625e-06, "model_forward_time": 0.02498626708984375, "step": 6378 }, { "epoch": 9.7320556640625e-06, "step": 6378, "training_step_time": 0.1061544418334961 }, { "epoch": 9.73358154296875e-06, "model_forward_time": 0.02496051788330078, "step": 6379 }, { "epoch": 9.73358154296875e-06, "step": 6379, "training_step_time": 0.11384892463684082 }, { "epoch": 9.735107421875e-06, "grad_norm": 0.3744323253631592, "learning_rate": 9.293858475394754e-05, "loss": 0.0723, "step": 6380 }, { "epoch": 9.735107421875e-06, "model_forward_time": 0.02501201629638672, "step": 6380 }, { "epoch": 9.735107421875e-06, "step": 6380, "training_step_time": 0.10827994346618652 }, { "epoch": 9.73663330078125e-06, "model_forward_time": 0.02487468719482422, "step": 6381 }, { "epoch": 9.73663330078125e-06, "step": 6381, "training_step_time": 0.10662484169006348 }, { "epoch": 9.7381591796875e-06, "model_forward_time": 0.02529740333557129, "step": 6382 }, { "epoch": 9.7381591796875e-06, "step": 6382, "training_step_time": 0.1964564323425293 }, { "epoch": 9.73968505859375e-06, "model_forward_time": 0.024744510650634766, "step": 6383 }, { "epoch": 9.73968505859375e-06, "step": 6383, "training_step_time": 0.1131277084350586 }, { "epoch": 9.7412109375e-06, "model_forward_time": 0.02750420570373535, "step": 6384 }, { "epoch": 9.7412109375e-06, "step": 6384, "training_step_time": 0.1359543800354004 }, { "epoch": 9.74273681640625e-06, "model_forward_time": 0.025117874145507812, "step": 6385 }, { "epoch": 9.74273681640625e-06, "step": 6385, "training_step_time": 0.16062355041503906 }, { "epoch": 9.7442626953125e-06, "model_forward_time": 0.024236202239990234, "step": 6386 }, { "epoch": 9.7442626953125e-06, "step": 6386, "training_step_time": 0.20752811431884766 }, { "epoch": 9.74578857421875e-06, "model_forward_time": 0.02451610565185547, "step": 6387 }, { "epoch": 9.74578857421875e-06, "step": 6387, "training_step_time": 0.14438366889953613 }, { "epoch": 9.747314453125e-06, "model_forward_time": 0.024376630783081055, "step": 6388 }, { "epoch": 9.747314453125e-06, "step": 6388, "training_step_time": 0.11850690841674805 }, { "epoch": 9.74884033203125e-06, "model_forward_time": 0.02434372901916504, "step": 6389 }, { "epoch": 9.74884033203125e-06, "step": 6389, "training_step_time": 0.1134023666381836 }, { "epoch": 9.7503662109375e-06, "grad_norm": 0.6585614085197449, "learning_rate": 9.291031969832026e-05, "loss": 0.088, "step": 6390 }, { "epoch": 9.7503662109375e-06, "model_forward_time": 0.02542710304260254, "step": 6390 }, { "epoch": 9.7503662109375e-06, "step": 6390, "training_step_time": 0.11016464233398438 }, { "epoch": 9.75189208984375e-06, "model_forward_time": 0.025087833404541016, "step": 6391 }, { "epoch": 9.75189208984375e-06, "step": 6391, "training_step_time": 0.19121432304382324 }, { "epoch": 9.75341796875e-06, "model_forward_time": 0.024791240692138672, "step": 6392 }, { "epoch": 9.75341796875e-06, "step": 6392, "training_step_time": 0.10965704917907715 }, { "epoch": 9.75494384765625e-06, "model_forward_time": 0.0244753360748291, "step": 6393 }, { "epoch": 9.75494384765625e-06, "step": 6393, "training_step_time": 0.11123037338256836 }, { "epoch": 9.7564697265625e-06, "model_forward_time": 0.025005340576171875, "step": 6394 }, { "epoch": 9.7564697265625e-06, "step": 6394, "training_step_time": 0.11133456230163574 }, { "epoch": 9.75799560546875e-06, "model_forward_time": 0.025513887405395508, "step": 6395 }, { "epoch": 9.75799560546875e-06, "step": 6395, "training_step_time": 0.11153912544250488 }, { "epoch": 9.759521484375e-06, "model_forward_time": 0.025590896606445312, "step": 6396 }, { "epoch": 9.759521484375e-06, "step": 6396, "training_step_time": 0.11120057106018066 }, { "epoch": 9.76104736328125e-06, "model_forward_time": 0.025491952896118164, "step": 6397 }, { "epoch": 9.76104736328125e-06, "step": 6397, "training_step_time": 0.11074995994567871 }, { "epoch": 9.7625732421875e-06, "model_forward_time": 0.025948524475097656, "step": 6398 }, { "epoch": 9.7625732421875e-06, "step": 6398, "training_step_time": 0.11573386192321777 }, { "epoch": 9.76409912109375e-06, "model_forward_time": 0.024886131286621094, "step": 6399 }, { "epoch": 9.76409912109375e-06, "step": 6399, "training_step_time": 0.11252522468566895 }, { "epoch": 9.765625e-06, "grad_norm": 0.4097931981086731, "learning_rate": 9.288200250260836e-05, "loss": 0.0853, "step": 6400 }, { "epoch": 9.765625e-06, "model_forward_time": 0.02485060691833496, "step": 6400 }, { "epoch": 9.765625e-06, "step": 6400, "training_step_time": 0.1148531436920166 }, { "epoch": 9.76715087890625e-06, "model_forward_time": 0.025554180145263672, "step": 6401 }, { "epoch": 9.76715087890625e-06, "step": 6401, "training_step_time": 0.12636327743530273 }, { "epoch": 9.7686767578125e-06, "model_forward_time": 0.02586197853088379, "step": 6402 }, { "epoch": 9.7686767578125e-06, "step": 6402, "training_step_time": 0.11365318298339844 }, { "epoch": 9.77020263671875e-06, "model_forward_time": 0.025874853134155273, "step": 6403 }, { "epoch": 9.77020263671875e-06, "step": 6403, "training_step_time": 0.21921563148498535 }, { "epoch": 9.771728515625e-06, "model_forward_time": 0.02479720115661621, "step": 6404 }, { "epoch": 9.771728515625e-06, "step": 6404, "training_step_time": 0.10559582710266113 }, { "epoch": 9.77325439453125e-06, "model_forward_time": 0.024697542190551758, "step": 6405 }, { "epoch": 9.77325439453125e-06, "step": 6405, "training_step_time": 0.1693267822265625 }, { "epoch": 9.7747802734375e-06, "model_forward_time": 0.024753570556640625, "step": 6406 }, { "epoch": 9.7747802734375e-06, "step": 6406, "training_step_time": 0.16687965393066406 }, { "epoch": 9.77630615234375e-06, "model_forward_time": 0.024847745895385742, "step": 6407 }, { "epoch": 9.77630615234375e-06, "step": 6407, "training_step_time": 0.10904407501220703 }, { "epoch": 9.77783203125e-06, "model_forward_time": 0.025545597076416016, "step": 6408 }, { "epoch": 9.77783203125e-06, "step": 6408, "training_step_time": 0.18384981155395508 }, { "epoch": 9.77935791015625e-06, "model_forward_time": 0.024743318557739258, "step": 6409 }, { "epoch": 9.77935791015625e-06, "step": 6409, "training_step_time": 0.1584453582763672 }, { "epoch": 9.7808837890625e-06, "grad_norm": 0.3660637438297272, "learning_rate": 9.285363320121992e-05, "loss": 0.0619, "step": 6410 }, { "epoch": 9.7808837890625e-06, "model_forward_time": 0.02489757537841797, "step": 6410 }, { "epoch": 9.7808837890625e-06, "step": 6410, "training_step_time": 0.11624026298522949 }, { "epoch": 9.78240966796875e-06, "model_forward_time": 0.02508831024169922, "step": 6411 }, { "epoch": 9.78240966796875e-06, "step": 6411, "training_step_time": 0.1113882064819336 }, { "epoch": 9.783935546875e-06, "model_forward_time": 0.02545452117919922, "step": 6412 }, { "epoch": 9.783935546875e-06, "step": 6412, "training_step_time": 0.10992646217346191 }, { "epoch": 9.78546142578125e-06, "model_forward_time": 0.02508544921875, "step": 6413 }, { "epoch": 9.78546142578125e-06, "step": 6413, "training_step_time": 0.10876345634460449 }, { "epoch": 9.7869873046875e-06, "model_forward_time": 0.025025606155395508, "step": 6414 }, { "epoch": 9.7869873046875e-06, "step": 6414, "training_step_time": 0.11002326011657715 }, { "epoch": 9.78851318359375e-06, "model_forward_time": 0.025349855422973633, "step": 6415 }, { "epoch": 9.78851318359375e-06, "step": 6415, "training_step_time": 0.10778021812438965 }, { "epoch": 9.7900390625e-06, "model_forward_time": 0.02498459815979004, "step": 6416 }, { "epoch": 9.7900390625e-06, "step": 6416, "training_step_time": 0.10670876502990723 }, { "epoch": 9.79156494140625e-06, "model_forward_time": 0.02531147003173828, "step": 6417 }, { "epoch": 9.79156494140625e-06, "step": 6417, "training_step_time": 0.10906839370727539 }, { "epoch": 9.7930908203125e-06, "model_forward_time": 0.025927305221557617, "step": 6418 }, { "epoch": 9.7930908203125e-06, "step": 6418, "training_step_time": 0.10847711563110352 }, { "epoch": 9.79461669921875e-06, "model_forward_time": 0.025086402893066406, "step": 6419 }, { "epoch": 9.79461669921875e-06, "step": 6419, "training_step_time": 0.1091775894165039 }, { "epoch": 9.796142578125e-06, "grad_norm": 0.2623238265514374, "learning_rate": 9.282521182862629e-05, "loss": 0.0743, "step": 6420 }, { "epoch": 9.796142578125e-06, "model_forward_time": 0.02553248405456543, "step": 6420 }, { "epoch": 9.796142578125e-06, "step": 6420, "training_step_time": 0.11011600494384766 }, { "epoch": 9.79766845703125e-06, "model_forward_time": 0.02510547637939453, "step": 6421 }, { "epoch": 9.79766845703125e-06, "step": 6421, "training_step_time": 0.10996389389038086 }, { "epoch": 9.7991943359375e-06, "model_forward_time": 0.02573561668395996, "step": 6422 }, { "epoch": 9.7991943359375e-06, "step": 6422, "training_step_time": 0.10902929306030273 }, { "epoch": 9.80072021484375e-06, "model_forward_time": 0.025168180465698242, "step": 6423 }, { "epoch": 9.80072021484375e-06, "step": 6423, "training_step_time": 0.10947895050048828 }, { "epoch": 9.80224609375e-06, "model_forward_time": 0.0266265869140625, "step": 6424 }, { "epoch": 9.80224609375e-06, "step": 6424, "training_step_time": 0.11413717269897461 }, { "epoch": 9.80377197265625e-06, "model_forward_time": 0.025608301162719727, "step": 6425 }, { "epoch": 9.80377197265625e-06, "step": 6425, "training_step_time": 0.10844206809997559 }, { "epoch": 9.8052978515625e-06, "model_forward_time": 0.025483131408691406, "step": 6426 }, { "epoch": 9.8052978515625e-06, "step": 6426, "training_step_time": 0.17925524711608887 }, { "epoch": 9.80682373046875e-06, "model_forward_time": 0.024312257766723633, "step": 6427 }, { "epoch": 9.80682373046875e-06, "step": 6427, "training_step_time": 0.12234783172607422 }, { "epoch": 9.808349609375e-06, "model_forward_time": 0.02454996109008789, "step": 6428 }, { "epoch": 9.808349609375e-06, "step": 6428, "training_step_time": 0.1272134780883789 }, { "epoch": 9.80987548828125e-06, "model_forward_time": 0.025434494018554688, "step": 6429 }, { "epoch": 9.80987548828125e-06, "step": 6429, "training_step_time": 0.15738320350646973 }, { "epoch": 9.8114013671875e-06, "grad_norm": 0.4843827784061432, "learning_rate": 9.279673841936214e-05, "loss": 0.0752, "step": 6430 }, { "epoch": 9.8114013671875e-06, "model_forward_time": 0.024613618850708008, "step": 6430 }, { "epoch": 9.8114013671875e-06, "step": 6430, "training_step_time": 0.22339153289794922 }, { "epoch": 9.81292724609375e-06, "model_forward_time": 0.024805307388305664, "step": 6431 }, { "epoch": 9.81292724609375e-06, "step": 6431, "training_step_time": 0.17174720764160156 }, { "epoch": 9.814453125e-06, "model_forward_time": 0.024548768997192383, "step": 6432 }, { "epoch": 9.814453125e-06, "step": 6432, "training_step_time": 0.11900544166564941 }, { "epoch": 9.81597900390625e-06, "model_forward_time": 0.022927522659301758, "step": 6433 }, { "epoch": 9.81597900390625e-06, "step": 6433, "training_step_time": 0.12465405464172363 }, { "epoch": 9.8175048828125e-06, "model_forward_time": 0.025338411331176758, "step": 6434 }, { "epoch": 9.8175048828125e-06, "step": 6434, "training_step_time": 0.18746566772460938 }, { "epoch": 9.81903076171875e-06, "model_forward_time": 0.024634122848510742, "step": 6435 }, { "epoch": 9.81903076171875e-06, "step": 6435, "training_step_time": 0.10889267921447754 }, { "epoch": 9.820556640625e-06, "model_forward_time": 0.024967432022094727, "step": 6436 }, { "epoch": 9.820556640625e-06, "step": 6436, "training_step_time": 0.11101484298706055 }, { "epoch": 9.82208251953125e-06, "model_forward_time": 0.025379419326782227, "step": 6437 }, { "epoch": 9.82208251953125e-06, "step": 6437, "training_step_time": 0.11189937591552734 }, { "epoch": 9.8236083984375e-06, "model_forward_time": 0.025327444076538086, "step": 6438 }, { "epoch": 9.8236083984375e-06, "step": 6438, "training_step_time": 0.11257028579711914 }, { "epoch": 9.82513427734375e-06, "model_forward_time": 0.02582383155822754, "step": 6439 }, { "epoch": 9.82513427734375e-06, "step": 6439, "training_step_time": 0.11294841766357422 }, { "epoch": 9.82666015625e-06, "grad_norm": 0.3442363142967224, "learning_rate": 9.276821300802534e-05, "loss": 0.0761, "step": 6440 }, { "epoch": 9.82666015625e-06, "model_forward_time": 0.025915145874023438, "step": 6440 }, { "epoch": 9.82666015625e-06, "step": 6440, "training_step_time": 0.10849928855895996 }, { "epoch": 9.82818603515625e-06, "model_forward_time": 0.025716066360473633, "step": 6441 }, { "epoch": 9.82818603515625e-06, "step": 6441, "training_step_time": 0.11442422866821289 }, { "epoch": 9.8297119140625e-06, "model_forward_time": 0.02488231658935547, "step": 6442 }, { "epoch": 9.8297119140625e-06, "step": 6442, "training_step_time": 0.11401581764221191 }, { "epoch": 9.83123779296875e-06, "model_forward_time": 0.02488875389099121, "step": 6443 }, { "epoch": 9.83123779296875e-06, "step": 6443, "training_step_time": 0.10969710350036621 }, { "epoch": 9.832763671875e-06, "model_forward_time": 0.025012493133544922, "step": 6444 }, { "epoch": 9.832763671875e-06, "step": 6444, "training_step_time": 0.11367964744567871 }, { "epoch": 9.83428955078125e-06, "model_forward_time": 0.025678634643554688, "step": 6445 }, { "epoch": 9.83428955078125e-06, "step": 6445, "training_step_time": 0.1939716339111328 }, { "epoch": 9.8358154296875e-06, "model_forward_time": 0.024347543716430664, "step": 6446 }, { "epoch": 9.8358154296875e-06, "step": 6446, "training_step_time": 0.11507987976074219 }, { "epoch": 9.83734130859375e-06, "model_forward_time": 0.02462291717529297, "step": 6447 }, { "epoch": 9.83734130859375e-06, "step": 6447, "training_step_time": 0.17828774452209473 }, { "epoch": 9.8388671875e-06, "model_forward_time": 0.02552628517150879, "step": 6448 }, { "epoch": 9.8388671875e-06, "step": 6448, "training_step_time": 0.1516726016998291 }, { "epoch": 9.84039306640625e-06, "model_forward_time": 0.024512767791748047, "step": 6449 }, { "epoch": 9.84039306640625e-06, "step": 6449, "training_step_time": 0.18554449081420898 }, { "epoch": 9.8419189453125e-06, "grad_norm": 0.391694039106369, "learning_rate": 9.273963562927695e-05, "loss": 0.0723, "step": 6450 }, { "epoch": 9.8419189453125e-06, "model_forward_time": 0.024385452270507812, "step": 6450 }, { "epoch": 9.8419189453125e-06, "step": 6450, "training_step_time": 0.14000201225280762 }, { "epoch": 9.84344482421875e-06, "model_forward_time": 0.024399757385253906, "step": 6451 }, { "epoch": 9.84344482421875e-06, "step": 6451, "training_step_time": 0.16803574562072754 }, { "epoch": 9.844970703125e-06, "model_forward_time": 0.024952173233032227, "step": 6452 }, { "epoch": 9.844970703125e-06, "step": 6452, "training_step_time": 0.13135313987731934 }, { "epoch": 9.84649658203125e-06, "model_forward_time": 0.025227069854736328, "step": 6453 }, { "epoch": 9.84649658203125e-06, "step": 6453, "training_step_time": 0.11016607284545898 }, { "epoch": 9.8480224609375e-06, "model_forward_time": 0.025941848754882812, "step": 6454 }, { "epoch": 9.8480224609375e-06, "step": 6454, "training_step_time": 0.11260294914245605 }, { "epoch": 9.84954833984375e-06, "model_forward_time": 0.02573370933532715, "step": 6455 }, { "epoch": 9.84954833984375e-06, "step": 6455, "training_step_time": 0.10840106010437012 }, { "epoch": 9.85107421875e-06, "model_forward_time": 0.025121688842773438, "step": 6456 }, { "epoch": 9.85107421875e-06, "step": 6456, "training_step_time": 0.11309599876403809 }, { "epoch": 9.85260009765625e-06, "model_forward_time": 0.025045156478881836, "step": 6457 }, { "epoch": 9.85260009765625e-06, "step": 6457, "training_step_time": 0.11169910430908203 }, { "epoch": 9.8541259765625e-06, "model_forward_time": 0.02533125877380371, "step": 6458 }, { "epoch": 9.8541259765625e-06, "step": 6458, "training_step_time": 0.11044979095458984 }, { "epoch": 9.85565185546875e-06, "model_forward_time": 0.024950027465820312, "step": 6459 }, { "epoch": 9.85565185546875e-06, "step": 6459, "training_step_time": 0.10910606384277344 }, { "epoch": 9.857177734375e-06, "grad_norm": 0.3379170298576355, "learning_rate": 9.27110063178412e-05, "loss": 0.0729, "step": 6460 }, { "epoch": 9.857177734375e-06, "model_forward_time": 0.025556087493896484, "step": 6460 }, { "epoch": 9.857177734375e-06, "step": 6460, "training_step_time": 0.11236000061035156 }, { "epoch": 9.85870361328125e-06, "model_forward_time": 0.025185108184814453, "step": 6461 }, { "epoch": 9.85870361328125e-06, "step": 6461, "training_step_time": 0.10976648330688477 }, { "epoch": 9.8602294921875e-06, "model_forward_time": 0.02523016929626465, "step": 6462 }, { "epoch": 9.8602294921875e-06, "step": 6462, "training_step_time": 0.11237835884094238 }, { "epoch": 9.86175537109375e-06, "model_forward_time": 0.02500176429748535, "step": 6463 }, { "epoch": 9.86175537109375e-06, "step": 6463, "training_step_time": 0.11214208602905273 }, { "epoch": 9.86328125e-06, "model_forward_time": 0.025069236755371094, "step": 6464 }, { "epoch": 9.86328125e-06, "step": 6464, "training_step_time": 0.11614775657653809 }, { "epoch": 9.86480712890625e-06, "model_forward_time": 0.025235891342163086, "step": 6465 }, { "epoch": 9.86480712890625e-06, "step": 6465, "training_step_time": 0.11234688758850098 }, { "epoch": 9.8663330078125e-06, "model_forward_time": 0.025141000747680664, "step": 6466 }, { "epoch": 9.8663330078125e-06, "step": 6466, "training_step_time": 0.10916805267333984 }, { "epoch": 9.86785888671875e-06, "model_forward_time": 0.025653839111328125, "step": 6467 }, { "epoch": 9.86785888671875e-06, "step": 6467, "training_step_time": 0.10910892486572266 }, { "epoch": 9.869384765625e-06, "model_forward_time": 0.025215864181518555, "step": 6468 }, { "epoch": 9.869384765625e-06, "step": 6468, "training_step_time": 0.11259675025939941 }, { "epoch": 9.87091064453125e-06, "model_forward_time": 0.02528095245361328, "step": 6469 }, { "epoch": 9.87091064453125e-06, "step": 6469, "training_step_time": 0.10969185829162598 }, { "epoch": 9.8724365234375e-06, "grad_norm": 0.5433868169784546, "learning_rate": 9.268232510850539e-05, "loss": 0.0655, "step": 6470 }, { "epoch": 9.8724365234375e-06, "model_forward_time": 0.02533721923828125, "step": 6470 }, { "epoch": 9.8724365234375e-06, "step": 6470, "training_step_time": 0.11114692687988281 }, { "epoch": 9.87396240234375e-06, "model_forward_time": 0.025383949279785156, "step": 6471 }, { "epoch": 9.87396240234375e-06, "step": 6471, "training_step_time": 0.18553638458251953 }, { "epoch": 9.87548828125e-06, "model_forward_time": 0.024715662002563477, "step": 6472 }, { "epoch": 9.87548828125e-06, "step": 6472, "training_step_time": 0.12156844139099121 }, { "epoch": 9.87701416015625e-06, "model_forward_time": 0.024167776107788086, "step": 6473 }, { "epoch": 9.87701416015625e-06, "step": 6473, "training_step_time": 0.1274569034576416 }, { "epoch": 9.8785400390625e-06, "model_forward_time": 0.024987459182739258, "step": 6474 }, { "epoch": 9.8785400390625e-06, "step": 6474, "training_step_time": 0.19722533226013184 }, { "epoch": 9.88006591796875e-06, "model_forward_time": 0.024302005767822266, "step": 6475 }, { "epoch": 9.88006591796875e-06, "step": 6475, "training_step_time": 0.1802213191986084 }, { "epoch": 9.881591796875e-06, "model_forward_time": 0.024502992630004883, "step": 6476 }, { "epoch": 9.881591796875e-06, "step": 6476, "training_step_time": 0.18155741691589355 }, { "epoch": 9.88311767578125e-06, "model_forward_time": 0.02438521385192871, "step": 6477 }, { "epoch": 9.88311767578125e-06, "step": 6477, "training_step_time": 0.11775541305541992 }, { "epoch": 9.8846435546875e-06, "model_forward_time": 0.02451181411743164, "step": 6478 }, { "epoch": 9.8846435546875e-06, "step": 6478, "training_step_time": 0.10991883277893066 }, { "epoch": 9.88616943359375e-06, "model_forward_time": 0.02526998519897461, "step": 6479 }, { "epoch": 9.88616943359375e-06, "step": 6479, "training_step_time": 0.20051026344299316 }, { "epoch": 9.8876953125e-06, "grad_norm": 0.45038890838623047, "learning_rate": 9.265359203611987e-05, "loss": 0.0694, "step": 6480 }, { "epoch": 9.8876953125e-06, "model_forward_time": 0.024538040161132812, "step": 6480 }, { "epoch": 9.8876953125e-06, "step": 6480, "training_step_time": 0.1041109561920166 }, { "epoch": 9.88922119140625e-06, "model_forward_time": 0.025287151336669922, "step": 6481 }, { "epoch": 9.88922119140625e-06, "step": 6481, "training_step_time": 0.10660171508789062 }, { "epoch": 9.8907470703125e-06, "model_forward_time": 0.025284528732299805, "step": 6482 }, { "epoch": 9.8907470703125e-06, "step": 6482, "training_step_time": 0.10610008239746094 }, { "epoch": 9.89227294921875e-06, "model_forward_time": 0.025357484817504883, "step": 6483 }, { "epoch": 9.89227294921875e-06, "step": 6483, "training_step_time": 0.10704517364501953 }, { "epoch": 9.893798828125e-06, "model_forward_time": 0.028635740280151367, "step": 6484 }, { "epoch": 9.893798828125e-06, "step": 6484, "training_step_time": 0.11926507949829102 }, { "epoch": 9.89532470703125e-06, "model_forward_time": 0.025227069854736328, "step": 6485 }, { "epoch": 9.89532470703125e-06, "step": 6485, "training_step_time": 0.11953353881835938 }, { "epoch": 9.8968505859375e-06, "model_forward_time": 0.02532362937927246, "step": 6486 }, { "epoch": 9.8968505859375e-06, "step": 6486, "training_step_time": 0.10961198806762695 }, { "epoch": 9.89837646484375e-06, "model_forward_time": 0.025210857391357422, "step": 6487 }, { "epoch": 9.89837646484375e-06, "step": 6487, "training_step_time": 0.10686731338500977 }, { "epoch": 9.89990234375e-06, "model_forward_time": 0.025217533111572266, "step": 6488 }, { "epoch": 9.89990234375e-06, "step": 6488, "training_step_time": 0.1099550724029541 }, { "epoch": 9.90142822265625e-06, "model_forward_time": 0.02546525001525879, "step": 6489 }, { "epoch": 9.90142822265625e-06, "step": 6489, "training_step_time": 0.19519448280334473 }, { "epoch": 9.9029541015625e-06, "grad_norm": 0.39201900362968445, "learning_rate": 9.262480713559808e-05, "loss": 0.091, "step": 6490 }, { "epoch": 9.9029541015625e-06, "model_forward_time": 0.024413108825683594, "step": 6490 }, { "epoch": 9.9029541015625e-06, "step": 6490, "training_step_time": 0.11302399635314941 }, { "epoch": 9.90447998046875e-06, "model_forward_time": 0.024483203887939453, "step": 6491 }, { "epoch": 9.90447998046875e-06, "step": 6491, "training_step_time": 0.10732817649841309 }, { "epoch": 9.906005859375e-06, "model_forward_time": 0.025440216064453125, "step": 6492 }, { "epoch": 9.906005859375e-06, "step": 6492, "training_step_time": 0.11038470268249512 }, { "epoch": 9.90753173828125e-06, "model_forward_time": 0.024969100952148438, "step": 6493 }, { "epoch": 9.90753173828125e-06, "step": 6493, "training_step_time": 0.19565677642822266 }, { "epoch": 9.9090576171875e-06, "model_forward_time": 0.02492666244506836, "step": 6494 }, { "epoch": 9.9090576171875e-06, "step": 6494, "training_step_time": 0.1360912322998047 }, { "epoch": 9.91058349609375e-06, "model_forward_time": 0.025622129440307617, "step": 6495 }, { "epoch": 9.91058349609375e-06, "step": 6495, "training_step_time": 0.11145210266113281 }, { "epoch": 9.912109375e-06, "model_forward_time": 0.026498079299926758, "step": 6496 }, { "epoch": 9.912109375e-06, "step": 6496, "training_step_time": 0.17135167121887207 }, { "epoch": 9.91363525390625e-06, "model_forward_time": 0.02469658851623535, "step": 6497 }, { "epoch": 9.91363525390625e-06, "step": 6497, "training_step_time": 0.17390871047973633 }, { "epoch": 9.9151611328125e-06, "model_forward_time": 0.0252382755279541, "step": 6498 }, { "epoch": 9.9151611328125e-06, "step": 6498, "training_step_time": 0.10881257057189941 }, { "epoch": 9.91668701171875e-06, "model_forward_time": 0.02506256103515625, "step": 6499 }, { "epoch": 9.91668701171875e-06, "step": 6499, "training_step_time": 0.11243844032287598 }, { "epoch": 9.918212890625e-06, "grad_norm": 0.5705287456512451, "learning_rate": 9.259597044191636e-05, "loss": 0.066, "step": 6500 }, { "epoch": 9.918212890625e-06, "model_forward_time": 0.02579665184020996, "step": 6500 }, { "epoch": 9.918212890625e-06, "step": 6500, "training_step_time": 0.10969090461730957 }, { "epoch": 9.91973876953125e-06, "model_forward_time": 0.025682926177978516, "step": 6501 }, { "epoch": 9.91973876953125e-06, "step": 6501, "training_step_time": 0.10774374008178711 }, { "epoch": 9.9212646484375e-06, "model_forward_time": 0.027169227600097656, "step": 6502 }, { "epoch": 9.9212646484375e-06, "step": 6502, "training_step_time": 0.10848450660705566 }, { "epoch": 9.92279052734375e-06, "model_forward_time": 0.02458477020263672, "step": 6503 }, { "epoch": 9.92279052734375e-06, "step": 6503, "training_step_time": 0.10886192321777344 }, { "epoch": 9.92431640625e-06, "model_forward_time": 0.02572774887084961, "step": 6504 }, { "epoch": 9.92431640625e-06, "step": 6504, "training_step_time": 0.10667586326599121 }, { "epoch": 9.92584228515625e-06, "model_forward_time": 0.025885343551635742, "step": 6505 }, { "epoch": 9.92584228515625e-06, "step": 6505, "training_step_time": 0.11089348793029785 }, { "epoch": 9.9273681640625e-06, "model_forward_time": 0.025759458541870117, "step": 6506 }, { "epoch": 9.9273681640625e-06, "step": 6506, "training_step_time": 0.11117839813232422 }, { "epoch": 9.92889404296875e-06, "model_forward_time": 0.02552652359008789, "step": 6507 }, { "epoch": 9.92889404296875e-06, "step": 6507, "training_step_time": 0.10968399047851562 }, { "epoch": 9.930419921875e-06, "model_forward_time": 0.025144100189208984, "step": 6508 }, { "epoch": 9.930419921875e-06, "step": 6508, "training_step_time": 0.16504144668579102 }, { "epoch": 9.93194580078125e-06, "model_forward_time": 0.024913311004638672, "step": 6509 }, { "epoch": 9.93194580078125e-06, "step": 6509, "training_step_time": 0.17327237129211426 }, { "epoch": 9.9334716796875e-06, "grad_norm": 0.33714815974235535, "learning_rate": 9.256708199011401e-05, "loss": 0.0672, "step": 6510 }, { "epoch": 9.9334716796875e-06, "model_forward_time": 0.024626493453979492, "step": 6510 }, { "epoch": 9.9334716796875e-06, "step": 6510, "training_step_time": 0.16625308990478516 }, { "epoch": 9.93499755859375e-06, "model_forward_time": 0.024190902709960938, "step": 6511 }, { "epoch": 9.93499755859375e-06, "step": 6511, "training_step_time": 0.16745352745056152 }, { "epoch": 9.9365234375e-06, "model_forward_time": 0.024586915969848633, "step": 6512 }, { "epoch": 9.9365234375e-06, "step": 6512, "training_step_time": 0.15546774864196777 }, { "epoch": 9.93804931640625e-06, "model_forward_time": 0.02459430694580078, "step": 6513 }, { "epoch": 9.93804931640625e-06, "step": 6513, "training_step_time": 0.17969632148742676 }, { "epoch": 9.9395751953125e-06, "model_forward_time": 0.024633407592773438, "step": 6514 }, { "epoch": 9.9395751953125e-06, "step": 6514, "training_step_time": 0.1299731731414795 }, { "epoch": 9.94110107421875e-06, "model_forward_time": 0.02414870262145996, "step": 6515 }, { "epoch": 9.94110107421875e-06, "step": 6515, "training_step_time": 0.12489056587219238 }, { "epoch": 9.942626953125e-06, "model_forward_time": 0.0241239070892334, "step": 6516 }, { "epoch": 9.942626953125e-06, "step": 6516, "training_step_time": 0.19248151779174805 }, { "epoch": 9.94415283203125e-06, "model_forward_time": 0.02426314353942871, "step": 6517 }, { "epoch": 9.94415283203125e-06, "step": 6517, "training_step_time": 0.1783006191253662 }, { "epoch": 9.9456787109375e-06, "model_forward_time": 0.024959325790405273, "step": 6518 }, { "epoch": 9.9456787109375e-06, "step": 6518, "training_step_time": 0.18056678771972656 }, { "epoch": 9.94720458984375e-06, "model_forward_time": 0.02393960952758789, "step": 6519 }, { "epoch": 9.94720458984375e-06, "step": 6519, "training_step_time": 0.12206649780273438 }, { "epoch": 9.94873046875e-06, "grad_norm": 0.2646665871143341, "learning_rate": 9.253814181529323e-05, "loss": 0.0717, "step": 6520 }, { "epoch": 9.94873046875e-06, "model_forward_time": 0.024686813354492188, "step": 6520 }, { "epoch": 9.94873046875e-06, "step": 6520, "training_step_time": 0.15739727020263672 }, { "epoch": 9.95025634765625e-06, "model_forward_time": 0.02528524398803711, "step": 6521 }, { "epoch": 9.95025634765625e-06, "step": 6521, "training_step_time": 0.15546536445617676 }, { "epoch": 9.9517822265625e-06, "model_forward_time": 0.024883270263671875, "step": 6522 }, { "epoch": 9.9517822265625e-06, "step": 6522, "training_step_time": 0.10444355010986328 }, { "epoch": 9.95330810546875e-06, "model_forward_time": 0.0251767635345459, "step": 6523 }, { "epoch": 9.95330810546875e-06, "step": 6523, "training_step_time": 0.1049337387084961 }, { "epoch": 9.954833984375e-06, "model_forward_time": 0.02523517608642578, "step": 6524 }, { "epoch": 9.954833984375e-06, "step": 6524, "training_step_time": 0.10724020004272461 }, { "epoch": 9.95635986328125e-06, "model_forward_time": 0.025401592254638672, "step": 6525 }, { "epoch": 9.95635986328125e-06, "step": 6525, "training_step_time": 0.11230969429016113 }, { "epoch": 9.9578857421875e-06, "model_forward_time": 0.02580094337463379, "step": 6526 }, { "epoch": 9.9578857421875e-06, "step": 6526, "training_step_time": 0.10893130302429199 }, { "epoch": 9.95941162109375e-06, "model_forward_time": 0.025339841842651367, "step": 6527 }, { "epoch": 9.95941162109375e-06, "step": 6527, "training_step_time": 0.10904169082641602 }, { "epoch": 9.9609375e-06, "model_forward_time": 0.02503228187561035, "step": 6528 }, { "epoch": 9.9609375e-06, "step": 6528, "training_step_time": 0.11194920539855957 }, { "epoch": 9.96246337890625e-06, "model_forward_time": 0.025620698928833008, "step": 6529 }, { "epoch": 9.96246337890625e-06, "step": 6529, "training_step_time": 0.17990803718566895 }, { "epoch": 9.9639892578125e-06, "grad_norm": 0.42773929238319397, "learning_rate": 9.250914995261905e-05, "loss": 0.0704, "step": 6530 }, { "epoch": 9.9639892578125e-06, "model_forward_time": 0.024761438369750977, "step": 6530 }, { "epoch": 9.9639892578125e-06, "step": 6530, "training_step_time": 0.19832634925842285 }, { "epoch": 9.96551513671875e-06, "model_forward_time": 0.02418804168701172, "step": 6531 }, { "epoch": 9.96551513671875e-06, "step": 6531, "training_step_time": 0.20506024360656738 }, { "epoch": 9.967041015625e-06, "model_forward_time": 0.024600505828857422, "step": 6532 }, { "epoch": 9.967041015625e-06, "step": 6532, "training_step_time": 0.19393205642700195 }, { "epoch": 9.96856689453125e-06, "model_forward_time": 0.02482318878173828, "step": 6533 }, { "epoch": 9.96856689453125e-06, "step": 6533, "training_step_time": 0.2174544334411621 }, { "epoch": 9.9700927734375e-06, "model_forward_time": 0.024936676025390625, "step": 6534 }, { "epoch": 9.9700927734375e-06, "step": 6534, "training_step_time": 0.19177937507629395 }, { "epoch": 9.97161865234375e-06, "model_forward_time": 0.02472662925720215, "step": 6535 }, { "epoch": 9.97161865234375e-06, "step": 6535, "training_step_time": 0.15485143661499023 }, { "epoch": 9.97314453125e-06, "model_forward_time": 0.02605581283569336, "step": 6536 }, { "epoch": 9.97314453125e-06, "step": 6536, "training_step_time": 0.18163514137268066 }, { "epoch": 9.97467041015625e-06, "model_forward_time": 0.02474212646484375, "step": 6537 }, { "epoch": 9.97467041015625e-06, "step": 6537, "training_step_time": 0.13070344924926758 }, { "epoch": 9.9761962890625e-06, "model_forward_time": 0.02474689483642578, "step": 6538 }, { "epoch": 9.9761962890625e-06, "step": 6538, "training_step_time": 0.12535762786865234 }, { "epoch": 9.97772216796875e-06, "model_forward_time": 0.025102853775024414, "step": 6539 }, { "epoch": 9.97772216796875e-06, "step": 6539, "training_step_time": 0.12402009963989258 }, { "epoch": 9.979248046875e-06, "grad_norm": 0.414929062128067, "learning_rate": 9.248010643731935e-05, "loss": 0.0835, "step": 6540 }, { "epoch": 9.979248046875e-06, "model_forward_time": 0.024689435958862305, "step": 6540 }, { "epoch": 9.979248046875e-06, "step": 6540, "training_step_time": 0.11315751075744629 }, { "epoch": 9.98077392578125e-06, "model_forward_time": 0.025402069091796875, "step": 6541 }, { "epoch": 9.98077392578125e-06, "step": 6541, "training_step_time": 0.11596894264221191 }, { "epoch": 9.9822998046875e-06, "model_forward_time": 0.02563762664794922, "step": 6542 }, { "epoch": 9.9822998046875e-06, "step": 6542, "training_step_time": 0.11794137954711914 }, { "epoch": 9.98382568359375e-06, "model_forward_time": 0.025556325912475586, "step": 6543 }, { "epoch": 9.98382568359375e-06, "step": 6543, "training_step_time": 0.10900497436523438 }, { "epoch": 9.9853515625e-06, "model_forward_time": 0.02540898323059082, "step": 6544 }, { "epoch": 9.9853515625e-06, "step": 6544, "training_step_time": 0.11584973335266113 }, { "epoch": 9.98687744140625e-06, "model_forward_time": 0.025455951690673828, "step": 6545 }, { "epoch": 9.98687744140625e-06, "step": 6545, "training_step_time": 0.1203455924987793 }, { "epoch": 9.9884033203125e-06, "model_forward_time": 0.025952577590942383, "step": 6546 }, { "epoch": 9.9884033203125e-06, "step": 6546, "training_step_time": 0.10809850692749023 }, { "epoch": 9.98992919921875e-06, "model_forward_time": 0.025254249572753906, "step": 6547 }, { "epoch": 9.98992919921875e-06, "step": 6547, "training_step_time": 0.10940718650817871 }, { "epoch": 9.991455078125e-06, "model_forward_time": 0.02501678466796875, "step": 6548 }, { "epoch": 9.991455078125e-06, "step": 6548, "training_step_time": 0.10718178749084473 }, { "epoch": 9.99298095703125e-06, "model_forward_time": 0.025507211685180664, "step": 6549 }, { "epoch": 9.99298095703125e-06, "step": 6549, "training_step_time": 0.10961318016052246 }, { "epoch": 9.9945068359375e-06, "grad_norm": 0.34869563579559326, "learning_rate": 9.24510113046847e-05, "loss": 0.0621, "step": 6550 }, { "epoch": 9.9945068359375e-06, "model_forward_time": 0.0255281925201416, "step": 6550 }, { "epoch": 9.9945068359375e-06, "step": 6550, "training_step_time": 0.11459684371948242 }, { "epoch": 9.99603271484375e-06, "model_forward_time": 0.02547907829284668, "step": 6551 }, { "epoch": 9.99603271484375e-06, "step": 6551, "training_step_time": 0.10655784606933594 }, { "epoch": 9.99755859375e-06, "model_forward_time": 0.025511741638183594, "step": 6552 }, { "epoch": 9.99755859375e-06, "step": 6552, "training_step_time": 0.11295914649963379 }, { "epoch": 9.99908447265625e-06, "model_forward_time": 0.025484561920166016, "step": 6553 }, { "epoch": 9.99908447265625e-06, "step": 6553, "training_step_time": 0.165130615234375 }, { "epoch": 1.00006103515625e-05, "model_forward_time": 0.02457904815673828, "step": 6554 }, { "epoch": 1.00006103515625e-05, "step": 6554, "training_step_time": 0.17719817161560059 }, { "epoch": 1.000213623046875e-05, "model_forward_time": 0.024677753448486328, "step": 6555 }, { "epoch": 1.000213623046875e-05, "step": 6555, "training_step_time": 0.22609853744506836 }, { "epoch": 1.0003662109375e-05, "model_forward_time": 0.025181293487548828, "step": 6556 }, { "epoch": 1.0003662109375e-05, "step": 6556, "training_step_time": 0.16385102272033691 }, { "epoch": 1.000518798828125e-05, "model_forward_time": 0.02461409568786621, "step": 6557 }, { "epoch": 1.000518798828125e-05, "step": 6557, "training_step_time": 0.20233583450317383 }, { "epoch": 1.00067138671875e-05, "model_forward_time": 0.025184154510498047, "step": 6558 }, { "epoch": 1.00067138671875e-05, "step": 6558, "training_step_time": 0.14763879776000977 }, { "epoch": 1.000823974609375e-05, "model_forward_time": 0.024624347686767578, "step": 6559 }, { "epoch": 1.000823974609375e-05, "step": 6559, "training_step_time": 0.10206198692321777 }, { "epoch": 1.0009765625e-05, "grad_norm": 0.5837518572807312, "learning_rate": 9.242186459006845e-05, "loss": 0.0912, "step": 6560 }, { "epoch": 1.0009765625e-05, "model_forward_time": 0.024824857711791992, "step": 6560 }, { "epoch": 1.0009765625e-05, "step": 6560, "training_step_time": 0.11862015724182129 }, { "epoch": 1.001129150390625e-05, "model_forward_time": 0.02503180503845215, "step": 6561 }, { "epoch": 1.001129150390625e-05, "step": 6561, "training_step_time": 0.11403226852416992 }, { "epoch": 1.00128173828125e-05, "model_forward_time": 0.025346994400024414, "step": 6562 }, { "epoch": 1.00128173828125e-05, "step": 6562, "training_step_time": 0.19987177848815918 }, { "epoch": 1.001434326171875e-05, "model_forward_time": 0.024656295776367188, "step": 6563 }, { "epoch": 1.001434326171875e-05, "step": 6563, "training_step_time": 0.10623526573181152 }, { "epoch": 1.0015869140625e-05, "model_forward_time": 0.024497270584106445, "step": 6564 }, { "epoch": 1.0015869140625e-05, "step": 6564, "training_step_time": 0.10821032524108887 }, { "epoch": 1.001739501953125e-05, "model_forward_time": 0.025483131408691406, "step": 6565 }, { "epoch": 1.001739501953125e-05, "step": 6565, "training_step_time": 0.1135711669921875 }, { "epoch": 1.00189208984375e-05, "model_forward_time": 0.025506019592285156, "step": 6566 }, { "epoch": 1.00189208984375e-05, "step": 6566, "training_step_time": 0.10773110389709473 }, { "epoch": 1.002044677734375e-05, "model_forward_time": 0.025345802307128906, "step": 6567 }, { "epoch": 1.002044677734375e-05, "step": 6567, "training_step_time": 0.10838103294372559 }, { "epoch": 1.002197265625e-05, "model_forward_time": 0.025368452072143555, "step": 6568 }, { "epoch": 1.002197265625e-05, "step": 6568, "training_step_time": 0.11011695861816406 }, { "epoch": 1.002349853515625e-05, "model_forward_time": 0.02463841438293457, "step": 6569 }, { "epoch": 1.002349853515625e-05, "step": 6569, "training_step_time": 0.11744475364685059 }, { "epoch": 1.00250244140625e-05, "grad_norm": 0.6058138608932495, "learning_rate": 9.239266632888659e-05, "loss": 0.0873, "step": 6570 }, { "epoch": 1.00250244140625e-05, "model_forward_time": 0.02502894401550293, "step": 6570 }, { "epoch": 1.00250244140625e-05, "step": 6570, "training_step_time": 0.11164259910583496 }, { "epoch": 1.002655029296875e-05, "model_forward_time": 0.025824308395385742, "step": 6571 }, { "epoch": 1.002655029296875e-05, "step": 6571, "training_step_time": 0.10733318328857422 }, { "epoch": 1.0028076171875e-05, "model_forward_time": 0.025097370147705078, "step": 6572 }, { "epoch": 1.0028076171875e-05, "step": 6572, "training_step_time": 0.1066129207611084 }, { "epoch": 1.002960205078125e-05, "model_forward_time": 0.02521491050720215, "step": 6573 }, { "epoch": 1.002960205078125e-05, "step": 6573, "training_step_time": 0.16582345962524414 }, { "epoch": 1.00311279296875e-05, "model_forward_time": 0.0251004695892334, "step": 6574 }, { "epoch": 1.00311279296875e-05, "step": 6574, "training_step_time": 0.11115622520446777 }, { "epoch": 1.003265380859375e-05, "model_forward_time": 0.02578258514404297, "step": 6575 }, { "epoch": 1.003265380859375e-05, "step": 6575, "training_step_time": 0.1152796745300293 }, { "epoch": 1.00341796875e-05, "model_forward_time": 0.02480173110961914, "step": 6576 }, { "epoch": 1.00341796875e-05, "step": 6576, "training_step_time": 0.21740508079528809 }, { "epoch": 1.003570556640625e-05, "model_forward_time": 0.024617910385131836, "step": 6577 }, { "epoch": 1.003570556640625e-05, "step": 6577, "training_step_time": 0.2051553726196289 }, { "epoch": 1.00372314453125e-05, "model_forward_time": 0.024477720260620117, "step": 6578 }, { "epoch": 1.00372314453125e-05, "step": 6578, "training_step_time": 0.11720871925354004 }, { "epoch": 1.003875732421875e-05, "model_forward_time": 0.02523183822631836, "step": 6579 }, { "epoch": 1.003875732421875e-05, "step": 6579, "training_step_time": 0.12525200843811035 }, { "epoch": 1.0040283203125e-05, "grad_norm": 0.6062305569648743, "learning_rate": 9.236341655661778e-05, "loss": 0.0628, "step": 6580 }, { "epoch": 1.0040283203125e-05, "model_forward_time": 0.02523660659790039, "step": 6580 }, { "epoch": 1.0040283203125e-05, "step": 6580, "training_step_time": 0.11180639266967773 }, { "epoch": 1.004180908203125e-05, "model_forward_time": 0.025545120239257812, "step": 6581 }, { "epoch": 1.004180908203125e-05, "step": 6581, "training_step_time": 0.1099853515625 }, { "epoch": 1.00433349609375e-05, "model_forward_time": 0.02547430992126465, "step": 6582 }, { "epoch": 1.00433349609375e-05, "step": 6582, "training_step_time": 0.11046457290649414 }, { "epoch": 1.004486083984375e-05, "model_forward_time": 0.02477717399597168, "step": 6583 }, { "epoch": 1.004486083984375e-05, "step": 6583, "training_step_time": 0.1072385311126709 }, { "epoch": 1.004638671875e-05, "model_forward_time": 0.025068283081054688, "step": 6584 }, { "epoch": 1.004638671875e-05, "step": 6584, "training_step_time": 0.10965681076049805 }, { "epoch": 1.004791259765625e-05, "model_forward_time": 0.02513599395751953, "step": 6585 }, { "epoch": 1.004791259765625e-05, "step": 6585, "training_step_time": 0.10981392860412598 }, { "epoch": 1.00494384765625e-05, "model_forward_time": 0.0253293514251709, "step": 6586 }, { "epoch": 1.00494384765625e-05, "step": 6586, "training_step_time": 0.10671186447143555 }, { "epoch": 1.005096435546875e-05, "model_forward_time": 0.025168180465698242, "step": 6587 }, { "epoch": 1.005096435546875e-05, "step": 6587, "training_step_time": 0.11273598670959473 }, { "epoch": 1.0052490234375e-05, "model_forward_time": 0.025272846221923828, "step": 6588 }, { "epoch": 1.0052490234375e-05, "step": 6588, "training_step_time": 0.10802078247070312 }, { "epoch": 1.005401611328125e-05, "model_forward_time": 0.02510356903076172, "step": 6589 }, { "epoch": 1.005401611328125e-05, "step": 6589, "training_step_time": 0.11127281188964844 }, { "epoch": 1.00555419921875e-05, "grad_norm": 0.5910700559616089, "learning_rate": 9.233411530880326e-05, "loss": 0.0951, "step": 6590 }, { "epoch": 1.00555419921875e-05, "model_forward_time": 0.0242159366607666, "step": 6590 }, { "epoch": 1.00555419921875e-05, "step": 6590, "training_step_time": 0.10978293418884277 }, { "epoch": 1.005706787109375e-05, "model_forward_time": 0.025275468826293945, "step": 6591 }, { "epoch": 1.005706787109375e-05, "step": 6591, "training_step_time": 0.10733270645141602 }, { "epoch": 1.005859375e-05, "model_forward_time": 0.025221586227416992, "step": 6592 }, { "epoch": 1.005859375e-05, "step": 6592, "training_step_time": 0.10641694068908691 }, { "epoch": 1.006011962890625e-05, "model_forward_time": 0.025393247604370117, "step": 6593 }, { "epoch": 1.006011962890625e-05, "step": 6593, "training_step_time": 0.10719633102416992 }, { "epoch": 1.00616455078125e-05, "model_forward_time": 0.025210142135620117, "step": 6594 }, { "epoch": 1.00616455078125e-05, "step": 6594, "training_step_time": 0.10864496231079102 }, { "epoch": 1.006317138671875e-05, "model_forward_time": 0.025278568267822266, "step": 6595 }, { "epoch": 1.006317138671875e-05, "step": 6595, "training_step_time": 0.1097726821899414 }, { "epoch": 1.0064697265625e-05, "model_forward_time": 0.02552509307861328, "step": 6596 }, { "epoch": 1.0064697265625e-05, "step": 6596, "training_step_time": 0.11342620849609375 }, { "epoch": 1.006622314453125e-05, "model_forward_time": 0.024968624114990234, "step": 6597 }, { "epoch": 1.006622314453125e-05, "step": 6597, "training_step_time": 0.10930562019348145 }, { "epoch": 1.00677490234375e-05, "model_forward_time": 0.027837753295898438, "step": 6598 }, { "epoch": 1.00677490234375e-05, "step": 6598, "training_step_time": 0.16094255447387695 }, { "epoch": 1.006927490234375e-05, "model_forward_time": 0.024677753448486328, "step": 6599 }, { "epoch": 1.006927490234375e-05, "step": 6599, "training_step_time": 0.1149604320526123 }, { "epoch": 1.007080078125e-05, "grad_norm": 0.3668150007724762, "learning_rate": 9.230476262104677e-05, "loss": 0.0887, "step": 6600 }, { "epoch": 1.007080078125e-05, "model_forward_time": 0.0246431827545166, "step": 6600 }, { "epoch": 1.007080078125e-05, "step": 6600, "training_step_time": 0.12270092964172363 }, { "epoch": 1.007232666015625e-05, "model_forward_time": 0.025225400924682617, "step": 6601 }, { "epoch": 1.007232666015625e-05, "step": 6601, "training_step_time": 0.15770483016967773 }, { "epoch": 1.00738525390625e-05, "model_forward_time": 0.02454209327697754, "step": 6602 }, { "epoch": 1.00738525390625e-05, "step": 6602, "training_step_time": 0.17799139022827148 }, { "epoch": 1.007537841796875e-05, "model_forward_time": 0.024739503860473633, "step": 6603 }, { "epoch": 1.007537841796875e-05, "step": 6603, "training_step_time": 0.156998872756958 }, { "epoch": 1.0076904296875e-05, "model_forward_time": 0.024178266525268555, "step": 6604 }, { "epoch": 1.0076904296875e-05, "step": 6604, "training_step_time": 0.20427346229553223 }, { "epoch": 1.007843017578125e-05, "model_forward_time": 0.02445840835571289, "step": 6605 }, { "epoch": 1.007843017578125e-05, "step": 6605, "training_step_time": 0.11992216110229492 }, { "epoch": 1.00799560546875e-05, "model_forward_time": 0.02454543113708496, "step": 6606 }, { "epoch": 1.00799560546875e-05, "step": 6606, "training_step_time": 0.10874819755554199 }, { "epoch": 1.008148193359375e-05, "model_forward_time": 0.02534937858581543, "step": 6607 }, { "epoch": 1.008148193359375e-05, "step": 6607, "training_step_time": 0.19548892974853516 }, { "epoch": 1.00830078125e-05, "model_forward_time": 0.024621009826660156, "step": 6608 }, { "epoch": 1.00830078125e-05, "step": 6608, "training_step_time": 0.10390520095825195 }, { "epoch": 1.008453369140625e-05, "model_forward_time": 0.02482748031616211, "step": 6609 }, { "epoch": 1.008453369140625e-05, "step": 6609, "training_step_time": 0.11417055130004883 }, { "epoch": 1.00860595703125e-05, "grad_norm": 0.33050212264060974, "learning_rate": 9.227535852901463e-05, "loss": 0.0624, "step": 6610 }, { "epoch": 1.00860595703125e-05, "model_forward_time": 0.0255126953125, "step": 6610 }, { "epoch": 1.00860595703125e-05, "step": 6610, "training_step_time": 0.10654592514038086 }, { "epoch": 1.008758544921875e-05, "model_forward_time": 0.025337696075439453, "step": 6611 }, { "epoch": 1.008758544921875e-05, "step": 6611, "training_step_time": 0.10657715797424316 }, { "epoch": 1.0089111328125e-05, "model_forward_time": 0.025542020797729492, "step": 6612 }, { "epoch": 1.0089111328125e-05, "step": 6612, "training_step_time": 0.10787487030029297 }, { "epoch": 1.009063720703125e-05, "model_forward_time": 0.026583194732666016, "step": 6613 }, { "epoch": 1.009063720703125e-05, "step": 6613, "training_step_time": 0.10704326629638672 }, { "epoch": 1.00921630859375e-05, "model_forward_time": 0.025339603424072266, "step": 6614 }, { "epoch": 1.00921630859375e-05, "step": 6614, "training_step_time": 0.1078641414642334 }, { "epoch": 1.009368896484375e-05, "model_forward_time": 0.025077104568481445, "step": 6615 }, { "epoch": 1.009368896484375e-05, "step": 6615, "training_step_time": 0.10654044151306152 }, { "epoch": 1.009521484375e-05, "model_forward_time": 0.025461435317993164, "step": 6616 }, { "epoch": 1.009521484375e-05, "step": 6616, "training_step_time": 0.10643291473388672 }, { "epoch": 1.009674072265625e-05, "model_forward_time": 0.026859760284423828, "step": 6617 }, { "epoch": 1.009674072265625e-05, "step": 6617, "training_step_time": 0.11926865577697754 }, { "epoch": 1.00982666015625e-05, "model_forward_time": 0.02557849884033203, "step": 6618 }, { "epoch": 1.00982666015625e-05, "step": 6618, "training_step_time": 0.10747122764587402 }, { "epoch": 1.009979248046875e-05, "model_forward_time": 0.02520585060119629, "step": 6619 }, { "epoch": 1.009979248046875e-05, "step": 6619, "training_step_time": 0.18399500846862793 }, { "epoch": 1.0101318359375e-05, "grad_norm": 0.31240516901016235, "learning_rate": 9.224590306843558e-05, "loss": 0.0566, "step": 6620 }, { "epoch": 1.0101318359375e-05, "model_forward_time": 0.024890899658203125, "step": 6620 }, { "epoch": 1.0101318359375e-05, "step": 6620, "training_step_time": 0.2280712127685547 }, { "epoch": 1.010284423828125e-05, "model_forward_time": 0.024544239044189453, "step": 6621 }, { "epoch": 1.010284423828125e-05, "step": 6621, "training_step_time": 0.2417900562286377 }, { "epoch": 1.01043701171875e-05, "model_forward_time": 0.024407386779785156, "step": 6622 }, { "epoch": 1.01043701171875e-05, "step": 6622, "training_step_time": 0.23080086708068848 }, { "epoch": 1.010589599609375e-05, "model_forward_time": 0.024460315704345703, "step": 6623 }, { "epoch": 1.010589599609375e-05, "step": 6623, "training_step_time": 0.22805118560791016 }, { "epoch": 1.0107421875e-05, "model_forward_time": 0.024762868881225586, "step": 6624 }, { "epoch": 1.0107421875e-05, "step": 6624, "training_step_time": 0.17116379737854004 }, { "epoch": 1.010894775390625e-05, "model_forward_time": 0.024512290954589844, "step": 6625 }, { "epoch": 1.010894775390625e-05, "step": 6625, "training_step_time": 0.15546727180480957 }, { "epoch": 1.01104736328125e-05, "model_forward_time": 0.025127410888671875, "step": 6626 }, { "epoch": 1.01104736328125e-05, "step": 6626, "training_step_time": 0.10763359069824219 }, { "epoch": 1.011199951171875e-05, "model_forward_time": 0.02610635757446289, "step": 6627 }, { "epoch": 1.011199951171875e-05, "step": 6627, "training_step_time": 0.10792875289916992 }, { "epoch": 1.0113525390625e-05, "model_forward_time": 0.025419950485229492, "step": 6628 }, { "epoch": 1.0113525390625e-05, "step": 6628, "training_step_time": 0.11095404624938965 }, { "epoch": 1.011505126953125e-05, "model_forward_time": 0.024516582489013672, "step": 6629 }, { "epoch": 1.011505126953125e-05, "step": 6629, "training_step_time": 0.10919022560119629 }, { "epoch": 1.01165771484375e-05, "grad_norm": 0.29019224643707275, "learning_rate": 9.221639627510076e-05, "loss": 0.0638, "step": 6630 }, { "epoch": 1.01165771484375e-05, "model_forward_time": 0.025113344192504883, "step": 6630 }, { "epoch": 1.01165771484375e-05, "step": 6630, "training_step_time": 0.10723304748535156 }, { "epoch": 1.011810302734375e-05, "model_forward_time": 0.025109529495239258, "step": 6631 }, { "epoch": 1.011810302734375e-05, "step": 6631, "training_step_time": 0.11158347129821777 }, { "epoch": 1.011962890625e-05, "model_forward_time": 0.02535247802734375, "step": 6632 }, { "epoch": 1.011962890625e-05, "step": 6632, "training_step_time": 0.11148571968078613 }, { "epoch": 1.012115478515625e-05, "model_forward_time": 0.025548219680786133, "step": 6633 }, { "epoch": 1.012115478515625e-05, "step": 6633, "training_step_time": 0.11294007301330566 }, { "epoch": 1.01226806640625e-05, "model_forward_time": 0.026001453399658203, "step": 6634 }, { "epoch": 1.01226806640625e-05, "step": 6634, "training_step_time": 0.1767728328704834 }, { "epoch": 1.012420654296875e-05, "model_forward_time": 0.02497553825378418, "step": 6635 }, { "epoch": 1.012420654296875e-05, "step": 6635, "training_step_time": 0.19539332389831543 }, { "epoch": 1.0125732421875e-05, "model_forward_time": 0.024483203887939453, "step": 6636 }, { "epoch": 1.0125732421875e-05, "step": 6636, "training_step_time": 0.1976184844970703 }, { "epoch": 1.012725830078125e-05, "model_forward_time": 0.02395033836364746, "step": 6637 }, { "epoch": 1.012725830078125e-05, "step": 6637, "training_step_time": 0.17644810676574707 }, { "epoch": 1.01287841796875e-05, "model_forward_time": 0.024905920028686523, "step": 6638 }, { "epoch": 1.01287841796875e-05, "step": 6638, "training_step_time": 0.17102837562561035 }, { "epoch": 1.013031005859375e-05, "model_forward_time": 0.024414539337158203, "step": 6639 }, { "epoch": 1.013031005859375e-05, "step": 6639, "training_step_time": 0.17267775535583496 }, { "epoch": 1.01318359375e-05, "grad_norm": 0.3535989224910736, "learning_rate": 9.218683818486372e-05, "loss": 0.0565, "step": 6640 }, { "epoch": 1.01318359375e-05, "model_forward_time": 0.024838924407958984, "step": 6640 }, { "epoch": 1.01318359375e-05, "step": 6640, "training_step_time": 0.1535487174987793 }, { "epoch": 1.013336181640625e-05, "model_forward_time": 0.02460026741027832, "step": 6641 }, { "epoch": 1.013336181640625e-05, "step": 6641, "training_step_time": 0.17336249351501465 }, { "epoch": 1.01348876953125e-05, "model_forward_time": 0.02479839324951172, "step": 6642 }, { "epoch": 1.01348876953125e-05, "step": 6642, "training_step_time": 0.17738080024719238 }, { "epoch": 1.013641357421875e-05, "model_forward_time": 0.02465653419494629, "step": 6643 }, { "epoch": 1.013641357421875e-05, "step": 6643, "training_step_time": 0.10442662239074707 }, { "epoch": 1.0137939453125e-05, "model_forward_time": 0.02462911605834961, "step": 6644 }, { "epoch": 1.0137939453125e-05, "step": 6644, "training_step_time": 0.11913657188415527 }, { "epoch": 1.013946533203125e-05, "model_forward_time": 0.025424957275390625, "step": 6645 }, { "epoch": 1.013946533203125e-05, "step": 6645, "training_step_time": 0.11937212944030762 }, { "epoch": 1.01409912109375e-05, "model_forward_time": 0.02550530433654785, "step": 6646 }, { "epoch": 1.01409912109375e-05, "step": 6646, "training_step_time": 0.10855770111083984 }, { "epoch": 1.014251708984375e-05, "model_forward_time": 0.0254666805267334, "step": 6647 }, { "epoch": 1.014251708984375e-05, "step": 6647, "training_step_time": 0.2111661434173584 }, { "epoch": 1.014404296875e-05, "model_forward_time": 0.025254487991333008, "step": 6648 }, { "epoch": 1.014404296875e-05, "step": 6648, "training_step_time": 0.10758090019226074 }, { "epoch": 1.014556884765625e-05, "model_forward_time": 0.02412271499633789, "step": 6649 }, { "epoch": 1.014556884765625e-05, "step": 6649, "training_step_time": 0.10734963417053223 }, { "epoch": 1.01470947265625e-05, "grad_norm": 0.3252510130405426, "learning_rate": 9.215722883364033e-05, "loss": 0.0866, "step": 6650 }, { "epoch": 1.01470947265625e-05, "model_forward_time": 0.02480626106262207, "step": 6650 }, { "epoch": 1.01470947265625e-05, "step": 6650, "training_step_time": 0.1477358341217041 }, { "epoch": 1.014862060546875e-05, "model_forward_time": 0.025113821029663086, "step": 6651 }, { "epoch": 1.014862060546875e-05, "step": 6651, "training_step_time": 0.14652562141418457 }, { "epoch": 1.0150146484375e-05, "model_forward_time": 0.024847984313964844, "step": 6652 }, { "epoch": 1.0150146484375e-05, "step": 6652, "training_step_time": 0.14195489883422852 }, { "epoch": 1.015167236328125e-05, "model_forward_time": 0.024892807006835938, "step": 6653 }, { "epoch": 1.015167236328125e-05, "step": 6653, "training_step_time": 0.12610769271850586 }, { "epoch": 1.01531982421875e-05, "model_forward_time": 0.02494668960571289, "step": 6654 }, { "epoch": 1.01531982421875e-05, "step": 6654, "training_step_time": 0.11930966377258301 }, { "epoch": 1.015472412109375e-05, "model_forward_time": 0.027321577072143555, "step": 6655 }, { "epoch": 1.015472412109375e-05, "step": 6655, "training_step_time": 0.12143421173095703 }, { "epoch": 1.015625e-05, "model_forward_time": 0.02546215057373047, "step": 6656 }, { "epoch": 1.015625e-05, "step": 6656, "training_step_time": 0.11566615104675293 }, { "epoch": 1.015777587890625e-05, "model_forward_time": 0.0250399112701416, "step": 6657 }, { "epoch": 1.015777587890625e-05, "step": 6657, "training_step_time": 0.17594289779663086 }, { "epoch": 1.01593017578125e-05, "model_forward_time": 0.025894641876220703, "step": 6658 }, { "epoch": 1.01593017578125e-05, "step": 6658, "training_step_time": 0.10976433753967285 }, { "epoch": 1.016082763671875e-05, "model_forward_time": 0.024956703186035156, "step": 6659 }, { "epoch": 1.016082763671875e-05, "step": 6659, "training_step_time": 0.21849536895751953 }, { "epoch": 1.0162353515625e-05, "grad_norm": 0.2549554109573364, "learning_rate": 9.212756825740873e-05, "loss": 0.0588, "step": 6660 }, { "epoch": 1.0162353515625e-05, "model_forward_time": 0.024979829788208008, "step": 6660 }, { "epoch": 1.0162353515625e-05, "step": 6660, "training_step_time": 0.11991143226623535 }, { "epoch": 1.016387939453125e-05, "model_forward_time": 0.025058984756469727, "step": 6661 }, { "epoch": 1.016387939453125e-05, "step": 6661, "training_step_time": 0.15906882286071777 }, { "epoch": 1.01654052734375e-05, "model_forward_time": 0.025599002838134766, "step": 6662 }, { "epoch": 1.01654052734375e-05, "step": 6662, "training_step_time": 0.17911100387573242 }, { "epoch": 1.016693115234375e-05, "model_forward_time": 0.02510523796081543, "step": 6663 }, { "epoch": 1.016693115234375e-05, "step": 6663, "training_step_time": 0.10828399658203125 }, { "epoch": 1.016845703125e-05, "model_forward_time": 0.02490830421447754, "step": 6664 }, { "epoch": 1.016845703125e-05, "step": 6664, "training_step_time": 0.10892319679260254 }, { "epoch": 1.016998291015625e-05, "model_forward_time": 0.025702476501464844, "step": 6665 }, { "epoch": 1.016998291015625e-05, "step": 6665, "training_step_time": 0.10903215408325195 }, { "epoch": 1.01715087890625e-05, "model_forward_time": 0.025539636611938477, "step": 6666 }, { "epoch": 1.01715087890625e-05, "step": 6666, "training_step_time": 0.10861897468566895 }, { "epoch": 1.017303466796875e-05, "model_forward_time": 0.025807857513427734, "step": 6667 }, { "epoch": 1.017303466796875e-05, "step": 6667, "training_step_time": 0.11297774314880371 }, { "epoch": 1.0174560546875e-05, "model_forward_time": 0.0253751277923584, "step": 6668 }, { "epoch": 1.0174560546875e-05, "step": 6668, "training_step_time": 0.11098408699035645 }, { "epoch": 1.017608642578125e-05, "model_forward_time": 0.025780677795410156, "step": 6669 }, { "epoch": 1.017608642578125e-05, "step": 6669, "training_step_time": 0.10950779914855957 }, { "epoch": 1.01776123046875e-05, "grad_norm": 0.43726256489753723, "learning_rate": 9.209785649220935e-05, "loss": 0.0595, "step": 6670 }, { "epoch": 1.01776123046875e-05, "model_forward_time": 0.025224685668945312, "step": 6670 }, { "epoch": 1.01776123046875e-05, "step": 6670, "training_step_time": 0.11468982696533203 }, { "epoch": 1.017913818359375e-05, "model_forward_time": 0.02541661262512207, "step": 6671 }, { "epoch": 1.017913818359375e-05, "step": 6671, "training_step_time": 0.11031198501586914 }, { "epoch": 1.01806640625e-05, "model_forward_time": 0.025704383850097656, "step": 6672 }, { "epoch": 1.01806640625e-05, "step": 6672, "training_step_time": 0.11162042617797852 }, { "epoch": 1.018218994140625e-05, "model_forward_time": 0.025445938110351562, "step": 6673 }, { "epoch": 1.018218994140625e-05, "step": 6673, "training_step_time": 0.11043286323547363 }, { "epoch": 1.01837158203125e-05, "model_forward_time": 0.025633573532104492, "step": 6674 }, { "epoch": 1.01837158203125e-05, "step": 6674, "training_step_time": 0.1081540584564209 }, { "epoch": 1.018524169921875e-05, "model_forward_time": 0.025513410568237305, "step": 6675 }, { "epoch": 1.018524169921875e-05, "step": 6675, "training_step_time": 0.10905671119689941 }, { "epoch": 1.0186767578125e-05, "model_forward_time": 0.025923490524291992, "step": 6676 }, { "epoch": 1.0186767578125e-05, "step": 6676, "training_step_time": 0.1147162914276123 }, { "epoch": 1.018829345703125e-05, "model_forward_time": 0.027506113052368164, "step": 6677 }, { "epoch": 1.018829345703125e-05, "step": 6677, "training_step_time": 0.11569929122924805 }, { "epoch": 1.01898193359375e-05, "model_forward_time": 0.02564239501953125, "step": 6678 }, { "epoch": 1.01898193359375e-05, "step": 6678, "training_step_time": 0.10983085632324219 }, { "epoch": 1.019134521484375e-05, "model_forward_time": 0.025661468505859375, "step": 6679 }, { "epoch": 1.019134521484375e-05, "step": 6679, "training_step_time": 0.10880470275878906 }, { "epoch": 1.019287109375e-05, "grad_norm": 0.593154788017273, "learning_rate": 9.206809357414474e-05, "loss": 0.0824, "step": 6680 }, { "epoch": 1.019287109375e-05, "model_forward_time": 0.025086402893066406, "step": 6680 }, { "epoch": 1.019287109375e-05, "step": 6680, "training_step_time": 0.10748863220214844 }, { "epoch": 1.019439697265625e-05, "model_forward_time": 0.025897502899169922, "step": 6681 }, { "epoch": 1.019439697265625e-05, "step": 6681, "training_step_time": 0.18822598457336426 }, { "epoch": 1.01959228515625e-05, "model_forward_time": 0.02459859848022461, "step": 6682 }, { "epoch": 1.01959228515625e-05, "step": 6682, "training_step_time": 0.11741161346435547 }, { "epoch": 1.019744873046875e-05, "model_forward_time": 0.02447962760925293, "step": 6683 }, { "epoch": 1.019744873046875e-05, "step": 6683, "training_step_time": 0.12725615501403809 }, { "epoch": 1.0198974609375e-05, "model_forward_time": 0.025165796279907227, "step": 6684 }, { "epoch": 1.0198974609375e-05, "step": 6684, "training_step_time": 0.16658878326416016 }, { "epoch": 1.020050048828125e-05, "model_forward_time": 0.024655818939208984, "step": 6685 }, { "epoch": 1.020050048828125e-05, "step": 6685, "training_step_time": 0.21158623695373535 }, { "epoch": 1.02020263671875e-05, "model_forward_time": 0.02469038963317871, "step": 6686 }, { "epoch": 1.02020263671875e-05, "step": 6686, "training_step_time": 0.1145026683807373 }, { "epoch": 1.020355224609375e-05, "model_forward_time": 0.024611949920654297, "step": 6687 }, { "epoch": 1.020355224609375e-05, "step": 6687, "training_step_time": 0.11061739921569824 }, { "epoch": 1.0205078125e-05, "model_forward_time": 0.025577783584594727, "step": 6688 }, { "epoch": 1.0205078125e-05, "step": 6688, "training_step_time": 0.11490631103515625 }, { "epoch": 1.020660400390625e-05, "model_forward_time": 0.025063276290893555, "step": 6689 }, { "epoch": 1.020660400390625e-05, "step": 6689, "training_step_time": 0.11049604415893555 }, { "epoch": 1.02081298828125e-05, "grad_norm": 0.43632248044013977, "learning_rate": 9.20382795393797e-05, "loss": 0.0782, "step": 6690 }, { "epoch": 1.02081298828125e-05, "model_forward_time": 0.025624513626098633, "step": 6690 }, { "epoch": 1.02081298828125e-05, "step": 6690, "training_step_time": 0.15600085258483887 }, { "epoch": 1.020965576171875e-05, "model_forward_time": 0.027201414108276367, "step": 6691 }, { "epoch": 1.020965576171875e-05, "step": 6691, "training_step_time": 0.1472001075744629 }, { "epoch": 1.0211181640625e-05, "model_forward_time": 0.025005817413330078, "step": 6692 }, { "epoch": 1.0211181640625e-05, "step": 6692, "training_step_time": 0.13500022888183594 }, { "epoch": 1.021270751953125e-05, "model_forward_time": 0.024970054626464844, "step": 6693 }, { "epoch": 1.021270751953125e-05, "step": 6693, "training_step_time": 0.18063092231750488 }, { "epoch": 1.02142333984375e-05, "model_forward_time": 0.02487635612487793, "step": 6694 }, { "epoch": 1.02142333984375e-05, "step": 6694, "training_step_time": 0.18192648887634277 }, { "epoch": 1.021575927734375e-05, "model_forward_time": 0.024602890014648438, "step": 6695 }, { "epoch": 1.021575927734375e-05, "step": 6695, "training_step_time": 0.17698454856872559 }, { "epoch": 1.021728515625e-05, "model_forward_time": 0.024635791778564453, "step": 6696 }, { "epoch": 1.021728515625e-05, "step": 6696, "training_step_time": 0.17298436164855957 }, { "epoch": 1.021881103515625e-05, "model_forward_time": 0.02493762969970703, "step": 6697 }, { "epoch": 1.021881103515625e-05, "step": 6697, "training_step_time": 0.15532946586608887 }, { "epoch": 1.02203369140625e-05, "model_forward_time": 0.024669647216796875, "step": 6698 }, { "epoch": 1.02203369140625e-05, "step": 6698, "training_step_time": 0.13764238357543945 }, { "epoch": 1.022186279296875e-05, "model_forward_time": 0.024527311325073242, "step": 6699 }, { "epoch": 1.022186279296875e-05, "step": 6699, "training_step_time": 0.13286733627319336 }, { "epoch": 1.0223388671875e-05, "grad_norm": 0.7008652687072754, "learning_rate": 9.200841442414106e-05, "loss": 0.0794, "step": 6700 }, { "epoch": 1.0223388671875e-05, "model_forward_time": 0.024979591369628906, "step": 6700 }, { "epoch": 1.0223388671875e-05, "step": 6700, "training_step_time": 0.1236116886138916 }, { "epoch": 1.022491455078125e-05, "model_forward_time": 0.02466607093811035, "step": 6701 }, { "epoch": 1.022491455078125e-05, "step": 6701, "training_step_time": 0.1890411376953125 }, { "epoch": 1.02264404296875e-05, "model_forward_time": 0.024779081344604492, "step": 6702 }, { "epoch": 1.02264404296875e-05, "step": 6702, "training_step_time": 0.13500738143920898 }, { "epoch": 1.022796630859375e-05, "model_forward_time": 0.02444005012512207, "step": 6703 }, { "epoch": 1.022796630859375e-05, "step": 6703, "training_step_time": 0.13224196434020996 }, { "epoch": 1.02294921875e-05, "model_forward_time": 0.024773120880126953, "step": 6704 }, { "epoch": 1.02294921875e-05, "step": 6704, "training_step_time": 0.17796826362609863 }, { "epoch": 1.023101806640625e-05, "model_forward_time": 0.02533102035522461, "step": 6705 }, { "epoch": 1.023101806640625e-05, "step": 6705, "training_step_time": 0.14831233024597168 }, { "epoch": 1.02325439453125e-05, "model_forward_time": 0.025710582733154297, "step": 6706 }, { "epoch": 1.02325439453125e-05, "step": 6706, "training_step_time": 0.10755181312561035 }, { "epoch": 1.023406982421875e-05, "model_forward_time": 0.025962114334106445, "step": 6707 }, { "epoch": 1.023406982421875e-05, "step": 6707, "training_step_time": 0.11071038246154785 }, { "epoch": 1.0235595703125e-05, "model_forward_time": 0.025438785552978516, "step": 6708 }, { "epoch": 1.0235595703125e-05, "step": 6708, "training_step_time": 0.1090400218963623 }, { "epoch": 1.023712158203125e-05, "model_forward_time": 0.02598714828491211, "step": 6709 }, { "epoch": 1.023712158203125e-05, "step": 6709, "training_step_time": 0.1110529899597168 }, { "epoch": 1.02386474609375e-05, "grad_norm": 0.5296297669410706, "learning_rate": 9.197849826471774e-05, "loss": 0.0832, "step": 6710 }, { "epoch": 1.02386474609375e-05, "model_forward_time": 0.025888442993164062, "step": 6710 }, { "epoch": 1.02386474609375e-05, "step": 6710, "training_step_time": 0.1121983528137207 }, { "epoch": 1.024017333984375e-05, "model_forward_time": 0.025605440139770508, "step": 6711 }, { "epoch": 1.024017333984375e-05, "step": 6711, "training_step_time": 0.11295032501220703 }, { "epoch": 1.024169921875e-05, "model_forward_time": 0.025949716567993164, "step": 6712 }, { "epoch": 1.024169921875e-05, "step": 6712, "training_step_time": 0.10803723335266113 }, { "epoch": 1.024322509765625e-05, "model_forward_time": 0.02553391456604004, "step": 6713 }, { "epoch": 1.024322509765625e-05, "step": 6713, "training_step_time": 0.10935521125793457 }, { "epoch": 1.02447509765625e-05, "model_forward_time": 0.02526545524597168, "step": 6714 }, { "epoch": 1.02447509765625e-05, "step": 6714, "training_step_time": 0.11267876625061035 }, { "epoch": 1.024627685546875e-05, "model_forward_time": 0.029499530792236328, "step": 6715 }, { "epoch": 1.024627685546875e-05, "step": 6715, "training_step_time": 0.11208701133728027 }, { "epoch": 1.0247802734375e-05, "model_forward_time": 0.025459766387939453, "step": 6716 }, { "epoch": 1.0247802734375e-05, "step": 6716, "training_step_time": 0.10940074920654297 }, { "epoch": 1.024932861328125e-05, "model_forward_time": 0.02602076530456543, "step": 6717 }, { "epoch": 1.024932861328125e-05, "step": 6717, "training_step_time": 0.11237454414367676 }, { "epoch": 1.02508544921875e-05, "model_forward_time": 0.024964570999145508, "step": 6718 }, { "epoch": 1.02508544921875e-05, "step": 6718, "training_step_time": 0.11144852638244629 }, { "epoch": 1.025238037109375e-05, "model_forward_time": 0.026040315628051758, "step": 6719 }, { "epoch": 1.025238037109375e-05, "step": 6719, "training_step_time": 0.11171674728393555 }, { "epoch": 1.025390625e-05, "grad_norm": 0.36391812562942505, "learning_rate": 9.194853109746074e-05, "loss": 0.0844, "step": 6720 }, { "epoch": 1.025390625e-05, "model_forward_time": 0.025341272354125977, "step": 6720 }, { "epoch": 1.025390625e-05, "step": 6720, "training_step_time": 0.11127972602844238 }, { "epoch": 1.025543212890625e-05, "model_forward_time": 0.025410175323486328, "step": 6721 }, { "epoch": 1.025543212890625e-05, "step": 6721, "training_step_time": 0.10713624954223633 }, { "epoch": 1.02569580078125e-05, "model_forward_time": 0.02526688575744629, "step": 6722 }, { "epoch": 1.02569580078125e-05, "step": 6722, "training_step_time": 0.10897207260131836 }, { "epoch": 1.025848388671875e-05, "model_forward_time": 0.02539229393005371, "step": 6723 }, { "epoch": 1.025848388671875e-05, "step": 6723, "training_step_time": 0.10737085342407227 }, { "epoch": 1.0260009765625e-05, "model_forward_time": 0.02553582191467285, "step": 6724 }, { "epoch": 1.0260009765625e-05, "step": 6724, "training_step_time": 0.155792236328125 }, { "epoch": 1.026153564453125e-05, "model_forward_time": 0.02562546730041504, "step": 6725 }, { "epoch": 1.026153564453125e-05, "step": 6725, "training_step_time": 0.11450815200805664 }, { "epoch": 1.02630615234375e-05, "model_forward_time": 0.024576187133789062, "step": 6726 }, { "epoch": 1.02630615234375e-05, "step": 6726, "training_step_time": 0.13769054412841797 }, { "epoch": 1.026458740234375e-05, "model_forward_time": 0.0250091552734375, "step": 6727 }, { "epoch": 1.026458740234375e-05, "step": 6727, "training_step_time": 0.1526026725769043 }, { "epoch": 1.026611328125e-05, "model_forward_time": 0.025135517120361328, "step": 6728 }, { "epoch": 1.026611328125e-05, "step": 6728, "training_step_time": 0.19364094734191895 }, { "epoch": 1.026763916015625e-05, "model_forward_time": 0.025769472122192383, "step": 6729 }, { "epoch": 1.026763916015625e-05, "step": 6729, "training_step_time": 0.15213584899902344 }, { "epoch": 1.02691650390625e-05, "grad_norm": 0.37396296858787537, "learning_rate": 9.191851295878295e-05, "loss": 0.0766, "step": 6730 }, { "epoch": 1.02691650390625e-05, "model_forward_time": 0.0246121883392334, "step": 6730 }, { "epoch": 1.02691650390625e-05, "step": 6730, "training_step_time": 0.20453286170959473 }, { "epoch": 1.027069091796875e-05, "model_forward_time": 0.024776458740234375, "step": 6731 }, { "epoch": 1.027069091796875e-05, "step": 6731, "training_step_time": 0.11318707466125488 }, { "epoch": 1.0272216796875e-05, "model_forward_time": 0.02440643310546875, "step": 6732 }, { "epoch": 1.0272216796875e-05, "step": 6732, "training_step_time": 0.10942816734313965 }, { "epoch": 1.027374267578125e-05, "model_forward_time": 0.025505542755126953, "step": 6733 }, { "epoch": 1.027374267578125e-05, "step": 6733, "training_step_time": 0.19609928131103516 }, { "epoch": 1.02752685546875e-05, "model_forward_time": 0.02491903305053711, "step": 6734 }, { "epoch": 1.02752685546875e-05, "step": 6734, "training_step_time": 0.1049954891204834 }, { "epoch": 1.027679443359375e-05, "model_forward_time": 0.02437138557434082, "step": 6735 }, { "epoch": 1.027679443359375e-05, "step": 6735, "training_step_time": 0.10743236541748047 }, { "epoch": 1.02783203125e-05, "model_forward_time": 0.02556777000427246, "step": 6736 }, { "epoch": 1.02783203125e-05, "step": 6736, "training_step_time": 0.1072227954864502 }, { "epoch": 1.027984619140625e-05, "model_forward_time": 0.02550029754638672, "step": 6737 }, { "epoch": 1.027984619140625e-05, "step": 6737, "training_step_time": 0.10777139663696289 }, { "epoch": 1.02813720703125e-05, "model_forward_time": 0.02562570571899414, "step": 6738 }, { "epoch": 1.02813720703125e-05, "step": 6738, "training_step_time": 0.10840296745300293 }, { "epoch": 1.028289794921875e-05, "model_forward_time": 0.025302648544311523, "step": 6739 }, { "epoch": 1.028289794921875e-05, "step": 6739, "training_step_time": 0.10816168785095215 }, { "epoch": 1.0284423828125e-05, "grad_norm": 0.42670416831970215, "learning_rate": 9.188844388515926e-05, "loss": 0.0931, "step": 6740 }, { "epoch": 1.0284423828125e-05, "model_forward_time": 0.025083065032958984, "step": 6740 }, { "epoch": 1.0284423828125e-05, "step": 6740, "training_step_time": 0.10978221893310547 }, { "epoch": 1.028594970703125e-05, "model_forward_time": 0.025543212890625, "step": 6741 }, { "epoch": 1.028594970703125e-05, "step": 6741, "training_step_time": 0.10684990882873535 }, { "epoch": 1.02874755859375e-05, "model_forward_time": 0.025813579559326172, "step": 6742 }, { "epoch": 1.02874755859375e-05, "step": 6742, "training_step_time": 0.10875988006591797 }, { "epoch": 1.028900146484375e-05, "model_forward_time": 0.025463581085205078, "step": 6743 }, { "epoch": 1.028900146484375e-05, "step": 6743, "training_step_time": 0.1079552173614502 }, { "epoch": 1.029052734375e-05, "model_forward_time": 0.02527761459350586, "step": 6744 }, { "epoch": 1.029052734375e-05, "step": 6744, "training_step_time": 0.10619711875915527 }, { "epoch": 1.029205322265625e-05, "model_forward_time": 0.025319814682006836, "step": 6745 }, { "epoch": 1.029205322265625e-05, "step": 6745, "training_step_time": 0.19970440864562988 }, { "epoch": 1.02935791015625e-05, "model_forward_time": 0.02437901496887207, "step": 6746 }, { "epoch": 1.02935791015625e-05, "step": 6746, "training_step_time": 0.18084716796875 }, { "epoch": 1.029510498046875e-05, "model_forward_time": 0.02436995506286621, "step": 6747 }, { "epoch": 1.029510498046875e-05, "step": 6747, "training_step_time": 0.16920948028564453 }, { "epoch": 1.0296630859375e-05, "model_forward_time": 0.025168895721435547, "step": 6748 }, { "epoch": 1.0296630859375e-05, "step": 6748, "training_step_time": 0.17935466766357422 }, { "epoch": 1.029815673828125e-05, "model_forward_time": 0.025011777877807617, "step": 6749 }, { "epoch": 1.029815673828125e-05, "step": 6749, "training_step_time": 0.177933931350708 }, { "epoch": 1.02996826171875e-05, "grad_norm": 0.39730721712112427, "learning_rate": 9.185832391312644e-05, "loss": 0.0651, "step": 6750 }, { "epoch": 1.02996826171875e-05, "model_forward_time": 0.024962186813354492, "step": 6750 }, { "epoch": 1.02996826171875e-05, "step": 6750, "training_step_time": 0.15660643577575684 }, { "epoch": 1.030120849609375e-05, "model_forward_time": 0.024784564971923828, "step": 6751 }, { "epoch": 1.030120849609375e-05, "step": 6751, "training_step_time": 0.10580635070800781 }, { "epoch": 1.0302734375e-05, "model_forward_time": 0.025832414627075195, "step": 6752 }, { "epoch": 1.0302734375e-05, "step": 6752, "training_step_time": 0.10579276084899902 }, { "epoch": 1.030426025390625e-05, "model_forward_time": 0.02552938461303711, "step": 6753 }, { "epoch": 1.030426025390625e-05, "step": 6753, "training_step_time": 0.10761046409606934 }, { "epoch": 1.03057861328125e-05, "model_forward_time": 0.02854323387145996, "step": 6754 }, { "epoch": 1.03057861328125e-05, "step": 6754, "training_step_time": 0.11634421348571777 }, { "epoch": 1.030731201171875e-05, "model_forward_time": 0.025571584701538086, "step": 6755 }, { "epoch": 1.030731201171875e-05, "step": 6755, "training_step_time": 0.1069326400756836 }, { "epoch": 1.0308837890625e-05, "model_forward_time": 0.025213003158569336, "step": 6756 }, { "epoch": 1.0308837890625e-05, "step": 6756, "training_step_time": 0.10791325569152832 }, { "epoch": 1.031036376953125e-05, "model_forward_time": 0.02558112144470215, "step": 6757 }, { "epoch": 1.031036376953125e-05, "step": 6757, "training_step_time": 0.10666370391845703 }, { "epoch": 1.03118896484375e-05, "model_forward_time": 0.02504253387451172, "step": 6758 }, { "epoch": 1.03118896484375e-05, "step": 6758, "training_step_time": 0.13380169868469238 }, { "epoch": 1.031341552734375e-05, "model_forward_time": 0.025493621826171875, "step": 6759 }, { "epoch": 1.031341552734375e-05, "step": 6759, "training_step_time": 0.15464329719543457 }, { "epoch": 1.031494140625e-05, "grad_norm": 0.5684757232666016, "learning_rate": 9.182815307928307e-05, "loss": 0.0703, "step": 6760 }, { "epoch": 1.031494140625e-05, "model_forward_time": 0.024649620056152344, "step": 6760 }, { "epoch": 1.031494140625e-05, "step": 6760, "training_step_time": 0.15729761123657227 }, { "epoch": 1.031646728515625e-05, "model_forward_time": 0.024744510650634766, "step": 6761 }, { "epoch": 1.031646728515625e-05, "step": 6761, "training_step_time": 0.15435123443603516 }, { "epoch": 1.03179931640625e-05, "model_forward_time": 0.025151968002319336, "step": 6762 }, { "epoch": 1.03179931640625e-05, "step": 6762, "training_step_time": 0.13762664794921875 }, { "epoch": 1.031951904296875e-05, "model_forward_time": 0.024779796600341797, "step": 6763 }, { "epoch": 1.031951904296875e-05, "step": 6763, "training_step_time": 0.1277782917022705 }, { "epoch": 1.0321044921875e-05, "model_forward_time": 0.024873018264770508, "step": 6764 }, { "epoch": 1.0321044921875e-05, "step": 6764, "training_step_time": 0.12342333793640137 }, { "epoch": 1.032257080078125e-05, "model_forward_time": 0.02537393569946289, "step": 6765 }, { "epoch": 1.032257080078125e-05, "step": 6765, "training_step_time": 0.1369800567626953 }, { "epoch": 1.03240966796875e-05, "model_forward_time": 0.02530837059020996, "step": 6766 }, { "epoch": 1.03240966796875e-05, "step": 6766, "training_step_time": 0.19207310676574707 }, { "epoch": 1.032562255859375e-05, "model_forward_time": 0.024490833282470703, "step": 6767 }, { "epoch": 1.032562255859375e-05, "step": 6767, "training_step_time": 0.12000656127929688 }, { "epoch": 1.03271484375e-05, "model_forward_time": 0.0269777774810791, "step": 6768 }, { "epoch": 1.03271484375e-05, "step": 6768, "training_step_time": 0.12020754814147949 }, { "epoch": 1.032867431640625e-05, "model_forward_time": 0.025759220123291016, "step": 6769 }, { "epoch": 1.032867431640625e-05, "step": 6769, "training_step_time": 0.11267638206481934 }, { "epoch": 1.03302001953125e-05, "grad_norm": 0.38645869493484497, "learning_rate": 9.179793142028959e-05, "loss": 0.0655, "step": 6770 }, { "epoch": 1.03302001953125e-05, "model_forward_time": 0.025322675704956055, "step": 6770 }, { "epoch": 1.03302001953125e-05, "step": 6770, "training_step_time": 0.1320514678955078 }, { "epoch": 1.033172607421875e-05, "model_forward_time": 0.0254361629486084, "step": 6771 }, { "epoch": 1.033172607421875e-05, "step": 6771, "training_step_time": 0.19279909133911133 }, { "epoch": 1.0333251953125e-05, "model_forward_time": 0.024619340896606445, "step": 6772 }, { "epoch": 1.0333251953125e-05, "step": 6772, "training_step_time": 0.156111478805542 }, { "epoch": 1.033477783203125e-05, "model_forward_time": 0.024389982223510742, "step": 6773 }, { "epoch": 1.033477783203125e-05, "step": 6773, "training_step_time": 0.12191057205200195 }, { "epoch": 1.03363037109375e-05, "model_forward_time": 0.024698972702026367, "step": 6774 }, { "epoch": 1.03363037109375e-05, "step": 6774, "training_step_time": 0.119110107421875 }, { "epoch": 1.033782958984375e-05, "model_forward_time": 0.025611162185668945, "step": 6775 }, { "epoch": 1.033782958984375e-05, "step": 6775, "training_step_time": 0.10812234878540039 }, { "epoch": 1.033935546875e-05, "model_forward_time": 0.025439977645874023, "step": 6776 }, { "epoch": 1.033935546875e-05, "step": 6776, "training_step_time": 0.19219470024108887 }, { "epoch": 1.034088134765625e-05, "model_forward_time": 0.024503231048583984, "step": 6777 }, { "epoch": 1.034088134765625e-05, "step": 6777, "training_step_time": 0.10391712188720703 }, { "epoch": 1.03424072265625e-05, "model_forward_time": 0.024680137634277344, "step": 6778 }, { "epoch": 1.03424072265625e-05, "step": 6778, "training_step_time": 0.10871386528015137 }, { "epoch": 1.034393310546875e-05, "model_forward_time": 0.024979114532470703, "step": 6779 }, { "epoch": 1.034393310546875e-05, "step": 6779, "training_step_time": 0.11033987998962402 }, { "epoch": 1.0345458984375e-05, "grad_norm": 0.36881861090660095, "learning_rate": 9.176765897286813e-05, "loss": 0.0735, "step": 6780 }, { "epoch": 1.0345458984375e-05, "model_forward_time": 0.024012088775634766, "step": 6780 }, { "epoch": 1.0345458984375e-05, "step": 6780, "training_step_time": 0.10874104499816895 }, { "epoch": 1.034698486328125e-05, "model_forward_time": 0.02436971664428711, "step": 6781 }, { "epoch": 1.034698486328125e-05, "step": 6781, "training_step_time": 0.1107327938079834 }, { "epoch": 1.03485107421875e-05, "model_forward_time": 0.025145292282104492, "step": 6782 }, { "epoch": 1.03485107421875e-05, "step": 6782, "training_step_time": 0.10963177680969238 }, { "epoch": 1.035003662109375e-05, "model_forward_time": 0.025675296783447266, "step": 6783 }, { "epoch": 1.035003662109375e-05, "step": 6783, "training_step_time": 0.1100163459777832 }, { "epoch": 1.03515625e-05, "model_forward_time": 0.02542710304260254, "step": 6784 }, { "epoch": 1.03515625e-05, "step": 6784, "training_step_time": 0.11111569404602051 }, { "epoch": 1.035308837890625e-05, "model_forward_time": 0.025756359100341797, "step": 6785 }, { "epoch": 1.035308837890625e-05, "step": 6785, "training_step_time": 0.11312198638916016 }, { "epoch": 1.03546142578125e-05, "model_forward_time": 0.025445938110351562, "step": 6786 }, { "epoch": 1.03546142578125e-05, "step": 6786, "training_step_time": 0.1082923412322998 }, { "epoch": 1.035614013671875e-05, "model_forward_time": 0.02658820152282715, "step": 6787 }, { "epoch": 1.035614013671875e-05, "step": 6787, "training_step_time": 0.11227202415466309 }, { "epoch": 1.0357666015625e-05, "model_forward_time": 0.025177955627441406, "step": 6788 }, { "epoch": 1.0357666015625e-05, "step": 6788, "training_step_time": 0.19746804237365723 }, { "epoch": 1.035919189453125e-05, "model_forward_time": 0.024490833282470703, "step": 6789 }, { "epoch": 1.035919189453125e-05, "step": 6789, "training_step_time": 0.16613340377807617 }, { "epoch": 1.03607177734375e-05, "grad_norm": 0.5300776958465576, "learning_rate": 9.173733577380258e-05, "loss": 0.0643, "step": 6790 }, { "epoch": 1.03607177734375e-05, "model_forward_time": 0.02492237091064453, "step": 6790 }, { "epoch": 1.03607177734375e-05, "step": 6790, "training_step_time": 0.17789149284362793 }, { "epoch": 1.036224365234375e-05, "model_forward_time": 0.024981260299682617, "step": 6791 }, { "epoch": 1.036224365234375e-05, "step": 6791, "training_step_time": 0.15999388694763184 }, { "epoch": 1.036376953125e-05, "model_forward_time": 0.02533888816833496, "step": 6792 }, { "epoch": 1.036376953125e-05, "step": 6792, "training_step_time": 0.20302867889404297 }, { "epoch": 1.036529541015625e-05, "model_forward_time": 0.024639368057250977, "step": 6793 }, { "epoch": 1.036529541015625e-05, "step": 6793, "training_step_time": 0.1438910961151123 }, { "epoch": 1.03668212890625e-05, "model_forward_time": 0.024744033813476562, "step": 6794 }, { "epoch": 1.03668212890625e-05, "step": 6794, "training_step_time": 0.10484528541564941 }, { "epoch": 1.036834716796875e-05, "model_forward_time": 0.025305509567260742, "step": 6795 }, { "epoch": 1.036834716796875e-05, "step": 6795, "training_step_time": 0.10884857177734375 }, { "epoch": 1.0369873046875e-05, "model_forward_time": 0.028499126434326172, "step": 6796 }, { "epoch": 1.0369873046875e-05, "step": 6796, "training_step_time": 0.11509394645690918 }, { "epoch": 1.037139892578125e-05, "model_forward_time": 0.025892257690429688, "step": 6797 }, { "epoch": 1.037139892578125e-05, "step": 6797, "training_step_time": 0.10813784599304199 }, { "epoch": 1.03729248046875e-05, "model_forward_time": 0.0252077579498291, "step": 6798 }, { "epoch": 1.03729248046875e-05, "step": 6798, "training_step_time": 0.11166524887084961 }, { "epoch": 1.037445068359375e-05, "model_forward_time": 0.02540755271911621, "step": 6799 }, { "epoch": 1.037445068359375e-05, "step": 6799, "training_step_time": 0.10682511329650879 }, { "epoch": 1.03759765625e-05, "grad_norm": 0.4445255994796753, "learning_rate": 9.17069618599385e-05, "loss": 0.083, "step": 6800 }, { "epoch": 1.03759765625e-05, "model_forward_time": 0.026111841201782227, "step": 6800 }, { "epoch": 1.03759765625e-05, "step": 6800, "training_step_time": 0.11031579971313477 }, { "epoch": 1.037750244140625e-05, "model_forward_time": 0.025699615478515625, "step": 6801 }, { "epoch": 1.037750244140625e-05, "step": 6801, "training_step_time": 0.10898399353027344 }, { "epoch": 1.03790283203125e-05, "model_forward_time": 0.025450468063354492, "step": 6802 }, { "epoch": 1.03790283203125e-05, "step": 6802, "training_step_time": 0.10847973823547363 }, { "epoch": 1.038055419921875e-05, "model_forward_time": 0.025058746337890625, "step": 6803 }, { "epoch": 1.038055419921875e-05, "step": 6803, "training_step_time": 0.10778331756591797 }, { "epoch": 1.0382080078125e-05, "model_forward_time": 0.02533102035522461, "step": 6804 }, { "epoch": 1.0382080078125e-05, "step": 6804, "training_step_time": 0.11117935180664062 }, { "epoch": 1.038360595703125e-05, "model_forward_time": 0.024915456771850586, "step": 6805 }, { "epoch": 1.038360595703125e-05, "step": 6805, "training_step_time": 0.1074991226196289 }, { "epoch": 1.03851318359375e-05, "model_forward_time": 0.025461673736572266, "step": 6806 }, { "epoch": 1.03851318359375e-05, "step": 6806, "training_step_time": 0.10694098472595215 }, { "epoch": 1.038665771484375e-05, "model_forward_time": 0.025527238845825195, "step": 6807 }, { "epoch": 1.038665771484375e-05, "step": 6807, "training_step_time": 0.1068115234375 }, { "epoch": 1.038818359375e-05, "model_forward_time": 0.025472640991210938, "step": 6808 }, { "epoch": 1.038818359375e-05, "step": 6808, "training_step_time": 0.10971713066101074 }, { "epoch": 1.038970947265625e-05, "model_forward_time": 0.025383710861206055, "step": 6809 }, { "epoch": 1.038970947265625e-05, "step": 6809, "training_step_time": 0.10813021659851074 }, { "epoch": 1.03912353515625e-05, "grad_norm": 0.41912150382995605, "learning_rate": 9.167653726818305e-05, "loss": 0.0563, "step": 6810 }, { "epoch": 1.03912353515625e-05, "model_forward_time": 0.02541637420654297, "step": 6810 }, { "epoch": 1.03912353515625e-05, "step": 6810, "training_step_time": 0.17542695999145508 }, { "epoch": 1.039276123046875e-05, "model_forward_time": 0.02513909339904785, "step": 6811 }, { "epoch": 1.039276123046875e-05, "step": 6811, "training_step_time": 0.2097764015197754 }, { "epoch": 1.0394287109375e-05, "model_forward_time": 0.024834156036376953, "step": 6812 }, { "epoch": 1.0394287109375e-05, "step": 6812, "training_step_time": 0.2441103458404541 }, { "epoch": 1.039581298828125e-05, "model_forward_time": 0.026102066040039062, "step": 6813 }, { "epoch": 1.039581298828125e-05, "step": 6813, "training_step_time": 0.1863689422607422 }, { "epoch": 1.03973388671875e-05, "model_forward_time": 0.02456831932067871, "step": 6814 }, { "epoch": 1.03973388671875e-05, "step": 6814, "training_step_time": 0.21054291725158691 }, { "epoch": 1.039886474609375e-05, "model_forward_time": 0.024891138076782227, "step": 6815 }, { "epoch": 1.039886474609375e-05, "step": 6815, "training_step_time": 0.14827871322631836 }, { "epoch": 1.0400390625e-05, "model_forward_time": 0.02440333366394043, "step": 6816 }, { "epoch": 1.0400390625e-05, "step": 6816, "training_step_time": 0.1231389045715332 }, { "epoch": 1.040191650390625e-05, "model_forward_time": 0.025834321975708008, "step": 6817 }, { "epoch": 1.040191650390625e-05, "step": 6817, "training_step_time": 0.11193180084228516 }, { "epoch": 1.04034423828125e-05, "model_forward_time": 0.025318384170532227, "step": 6818 }, { "epoch": 1.04034423828125e-05, "step": 6818, "training_step_time": 0.1058201789855957 }, { "epoch": 1.040496826171875e-05, "model_forward_time": 0.02589106559753418, "step": 6819 }, { "epoch": 1.040496826171875e-05, "step": 6819, "training_step_time": 0.19548344612121582 }, { "epoch": 1.0406494140625e-05, "grad_norm": 0.5922017693519592, "learning_rate": 9.164606203550497e-05, "loss": 0.0896, "step": 6820 }, { "epoch": 1.0406494140625e-05, "model_forward_time": 0.025278806686401367, "step": 6820 }, { "epoch": 1.0406494140625e-05, "step": 6820, "training_step_time": 0.1424579620361328 }, { "epoch": 1.040802001953125e-05, "model_forward_time": 0.024781465530395508, "step": 6821 }, { "epoch": 1.040802001953125e-05, "step": 6821, "training_step_time": 0.16158580780029297 }, { "epoch": 1.04095458984375e-05, "model_forward_time": 0.02426433563232422, "step": 6822 }, { "epoch": 1.04095458984375e-05, "step": 6822, "training_step_time": 0.15207862854003906 }, { "epoch": 1.041107177734375e-05, "model_forward_time": 0.024457216262817383, "step": 6823 }, { "epoch": 1.041107177734375e-05, "step": 6823, "training_step_time": 0.13353395462036133 }, { "epoch": 1.041259765625e-05, "model_forward_time": 0.024419546127319336, "step": 6824 }, { "epoch": 1.041259765625e-05, "step": 6824, "training_step_time": 0.125596284866333 }, { "epoch": 1.041412353515625e-05, "model_forward_time": 0.024992704391479492, "step": 6825 }, { "epoch": 1.041412353515625e-05, "step": 6825, "training_step_time": 0.12492012977600098 }, { "epoch": 1.04156494140625e-05, "model_forward_time": 0.025506973266601562, "step": 6826 }, { "epoch": 1.04156494140625e-05, "step": 6826, "training_step_time": 0.12287163734436035 }, { "epoch": 1.041717529296875e-05, "model_forward_time": 0.025490760803222656, "step": 6827 }, { "epoch": 1.041717529296875e-05, "step": 6827, "training_step_time": 0.11313962936401367 }, { "epoch": 1.0418701171875e-05, "model_forward_time": 0.025393247604370117, "step": 6828 }, { "epoch": 1.0418701171875e-05, "step": 6828, "training_step_time": 0.1130528450012207 }, { "epoch": 1.042022705078125e-05, "model_forward_time": 0.025601625442504883, "step": 6829 }, { "epoch": 1.042022705078125e-05, "step": 6829, "training_step_time": 0.1140587329864502 }, { "epoch": 1.04217529296875e-05, "grad_norm": 0.5026640892028809, "learning_rate": 9.161553619893457e-05, "loss": 0.091, "step": 6830 }, { "epoch": 1.04217529296875e-05, "model_forward_time": 0.025165796279907227, "step": 6830 }, { "epoch": 1.04217529296875e-05, "step": 6830, "training_step_time": 0.10871076583862305 }, { "epoch": 1.042327880859375e-05, "model_forward_time": 0.024888992309570312, "step": 6831 }, { "epoch": 1.042327880859375e-05, "step": 6831, "training_step_time": 0.19790339469909668 }, { "epoch": 1.04248046875e-05, "model_forward_time": 0.026005268096923828, "step": 6832 }, { "epoch": 1.04248046875e-05, "step": 6832, "training_step_time": 0.13216757774353027 }, { "epoch": 1.042633056640625e-05, "model_forward_time": 0.024621248245239258, "step": 6833 }, { "epoch": 1.042633056640625e-05, "step": 6833, "training_step_time": 0.12093043327331543 }, { "epoch": 1.04278564453125e-05, "model_forward_time": 0.02511286735534668, "step": 6834 }, { "epoch": 1.04278564453125e-05, "step": 6834, "training_step_time": 0.21353650093078613 }, { "epoch": 1.042938232421875e-05, "model_forward_time": 0.024446964263916016, "step": 6835 }, { "epoch": 1.042938232421875e-05, "step": 6835, "training_step_time": 0.1211555004119873 }, { "epoch": 1.0430908203125e-05, "model_forward_time": 0.024619579315185547, "step": 6836 }, { "epoch": 1.0430908203125e-05, "step": 6836, "training_step_time": 0.10722780227661133 }, { "epoch": 1.043243408203125e-05, "model_forward_time": 0.027858257293701172, "step": 6837 }, { "epoch": 1.043243408203125e-05, "step": 6837, "training_step_time": 0.11323404312133789 }, { "epoch": 1.04339599609375e-05, "model_forward_time": 0.025473833084106445, "step": 6838 }, { "epoch": 1.04339599609375e-05, "step": 6838, "training_step_time": 0.10867667198181152 }, { "epoch": 1.043548583984375e-05, "model_forward_time": 0.025340557098388672, "step": 6839 }, { "epoch": 1.043548583984375e-05, "step": 6839, "training_step_time": 0.10857868194580078 }, { "epoch": 1.043701171875e-05, "grad_norm": 0.6259903907775879, "learning_rate": 9.158495979556358e-05, "loss": 0.0976, "step": 6840 }, { "epoch": 1.043701171875e-05, "model_forward_time": 0.02539229393005371, "step": 6840 }, { "epoch": 1.043701171875e-05, "step": 6840, "training_step_time": 0.11167383193969727 }, { "epoch": 1.043853759765625e-05, "model_forward_time": 0.02536606788635254, "step": 6841 }, { "epoch": 1.043853759765625e-05, "step": 6841, "training_step_time": 0.10842156410217285 }, { "epoch": 1.04400634765625e-05, "model_forward_time": 0.02516317367553711, "step": 6842 }, { "epoch": 1.04400634765625e-05, "step": 6842, "training_step_time": 0.10797691345214844 }, { "epoch": 1.044158935546875e-05, "model_forward_time": 0.025162458419799805, "step": 6843 }, { "epoch": 1.044158935546875e-05, "step": 6843, "training_step_time": 0.11037540435791016 }, { "epoch": 1.0443115234375e-05, "model_forward_time": 0.025346994400024414, "step": 6844 }, { "epoch": 1.0443115234375e-05, "step": 6844, "training_step_time": 0.1098787784576416 }, { "epoch": 1.044464111328125e-05, "model_forward_time": 0.025025367736816406, "step": 6845 }, { "epoch": 1.044464111328125e-05, "step": 6845, "training_step_time": 0.11203169822692871 }, { "epoch": 1.04461669921875e-05, "model_forward_time": 0.024965524673461914, "step": 6846 }, { "epoch": 1.04461669921875e-05, "step": 6846, "training_step_time": 0.11276793479919434 }, { "epoch": 1.044769287109375e-05, "model_forward_time": 0.025529861450195312, "step": 6847 }, { "epoch": 1.044769287109375e-05, "step": 6847, "training_step_time": 0.10617733001708984 }, { "epoch": 1.044921875e-05, "model_forward_time": 0.025123119354248047, "step": 6848 }, { "epoch": 1.044921875e-05, "step": 6848, "training_step_time": 0.10662651062011719 }, { "epoch": 1.045074462890625e-05, "model_forward_time": 0.025099992752075195, "step": 6849 }, { "epoch": 1.045074462890625e-05, "step": 6849, "training_step_time": 0.10772442817687988 }, { "epoch": 1.04522705078125e-05, "grad_norm": 0.3904078006744385, "learning_rate": 9.155433286254525e-05, "loss": 0.0869, "step": 6850 }, { "epoch": 1.04522705078125e-05, "model_forward_time": 0.02514195442199707, "step": 6850 }, { "epoch": 1.04522705078125e-05, "step": 6850, "training_step_time": 0.10655665397644043 }, { "epoch": 1.045379638671875e-05, "model_forward_time": 0.025073528289794922, "step": 6851 }, { "epoch": 1.045379638671875e-05, "step": 6851, "training_step_time": 0.10752987861633301 }, { "epoch": 1.0455322265625e-05, "model_forward_time": 0.025771379470825195, "step": 6852 }, { "epoch": 1.0455322265625e-05, "step": 6852, "training_step_time": 0.11004471778869629 }, { "epoch": 1.045684814453125e-05, "model_forward_time": 0.02509140968322754, "step": 6853 }, { "epoch": 1.045684814453125e-05, "step": 6853, "training_step_time": 0.10846352577209473 }, { "epoch": 1.04583740234375e-05, "model_forward_time": 0.026437759399414062, "step": 6854 }, { "epoch": 1.04583740234375e-05, "step": 6854, "training_step_time": 0.1535472869873047 }, { "epoch": 1.045989990234375e-05, "model_forward_time": 0.025159358978271484, "step": 6855 }, { "epoch": 1.045989990234375e-05, "step": 6855, "training_step_time": 0.1153264045715332 }, { "epoch": 1.046142578125e-05, "model_forward_time": 0.025094985961914062, "step": 6856 }, { "epoch": 1.046142578125e-05, "step": 6856, "training_step_time": 0.1332230567932129 }, { "epoch": 1.046295166015625e-05, "model_forward_time": 0.02551579475402832, "step": 6857 }, { "epoch": 1.046295166015625e-05, "step": 6857, "training_step_time": 0.15721726417541504 }, { "epoch": 1.04644775390625e-05, "model_forward_time": 0.024110794067382812, "step": 6858 }, { "epoch": 1.04644775390625e-05, "step": 6858, "training_step_time": 0.17815899848937988 }, { "epoch": 1.046600341796875e-05, "model_forward_time": 0.024344205856323242, "step": 6859 }, { "epoch": 1.046600341796875e-05, "step": 6859, "training_step_time": 0.1632683277130127 }, { "epoch": 1.0467529296875e-05, "grad_norm": 0.49598586559295654, "learning_rate": 9.152365543709416e-05, "loss": 0.0621, "step": 6860 }, { "epoch": 1.0467529296875e-05, "model_forward_time": 0.024263858795166016, "step": 6860 }, { "epoch": 1.0467529296875e-05, "step": 6860, "training_step_time": 0.16974520683288574 }, { "epoch": 1.046905517578125e-05, "model_forward_time": 0.0242769718170166, "step": 6861 }, { "epoch": 1.046905517578125e-05, "step": 6861, "training_step_time": 0.10838723182678223 }, { "epoch": 1.04705810546875e-05, "model_forward_time": 0.024440526962280273, "step": 6862 }, { "epoch": 1.04705810546875e-05, "step": 6862, "training_step_time": 0.18919014930725098 }, { "epoch": 1.047210693359375e-05, "model_forward_time": 0.02396702766418457, "step": 6863 }, { "epoch": 1.047210693359375e-05, "step": 6863, "training_step_time": 0.20218777656555176 }, { "epoch": 1.04736328125e-05, "model_forward_time": 0.02405071258544922, "step": 6864 }, { "epoch": 1.04736328125e-05, "step": 6864, "training_step_time": 0.19723796844482422 }, { "epoch": 1.047515869140625e-05, "model_forward_time": 0.023938417434692383, "step": 6865 }, { "epoch": 1.047515869140625e-05, "step": 6865, "training_step_time": 0.18705320358276367 }, { "epoch": 1.04766845703125e-05, "model_forward_time": 0.024021387100219727, "step": 6866 }, { "epoch": 1.04766845703125e-05, "step": 6866, "training_step_time": 0.1728525161743164 }, { "epoch": 1.047821044921875e-05, "model_forward_time": 0.024511337280273438, "step": 6867 }, { "epoch": 1.047821044921875e-05, "step": 6867, "training_step_time": 0.17067623138427734 }, { "epoch": 1.0479736328125e-05, "model_forward_time": 0.024166345596313477, "step": 6868 }, { "epoch": 1.0479736328125e-05, "step": 6868, "training_step_time": 0.11936259269714355 }, { "epoch": 1.048126220703125e-05, "model_forward_time": 0.024559497833251953, "step": 6869 }, { "epoch": 1.048126220703125e-05, "step": 6869, "training_step_time": 0.10615658760070801 }, { "epoch": 1.04827880859375e-05, "grad_norm": 0.5252742767333984, "learning_rate": 9.14929275564863e-05, "loss": 0.0848, "step": 6870 }, { "epoch": 1.04827880859375e-05, "model_forward_time": 0.024663448333740234, "step": 6870 }, { "epoch": 1.04827880859375e-05, "step": 6870, "training_step_time": 0.10719823837280273 }, { "epoch": 1.048431396484375e-05, "model_forward_time": 0.02483534812927246, "step": 6871 }, { "epoch": 1.048431396484375e-05, "step": 6871, "training_step_time": 0.10860753059387207 }, { "epoch": 1.048583984375e-05, "model_forward_time": 0.024970531463623047, "step": 6872 }, { "epoch": 1.048583984375e-05, "step": 6872, "training_step_time": 0.11089205741882324 }, { "epoch": 1.048736572265625e-05, "model_forward_time": 0.024719715118408203, "step": 6873 }, { "epoch": 1.048736572265625e-05, "step": 6873, "training_step_time": 0.17366385459899902 }, { "epoch": 1.04888916015625e-05, "model_forward_time": 0.02436375617980957, "step": 6874 }, { "epoch": 1.04888916015625e-05, "step": 6874, "training_step_time": 0.1891021728515625 }, { "epoch": 1.049041748046875e-05, "model_forward_time": 0.02434372901916504, "step": 6875 }, { "epoch": 1.049041748046875e-05, "step": 6875, "training_step_time": 0.18416285514831543 }, { "epoch": 1.0491943359375e-05, "model_forward_time": 0.024489641189575195, "step": 6876 }, { "epoch": 1.0491943359375e-05, "step": 6876, "training_step_time": 0.17869210243225098 }, { "epoch": 1.049346923828125e-05, "model_forward_time": 0.024214744567871094, "step": 6877 }, { "epoch": 1.049346923828125e-05, "step": 6877, "training_step_time": 0.15127015113830566 }, { "epoch": 1.04949951171875e-05, "model_forward_time": 0.027262449264526367, "step": 6878 }, { "epoch": 1.04949951171875e-05, "step": 6878, "training_step_time": 0.11522150039672852 }, { "epoch": 1.049652099609375e-05, "model_forward_time": 0.023425579071044922, "step": 6879 }, { "epoch": 1.049652099609375e-05, "step": 6879, "training_step_time": 0.10960936546325684 }, { "epoch": 1.0498046875e-05, "grad_norm": 0.2893292307853699, "learning_rate": 9.146214925805891e-05, "loss": 0.0822, "step": 6880 }, { "epoch": 1.0498046875e-05, "model_forward_time": 0.026083707809448242, "step": 6880 }, { "epoch": 1.0498046875e-05, "step": 6880, "training_step_time": 0.11514163017272949 }, { "epoch": 1.049957275390625e-05, "model_forward_time": 0.025149822235107422, "step": 6881 }, { "epoch": 1.049957275390625e-05, "step": 6881, "training_step_time": 0.10846257209777832 }, { "epoch": 1.05010986328125e-05, "model_forward_time": 0.024936914443969727, "step": 6882 }, { "epoch": 1.05010986328125e-05, "step": 6882, "training_step_time": 0.11062002182006836 }, { "epoch": 1.050262451171875e-05, "model_forward_time": 0.025113821029663086, "step": 6883 }, { "epoch": 1.050262451171875e-05, "step": 6883, "training_step_time": 0.10923910140991211 }, { "epoch": 1.0504150390625e-05, "model_forward_time": 0.024729013442993164, "step": 6884 }, { "epoch": 1.0504150390625e-05, "step": 6884, "training_step_time": 0.10837125778198242 }, { "epoch": 1.050567626953125e-05, "model_forward_time": 0.025251150131225586, "step": 6885 }, { "epoch": 1.050567626953125e-05, "step": 6885, "training_step_time": 0.10841155052185059 }, { "epoch": 1.05072021484375e-05, "model_forward_time": 0.025059223175048828, "step": 6886 }, { "epoch": 1.05072021484375e-05, "step": 6886, "training_step_time": 0.11151838302612305 }, { "epoch": 1.050872802734375e-05, "model_forward_time": 0.02523207664489746, "step": 6887 }, { "epoch": 1.050872802734375e-05, "step": 6887, "training_step_time": 0.11017608642578125 }, { "epoch": 1.051025390625e-05, "model_forward_time": 0.02539992332458496, "step": 6888 }, { "epoch": 1.051025390625e-05, "step": 6888, "training_step_time": 0.10846590995788574 }, { "epoch": 1.051177978515625e-05, "model_forward_time": 0.024913787841796875, "step": 6889 }, { "epoch": 1.051177978515625e-05, "step": 6889, "training_step_time": 0.10684013366699219 }, { "epoch": 1.05133056640625e-05, "grad_norm": 0.4509742558002472, "learning_rate": 9.143132057921058e-05, "loss": 0.0736, "step": 6890 }, { "epoch": 1.05133056640625e-05, "model_forward_time": 0.025509357452392578, "step": 6890 }, { "epoch": 1.05133056640625e-05, "step": 6890, "training_step_time": 0.10841894149780273 }, { "epoch": 1.051483154296875e-05, "model_forward_time": 0.025149106979370117, "step": 6891 }, { "epoch": 1.051483154296875e-05, "step": 6891, "training_step_time": 0.10986566543579102 }, { "epoch": 1.0516357421875e-05, "model_forward_time": 0.024235963821411133, "step": 6892 }, { "epoch": 1.0516357421875e-05, "step": 6892, "training_step_time": 0.10765886306762695 }, { "epoch": 1.051788330078125e-05, "model_forward_time": 0.02395153045654297, "step": 6893 }, { "epoch": 1.051788330078125e-05, "step": 6893, "training_step_time": 0.1080479621887207 }, { "epoch": 1.05194091796875e-05, "model_forward_time": 0.024648189544677734, "step": 6894 }, { "epoch": 1.05194091796875e-05, "step": 6894, "training_step_time": 0.10867524147033691 }, { "epoch": 1.052093505859375e-05, "model_forward_time": 0.025477886199951172, "step": 6895 }, { "epoch": 1.052093505859375e-05, "step": 6895, "training_step_time": 0.21281933784484863 }, { "epoch": 1.05224609375e-05, "model_forward_time": 0.02380228042602539, "step": 6896 }, { "epoch": 1.05224609375e-05, "step": 6896, "training_step_time": 0.11900568008422852 }, { "epoch": 1.052398681640625e-05, "model_forward_time": 0.024505138397216797, "step": 6897 }, { "epoch": 1.052398681640625e-05, "step": 6897, "training_step_time": 0.13282513618469238 }, { "epoch": 1.05255126953125e-05, "model_forward_time": 0.02491140365600586, "step": 6898 }, { "epoch": 1.05255126953125e-05, "step": 6898, "training_step_time": 0.16254496574401855 }, { "epoch": 1.052703857421875e-05, "model_forward_time": 0.024431467056274414, "step": 6899 }, { "epoch": 1.052703857421875e-05, "step": 6899, "training_step_time": 0.21403288841247559 }, { "epoch": 1.0528564453125e-05, "grad_norm": 0.45346662402153015, "learning_rate": 9.140044155740101e-05, "loss": 0.0765, "step": 6900 }, { "epoch": 1.0528564453125e-05, "model_forward_time": 0.024194002151489258, "step": 6900 }, { "epoch": 1.0528564453125e-05, "step": 6900, "training_step_time": 0.1487438678741455 }, { "epoch": 1.053009033203125e-05, "model_forward_time": 0.024287939071655273, "step": 6901 }, { "epoch": 1.053009033203125e-05, "step": 6901, "training_step_time": 0.11983561515808105 }, { "epoch": 1.05316162109375e-05, "model_forward_time": 0.024675607681274414, "step": 6902 }, { "epoch": 1.05316162109375e-05, "step": 6902, "training_step_time": 0.11600899696350098 }, { "epoch": 1.053314208984375e-05, "model_forward_time": 0.025053739547729492, "step": 6903 }, { "epoch": 1.053314208984375e-05, "step": 6903, "training_step_time": 0.10934877395629883 }, { "epoch": 1.053466796875e-05, "model_forward_time": 0.025212526321411133, "step": 6904 }, { "epoch": 1.053466796875e-05, "step": 6904, "training_step_time": 0.20821809768676758 }, { "epoch": 1.053619384765625e-05, "model_forward_time": 0.024994373321533203, "step": 6905 }, { "epoch": 1.053619384765625e-05, "step": 6905, "training_step_time": 0.11135983467102051 }, { "epoch": 1.05377197265625e-05, "model_forward_time": 0.024297714233398438, "step": 6906 }, { "epoch": 1.05377197265625e-05, "step": 6906, "training_step_time": 0.1109781265258789 }, { "epoch": 1.053924560546875e-05, "model_forward_time": 0.02540111541748047, "step": 6907 }, { "epoch": 1.053924560546875e-05, "step": 6907, "training_step_time": 0.11577272415161133 }, { "epoch": 1.0540771484375e-05, "model_forward_time": 0.02454376220703125, "step": 6908 }, { "epoch": 1.0540771484375e-05, "step": 6908, "training_step_time": 0.1179506778717041 }, { "epoch": 1.054229736328125e-05, "model_forward_time": 0.024923086166381836, "step": 6909 }, { "epoch": 1.054229736328125e-05, "step": 6909, "training_step_time": 0.10774946212768555 }, { "epoch": 1.05438232421875e-05, "grad_norm": 0.6097220778465271, "learning_rate": 9.136951223015113e-05, "loss": 0.0682, "step": 6910 }, { "epoch": 1.05438232421875e-05, "model_forward_time": 0.02468109130859375, "step": 6910 }, { "epoch": 1.05438232421875e-05, "step": 6910, "training_step_time": 0.10899472236633301 }, { "epoch": 1.054534912109375e-05, "model_forward_time": 0.02492666244506836, "step": 6911 }, { "epoch": 1.054534912109375e-05, "step": 6911, "training_step_time": 0.10904860496520996 }, { "epoch": 1.0546875e-05, "model_forward_time": 0.025363683700561523, "step": 6912 }, { "epoch": 1.0546875e-05, "step": 6912, "training_step_time": 0.11029481887817383 }, { "epoch": 1.054840087890625e-05, "model_forward_time": 0.024826526641845703, "step": 6913 }, { "epoch": 1.054840087890625e-05, "step": 6913, "training_step_time": 0.10861897468566895 }, { "epoch": 1.05499267578125e-05, "model_forward_time": 0.024974346160888672, "step": 6914 }, { "epoch": 1.05499267578125e-05, "step": 6914, "training_step_time": 0.10750460624694824 }, { "epoch": 1.055145263671875e-05, "model_forward_time": 0.02500009536743164, "step": 6915 }, { "epoch": 1.055145263671875e-05, "step": 6915, "training_step_time": 0.10692739486694336 }, { "epoch": 1.0552978515625e-05, "model_forward_time": 0.028398513793945312, "step": 6916 }, { "epoch": 1.0552978515625e-05, "step": 6916, "training_step_time": 0.11512517929077148 }, { "epoch": 1.055450439453125e-05, "model_forward_time": 0.025870561599731445, "step": 6917 }, { "epoch": 1.055450439453125e-05, "step": 6917, "training_step_time": 0.10952019691467285 }, { "epoch": 1.05560302734375e-05, "model_forward_time": 0.025049209594726562, "step": 6918 }, { "epoch": 1.05560302734375e-05, "step": 6918, "training_step_time": 0.20692658424377441 }, { "epoch": 1.055755615234375e-05, "model_forward_time": 0.024576663970947266, "step": 6919 }, { "epoch": 1.055755615234375e-05, "step": 6919, "training_step_time": 0.20017027854919434 }, { "epoch": 1.055908203125e-05, "grad_norm": 0.37026605010032654, "learning_rate": 9.133853263504302e-05, "loss": 0.0919, "step": 6920 }, { "epoch": 1.055908203125e-05, "model_forward_time": 0.02448415756225586, "step": 6920 }, { "epoch": 1.055908203125e-05, "step": 6920, "training_step_time": 0.18847084045410156 }, { "epoch": 1.056060791015625e-05, "model_forward_time": 0.0244293212890625, "step": 6921 }, { "epoch": 1.056060791015625e-05, "step": 6921, "training_step_time": 0.1820390224456787 }, { "epoch": 1.05621337890625e-05, "model_forward_time": 0.024730205535888672, "step": 6922 }, { "epoch": 1.05621337890625e-05, "step": 6922, "training_step_time": 0.18834781646728516 }, { "epoch": 1.056365966796875e-05, "model_forward_time": 0.024193763732910156, "step": 6923 }, { "epoch": 1.056365966796875e-05, "step": 6923, "training_step_time": 0.10442686080932617 }, { "epoch": 1.0565185546875e-05, "model_forward_time": 0.02419447898864746, "step": 6924 }, { "epoch": 1.0565185546875e-05, "step": 6924, "training_step_time": 0.11005306243896484 }, { "epoch": 1.056671142578125e-05, "model_forward_time": 0.024891138076782227, "step": 6925 }, { "epoch": 1.056671142578125e-05, "step": 6925, "training_step_time": 0.10966086387634277 }, { "epoch": 1.05682373046875e-05, "model_forward_time": 0.025177955627441406, "step": 6926 }, { "epoch": 1.05682373046875e-05, "step": 6926, "training_step_time": 0.11630845069885254 }, { "epoch": 1.056976318359375e-05, "model_forward_time": 0.025228023529052734, "step": 6927 }, { "epoch": 1.056976318359375e-05, "step": 6927, "training_step_time": 0.10709166526794434 }, { "epoch": 1.05712890625e-05, "model_forward_time": 0.0254361629486084, "step": 6928 }, { "epoch": 1.05712890625e-05, "step": 6928, "training_step_time": 0.1087031364440918 }, { "epoch": 1.057281494140625e-05, "model_forward_time": 0.025231361389160156, "step": 6929 }, { "epoch": 1.057281494140625e-05, "step": 6929, "training_step_time": 0.10891246795654297 }, { "epoch": 1.05743408203125e-05, "grad_norm": 0.45468053221702576, "learning_rate": 9.130750280971978e-05, "loss": 0.0978, "step": 6930 }, { "epoch": 1.05743408203125e-05, "model_forward_time": 0.025106191635131836, "step": 6930 }, { "epoch": 1.05743408203125e-05, "step": 6930, "training_step_time": 0.10860562324523926 }, { "epoch": 1.057586669921875e-05, "model_forward_time": 0.025057554244995117, "step": 6931 }, { "epoch": 1.057586669921875e-05, "step": 6931, "training_step_time": 0.10863113403320312 }, { "epoch": 1.0577392578125e-05, "model_forward_time": 0.026504039764404297, "step": 6932 }, { "epoch": 1.0577392578125e-05, "step": 6932, "training_step_time": 0.10920596122741699 }, { "epoch": 1.057891845703125e-05, "model_forward_time": 0.024993896484375, "step": 6933 }, { "epoch": 1.057891845703125e-05, "step": 6933, "training_step_time": 0.10929608345031738 }, { "epoch": 1.05804443359375e-05, "model_forward_time": 0.025240182876586914, "step": 6934 }, { "epoch": 1.05804443359375e-05, "step": 6934, "training_step_time": 0.10766887664794922 }, { "epoch": 1.058197021484375e-05, "model_forward_time": 0.025053024291992188, "step": 6935 }, { "epoch": 1.058197021484375e-05, "step": 6935, "training_step_time": 0.1256873607635498 }, { "epoch": 1.058349609375e-05, "model_forward_time": 0.024502992630004883, "step": 6936 }, { "epoch": 1.058349609375e-05, "step": 6936, "training_step_time": 0.14051365852355957 }, { "epoch": 1.058502197265625e-05, "model_forward_time": 0.02467799186706543, "step": 6937 }, { "epoch": 1.058502197265625e-05, "step": 6937, "training_step_time": 0.13616371154785156 }, { "epoch": 1.05865478515625e-05, "model_forward_time": 0.02424454689025879, "step": 6938 }, { "epoch": 1.05865478515625e-05, "step": 6938, "training_step_time": 0.12135004997253418 }, { "epoch": 1.058807373046875e-05, "model_forward_time": 0.02514815330505371, "step": 6939 }, { "epoch": 1.058807373046875e-05, "step": 6939, "training_step_time": 0.13232088088989258 }, { "epoch": 1.0589599609375e-05, "grad_norm": 0.47305402159690857, "learning_rate": 9.127642279188558e-05, "loss": 0.0641, "step": 6940 }, { "epoch": 1.0589599609375e-05, "model_forward_time": 0.025082111358642578, "step": 6940 }, { "epoch": 1.0589599609375e-05, "step": 6940, "training_step_time": 0.11889147758483887 }, { "epoch": 1.059112548828125e-05, "model_forward_time": 0.024838685989379883, "step": 6941 }, { "epoch": 1.059112548828125e-05, "step": 6941, "training_step_time": 0.12120175361633301 }, { "epoch": 1.05926513671875e-05, "model_forward_time": 0.02487969398498535, "step": 6942 }, { "epoch": 1.05926513671875e-05, "step": 6942, "training_step_time": 0.15819287300109863 }, { "epoch": 1.059417724609375e-05, "model_forward_time": 0.0242156982421875, "step": 6943 }, { "epoch": 1.059417724609375e-05, "step": 6943, "training_step_time": 0.2102794647216797 }, { "epoch": 1.0595703125e-05, "model_forward_time": 0.02479720115661621, "step": 6944 }, { "epoch": 1.0595703125e-05, "step": 6944, "training_step_time": 0.1717240810394287 }, { "epoch": 1.059722900390625e-05, "model_forward_time": 0.0239102840423584, "step": 6945 }, { "epoch": 1.059722900390625e-05, "step": 6945, "training_step_time": 0.12453269958496094 }, { "epoch": 1.05987548828125e-05, "model_forward_time": 0.024402141571044922, "step": 6946 }, { "epoch": 1.05987548828125e-05, "step": 6946, "training_step_time": 0.11365866661071777 }, { "epoch": 1.060028076171875e-05, "model_forward_time": 0.02542901039123535, "step": 6947 }, { "epoch": 1.060028076171875e-05, "step": 6947, "training_step_time": 0.11139893531799316 }, { "epoch": 1.0601806640625e-05, "model_forward_time": 0.024979829788208008, "step": 6948 }, { "epoch": 1.0601806640625e-05, "step": 6948, "training_step_time": 0.19571137428283691 }, { "epoch": 1.060333251953125e-05, "model_forward_time": 0.024342060089111328, "step": 6949 }, { "epoch": 1.060333251953125e-05, "step": 6949, "training_step_time": 0.10512733459472656 }, { "epoch": 1.06048583984375e-05, "grad_norm": 0.5234485864639282, "learning_rate": 9.124529261930559e-05, "loss": 0.0716, "step": 6950 }, { "epoch": 1.06048583984375e-05, "model_forward_time": 0.024337053298950195, "step": 6950 }, { "epoch": 1.06048583984375e-05, "step": 6950, "training_step_time": 0.10312104225158691 }, { "epoch": 1.060638427734375e-05, "model_forward_time": 0.02665567398071289, "step": 6951 }, { "epoch": 1.060638427734375e-05, "step": 6951, "training_step_time": 0.1150050163269043 }, { "epoch": 1.060791015625e-05, "model_forward_time": 0.023973941802978516, "step": 6952 }, { "epoch": 1.060791015625e-05, "step": 6952, "training_step_time": 0.1068732738494873 }, { "epoch": 1.060943603515625e-05, "model_forward_time": 0.02396416664123535, "step": 6953 }, { "epoch": 1.060943603515625e-05, "step": 6953, "training_step_time": 0.10594987869262695 }, { "epoch": 1.06109619140625e-05, "model_forward_time": 0.024998188018798828, "step": 6954 }, { "epoch": 1.06109619140625e-05, "step": 6954, "training_step_time": 0.11004972457885742 }, { "epoch": 1.061248779296875e-05, "model_forward_time": 0.024957895278930664, "step": 6955 }, { "epoch": 1.061248779296875e-05, "step": 6955, "training_step_time": 0.10573744773864746 }, { "epoch": 1.0614013671875e-05, "model_forward_time": 0.025057077407836914, "step": 6956 }, { "epoch": 1.0614013671875e-05, "step": 6956, "training_step_time": 0.11009478569030762 }, { "epoch": 1.061553955078125e-05, "model_forward_time": 0.02498912811279297, "step": 6957 }, { "epoch": 1.061553955078125e-05, "step": 6957, "training_step_time": 0.10709834098815918 }, { "epoch": 1.06170654296875e-05, "model_forward_time": 0.025204896926879883, "step": 6958 }, { "epoch": 1.06170654296875e-05, "step": 6958, "training_step_time": 0.11341285705566406 }, { "epoch": 1.061859130859375e-05, "model_forward_time": 0.02502584457397461, "step": 6959 }, { "epoch": 1.061859130859375e-05, "step": 6959, "training_step_time": 0.14218664169311523 }, { "epoch": 1.06201171875e-05, "grad_norm": 0.556512713432312, "learning_rate": 9.121411232980588e-05, "loss": 0.0746, "step": 6960 }, { "epoch": 1.06201171875e-05, "model_forward_time": 0.025212764739990234, "step": 6960 }, { "epoch": 1.06201171875e-05, "step": 6960, "training_step_time": 0.11321234703063965 }, { "epoch": 1.062164306640625e-05, "model_forward_time": 0.024501800537109375, "step": 6961 }, { "epoch": 1.062164306640625e-05, "step": 6961, "training_step_time": 0.13434553146362305 }, { "epoch": 1.06231689453125e-05, "model_forward_time": 0.02434539794921875, "step": 6962 }, { "epoch": 1.06231689453125e-05, "step": 6962, "training_step_time": 0.2034461498260498 }, { "epoch": 1.062469482421875e-05, "model_forward_time": 0.024527788162231445, "step": 6963 }, { "epoch": 1.062469482421875e-05, "step": 6963, "training_step_time": 0.1347362995147705 }, { "epoch": 1.0626220703125e-05, "model_forward_time": 0.02398228645324707, "step": 6964 }, { "epoch": 1.0626220703125e-05, "step": 6964, "training_step_time": 0.20968294143676758 }, { "epoch": 1.062774658203125e-05, "model_forward_time": 0.0247194766998291, "step": 6965 }, { "epoch": 1.062774658203125e-05, "step": 6965, "training_step_time": 0.13364553451538086 }, { "epoch": 1.06292724609375e-05, "model_forward_time": 0.02466583251953125, "step": 6966 }, { "epoch": 1.06292724609375e-05, "step": 6966, "training_step_time": 0.11752462387084961 }, { "epoch": 1.063079833984375e-05, "model_forward_time": 0.025668859481811523, "step": 6967 }, { "epoch": 1.063079833984375e-05, "step": 6967, "training_step_time": 0.11880373954772949 }, { "epoch": 1.063232421875e-05, "model_forward_time": 0.025139808654785156, "step": 6968 }, { "epoch": 1.063232421875e-05, "step": 6968, "training_step_time": 0.11307787895202637 }, { "epoch": 1.063385009765625e-05, "model_forward_time": 0.025094032287597656, "step": 6969 }, { "epoch": 1.063385009765625e-05, "step": 6969, "training_step_time": 0.11216187477111816 }, { "epoch": 1.06353759765625e-05, "grad_norm": 0.5842373371124268, "learning_rate": 9.118288196127345e-05, "loss": 0.084, "step": 6970 }, { "epoch": 1.06353759765625e-05, "model_forward_time": 0.025012969970703125, "step": 6970 }, { "epoch": 1.06353759765625e-05, "step": 6970, "training_step_time": 0.11043500900268555 }, { "epoch": 1.063690185546875e-05, "model_forward_time": 0.024838685989379883, "step": 6971 }, { "epoch": 1.063690185546875e-05, "step": 6971, "training_step_time": 0.10912013053894043 }, { "epoch": 1.0638427734375e-05, "model_forward_time": 0.024827957153320312, "step": 6972 }, { "epoch": 1.0638427734375e-05, "step": 6972, "training_step_time": 0.10993099212646484 }, { "epoch": 1.063995361328125e-05, "model_forward_time": 0.025302886962890625, "step": 6973 }, { "epoch": 1.063995361328125e-05, "step": 6973, "training_step_time": 0.11309957504272461 }, { "epoch": 1.06414794921875e-05, "model_forward_time": 0.025162458419799805, "step": 6974 }, { "epoch": 1.06414794921875e-05, "step": 6974, "training_step_time": 0.10836243629455566 }, { "epoch": 1.064300537109375e-05, "model_forward_time": 0.025098085403442383, "step": 6975 }, { "epoch": 1.064300537109375e-05, "step": 6975, "training_step_time": 0.10652303695678711 }, { "epoch": 1.064453125e-05, "model_forward_time": 0.027965545654296875, "step": 6976 }, { "epoch": 1.064453125e-05, "step": 6976, "training_step_time": 0.1163339614868164 }, { "epoch": 1.064605712890625e-05, "model_forward_time": 0.024166584014892578, "step": 6977 }, { "epoch": 1.064605712890625e-05, "step": 6977, "training_step_time": 0.11110782623291016 }, { "epoch": 1.06475830078125e-05, "model_forward_time": 0.023968935012817383, "step": 6978 }, { "epoch": 1.06475830078125e-05, "step": 6978, "training_step_time": 0.10535955429077148 }, { "epoch": 1.064910888671875e-05, "model_forward_time": 0.02504110336303711, "step": 6979 }, { "epoch": 1.064910888671875e-05, "step": 6979, "training_step_time": 0.10996055603027344 }, { "epoch": 1.0650634765625e-05, "grad_norm": 0.5641760230064392, "learning_rate": 9.115160155165614e-05, "loss": 0.0789, "step": 6980 }, { "epoch": 1.0650634765625e-05, "model_forward_time": 0.02630448341369629, "step": 6980 }, { "epoch": 1.0650634765625e-05, "step": 6980, "training_step_time": 0.1129293441772461 }, { "epoch": 1.065216064453125e-05, "model_forward_time": 0.027592182159423828, "step": 6981 }, { "epoch": 1.065216064453125e-05, "step": 6981, "training_step_time": 0.11294436454772949 }, { "epoch": 1.06536865234375e-05, "model_forward_time": 0.02497243881225586, "step": 6982 }, { "epoch": 1.06536865234375e-05, "step": 6982, "training_step_time": 0.11072969436645508 }, { "epoch": 1.065521240234375e-05, "model_forward_time": 0.02498459815979004, "step": 6983 }, { "epoch": 1.065521240234375e-05, "step": 6983, "training_step_time": 0.19438672065734863 }, { "epoch": 1.065673828125e-05, "model_forward_time": 0.024617671966552734, "step": 6984 }, { "epoch": 1.065673828125e-05, "step": 6984, "training_step_time": 0.11960458755493164 }, { "epoch": 1.065826416015625e-05, "model_forward_time": 0.024527549743652344, "step": 6985 }, { "epoch": 1.065826416015625e-05, "step": 6985, "training_step_time": 0.12544631958007812 }, { "epoch": 1.06597900390625e-05, "model_forward_time": 0.02514934539794922, "step": 6986 }, { "epoch": 1.06597900390625e-05, "step": 6986, "training_step_time": 0.15999865531921387 }, { "epoch": 1.066131591796875e-05, "model_forward_time": 0.02427983283996582, "step": 6987 }, { "epoch": 1.066131591796875e-05, "step": 6987, "training_step_time": 0.17305231094360352 }, { "epoch": 1.0662841796875e-05, "model_forward_time": 0.02447342872619629, "step": 6988 }, { "epoch": 1.0662841796875e-05, "step": 6988, "training_step_time": 0.17518877983093262 }, { "epoch": 1.066436767578125e-05, "model_forward_time": 0.0241849422454834, "step": 6989 }, { "epoch": 1.066436767578125e-05, "step": 6989, "training_step_time": 0.10628914833068848 }, { "epoch": 1.06658935546875e-05, "grad_norm": 0.38509079813957214, "learning_rate": 9.112027113896262e-05, "loss": 0.0674, "step": 6990 }, { "epoch": 1.06658935546875e-05, "model_forward_time": 0.024780750274658203, "step": 6990 }, { "epoch": 1.06658935546875e-05, "step": 6990, "training_step_time": 0.11836552619934082 }, { "epoch": 1.066741943359375e-05, "model_forward_time": 0.025345325469970703, "step": 6991 }, { "epoch": 1.066741943359375e-05, "step": 6991, "training_step_time": 0.11522483825683594 }, { "epoch": 1.06689453125e-05, "model_forward_time": 0.025555133819580078, "step": 6992 }, { "epoch": 1.06689453125e-05, "step": 6992, "training_step_time": 0.1119081974029541 }, { "epoch": 1.067047119140625e-05, "model_forward_time": 0.02512502670288086, "step": 6993 }, { "epoch": 1.067047119140625e-05, "step": 6993, "training_step_time": 0.19302845001220703 }, { "epoch": 1.06719970703125e-05, "model_forward_time": 0.024240970611572266, "step": 6994 }, { "epoch": 1.06719970703125e-05, "step": 6994, "training_step_time": 0.1082918643951416 }, { "epoch": 1.067352294921875e-05, "model_forward_time": 0.024778366088867188, "step": 6995 }, { "epoch": 1.067352294921875e-05, "step": 6995, "training_step_time": 0.11216950416564941 }, { "epoch": 1.0675048828125e-05, "model_forward_time": 0.02550816535949707, "step": 6996 }, { "epoch": 1.0675048828125e-05, "step": 6996, "training_step_time": 0.1091001033782959 }, { "epoch": 1.067657470703125e-05, "model_forward_time": 0.025396108627319336, "step": 6997 }, { "epoch": 1.067657470703125e-05, "step": 6997, "training_step_time": 0.10845518112182617 }, { "epoch": 1.06781005859375e-05, "model_forward_time": 0.025339365005493164, "step": 6998 }, { "epoch": 1.06781005859375e-05, "step": 6998, "training_step_time": 0.10641217231750488 }, { "epoch": 1.067962646484375e-05, "model_forward_time": 0.025295257568359375, "step": 6999 }, { "epoch": 1.067962646484375e-05, "step": 6999, "training_step_time": 0.10755085945129395 }, { "epoch": 1.068115234375e-05, "grad_norm": 0.41960352659225464, "learning_rate": 9.108889076126226e-05, "loss": 0.0711, "step": 7000 }, { "epoch": 1.068115234375e-05, "model_forward_time": 0.02619338035583496, "step": 7000 }, { "epoch": 1.068115234375e-05, "step": 7000, "training_step_time": 0.10446834564208984 }, { "epoch": 1.068267822265625e-05, "model_forward_time": 0.023566722869873047, "step": 7001 }, { "epoch": 1.068267822265625e-05, "step": 7001, "training_step_time": 0.1801450252532959 }, { "epoch": 1.06842041015625e-05, "model_forward_time": 0.024695634841918945, "step": 7002 }, { "epoch": 1.06842041015625e-05, "step": 7002, "training_step_time": 0.10543465614318848 }, { "epoch": 1.068572998046875e-05, "model_forward_time": 0.02454376220703125, "step": 7003 }, { "epoch": 1.068572998046875e-05, "step": 7003, "training_step_time": 0.19732403755187988 }, { "epoch": 1.0687255859375e-05, "model_forward_time": 0.02497696876525879, "step": 7004 }, { "epoch": 1.0687255859375e-05, "step": 7004, "training_step_time": 0.10558462142944336 }, { "epoch": 1.068878173828125e-05, "model_forward_time": 0.02477264404296875, "step": 7005 }, { "epoch": 1.068878173828125e-05, "step": 7005, "training_step_time": 0.10427308082580566 }, { "epoch": 1.06903076171875e-05, "model_forward_time": 0.02538156509399414, "step": 7006 }, { "epoch": 1.06903076171875e-05, "step": 7006, "training_step_time": 0.10601615905761719 }, { "epoch": 1.069183349609375e-05, "model_forward_time": 0.025115013122558594, "step": 7007 }, { "epoch": 1.069183349609375e-05, "step": 7007, "training_step_time": 0.1067347526550293 }, { "epoch": 1.0693359375e-05, "model_forward_time": 0.025571823120117188, "step": 7008 }, { "epoch": 1.0693359375e-05, "step": 7008, "training_step_time": 0.10509395599365234 }, { "epoch": 1.069488525390625e-05, "model_forward_time": 0.026918888092041016, "step": 7009 }, { "epoch": 1.069488525390625e-05, "step": 7009, "training_step_time": 0.10778617858886719 }, { "epoch": 1.06964111328125e-05, "grad_norm": 0.6406276822090149, "learning_rate": 9.105746045668521e-05, "loss": 0.0804, "step": 7010 }, { "epoch": 1.06964111328125e-05, "model_forward_time": 0.025259017944335938, "step": 7010 }, { "epoch": 1.06964111328125e-05, "step": 7010, "training_step_time": 0.10605144500732422 }, { "epoch": 1.069793701171875e-05, "model_forward_time": 0.02529311180114746, "step": 7011 }, { "epoch": 1.069793701171875e-05, "step": 7011, "training_step_time": 0.10574507713317871 }, { "epoch": 1.0699462890625e-05, "model_forward_time": 0.024935483932495117, "step": 7012 }, { "epoch": 1.0699462890625e-05, "step": 7012, "training_step_time": 0.11019563674926758 }, { "epoch": 1.070098876953125e-05, "model_forward_time": 0.024545669555664062, "step": 7013 }, { "epoch": 1.070098876953125e-05, "step": 7013, "training_step_time": 0.11034584045410156 }, { "epoch": 1.07025146484375e-05, "model_forward_time": 0.024191617965698242, "step": 7014 }, { "epoch": 1.07025146484375e-05, "step": 7014, "training_step_time": 0.10854029655456543 }, { "epoch": 1.070404052734375e-05, "model_forward_time": 0.02518010139465332, "step": 7015 }, { "epoch": 1.070404052734375e-05, "step": 7015, "training_step_time": 0.1059732437133789 }, { "epoch": 1.070556640625e-05, "model_forward_time": 0.024617910385131836, "step": 7016 }, { "epoch": 1.070556640625e-05, "step": 7016, "training_step_time": 0.12343764305114746 }, { "epoch": 1.070709228515625e-05, "model_forward_time": 0.024818897247314453, "step": 7017 }, { "epoch": 1.070709228515625e-05, "step": 7017, "training_step_time": 0.17769408226013184 }, { "epoch": 1.07086181640625e-05, "model_forward_time": 0.02481675148010254, "step": 7018 }, { "epoch": 1.07086181640625e-05, "step": 7018, "training_step_time": 0.1714038848876953 }, { "epoch": 1.071014404296875e-05, "model_forward_time": 0.027878761291503906, "step": 7019 }, { "epoch": 1.071014404296875e-05, "step": 7019, "training_step_time": 0.2144160270690918 }, { "epoch": 1.0711669921875e-05, "grad_norm": 0.5380091667175293, "learning_rate": 9.102598026342222e-05, "loss": 0.0731, "step": 7020 }, { "epoch": 1.0711669921875e-05, "model_forward_time": 0.02478504180908203, "step": 7020 }, { "epoch": 1.0711669921875e-05, "step": 7020, "training_step_time": 0.12555789947509766 }, { "epoch": 1.071319580078125e-05, "model_forward_time": 0.025376558303833008, "step": 7021 }, { "epoch": 1.071319580078125e-05, "step": 7021, "training_step_time": 0.10780143737792969 }, { "epoch": 1.07147216796875e-05, "model_forward_time": 0.025652647018432617, "step": 7022 }, { "epoch": 1.07147216796875e-05, "step": 7022, "training_step_time": 0.12540078163146973 }, { "epoch": 1.071624755859375e-05, "model_forward_time": 0.025504112243652344, "step": 7023 }, { "epoch": 1.071624755859375e-05, "step": 7023, "training_step_time": 0.10656952857971191 }, { "epoch": 1.07177734375e-05, "model_forward_time": 0.02558159828186035, "step": 7024 }, { "epoch": 1.07177734375e-05, "step": 7024, "training_step_time": 0.10676693916320801 }, { "epoch": 1.071929931640625e-05, "model_forward_time": 0.025756359100341797, "step": 7025 }, { "epoch": 1.071929931640625e-05, "step": 7025, "training_step_time": 0.11246752738952637 }, { "epoch": 1.07208251953125e-05, "model_forward_time": 0.02526712417602539, "step": 7026 }, { "epoch": 1.07208251953125e-05, "step": 7026, "training_step_time": 0.11186075210571289 }, { "epoch": 1.072235107421875e-05, "model_forward_time": 0.025817394256591797, "step": 7027 }, { "epoch": 1.072235107421875e-05, "step": 7027, "training_step_time": 0.10851478576660156 }, { "epoch": 1.0723876953125e-05, "model_forward_time": 0.025493383407592773, "step": 7028 }, { "epoch": 1.0723876953125e-05, "step": 7028, "training_step_time": 0.11330008506774902 }, { "epoch": 1.072540283203125e-05, "model_forward_time": 0.02581787109375, "step": 7029 }, { "epoch": 1.072540283203125e-05, "step": 7029, "training_step_time": 0.10928082466125488 }, { "epoch": 1.07269287109375e-05, "grad_norm": 0.5369435548782349, "learning_rate": 9.099445021972473e-05, "loss": 0.0878, "step": 7030 }, { "epoch": 1.07269287109375e-05, "model_forward_time": 0.025064706802368164, "step": 7030 }, { "epoch": 1.07269287109375e-05, "step": 7030, "training_step_time": 0.11098766326904297 }, { "epoch": 1.072845458984375e-05, "model_forward_time": 0.02506113052368164, "step": 7031 }, { "epoch": 1.072845458984375e-05, "step": 7031, "training_step_time": 0.10520696640014648 }, { "epoch": 1.072998046875e-05, "model_forward_time": 0.02485370635986328, "step": 7032 }, { "epoch": 1.072998046875e-05, "step": 7032, "training_step_time": 0.10779285430908203 }, { "epoch": 1.073150634765625e-05, "model_forward_time": 0.02497410774230957, "step": 7033 }, { "epoch": 1.073150634765625e-05, "step": 7033, "training_step_time": 0.10523414611816406 }, { "epoch": 1.07330322265625e-05, "model_forward_time": 0.025293588638305664, "step": 7034 }, { "epoch": 1.07330322265625e-05, "step": 7034, "training_step_time": 0.1077272891998291 }, { "epoch": 1.073455810546875e-05, "model_forward_time": 0.025935649871826172, "step": 7035 }, { "epoch": 1.073455810546875e-05, "step": 7035, "training_step_time": 0.11211967468261719 }, { "epoch": 1.0736083984375e-05, "model_forward_time": 0.025684595108032227, "step": 7036 }, { "epoch": 1.0736083984375e-05, "step": 7036, "training_step_time": 0.10788726806640625 }, { "epoch": 1.073760986328125e-05, "model_forward_time": 0.02520465850830078, "step": 7037 }, { "epoch": 1.073760986328125e-05, "step": 7037, "training_step_time": 0.10820150375366211 }, { "epoch": 1.07391357421875e-05, "model_forward_time": 0.025285959243774414, "step": 7038 }, { "epoch": 1.07391357421875e-05, "step": 7038, "training_step_time": 0.1331157684326172 }, { "epoch": 1.074066162109375e-05, "model_forward_time": 0.02513742446899414, "step": 7039 }, { "epoch": 1.074066162109375e-05, "step": 7039, "training_step_time": 0.12142276763916016 }, { "epoch": 1.07421875e-05, "grad_norm": 0.2710757851600647, "learning_rate": 9.09628703639047e-05, "loss": 0.0669, "step": 7040 }, { "epoch": 1.07421875e-05, "model_forward_time": 0.0250089168548584, "step": 7040 }, { "epoch": 1.07421875e-05, "step": 7040, "training_step_time": 0.12996506690979004 }, { "epoch": 1.074371337890625e-05, "model_forward_time": 0.02472829818725586, "step": 7041 }, { "epoch": 1.074371337890625e-05, "step": 7041, "training_step_time": 0.15635967254638672 }, { "epoch": 1.07452392578125e-05, "model_forward_time": 0.024196863174438477, "step": 7042 }, { "epoch": 1.07452392578125e-05, "step": 7042, "training_step_time": 0.10655665397644043 }, { "epoch": 1.074676513671875e-05, "model_forward_time": 0.025248289108276367, "step": 7043 }, { "epoch": 1.074676513671875e-05, "step": 7043, "training_step_time": 0.1185302734375 }, { "epoch": 1.0748291015625e-05, "model_forward_time": 0.02496814727783203, "step": 7044 }, { "epoch": 1.0748291015625e-05, "step": 7044, "training_step_time": 0.12171816825866699 }, { "epoch": 1.074981689453125e-05, "model_forward_time": 0.027051448822021484, "step": 7045 }, { "epoch": 1.074981689453125e-05, "step": 7045, "training_step_time": 0.13951444625854492 }, { "epoch": 1.07513427734375e-05, "model_forward_time": 0.02491307258605957, "step": 7046 }, { "epoch": 1.07513427734375e-05, "step": 7046, "training_step_time": 0.11390924453735352 }, { "epoch": 1.075286865234375e-05, "model_forward_time": 0.025099992752075195, "step": 7047 }, { "epoch": 1.075286865234375e-05, "step": 7047, "training_step_time": 0.11038398742675781 }, { "epoch": 1.075439453125e-05, "model_forward_time": 0.024669647216796875, "step": 7048 }, { "epoch": 1.075439453125e-05, "step": 7048, "training_step_time": 0.11325502395629883 }, { "epoch": 1.075592041015625e-05, "model_forward_time": 0.02504706382751465, "step": 7049 }, { "epoch": 1.075592041015625e-05, "step": 7049, "training_step_time": 0.11198687553405762 }, { "epoch": 1.07574462890625e-05, "grad_norm": 0.49168747663497925, "learning_rate": 9.093124073433463e-05, "loss": 0.0635, "step": 7050 }, { "epoch": 1.07574462890625e-05, "model_forward_time": 0.025922298431396484, "step": 7050 }, { "epoch": 1.07574462890625e-05, "step": 7050, "training_step_time": 0.19176888465881348 }, { "epoch": 1.075897216796875e-05, "model_forward_time": 0.02443718910217285, "step": 7051 }, { "epoch": 1.075897216796875e-05, "step": 7051, "training_step_time": 0.10614514350891113 }, { "epoch": 1.0760498046875e-05, "model_forward_time": 0.02424764633178711, "step": 7052 }, { "epoch": 1.0760498046875e-05, "step": 7052, "training_step_time": 0.10701131820678711 }, { "epoch": 1.076202392578125e-05, "model_forward_time": 0.02578139305114746, "step": 7053 }, { "epoch": 1.076202392578125e-05, "step": 7053, "training_step_time": 0.10794782638549805 }, { "epoch": 1.07635498046875e-05, "model_forward_time": 0.02535414695739746, "step": 7054 }, { "epoch": 1.07635498046875e-05, "step": 7054, "training_step_time": 0.10931730270385742 }, { "epoch": 1.076507568359375e-05, "model_forward_time": 0.025298595428466797, "step": 7055 }, { "epoch": 1.076507568359375e-05, "step": 7055, "training_step_time": 0.10750102996826172 }, { "epoch": 1.07666015625e-05, "model_forward_time": 0.02509927749633789, "step": 7056 }, { "epoch": 1.07666015625e-05, "step": 7056, "training_step_time": 0.11254096031188965 }, { "epoch": 1.076812744140625e-05, "model_forward_time": 0.02480292320251465, "step": 7057 }, { "epoch": 1.076812744140625e-05, "step": 7057, "training_step_time": 0.10697579383850098 }, { "epoch": 1.07696533203125e-05, "model_forward_time": 0.02809000015258789, "step": 7058 }, { "epoch": 1.07696533203125e-05, "step": 7058, "training_step_time": 0.1109776496887207 }, { "epoch": 1.077117919921875e-05, "model_forward_time": 0.02514171600341797, "step": 7059 }, { "epoch": 1.077117919921875e-05, "step": 7059, "training_step_time": 0.10888910293579102 }, { "epoch": 1.0772705078125e-05, "grad_norm": 0.30953270196914673, "learning_rate": 9.089956136944751e-05, "loss": 0.0626, "step": 7060 }, { "epoch": 1.0772705078125e-05, "model_forward_time": 0.025087594985961914, "step": 7060 }, { "epoch": 1.0772705078125e-05, "step": 7060, "training_step_time": 0.10701370239257812 }, { "epoch": 1.077423095703125e-05, "model_forward_time": 0.025014638900756836, "step": 7061 }, { "epoch": 1.077423095703125e-05, "step": 7061, "training_step_time": 0.10825872421264648 }, { "epoch": 1.07757568359375e-05, "model_forward_time": 0.025869131088256836, "step": 7062 }, { "epoch": 1.07757568359375e-05, "step": 7062, "training_step_time": 0.10701322555541992 }, { "epoch": 1.077728271484375e-05, "model_forward_time": 0.02526402473449707, "step": 7063 }, { "epoch": 1.077728271484375e-05, "step": 7063, "training_step_time": 0.17486119270324707 }, { "epoch": 1.077880859375e-05, "model_forward_time": 0.02432727813720703, "step": 7064 }, { "epoch": 1.077880859375e-05, "step": 7064, "training_step_time": 0.1943204402923584 }, { "epoch": 1.078033447265625e-05, "model_forward_time": 0.024588823318481445, "step": 7065 }, { "epoch": 1.078033447265625e-05, "step": 7065, "training_step_time": 0.2124476432800293 }, { "epoch": 1.07818603515625e-05, "model_forward_time": 0.024348020553588867, "step": 7066 }, { "epoch": 1.07818603515625e-05, "step": 7066, "training_step_time": 0.1244359016418457 }, { "epoch": 1.078338623046875e-05, "model_forward_time": 0.02512812614440918, "step": 7067 }, { "epoch": 1.078338623046875e-05, "step": 7067, "training_step_time": 0.13160943984985352 }, { "epoch": 1.0784912109375e-05, "model_forward_time": 0.025207042694091797, "step": 7068 }, { "epoch": 1.0784912109375e-05, "step": 7068, "training_step_time": 0.10900139808654785 }, { "epoch": 1.078643798828125e-05, "model_forward_time": 0.025530338287353516, "step": 7069 }, { "epoch": 1.078643798828125e-05, "step": 7069, "training_step_time": 0.1302180290222168 }, { "epoch": 1.07879638671875e-05, "grad_norm": 0.5272168517112732, "learning_rate": 9.086783230773672e-05, "loss": 0.0733, "step": 7070 }, { "epoch": 1.07879638671875e-05, "model_forward_time": 0.025542020797729492, "step": 7070 }, { "epoch": 1.07879638671875e-05, "step": 7070, "training_step_time": 0.10872650146484375 }, { "epoch": 1.078948974609375e-05, "model_forward_time": 0.025114774703979492, "step": 7071 }, { "epoch": 1.078948974609375e-05, "step": 7071, "training_step_time": 0.10934185981750488 }, { "epoch": 1.0791015625e-05, "model_forward_time": 0.025164127349853516, "step": 7072 }, { "epoch": 1.0791015625e-05, "step": 7072, "training_step_time": 0.11220622062683105 }, { "epoch": 1.079254150390625e-05, "model_forward_time": 0.02472090721130371, "step": 7073 }, { "epoch": 1.079254150390625e-05, "step": 7073, "training_step_time": 0.11298203468322754 }, { "epoch": 1.07940673828125e-05, "model_forward_time": 0.02496957778930664, "step": 7074 }, { "epoch": 1.07940673828125e-05, "step": 7074, "training_step_time": 0.1113898754119873 }, { "epoch": 1.079559326171875e-05, "model_forward_time": 0.025299787521362305, "step": 7075 }, { "epoch": 1.079559326171875e-05, "step": 7075, "training_step_time": 0.10976552963256836 }, { "epoch": 1.0797119140625e-05, "model_forward_time": 0.025315523147583008, "step": 7076 }, { "epoch": 1.0797119140625e-05, "step": 7076, "training_step_time": 0.1083989143371582 }, { "epoch": 1.079864501953125e-05, "model_forward_time": 0.02558302879333496, "step": 7077 }, { "epoch": 1.079864501953125e-05, "step": 7077, "training_step_time": 0.10982394218444824 }, { "epoch": 1.08001708984375e-05, "model_forward_time": 0.024932861328125, "step": 7078 }, { "epoch": 1.08001708984375e-05, "step": 7078, "training_step_time": 0.11085820198059082 }, { "epoch": 1.080169677734375e-05, "model_forward_time": 0.024733543395996094, "step": 7079 }, { "epoch": 1.080169677734375e-05, "step": 7079, "training_step_time": 0.10879302024841309 }, { "epoch": 1.080322265625e-05, "grad_norm": 0.26838913559913635, "learning_rate": 9.083605358775612e-05, "loss": 0.0949, "step": 7080 }, { "epoch": 1.080322265625e-05, "model_forward_time": 0.025464534759521484, "step": 7080 }, { "epoch": 1.080322265625e-05, "step": 7080, "training_step_time": 0.11103987693786621 }, { "epoch": 1.080474853515625e-05, "model_forward_time": 0.02545452117919922, "step": 7081 }, { "epoch": 1.080474853515625e-05, "step": 7081, "training_step_time": 0.11041712760925293 }, { "epoch": 1.08062744140625e-05, "model_forward_time": 0.025461196899414062, "step": 7082 }, { "epoch": 1.08062744140625e-05, "step": 7082, "training_step_time": 0.10801887512207031 }, { "epoch": 1.080780029296875e-05, "model_forward_time": 0.02534961700439453, "step": 7083 }, { "epoch": 1.080780029296875e-05, "step": 7083, "training_step_time": 0.11166858673095703 }, { "epoch": 1.0809326171875e-05, "model_forward_time": 0.025315523147583008, "step": 7084 }, { "epoch": 1.0809326171875e-05, "step": 7084, "training_step_time": 0.22837352752685547 }, { "epoch": 1.081085205078125e-05, "model_forward_time": 0.024243831634521484, "step": 7085 }, { "epoch": 1.081085205078125e-05, "step": 7085, "training_step_time": 0.1143944263458252 }, { "epoch": 1.08123779296875e-05, "model_forward_time": 0.024402141571044922, "step": 7086 }, { "epoch": 1.08123779296875e-05, "step": 7086, "training_step_time": 0.13496732711791992 }, { "epoch": 1.081390380859375e-05, "model_forward_time": 0.025428295135498047, "step": 7087 }, { "epoch": 1.081390380859375e-05, "step": 7087, "training_step_time": 0.14142107963562012 }, { "epoch": 1.08154296875e-05, "model_forward_time": 0.02503490447998047, "step": 7088 }, { "epoch": 1.08154296875e-05, "step": 7088, "training_step_time": 0.1221470832824707 }, { "epoch": 1.081695556640625e-05, "model_forward_time": 0.02440476417541504, "step": 7089 }, { "epoch": 1.081695556640625e-05, "step": 7089, "training_step_time": 0.12145471572875977 }, { "epoch": 1.08184814453125e-05, "grad_norm": 0.45701801776885986, "learning_rate": 9.080422524811982e-05, "loss": 0.0807, "step": 7090 }, { "epoch": 1.08184814453125e-05, "model_forward_time": 0.025293827056884766, "step": 7090 }, { "epoch": 1.08184814453125e-05, "step": 7090, "training_step_time": 0.11421799659729004 }, { "epoch": 1.082000732421875e-05, "model_forward_time": 0.025104522705078125, "step": 7091 }, { "epoch": 1.082000732421875e-05, "step": 7091, "training_step_time": 0.14574623107910156 }, { "epoch": 1.0821533203125e-05, "model_forward_time": 0.024726390838623047, "step": 7092 }, { "epoch": 1.0821533203125e-05, "step": 7092, "training_step_time": 0.11138057708740234 }, { "epoch": 1.082305908203125e-05, "model_forward_time": 0.024866580963134766, "step": 7093 }, { "epoch": 1.082305908203125e-05, "step": 7093, "training_step_time": 0.1107630729675293 }, { "epoch": 1.08245849609375e-05, "model_forward_time": 0.02533745765686035, "step": 7094 }, { "epoch": 1.08245849609375e-05, "step": 7094, "training_step_time": 0.11240482330322266 }, { "epoch": 1.082611083984375e-05, "model_forward_time": 0.025426387786865234, "step": 7095 }, { "epoch": 1.082611083984375e-05, "step": 7095, "training_step_time": 0.10952520370483398 }, { "epoch": 1.082763671875e-05, "model_forward_time": 0.025590896606445312, "step": 7096 }, { "epoch": 1.082763671875e-05, "step": 7096, "training_step_time": 0.19913673400878906 }, { "epoch": 1.082916259765625e-05, "model_forward_time": 0.02445077896118164, "step": 7097 }, { "epoch": 1.082916259765625e-05, "step": 7097, "training_step_time": 0.10300159454345703 }, { "epoch": 1.08306884765625e-05, "model_forward_time": 0.024631977081298828, "step": 7098 }, { "epoch": 1.08306884765625e-05, "step": 7098, "training_step_time": 0.10685181617736816 }, { "epoch": 1.083221435546875e-05, "model_forward_time": 0.025111913681030273, "step": 7099 }, { "epoch": 1.083221435546875e-05, "step": 7099, "training_step_time": 0.10706782341003418 }, { "epoch": 1.0833740234375e-05, "grad_norm": 0.531728208065033, "learning_rate": 9.077234732750224e-05, "loss": 0.0637, "step": 7100 }, { "epoch": 1.0833740234375e-05, "model_forward_time": 0.025095224380493164, "step": 7100 }, { "epoch": 1.0833740234375e-05, "step": 7100, "training_step_time": 0.1746680736541748 }, { "epoch": 1.083526611328125e-05, "model_forward_time": 0.024361610412597656, "step": 7101 }, { "epoch": 1.083526611328125e-05, "step": 7101, "training_step_time": 0.19654178619384766 }, { "epoch": 1.08367919921875e-05, "model_forward_time": 0.024446487426757812, "step": 7102 }, { "epoch": 1.08367919921875e-05, "step": 7102, "training_step_time": 0.1746366024017334 }, { "epoch": 1.083831787109375e-05, "model_forward_time": 0.02370762825012207, "step": 7103 }, { "epoch": 1.083831787109375e-05, "step": 7103, "training_step_time": 0.1680285930633545 }, { "epoch": 1.083984375e-05, "model_forward_time": 0.024488449096679688, "step": 7104 }, { "epoch": 1.083984375e-05, "step": 7104, "training_step_time": 0.14678120613098145 }, { "epoch": 1.084136962890625e-05, "model_forward_time": 0.025556325912475586, "step": 7105 }, { "epoch": 1.084136962890625e-05, "step": 7105, "training_step_time": 0.10350632667541504 }, { "epoch": 1.08428955078125e-05, "model_forward_time": 0.02471780776977539, "step": 7106 }, { "epoch": 1.08428955078125e-05, "step": 7106, "training_step_time": 0.11081433296203613 }, { "epoch": 1.084442138671875e-05, "model_forward_time": 0.024974584579467773, "step": 7107 }, { "epoch": 1.084442138671875e-05, "step": 7107, "training_step_time": 0.10358548164367676 }, { "epoch": 1.0845947265625e-05, "model_forward_time": 0.02560710906982422, "step": 7108 }, { "epoch": 1.0845947265625e-05, "step": 7108, "training_step_time": 0.20617341995239258 }, { "epoch": 1.084747314453125e-05, "model_forward_time": 0.02498650550842285, "step": 7109 }, { "epoch": 1.084747314453125e-05, "step": 7109, "training_step_time": 0.1423053741455078 }, { "epoch": 1.08489990234375e-05, "grad_norm": 0.38871708512306213, "learning_rate": 9.074041986463808e-05, "loss": 0.0789, "step": 7110 }, { "epoch": 1.08489990234375e-05, "model_forward_time": 0.024988174438476562, "step": 7110 }, { "epoch": 1.08489990234375e-05, "step": 7110, "training_step_time": 0.20393824577331543 }, { "epoch": 1.085052490234375e-05, "model_forward_time": 0.024208545684814453, "step": 7111 }, { "epoch": 1.085052490234375e-05, "step": 7111, "training_step_time": 0.12516546249389648 }, { "epoch": 1.085205078125e-05, "model_forward_time": 0.024608373641967773, "step": 7112 }, { "epoch": 1.085205078125e-05, "step": 7112, "training_step_time": 0.11145186424255371 }, { "epoch": 1.085357666015625e-05, "model_forward_time": 0.025238752365112305, "step": 7113 }, { "epoch": 1.085357666015625e-05, "step": 7113, "training_step_time": 0.12151646614074707 }, { "epoch": 1.08551025390625e-05, "model_forward_time": 0.025341272354125977, "step": 7114 }, { "epoch": 1.08551025390625e-05, "step": 7114, "training_step_time": 0.1088559627532959 }, { "epoch": 1.085662841796875e-05, "model_forward_time": 0.02471137046813965, "step": 7115 }, { "epoch": 1.085662841796875e-05, "step": 7115, "training_step_time": 0.10973572731018066 }, { "epoch": 1.0858154296875e-05, "model_forward_time": 0.025571584701538086, "step": 7116 }, { "epoch": 1.0858154296875e-05, "step": 7116, "training_step_time": 0.10637497901916504 }, { "epoch": 1.085968017578125e-05, "model_forward_time": 0.024418115615844727, "step": 7117 }, { "epoch": 1.085968017578125e-05, "step": 7117, "training_step_time": 0.10831713676452637 }, { "epoch": 1.08612060546875e-05, "model_forward_time": 0.02518916130065918, "step": 7118 }, { "epoch": 1.08612060546875e-05, "step": 7118, "training_step_time": 0.10754871368408203 }, { "epoch": 1.086273193359375e-05, "model_forward_time": 0.02527475357055664, "step": 7119 }, { "epoch": 1.086273193359375e-05, "step": 7119, "training_step_time": 0.10670089721679688 }, { "epoch": 1.08642578125e-05, "grad_norm": 0.33641284704208374, "learning_rate": 9.070844289832224e-05, "loss": 0.0542, "step": 7120 }, { "epoch": 1.08642578125e-05, "model_forward_time": 0.024882793426513672, "step": 7120 }, { "epoch": 1.08642578125e-05, "step": 7120, "training_step_time": 0.10865974426269531 }, { "epoch": 1.086578369140625e-05, "model_forward_time": 0.02531719207763672, "step": 7121 }, { "epoch": 1.086578369140625e-05, "step": 7121, "training_step_time": 0.10607171058654785 }, { "epoch": 1.08673095703125e-05, "model_forward_time": 0.02509021759033203, "step": 7122 }, { "epoch": 1.08673095703125e-05, "step": 7122, "training_step_time": 0.10604357719421387 }, { "epoch": 1.086883544921875e-05, "model_forward_time": 0.028946399688720703, "step": 7123 }, { "epoch": 1.086883544921875e-05, "step": 7123, "training_step_time": 0.11289429664611816 }, { "epoch": 1.0870361328125e-05, "model_forward_time": 0.02551126480102539, "step": 7124 }, { "epoch": 1.0870361328125e-05, "step": 7124, "training_step_time": 0.11545443534851074 }, { "epoch": 1.087188720703125e-05, "model_forward_time": 0.024940013885498047, "step": 7125 }, { "epoch": 1.087188720703125e-05, "step": 7125, "training_step_time": 0.10780644416809082 }, { "epoch": 1.08734130859375e-05, "model_forward_time": 0.025427579879760742, "step": 7126 }, { "epoch": 1.08734130859375e-05, "step": 7126, "training_step_time": 0.1098470687866211 }, { "epoch": 1.087493896484375e-05, "model_forward_time": 0.025162458419799805, "step": 7127 }, { "epoch": 1.087493896484375e-05, "step": 7127, "training_step_time": 0.10780072212219238 }, { "epoch": 1.087646484375e-05, "model_forward_time": 0.025557994842529297, "step": 7128 }, { "epoch": 1.087646484375e-05, "step": 7128, "training_step_time": 0.15156078338623047 }, { "epoch": 1.087799072265625e-05, "model_forward_time": 0.0252382755279541, "step": 7129 }, { "epoch": 1.087799072265625e-05, "step": 7129, "training_step_time": 0.12155938148498535 }, { "epoch": 1.08795166015625e-05, "grad_norm": 0.6400363445281982, "learning_rate": 9.067641646740968e-05, "loss": 0.0668, "step": 7130 }, { "epoch": 1.08795166015625e-05, "model_forward_time": 0.02494978904724121, "step": 7130 }, { "epoch": 1.08795166015625e-05, "step": 7130, "training_step_time": 0.10848093032836914 }, { "epoch": 1.088104248046875e-05, "model_forward_time": 0.02555108070373535, "step": 7131 }, { "epoch": 1.088104248046875e-05, "step": 7131, "training_step_time": 0.11040353775024414 }, { "epoch": 1.0882568359375e-05, "model_forward_time": 0.024983882904052734, "step": 7132 }, { "epoch": 1.0882568359375e-05, "step": 7132, "training_step_time": 0.10930585861206055 }, { "epoch": 1.088409423828125e-05, "model_forward_time": 0.025233030319213867, "step": 7133 }, { "epoch": 1.088409423828125e-05, "step": 7133, "training_step_time": 0.12082839012145996 }, { "epoch": 1.08856201171875e-05, "model_forward_time": 0.02501654624938965, "step": 7134 }, { "epoch": 1.08856201171875e-05, "step": 7134, "training_step_time": 0.1089637279510498 }, { "epoch": 1.088714599609375e-05, "model_forward_time": 0.025309085845947266, "step": 7135 }, { "epoch": 1.088714599609375e-05, "step": 7135, "training_step_time": 0.11163830757141113 }, { "epoch": 1.0888671875e-05, "model_forward_time": 0.024944543838500977, "step": 7136 }, { "epoch": 1.0888671875e-05, "step": 7136, "training_step_time": 0.1242983341217041 }, { "epoch": 1.089019775390625e-05, "model_forward_time": 0.025161027908325195, "step": 7137 }, { "epoch": 1.089019775390625e-05, "step": 7137, "training_step_time": 0.10615706443786621 }, { "epoch": 1.08917236328125e-05, "model_forward_time": 0.025165319442749023, "step": 7138 }, { "epoch": 1.08917236328125e-05, "step": 7138, "training_step_time": 0.13146185874938965 }, { "epoch": 1.089324951171875e-05, "model_forward_time": 0.0262148380279541, "step": 7139 }, { "epoch": 1.089324951171875e-05, "step": 7139, "training_step_time": 0.11148381233215332 }, { "epoch": 1.0894775390625e-05, "grad_norm": 0.36455848813056946, "learning_rate": 9.064434061081562e-05, "loss": 0.089, "step": 7140 }, { "epoch": 1.0894775390625e-05, "model_forward_time": 0.025578737258911133, "step": 7140 }, { "epoch": 1.0894775390625e-05, "step": 7140, "training_step_time": 0.18718838691711426 }, { "epoch": 1.089630126953125e-05, "model_forward_time": 0.02448248863220215, "step": 7141 }, { "epoch": 1.089630126953125e-05, "step": 7141, "training_step_time": 0.11860108375549316 }, { "epoch": 1.08978271484375e-05, "model_forward_time": 0.02464127540588379, "step": 7142 }, { "epoch": 1.08978271484375e-05, "step": 7142, "training_step_time": 0.10653901100158691 }, { "epoch": 1.089935302734375e-05, "model_forward_time": 0.025600910186767578, "step": 7143 }, { "epoch": 1.089935302734375e-05, "step": 7143, "training_step_time": 0.10887455940246582 }, { "epoch": 1.090087890625e-05, "model_forward_time": 0.025521516799926758, "step": 7144 }, { "epoch": 1.090087890625e-05, "step": 7144, "training_step_time": 0.10848307609558105 }, { "epoch": 1.090240478515625e-05, "model_forward_time": 0.02540445327758789, "step": 7145 }, { "epoch": 1.090240478515625e-05, "step": 7145, "training_step_time": 0.1065664291381836 }, { "epoch": 1.09039306640625e-05, "model_forward_time": 0.02531719207763672, "step": 7146 }, { "epoch": 1.09039306640625e-05, "step": 7146, "training_step_time": 0.1082918643951416 }, { "epoch": 1.090545654296875e-05, "model_forward_time": 0.02496337890625, "step": 7147 }, { "epoch": 1.090545654296875e-05, "step": 7147, "training_step_time": 0.11219668388366699 }, { "epoch": 1.0906982421875e-05, "model_forward_time": 0.025003671646118164, "step": 7148 }, { "epoch": 1.0906982421875e-05, "step": 7148, "training_step_time": 0.10953950881958008 }, { "epoch": 1.090850830078125e-05, "model_forward_time": 0.025734424591064453, "step": 7149 }, { "epoch": 1.090850830078125e-05, "step": 7149, "training_step_time": 0.10737252235412598 }, { "epoch": 1.09100341796875e-05, "grad_norm": 0.3468288481235504, "learning_rate": 9.061221536751517e-05, "loss": 0.0823, "step": 7150 }, { "epoch": 1.09100341796875e-05, "model_forward_time": 0.02486252784729004, "step": 7150 }, { "epoch": 1.09100341796875e-05, "step": 7150, "training_step_time": 0.10548138618469238 }, { "epoch": 1.091156005859375e-05, "model_forward_time": 0.02509617805480957, "step": 7151 }, { "epoch": 1.091156005859375e-05, "step": 7151, "training_step_time": 0.1064445972442627 }, { "epoch": 1.09130859375e-05, "model_forward_time": 0.02535843849182129, "step": 7152 }, { "epoch": 1.09130859375e-05, "step": 7152, "training_step_time": 0.10901260375976562 }, { "epoch": 1.091461181640625e-05, "model_forward_time": 0.02548670768737793, "step": 7153 }, { "epoch": 1.091461181640625e-05, "step": 7153, "training_step_time": 0.1082603931427002 }, { "epoch": 1.09161376953125e-05, "model_forward_time": 0.025157928466796875, "step": 7154 }, { "epoch": 1.09161376953125e-05, "step": 7154, "training_step_time": 0.10576987266540527 }, { "epoch": 1.091766357421875e-05, "model_forward_time": 0.028718233108520508, "step": 7155 }, { "epoch": 1.091766357421875e-05, "step": 7155, "training_step_time": 0.11563873291015625 }, { "epoch": 1.0919189453125e-05, "model_forward_time": 0.025705575942993164, "step": 7156 }, { "epoch": 1.0919189453125e-05, "step": 7156, "training_step_time": 0.10599136352539062 }, { "epoch": 1.092071533203125e-05, "model_forward_time": 0.025582313537597656, "step": 7157 }, { "epoch": 1.092071533203125e-05, "step": 7157, "training_step_time": 0.12227606773376465 }, { "epoch": 1.09222412109375e-05, "model_forward_time": 0.02516913414001465, "step": 7158 }, { "epoch": 1.09222412109375e-05, "step": 7158, "training_step_time": 0.20772910118103027 }, { "epoch": 1.092376708984375e-05, "model_forward_time": 0.025328397750854492, "step": 7159 }, { "epoch": 1.092376708984375e-05, "step": 7159, "training_step_time": 0.12943530082702637 }, { "epoch": 1.092529296875e-05, "grad_norm": 0.4154020845890045, "learning_rate": 9.058004077654359e-05, "loss": 0.0731, "step": 7160 }, { "epoch": 1.092529296875e-05, "model_forward_time": 0.024350404739379883, "step": 7160 }, { "epoch": 1.092529296875e-05, "step": 7160, "training_step_time": 0.12670683860778809 }, { "epoch": 1.092681884765625e-05, "model_forward_time": 0.024982213973999023, "step": 7161 }, { "epoch": 1.092681884765625e-05, "step": 7161, "training_step_time": 0.1072075366973877 }, { "epoch": 1.09283447265625e-05, "model_forward_time": 0.026461362838745117, "step": 7162 }, { "epoch": 1.09283447265625e-05, "step": 7162, "training_step_time": 0.12483382225036621 }, { "epoch": 1.092987060546875e-05, "model_forward_time": 0.025067806243896484, "step": 7163 }, { "epoch": 1.092987060546875e-05, "step": 7163, "training_step_time": 0.10476303100585938 }, { "epoch": 1.0931396484375e-05, "model_forward_time": 0.025358200073242188, "step": 7164 }, { "epoch": 1.0931396484375e-05, "step": 7164, "training_step_time": 0.104888916015625 }, { "epoch": 1.093292236328125e-05, "model_forward_time": 0.02443671226501465, "step": 7165 }, { "epoch": 1.093292236328125e-05, "step": 7165, "training_step_time": 0.10396409034729004 }, { "epoch": 1.09344482421875e-05, "model_forward_time": 0.02460932731628418, "step": 7166 }, { "epoch": 1.09344482421875e-05, "step": 7166, "training_step_time": 0.10518002510070801 }, { "epoch": 1.093597412109375e-05, "model_forward_time": 0.025338411331176758, "step": 7167 }, { "epoch": 1.093597412109375e-05, "step": 7167, "training_step_time": 0.10565471649169922 }, { "epoch": 1.09375e-05, "model_forward_time": 0.025322914123535156, "step": 7168 }, { "epoch": 1.09375e-05, "step": 7168, "training_step_time": 0.10481095314025879 }, { "epoch": 1.093902587890625e-05, "model_forward_time": 0.0251772403717041, "step": 7169 }, { "epoch": 1.093902587890625e-05, "step": 7169, "training_step_time": 0.10966682434082031 }, { "epoch": 1.09405517578125e-05, "grad_norm": 0.46047261357307434, "learning_rate": 9.0547816876996e-05, "loss": 0.0658, "step": 7170 }, { "epoch": 1.09405517578125e-05, "model_forward_time": 0.024871110916137695, "step": 7170 }, { "epoch": 1.09405517578125e-05, "step": 7170, "training_step_time": 0.1083979606628418 }, { "epoch": 1.094207763671875e-05, "model_forward_time": 0.02695012092590332, "step": 7171 }, { "epoch": 1.094207763671875e-05, "step": 7171, "training_step_time": 0.11578702926635742 }, { "epoch": 1.0943603515625e-05, "model_forward_time": 0.02516651153564453, "step": 7172 }, { "epoch": 1.0943603515625e-05, "step": 7172, "training_step_time": 0.11121439933776855 }, { "epoch": 1.094512939453125e-05, "model_forward_time": 0.025144338607788086, "step": 7173 }, { "epoch": 1.094512939453125e-05, "step": 7173, "training_step_time": 0.13921475410461426 }, { "epoch": 1.09466552734375e-05, "model_forward_time": 0.024189233779907227, "step": 7174 }, { "epoch": 1.09466552734375e-05, "step": 7174, "training_step_time": 0.1520841121673584 }, { "epoch": 1.094818115234375e-05, "model_forward_time": 0.02350020408630371, "step": 7175 }, { "epoch": 1.094818115234375e-05, "step": 7175, "training_step_time": 0.13956379890441895 }, { "epoch": 1.094970703125e-05, "model_forward_time": 0.02353835105895996, "step": 7176 }, { "epoch": 1.094970703125e-05, "step": 7176, "training_step_time": 0.1563112735748291 }, { "epoch": 1.095123291015625e-05, "model_forward_time": 0.023143768310546875, "step": 7177 }, { "epoch": 1.095123291015625e-05, "step": 7177, "training_step_time": 0.18925261497497559 }, { "epoch": 1.09527587890625e-05, "model_forward_time": 0.024778366088867188, "step": 7178 }, { "epoch": 1.09527587890625e-05, "step": 7178, "training_step_time": 0.12831521034240723 }, { "epoch": 1.095428466796875e-05, "model_forward_time": 0.024120569229125977, "step": 7179 }, { "epoch": 1.095428466796875e-05, "step": 7179, "training_step_time": 0.21085309982299805 }, { "epoch": 1.0955810546875e-05, "grad_norm": 0.38073450326919556, "learning_rate": 9.05155437080275e-05, "loss": 0.0638, "step": 7180 }, { "epoch": 1.0955810546875e-05, "model_forward_time": 0.024722814559936523, "step": 7180 }, { "epoch": 1.0955810546875e-05, "step": 7180, "training_step_time": 0.16522479057312012 }, { "epoch": 1.095733642578125e-05, "model_forward_time": 0.024158954620361328, "step": 7181 }, { "epoch": 1.095733642578125e-05, "step": 7181, "training_step_time": 0.22240757942199707 }, { "epoch": 1.09588623046875e-05, "model_forward_time": 0.02434706687927246, "step": 7182 }, { "epoch": 1.09588623046875e-05, "step": 7182, "training_step_time": 0.10975885391235352 }, { "epoch": 1.096038818359375e-05, "model_forward_time": 0.02623581886291504, "step": 7183 }, { "epoch": 1.096038818359375e-05, "step": 7183, "training_step_time": 0.11231613159179688 }, { "epoch": 1.09619140625e-05, "model_forward_time": 0.02514505386352539, "step": 7184 }, { "epoch": 1.09619140625e-05, "step": 7184, "training_step_time": 0.11184501647949219 }, { "epoch": 1.096343994140625e-05, "model_forward_time": 0.02528095245361328, "step": 7185 }, { "epoch": 1.096343994140625e-05, "step": 7185, "training_step_time": 0.11185789108276367 }, { "epoch": 1.09649658203125e-05, "model_forward_time": 0.025191068649291992, "step": 7186 }, { "epoch": 1.09649658203125e-05, "step": 7186, "training_step_time": 0.19948053359985352 }, { "epoch": 1.096649169921875e-05, "model_forward_time": 0.024756193161010742, "step": 7187 }, { "epoch": 1.096649169921875e-05, "step": 7187, "training_step_time": 0.10447001457214355 }, { "epoch": 1.0968017578125e-05, "model_forward_time": 0.02440953254699707, "step": 7188 }, { "epoch": 1.0968017578125e-05, "step": 7188, "training_step_time": 0.10707473754882812 }, { "epoch": 1.096954345703125e-05, "model_forward_time": 0.025114059448242188, "step": 7189 }, { "epoch": 1.096954345703125e-05, "step": 7189, "training_step_time": 0.11580729484558105 }, { "epoch": 1.09710693359375e-05, "grad_norm": 0.34728512167930603, "learning_rate": 9.048322130885305e-05, "loss": 0.0823, "step": 7190 }, { "epoch": 1.09710693359375e-05, "model_forward_time": 0.02399611473083496, "step": 7190 }, { "epoch": 1.09710693359375e-05, "step": 7190, "training_step_time": 0.10837864875793457 }, { "epoch": 1.097259521484375e-05, "model_forward_time": 0.024109601974487305, "step": 7191 }, { "epoch": 1.097259521484375e-05, "step": 7191, "training_step_time": 0.116119384765625 }, { "epoch": 1.097412109375e-05, "model_forward_time": 0.024892568588256836, "step": 7192 }, { "epoch": 1.097412109375e-05, "step": 7192, "training_step_time": 0.1058342456817627 }, { "epoch": 1.097564697265625e-05, "model_forward_time": 0.024745702743530273, "step": 7193 }, { "epoch": 1.097564697265625e-05, "step": 7193, "training_step_time": 0.1080021858215332 }, { "epoch": 1.09771728515625e-05, "model_forward_time": 0.025022506713867188, "step": 7194 }, { "epoch": 1.09771728515625e-05, "step": 7194, "training_step_time": 0.11544251441955566 }, { "epoch": 1.097869873046875e-05, "model_forward_time": 0.0252535343170166, "step": 7195 }, { "epoch": 1.097869873046875e-05, "step": 7195, "training_step_time": 0.1085367202758789 }, { "epoch": 1.0980224609375e-05, "model_forward_time": 0.025187969207763672, "step": 7196 }, { "epoch": 1.0980224609375e-05, "step": 7196, "training_step_time": 0.10837101936340332 }, { "epoch": 1.098175048828125e-05, "model_forward_time": 0.025034666061401367, "step": 7197 }, { "epoch": 1.098175048828125e-05, "step": 7197, "training_step_time": 0.10655403137207031 }, { "epoch": 1.09832763671875e-05, "model_forward_time": 0.02567267417907715, "step": 7198 }, { "epoch": 1.09832763671875e-05, "step": 7198, "training_step_time": 0.10844230651855469 }, { "epoch": 1.098480224609375e-05, "model_forward_time": 0.028069257736206055, "step": 7199 }, { "epoch": 1.098480224609375e-05, "step": 7199, "training_step_time": 0.1746354103088379 }, { "epoch": 1.0986328125e-05, "grad_norm": 0.4247555732727051, "learning_rate": 9.045084971874738e-05, "loss": 0.0616, "step": 7200 }, { "epoch": 1.0986328125e-05, "model_forward_time": 0.024900436401367188, "step": 7200 }, { "epoch": 1.0986328125e-05, "step": 7200, "training_step_time": 0.1531538963317871 }, { "epoch": 1.098785400390625e-05, "model_forward_time": 0.024562835693359375, "step": 7201 }, { "epoch": 1.098785400390625e-05, "step": 7201, "training_step_time": 0.16017651557922363 }, { "epoch": 1.09893798828125e-05, "model_forward_time": 0.024556398391723633, "step": 7202 }, { "epoch": 1.09893798828125e-05, "step": 7202, "training_step_time": 0.17667675018310547 }, { "epoch": 1.099090576171875e-05, "model_forward_time": 0.02463364601135254, "step": 7203 }, { "epoch": 1.099090576171875e-05, "step": 7203, "training_step_time": 0.1712172031402588 }, { "epoch": 1.0992431640625e-05, "model_forward_time": 0.025115966796875, "step": 7204 }, { "epoch": 1.0992431640625e-05, "step": 7204, "training_step_time": 0.1172933578491211 }, { "epoch": 1.099395751953125e-05, "model_forward_time": 0.02686285972595215, "step": 7205 }, { "epoch": 1.099395751953125e-05, "step": 7205, "training_step_time": 0.11657524108886719 }, { "epoch": 1.09954833984375e-05, "model_forward_time": 0.025010108947753906, "step": 7206 }, { "epoch": 1.09954833984375e-05, "step": 7206, "training_step_time": 0.11490726470947266 }, { "epoch": 1.099700927734375e-05, "model_forward_time": 0.025209665298461914, "step": 7207 }, { "epoch": 1.099700927734375e-05, "step": 7207, "training_step_time": 0.1080636978149414 }, { "epoch": 1.099853515625e-05, "model_forward_time": 0.025360584259033203, "step": 7208 }, { "epoch": 1.099853515625e-05, "step": 7208, "training_step_time": 0.10703182220458984 }, { "epoch": 1.100006103515625e-05, "model_forward_time": 0.025326251983642578, "step": 7209 }, { "epoch": 1.100006103515625e-05, "step": 7209, "training_step_time": 0.10778617858886719 }, { "epoch": 1.10015869140625e-05, "grad_norm": 0.3541938364505768, "learning_rate": 9.041842897704502e-05, "loss": 0.0654, "step": 7210 }, { "epoch": 1.10015869140625e-05, "model_forward_time": 0.024917125701904297, "step": 7210 }, { "epoch": 1.10015869140625e-05, "step": 7210, "training_step_time": 0.11451578140258789 }, { "epoch": 1.100311279296875e-05, "model_forward_time": 0.025231122970581055, "step": 7211 }, { "epoch": 1.100311279296875e-05, "step": 7211, "training_step_time": 0.17651700973510742 }, { "epoch": 1.1004638671875e-05, "model_forward_time": 0.024654865264892578, "step": 7212 }, { "epoch": 1.1004638671875e-05, "step": 7212, "training_step_time": 0.1771717071533203 }, { "epoch": 1.100616455078125e-05, "model_forward_time": 0.02397465705871582, "step": 7213 }, { "epoch": 1.100616455078125e-05, "step": 7213, "training_step_time": 0.18460941314697266 }, { "epoch": 1.10076904296875e-05, "model_forward_time": 0.024483680725097656, "step": 7214 }, { "epoch": 1.10076904296875e-05, "step": 7214, "training_step_time": 0.18198108673095703 }, { "epoch": 1.100921630859375e-05, "model_forward_time": 0.023537635803222656, "step": 7215 }, { "epoch": 1.100921630859375e-05, "step": 7215, "training_step_time": 0.15772032737731934 }, { "epoch": 1.10107421875e-05, "model_forward_time": 0.02441263198852539, "step": 7216 }, { "epoch": 1.10107421875e-05, "step": 7216, "training_step_time": 0.14050745964050293 }, { "epoch": 1.101226806640625e-05, "model_forward_time": 0.024580001831054688, "step": 7217 }, { "epoch": 1.101226806640625e-05, "step": 7217, "training_step_time": 0.14451980590820312 }, { "epoch": 1.10137939453125e-05, "model_forward_time": 0.024553537368774414, "step": 7218 }, { "epoch": 1.10137939453125e-05, "step": 7218, "training_step_time": 0.12962627410888672 }, { "epoch": 1.101531982421875e-05, "model_forward_time": 0.024059295654296875, "step": 7219 }, { "epoch": 1.101531982421875e-05, "step": 7219, "training_step_time": 0.18365931510925293 }, { "epoch": 1.1016845703125e-05, "grad_norm": 0.3829857110977173, "learning_rate": 9.038595912314027e-05, "loss": 0.0736, "step": 7220 }, { "epoch": 1.1016845703125e-05, "model_forward_time": 0.024135589599609375, "step": 7220 }, { "epoch": 1.1016845703125e-05, "step": 7220, "training_step_time": 0.11563467979431152 }, { "epoch": 1.101837158203125e-05, "model_forward_time": 0.024232864379882812, "step": 7221 }, { "epoch": 1.101837158203125e-05, "step": 7221, "training_step_time": 0.11539173126220703 }, { "epoch": 1.10198974609375e-05, "model_forward_time": 0.02488255500793457, "step": 7222 }, { "epoch": 1.10198974609375e-05, "step": 7222, "training_step_time": 0.18532681465148926 }, { "epoch": 1.102142333984375e-05, "model_forward_time": 0.024862289428710938, "step": 7223 }, { "epoch": 1.102142333984375e-05, "step": 7223, "training_step_time": 0.11067676544189453 }, { "epoch": 1.102294921875e-05, "model_forward_time": 0.024369001388549805, "step": 7224 }, { "epoch": 1.102294921875e-05, "step": 7224, "training_step_time": 0.12307024002075195 }, { "epoch": 1.102447509765625e-05, "model_forward_time": 0.024908781051635742, "step": 7225 }, { "epoch": 1.102447509765625e-05, "step": 7225, "training_step_time": 0.11423397064208984 }, { "epoch": 1.10260009765625e-05, "model_forward_time": 0.025310516357421875, "step": 7226 }, { "epoch": 1.10260009765625e-05, "step": 7226, "training_step_time": 0.12051892280578613 }, { "epoch": 1.102752685546875e-05, "model_forward_time": 0.025153398513793945, "step": 7227 }, { "epoch": 1.102752685546875e-05, "step": 7227, "training_step_time": 0.12036991119384766 }, { "epoch": 1.1029052734375e-05, "model_forward_time": 0.025138139724731445, "step": 7228 }, { "epoch": 1.1029052734375e-05, "step": 7228, "training_step_time": 0.10861778259277344 }, { "epoch": 1.103057861328125e-05, "model_forward_time": 0.025153398513793945, "step": 7229 }, { "epoch": 1.103057861328125e-05, "step": 7229, "training_step_time": 0.1465158462524414 }, { "epoch": 1.10321044921875e-05, "grad_norm": 0.3377252221107483, "learning_rate": 9.035344019648702e-05, "loss": 0.0636, "step": 7230 }, { "epoch": 1.10321044921875e-05, "model_forward_time": 0.024282217025756836, "step": 7230 }, { "epoch": 1.10321044921875e-05, "step": 7230, "training_step_time": 0.11136412620544434 }, { "epoch": 1.103363037109375e-05, "model_forward_time": 0.02481389045715332, "step": 7231 }, { "epoch": 1.103363037109375e-05, "step": 7231, "training_step_time": 0.10992431640625 }, { "epoch": 1.103515625e-05, "model_forward_time": 0.02512955665588379, "step": 7232 }, { "epoch": 1.103515625e-05, "step": 7232, "training_step_time": 0.1082766056060791 }, { "epoch": 1.103668212890625e-05, "model_forward_time": 0.025579452514648438, "step": 7233 }, { "epoch": 1.103668212890625e-05, "step": 7233, "training_step_time": 0.10912609100341797 }, { "epoch": 1.10382080078125e-05, "model_forward_time": 0.024631023406982422, "step": 7234 }, { "epoch": 1.10382080078125e-05, "step": 7234, "training_step_time": 0.10748863220214844 }, { "epoch": 1.103973388671875e-05, "model_forward_time": 0.025499343872070312, "step": 7235 }, { "epoch": 1.103973388671875e-05, "step": 7235, "training_step_time": 0.10778069496154785 }, { "epoch": 1.1041259765625e-05, "model_forward_time": 0.025264739990234375, "step": 7236 }, { "epoch": 1.1041259765625e-05, "step": 7236, "training_step_time": 0.10705137252807617 }, { "epoch": 1.104278564453125e-05, "model_forward_time": 0.025964975357055664, "step": 7237 }, { "epoch": 1.104278564453125e-05, "step": 7237, "training_step_time": 0.10864996910095215 }, { "epoch": 1.10443115234375e-05, "model_forward_time": 0.02418231964111328, "step": 7238 }, { "epoch": 1.10443115234375e-05, "step": 7238, "training_step_time": 0.10707879066467285 }, { "epoch": 1.104583740234375e-05, "model_forward_time": 0.025104522705078125, "step": 7239 }, { "epoch": 1.104583740234375e-05, "step": 7239, "training_step_time": 0.1099095344543457 }, { "epoch": 1.104736328125e-05, "grad_norm": 0.3963189423084259, "learning_rate": 9.032087223659885e-05, "loss": 0.0695, "step": 7240 }, { "epoch": 1.104736328125e-05, "model_forward_time": 0.025391101837158203, "step": 7240 }, { "epoch": 1.104736328125e-05, "step": 7240, "training_step_time": 0.10815811157226562 }, { "epoch": 1.104888916015625e-05, "model_forward_time": 0.02583026885986328, "step": 7241 }, { "epoch": 1.104888916015625e-05, "step": 7241, "training_step_time": 0.10962152481079102 }, { "epoch": 1.10504150390625e-05, "model_forward_time": 0.025299787521362305, "step": 7242 }, { "epoch": 1.10504150390625e-05, "step": 7242, "training_step_time": 0.17074108123779297 }, { "epoch": 1.105194091796875e-05, "model_forward_time": 0.02467966079711914, "step": 7243 }, { "epoch": 1.105194091796875e-05, "step": 7243, "training_step_time": 0.16531896591186523 }, { "epoch": 1.1053466796875e-05, "model_forward_time": 0.024398326873779297, "step": 7244 }, { "epoch": 1.1053466796875e-05, "step": 7244, "training_step_time": 0.1087045669555664 }, { "epoch": 1.105499267578125e-05, "model_forward_time": 0.02461719512939453, "step": 7245 }, { "epoch": 1.105499267578125e-05, "step": 7245, "training_step_time": 0.1721200942993164 }, { "epoch": 1.10565185546875e-05, "model_forward_time": 0.024678468704223633, "step": 7246 }, { "epoch": 1.10565185546875e-05, "step": 7246, "training_step_time": 0.18065905570983887 }, { "epoch": 1.105804443359375e-05, "model_forward_time": 0.024240493774414062, "step": 7247 }, { "epoch": 1.105804443359375e-05, "step": 7247, "training_step_time": 0.14234304428100586 }, { "epoch": 1.10595703125e-05, "model_forward_time": 0.024916648864746094, "step": 7248 }, { "epoch": 1.10595703125e-05, "step": 7248, "training_step_time": 0.11143255233764648 }, { "epoch": 1.106109619140625e-05, "model_forward_time": 0.025053024291992188, "step": 7249 }, { "epoch": 1.106109619140625e-05, "step": 7249, "training_step_time": 0.11568427085876465 }, { "epoch": 1.10626220703125e-05, "grad_norm": 0.4576609134674072, "learning_rate": 9.028825528304892e-05, "loss": 0.0818, "step": 7250 }, { "epoch": 1.10626220703125e-05, "model_forward_time": 0.024919748306274414, "step": 7250 }, { "epoch": 1.10626220703125e-05, "step": 7250, "training_step_time": 0.11223173141479492 }, { "epoch": 1.106414794921875e-05, "model_forward_time": 0.02493453025817871, "step": 7251 }, { "epoch": 1.106414794921875e-05, "step": 7251, "training_step_time": 0.10937166213989258 }, { "epoch": 1.1065673828125e-05, "model_forward_time": 0.024809837341308594, "step": 7252 }, { "epoch": 1.1065673828125e-05, "step": 7252, "training_step_time": 0.10766124725341797 }, { "epoch": 1.106719970703125e-05, "model_forward_time": 0.025313138961791992, "step": 7253 }, { "epoch": 1.106719970703125e-05, "step": 7253, "training_step_time": 0.10795712471008301 }, { "epoch": 1.10687255859375e-05, "model_forward_time": 0.025242328643798828, "step": 7254 }, { "epoch": 1.10687255859375e-05, "step": 7254, "training_step_time": 0.10759353637695312 }, { "epoch": 1.107025146484375e-05, "model_forward_time": 0.02611231803894043, "step": 7255 }, { "epoch": 1.107025146484375e-05, "step": 7255, "training_step_time": 0.10763049125671387 }, { "epoch": 1.107177734375e-05, "model_forward_time": 0.02528858184814453, "step": 7256 }, { "epoch": 1.107177734375e-05, "step": 7256, "training_step_time": 0.1089181900024414 }, { "epoch": 1.107330322265625e-05, "model_forward_time": 0.025150537490844727, "step": 7257 }, { "epoch": 1.107330322265625e-05, "step": 7257, "training_step_time": 0.1080312728881836 }, { "epoch": 1.10748291015625e-05, "model_forward_time": 0.025082826614379883, "step": 7258 }, { "epoch": 1.10748291015625e-05, "step": 7258, "training_step_time": 0.10687136650085449 }, { "epoch": 1.107635498046875e-05, "model_forward_time": 0.0253293514251709, "step": 7259 }, { "epoch": 1.107635498046875e-05, "step": 7259, "training_step_time": 0.10645031929016113 }, { "epoch": 1.1077880859375e-05, "grad_norm": 0.47237181663513184, "learning_rate": 9.025558937546988e-05, "loss": 0.0551, "step": 7260 }, { "epoch": 1.1077880859375e-05, "model_forward_time": 0.0248565673828125, "step": 7260 }, { "epoch": 1.1077880859375e-05, "step": 7260, "training_step_time": 0.10957765579223633 }, { "epoch": 1.107940673828125e-05, "model_forward_time": 0.024677753448486328, "step": 7261 }, { "epoch": 1.107940673828125e-05, "step": 7261, "training_step_time": 0.10764074325561523 }, { "epoch": 1.10809326171875e-05, "model_forward_time": 0.025378704071044922, "step": 7262 }, { "epoch": 1.10809326171875e-05, "step": 7262, "training_step_time": 0.11346673965454102 }, { "epoch": 1.108245849609375e-05, "model_forward_time": 0.024579286575317383, "step": 7263 }, { "epoch": 1.108245849609375e-05, "step": 7263, "training_step_time": 0.16894245147705078 }, { "epoch": 1.1083984375e-05, "model_forward_time": 0.02456212043762207, "step": 7264 }, { "epoch": 1.1083984375e-05, "step": 7264, "training_step_time": 0.10973334312438965 }, { "epoch": 1.108551025390625e-05, "model_forward_time": 0.024558544158935547, "step": 7265 }, { "epoch": 1.108551025390625e-05, "step": 7265, "training_step_time": 0.21234774589538574 }, { "epoch": 1.10870361328125e-05, "model_forward_time": 0.023995637893676758, "step": 7266 }, { "epoch": 1.10870361328125e-05, "step": 7266, "training_step_time": 0.1076805591583252 }, { "epoch": 1.108856201171875e-05, "model_forward_time": 0.024391651153564453, "step": 7267 }, { "epoch": 1.108856201171875e-05, "step": 7267, "training_step_time": 0.11236882209777832 }, { "epoch": 1.1090087890625e-05, "model_forward_time": 0.02426934242248535, "step": 7268 }, { "epoch": 1.1090087890625e-05, "step": 7268, "training_step_time": 0.20067310333251953 }, { "epoch": 1.109161376953125e-05, "model_forward_time": 0.0243222713470459, "step": 7269 }, { "epoch": 1.109161376953125e-05, "step": 7269, "training_step_time": 0.20014715194702148 }, { "epoch": 1.10931396484375e-05, "grad_norm": 0.25583454966545105, "learning_rate": 9.022287455355387e-05, "loss": 0.053, "step": 7270 }, { "epoch": 1.10931396484375e-05, "model_forward_time": 0.025159358978271484, "step": 7270 }, { "epoch": 1.10931396484375e-05, "step": 7270, "training_step_time": 0.10805583000183105 }, { "epoch": 1.109466552734375e-05, "model_forward_time": 0.024270057678222656, "step": 7271 }, { "epoch": 1.109466552734375e-05, "step": 7271, "training_step_time": 0.10420513153076172 }, { "epoch": 1.109619140625e-05, "model_forward_time": 0.02562427520751953, "step": 7272 }, { "epoch": 1.109619140625e-05, "step": 7272, "training_step_time": 0.10866403579711914 }, { "epoch": 1.109771728515625e-05, "model_forward_time": 0.025391817092895508, "step": 7273 }, { "epoch": 1.109771728515625e-05, "step": 7273, "training_step_time": 0.1334521770477295 }, { "epoch": 1.10992431640625e-05, "model_forward_time": 0.025745153427124023, "step": 7274 }, { "epoch": 1.10992431640625e-05, "step": 7274, "training_step_time": 0.10922694206237793 }, { "epoch": 1.110076904296875e-05, "model_forward_time": 0.025252103805541992, "step": 7275 }, { "epoch": 1.110076904296875e-05, "step": 7275, "training_step_time": 0.10742950439453125 }, { "epoch": 1.1102294921875e-05, "model_forward_time": 0.024979829788208008, "step": 7276 }, { "epoch": 1.1102294921875e-05, "step": 7276, "training_step_time": 0.11502671241760254 }, { "epoch": 1.110382080078125e-05, "model_forward_time": 0.025252819061279297, "step": 7277 }, { "epoch": 1.110382080078125e-05, "step": 7277, "training_step_time": 0.10231256484985352 }, { "epoch": 1.11053466796875e-05, "model_forward_time": 0.02502894401550293, "step": 7278 }, { "epoch": 1.11053466796875e-05, "step": 7278, "training_step_time": 0.10949850082397461 }, { "epoch": 1.110687255859375e-05, "model_forward_time": 0.02550649642944336, "step": 7279 }, { "epoch": 1.110687255859375e-05, "step": 7279, "training_step_time": 0.10681366920471191 }, { "epoch": 1.11083984375e-05, "grad_norm": 0.5158094763755798, "learning_rate": 9.019011085705253e-05, "loss": 0.0631, "step": 7280 }, { "epoch": 1.11083984375e-05, "model_forward_time": 0.025570392608642578, "step": 7280 }, { "epoch": 1.11083984375e-05, "step": 7280, "training_step_time": 0.10640549659729004 }, { "epoch": 1.110992431640625e-05, "model_forward_time": 0.025530338287353516, "step": 7281 }, { "epoch": 1.110992431640625e-05, "step": 7281, "training_step_time": 0.10541176795959473 }, { "epoch": 1.11114501953125e-05, "model_forward_time": 0.025226116180419922, "step": 7282 }, { "epoch": 1.11114501953125e-05, "step": 7282, "training_step_time": 0.10879158973693848 }, { "epoch": 1.111297607421875e-05, "model_forward_time": 0.025523662567138672, "step": 7283 }, { "epoch": 1.111297607421875e-05, "step": 7283, "training_step_time": 0.10608220100402832 }, { "epoch": 1.1114501953125e-05, "model_forward_time": 0.025322914123535156, "step": 7284 }, { "epoch": 1.1114501953125e-05, "step": 7284, "training_step_time": 0.10627937316894531 }, { "epoch": 1.111602783203125e-05, "model_forward_time": 0.025860071182250977, "step": 7285 }, { "epoch": 1.111602783203125e-05, "step": 7285, "training_step_time": 0.10731673240661621 }, { "epoch": 1.11175537109375e-05, "model_forward_time": 0.025129079818725586, "step": 7286 }, { "epoch": 1.11175537109375e-05, "step": 7286, "training_step_time": 0.10641765594482422 }, { "epoch": 1.111907958984375e-05, "model_forward_time": 0.02591252326965332, "step": 7287 }, { "epoch": 1.111907958984375e-05, "step": 7287, "training_step_time": 0.1089174747467041 }, { "epoch": 1.112060546875e-05, "model_forward_time": 0.025355100631713867, "step": 7288 }, { "epoch": 1.112060546875e-05, "step": 7288, "training_step_time": 0.10734844207763672 }, { "epoch": 1.112213134765625e-05, "model_forward_time": 0.02515864372253418, "step": 7289 }, { "epoch": 1.112213134765625e-05, "step": 7289, "training_step_time": 0.11225128173828125 }, { "epoch": 1.11236572265625e-05, "grad_norm": 0.49643757939338684, "learning_rate": 9.015729832577681e-05, "loss": 0.0804, "step": 7290 }, { "epoch": 1.11236572265625e-05, "model_forward_time": 0.025411367416381836, "step": 7290 }, { "epoch": 1.11236572265625e-05, "step": 7290, "training_step_time": 0.10703182220458984 }, { "epoch": 1.112518310546875e-05, "model_forward_time": 0.0251920223236084, "step": 7291 }, { "epoch": 1.112518310546875e-05, "step": 7291, "training_step_time": 0.10680437088012695 }, { "epoch": 1.1126708984375e-05, "model_forward_time": 0.025351524353027344, "step": 7292 }, { "epoch": 1.1126708984375e-05, "step": 7292, "training_step_time": 0.17006254196166992 }, { "epoch": 1.112823486328125e-05, "model_forward_time": 0.024634599685668945, "step": 7293 }, { "epoch": 1.112823486328125e-05, "step": 7293, "training_step_time": 0.16485595703125 }, { "epoch": 1.11297607421875e-05, "model_forward_time": 0.025214672088623047, "step": 7294 }, { "epoch": 1.11297607421875e-05, "step": 7294, "training_step_time": 0.18205595016479492 }, { "epoch": 1.113128662109375e-05, "model_forward_time": 0.024909019470214844, "step": 7295 }, { "epoch": 1.113128662109375e-05, "step": 7295, "training_step_time": 0.13115262985229492 }, { "epoch": 1.11328125e-05, "model_forward_time": 0.024552106857299805, "step": 7296 }, { "epoch": 1.11328125e-05, "step": 7296, "training_step_time": 0.11386394500732422 }, { "epoch": 1.113433837890625e-05, "model_forward_time": 0.025427579879760742, "step": 7297 }, { "epoch": 1.113433837890625e-05, "step": 7297, "training_step_time": 0.12169647216796875 }, { "epoch": 1.11358642578125e-05, "model_forward_time": 0.025649547576904297, "step": 7298 }, { "epoch": 1.11358642578125e-05, "step": 7298, "training_step_time": 0.1115255355834961 }, { "epoch": 1.113739013671875e-05, "model_forward_time": 0.02454400062561035, "step": 7299 }, { "epoch": 1.113739013671875e-05, "step": 7299, "training_step_time": 0.11632871627807617 }, { "epoch": 1.1138916015625e-05, "grad_norm": 0.6585777997970581, "learning_rate": 9.012443699959705e-05, "loss": 0.0746, "step": 7300 }, { "epoch": 1.1138916015625e-05, "model_forward_time": 0.024686098098754883, "step": 7300 }, { "epoch": 1.1138916015625e-05, "step": 7300, "training_step_time": 0.10910964012145996 }, { "epoch": 1.114044189453125e-05, "model_forward_time": 0.024530887603759766, "step": 7301 }, { "epoch": 1.114044189453125e-05, "step": 7301, "training_step_time": 0.11162185668945312 }, { "epoch": 1.11419677734375e-05, "model_forward_time": 0.02512073516845703, "step": 7302 }, { "epoch": 1.11419677734375e-05, "step": 7302, "training_step_time": 0.10822820663452148 }, { "epoch": 1.114349365234375e-05, "model_forward_time": 0.025602102279663086, "step": 7303 }, { "epoch": 1.114349365234375e-05, "step": 7303, "training_step_time": 0.10930347442626953 }, { "epoch": 1.114501953125e-05, "model_forward_time": 0.025483369827270508, "step": 7304 }, { "epoch": 1.114501953125e-05, "step": 7304, "training_step_time": 0.10873675346374512 }, { "epoch": 1.114654541015625e-05, "model_forward_time": 0.025824785232543945, "step": 7305 }, { "epoch": 1.114654541015625e-05, "step": 7305, "training_step_time": 0.10851573944091797 }, { "epoch": 1.11480712890625e-05, "model_forward_time": 0.025628089904785156, "step": 7306 }, { "epoch": 1.11480712890625e-05, "step": 7306, "training_step_time": 0.10759186744689941 }, { "epoch": 1.114959716796875e-05, "model_forward_time": 0.02558445930480957, "step": 7307 }, { "epoch": 1.114959716796875e-05, "step": 7307, "training_step_time": 0.1086263656616211 }, { "epoch": 1.1151123046875e-05, "model_forward_time": 0.025326013565063477, "step": 7308 }, { "epoch": 1.1151123046875e-05, "step": 7308, "training_step_time": 0.1112678050994873 }, { "epoch": 1.115264892578125e-05, "model_forward_time": 0.025253772735595703, "step": 7309 }, { "epoch": 1.115264892578125e-05, "step": 7309, "training_step_time": 0.11048650741577148 }, { "epoch": 1.11541748046875e-05, "grad_norm": 0.5398983359336853, "learning_rate": 9.009152691844285e-05, "loss": 0.0816, "step": 7310 }, { "epoch": 1.11541748046875e-05, "model_forward_time": 0.02472066879272461, "step": 7310 }, { "epoch": 1.11541748046875e-05, "step": 7310, "training_step_time": 0.19739842414855957 }, { "epoch": 1.115570068359375e-05, "model_forward_time": 0.024675369262695312, "step": 7311 }, { "epoch": 1.115570068359375e-05, "step": 7311, "training_step_time": 0.20471549034118652 }, { "epoch": 1.11572265625e-05, "model_forward_time": 0.02423691749572754, "step": 7312 }, { "epoch": 1.11572265625e-05, "step": 7312, "training_step_time": 0.10540318489074707 }, { "epoch": 1.115875244140625e-05, "model_forward_time": 0.025855302810668945, "step": 7313 }, { "epoch": 1.115875244140625e-05, "step": 7313, "training_step_time": 0.12966465950012207 }, { "epoch": 1.11602783203125e-05, "model_forward_time": 0.025696516036987305, "step": 7314 }, { "epoch": 1.11602783203125e-05, "step": 7314, "training_step_time": 0.19605040550231934 }, { "epoch": 1.116180419921875e-05, "model_forward_time": 0.024641990661621094, "step": 7315 }, { "epoch": 1.116180419921875e-05, "step": 7315, "training_step_time": 0.10182046890258789 }, { "epoch": 1.1163330078125e-05, "model_forward_time": 0.024964094161987305, "step": 7316 }, { "epoch": 1.1163330078125e-05, "step": 7316, "training_step_time": 0.10999584197998047 }, { "epoch": 1.116485595703125e-05, "model_forward_time": 0.02523064613342285, "step": 7317 }, { "epoch": 1.116485595703125e-05, "step": 7317, "training_step_time": 0.1361536979675293 }, { "epoch": 1.11663818359375e-05, "model_forward_time": 0.025173664093017578, "step": 7318 }, { "epoch": 1.11663818359375e-05, "step": 7318, "training_step_time": 0.1111748218536377 }, { "epoch": 1.116790771484375e-05, "model_forward_time": 0.025420427322387695, "step": 7319 }, { "epoch": 1.116790771484375e-05, "step": 7319, "training_step_time": 0.11510467529296875 }, { "epoch": 1.116943359375e-05, "grad_norm": 0.48828309774398804, "learning_rate": 9.005856812230304e-05, "loss": 0.0583, "step": 7320 }, { "epoch": 1.116943359375e-05, "model_forward_time": 0.025263309478759766, "step": 7320 }, { "epoch": 1.116943359375e-05, "step": 7320, "training_step_time": 0.11714482307434082 }, { "epoch": 1.117095947265625e-05, "model_forward_time": 0.025460481643676758, "step": 7321 }, { "epoch": 1.117095947265625e-05, "step": 7321, "training_step_time": 0.1083533763885498 }, { "epoch": 1.11724853515625e-05, "model_forward_time": 0.02569723129272461, "step": 7322 }, { "epoch": 1.11724853515625e-05, "step": 7322, "training_step_time": 0.19159507751464844 }, { "epoch": 1.117401123046875e-05, "model_forward_time": 0.024570226669311523, "step": 7323 }, { "epoch": 1.117401123046875e-05, "step": 7323, "training_step_time": 0.10987544059753418 }, { "epoch": 1.1175537109375e-05, "model_forward_time": 0.02426934242248535, "step": 7324 }, { "epoch": 1.1175537109375e-05, "step": 7324, "training_step_time": 0.10293054580688477 }, { "epoch": 1.117706298828125e-05, "model_forward_time": 0.024903297424316406, "step": 7325 }, { "epoch": 1.117706298828125e-05, "step": 7325, "training_step_time": 0.10445642471313477 }, { "epoch": 1.11785888671875e-05, "model_forward_time": 0.024820327758789062, "step": 7326 }, { "epoch": 1.11785888671875e-05, "step": 7326, "training_step_time": 0.10518026351928711 }, { "epoch": 1.118011474609375e-05, "model_forward_time": 0.025187015533447266, "step": 7327 }, { "epoch": 1.118011474609375e-05, "step": 7327, "training_step_time": 0.10549211502075195 }, { "epoch": 1.1181640625e-05, "model_forward_time": 0.024790525436401367, "step": 7328 }, { "epoch": 1.1181640625e-05, "step": 7328, "training_step_time": 0.10471796989440918 }, { "epoch": 1.118316650390625e-05, "model_forward_time": 0.025397300720214844, "step": 7329 }, { "epoch": 1.118316650390625e-05, "step": 7329, "training_step_time": 0.10778355598449707 }, { "epoch": 1.11846923828125e-05, "grad_norm": 0.3651945888996124, "learning_rate": 9.002556065122571e-05, "loss": 0.0765, "step": 7330 }, { "epoch": 1.11846923828125e-05, "model_forward_time": 0.024899721145629883, "step": 7330 }, { "epoch": 1.11846923828125e-05, "step": 7330, "training_step_time": 0.10371065139770508 }, { "epoch": 1.118621826171875e-05, "model_forward_time": 0.02550196647644043, "step": 7331 }, { "epoch": 1.118621826171875e-05, "step": 7331, "training_step_time": 0.10548162460327148 }, { "epoch": 1.1187744140625e-05, "model_forward_time": 0.025576114654541016, "step": 7332 }, { "epoch": 1.1187744140625e-05, "step": 7332, "training_step_time": 0.11353325843811035 }, { "epoch": 1.118927001953125e-05, "model_forward_time": 0.025925159454345703, "step": 7333 }, { "epoch": 1.118927001953125e-05, "step": 7333, "training_step_time": 0.11351132392883301 }, { "epoch": 1.11907958984375e-05, "model_forward_time": 0.025793790817260742, "step": 7334 }, { "epoch": 1.11907958984375e-05, "step": 7334, "training_step_time": 0.2113487720489502 }, { "epoch": 1.119232177734375e-05, "model_forward_time": 0.024778366088867188, "step": 7335 }, { "epoch": 1.119232177734375e-05, "step": 7335, "training_step_time": 0.11678290367126465 }, { "epoch": 1.119384765625e-05, "model_forward_time": 0.024832725524902344, "step": 7336 }, { "epoch": 1.119384765625e-05, "step": 7336, "training_step_time": 0.11150908470153809 }, { "epoch": 1.119537353515625e-05, "model_forward_time": 0.02576613426208496, "step": 7337 }, { "epoch": 1.119537353515625e-05, "step": 7337, "training_step_time": 0.2151024341583252 }, { "epoch": 1.11968994140625e-05, "model_forward_time": 0.024915695190429688, "step": 7338 }, { "epoch": 1.11968994140625e-05, "step": 7338, "training_step_time": 0.13201141357421875 }, { "epoch": 1.119842529296875e-05, "model_forward_time": 0.02493453025817871, "step": 7339 }, { "epoch": 1.119842529296875e-05, "step": 7339, "training_step_time": 0.1182854175567627 }, { "epoch": 1.1199951171875e-05, "grad_norm": 0.331667959690094, "learning_rate": 8.999250454531802e-05, "loss": 0.072, "step": 7340 }, { "epoch": 1.1199951171875e-05, "model_forward_time": 0.0255734920501709, "step": 7340 }, { "epoch": 1.1199951171875e-05, "step": 7340, "training_step_time": 0.1233818531036377 }, { "epoch": 1.120147705078125e-05, "model_forward_time": 0.02521491050720215, "step": 7341 }, { "epoch": 1.120147705078125e-05, "step": 7341, "training_step_time": 0.11857318878173828 }, { "epoch": 1.12030029296875e-05, "model_forward_time": 0.02529120445251465, "step": 7342 }, { "epoch": 1.12030029296875e-05, "step": 7342, "training_step_time": 0.11760401725769043 }, { "epoch": 1.120452880859375e-05, "model_forward_time": 0.027306556701660156, "step": 7343 }, { "epoch": 1.120452880859375e-05, "step": 7343, "training_step_time": 0.11627554893493652 }, { "epoch": 1.12060546875e-05, "model_forward_time": 0.025255680084228516, "step": 7344 }, { "epoch": 1.12060546875e-05, "step": 7344, "training_step_time": 0.11273479461669922 }, { "epoch": 1.120758056640625e-05, "model_forward_time": 0.02567267417907715, "step": 7345 }, { "epoch": 1.120758056640625e-05, "step": 7345, "training_step_time": 0.11033272743225098 }, { "epoch": 1.12091064453125e-05, "model_forward_time": 0.025693416595458984, "step": 7346 }, { "epoch": 1.12091064453125e-05, "step": 7346, "training_step_time": 0.11128449440002441 }, { "epoch": 1.121063232421875e-05, "model_forward_time": 0.0255124568939209, "step": 7347 }, { "epoch": 1.121063232421875e-05, "step": 7347, "training_step_time": 0.11049604415893555 }, { "epoch": 1.1212158203125e-05, "model_forward_time": 0.02553534507751465, "step": 7348 }, { "epoch": 1.1212158203125e-05, "step": 7348, "training_step_time": 0.10732245445251465 }, { "epoch": 1.121368408203125e-05, "model_forward_time": 0.025444507598876953, "step": 7349 }, { "epoch": 1.121368408203125e-05, "step": 7349, "training_step_time": 0.10709452629089355 }, { "epoch": 1.12152099609375e-05, "grad_norm": 0.7745429277420044, "learning_rate": 8.995939984474624e-05, "loss": 0.0599, "step": 7350 }, { "epoch": 1.12152099609375e-05, "model_forward_time": 0.02503824234008789, "step": 7350 }, { "epoch": 1.12152099609375e-05, "step": 7350, "training_step_time": 0.10704851150512695 }, { "epoch": 1.121673583984375e-05, "model_forward_time": 0.025383949279785156, "step": 7351 }, { "epoch": 1.121673583984375e-05, "step": 7351, "training_step_time": 0.10941624641418457 }, { "epoch": 1.121826171875e-05, "model_forward_time": 0.025880813598632812, "step": 7352 }, { "epoch": 1.121826171875e-05, "step": 7352, "training_step_time": 0.10666179656982422 }, { "epoch": 1.121978759765625e-05, "model_forward_time": 0.025503158569335938, "step": 7353 }, { "epoch": 1.121978759765625e-05, "step": 7353, "training_step_time": 0.10626888275146484 }, { "epoch": 1.12213134765625e-05, "model_forward_time": 0.025522470474243164, "step": 7354 }, { "epoch": 1.12213134765625e-05, "step": 7354, "training_step_time": 0.11079096794128418 }, { "epoch": 1.122283935546875e-05, "model_forward_time": 0.025725603103637695, "step": 7355 }, { "epoch": 1.122283935546875e-05, "step": 7355, "training_step_time": 0.13513851165771484 }, { "epoch": 1.1224365234375e-05, "model_forward_time": 0.025693893432617188, "step": 7356 }, { "epoch": 1.1224365234375e-05, "step": 7356, "training_step_time": 0.10869884490966797 }, { "epoch": 1.122589111328125e-05, "model_forward_time": 0.02541661262512207, "step": 7357 }, { "epoch": 1.122589111328125e-05, "step": 7357, "training_step_time": 0.21927452087402344 }, { "epoch": 1.12274169921875e-05, "model_forward_time": 0.024211883544921875, "step": 7358 }, { "epoch": 1.12274169921875e-05, "step": 7358, "training_step_time": 0.10668706893920898 }, { "epoch": 1.122894287109375e-05, "model_forward_time": 0.0242002010345459, "step": 7359 }, { "epoch": 1.122894287109375e-05, "step": 7359, "training_step_time": 0.11103606224060059 }, { "epoch": 1.123046875e-05, "grad_norm": 0.6871018409729004, "learning_rate": 8.992624658973574e-05, "loss": 0.0662, "step": 7360 }, { "epoch": 1.123046875e-05, "model_forward_time": 0.024962902069091797, "step": 7360 }, { "epoch": 1.123046875e-05, "step": 7360, "training_step_time": 0.19650936126708984 }, { "epoch": 1.123199462890625e-05, "model_forward_time": 0.02691483497619629, "step": 7361 }, { "epoch": 1.123199462890625e-05, "step": 7361, "training_step_time": 0.11382222175598145 }, { "epoch": 1.12335205078125e-05, "model_forward_time": 0.024154186248779297, "step": 7362 }, { "epoch": 1.12335205078125e-05, "step": 7362, "training_step_time": 0.11993145942687988 }, { "epoch": 1.123504638671875e-05, "model_forward_time": 0.025316238403320312, "step": 7363 }, { "epoch": 1.123504638671875e-05, "step": 7363, "training_step_time": 0.1404893398284912 }, { "epoch": 1.1236572265625e-05, "model_forward_time": 0.024767637252807617, "step": 7364 }, { "epoch": 1.1236572265625e-05, "step": 7364, "training_step_time": 0.12512588500976562 }, { "epoch": 1.123809814453125e-05, "model_forward_time": 0.023523330688476562, "step": 7365 }, { "epoch": 1.123809814453125e-05, "step": 7365, "training_step_time": 0.11969208717346191 }, { "epoch": 1.12396240234375e-05, "model_forward_time": 0.024266481399536133, "step": 7366 }, { "epoch": 1.12396240234375e-05, "step": 7366, "training_step_time": 0.1976485252380371 }, { "epoch": 1.124114990234375e-05, "model_forward_time": 0.023970842361450195, "step": 7367 }, { "epoch": 1.124114990234375e-05, "step": 7367, "training_step_time": 0.11065149307250977 }, { "epoch": 1.124267578125e-05, "model_forward_time": 0.02421259880065918, "step": 7368 }, { "epoch": 1.124267578125e-05, "step": 7368, "training_step_time": 0.10947299003601074 }, { "epoch": 1.124420166015625e-05, "model_forward_time": 0.02505636215209961, "step": 7369 }, { "epoch": 1.124420166015625e-05, "step": 7369, "training_step_time": 0.11038732528686523 }, { "epoch": 1.12457275390625e-05, "grad_norm": 0.41827675700187683, "learning_rate": 8.989304482057084e-05, "loss": 0.0782, "step": 7370 }, { "epoch": 1.12457275390625e-05, "model_forward_time": 0.025074005126953125, "step": 7370 }, { "epoch": 1.12457275390625e-05, "step": 7370, "training_step_time": 0.10799884796142578 }, { "epoch": 1.124725341796875e-05, "model_forward_time": 0.02555251121520996, "step": 7371 }, { "epoch": 1.124725341796875e-05, "step": 7371, "training_step_time": 0.10899734497070312 }, { "epoch": 1.1248779296875e-05, "model_forward_time": 0.025128841400146484, "step": 7372 }, { "epoch": 1.1248779296875e-05, "step": 7372, "training_step_time": 0.10998129844665527 }, { "epoch": 1.125030517578125e-05, "model_forward_time": 0.02503371238708496, "step": 7373 }, { "epoch": 1.125030517578125e-05, "step": 7373, "training_step_time": 0.10831117630004883 }, { "epoch": 1.12518310546875e-05, "model_forward_time": 0.025280237197875977, "step": 7374 }, { "epoch": 1.12518310546875e-05, "step": 7374, "training_step_time": 0.1067967414855957 }, { "epoch": 1.125335693359375e-05, "model_forward_time": 0.025223731994628906, "step": 7375 }, { "epoch": 1.125335693359375e-05, "step": 7375, "training_step_time": 0.10846924781799316 }, { "epoch": 1.12548828125e-05, "model_forward_time": 0.024911880493164062, "step": 7376 }, { "epoch": 1.12548828125e-05, "step": 7376, "training_step_time": 0.11301136016845703 }, { "epoch": 1.125640869140625e-05, "model_forward_time": 0.025600433349609375, "step": 7377 }, { "epoch": 1.125640869140625e-05, "step": 7377, "training_step_time": 0.11171364784240723 }, { "epoch": 1.12579345703125e-05, "model_forward_time": 0.026075124740600586, "step": 7378 }, { "epoch": 1.12579345703125e-05, "step": 7378, "training_step_time": 0.1078798770904541 }, { "epoch": 1.125946044921875e-05, "model_forward_time": 0.025047779083251953, "step": 7379 }, { "epoch": 1.125946044921875e-05, "step": 7379, "training_step_time": 0.1105952262878418 }, { "epoch": 1.1260986328125e-05, "grad_norm": 0.6497596502304077, "learning_rate": 8.98597945775948e-05, "loss": 0.0556, "step": 7380 }, { "epoch": 1.1260986328125e-05, "model_forward_time": 0.024868011474609375, "step": 7380 }, { "epoch": 1.1260986328125e-05, "step": 7380, "training_step_time": 0.10617375373840332 }, { "epoch": 1.126251220703125e-05, "model_forward_time": 0.025554180145263672, "step": 7381 }, { "epoch": 1.126251220703125e-05, "step": 7381, "training_step_time": 0.10843157768249512 }, { "epoch": 1.12640380859375e-05, "model_forward_time": 0.0257108211517334, "step": 7382 }, { "epoch": 1.12640380859375e-05, "step": 7382, "training_step_time": 0.12195611000061035 }, { "epoch": 1.126556396484375e-05, "model_forward_time": 0.025971412658691406, "step": 7383 }, { "epoch": 1.126556396484375e-05, "step": 7383, "training_step_time": 0.21953082084655762 }, { "epoch": 1.126708984375e-05, "model_forward_time": 0.025005340576171875, "step": 7384 }, { "epoch": 1.126708984375e-05, "step": 7384, "training_step_time": 0.17476272583007812 }, { "epoch": 1.126861572265625e-05, "model_forward_time": 0.024931669235229492, "step": 7385 }, { "epoch": 1.126861572265625e-05, "step": 7385, "training_step_time": 0.10980010032653809 }, { "epoch": 1.12701416015625e-05, "model_forward_time": 0.024990081787109375, "step": 7386 }, { "epoch": 1.12701416015625e-05, "step": 7386, "training_step_time": 0.12690210342407227 }, { "epoch": 1.127166748046875e-05, "model_forward_time": 0.025683164596557617, "step": 7387 }, { "epoch": 1.127166748046875e-05, "step": 7387, "training_step_time": 0.11972451210021973 }, { "epoch": 1.1273193359375e-05, "model_forward_time": 0.025560855865478516, "step": 7388 }, { "epoch": 1.1273193359375e-05, "step": 7388, "training_step_time": 0.16822552680969238 }, { "epoch": 1.127471923828125e-05, "model_forward_time": 0.0234375, "step": 7389 }, { "epoch": 1.127471923828125e-05, "step": 7389, "training_step_time": 0.19314885139465332 }, { "epoch": 1.12762451171875e-05, "grad_norm": 0.3705720603466034, "learning_rate": 8.982649590120982e-05, "loss": 0.061, "step": 7390 }, { "epoch": 1.12762451171875e-05, "model_forward_time": 0.025278329849243164, "step": 7390 }, { "epoch": 1.12762451171875e-05, "step": 7390, "training_step_time": 0.16959571838378906 }, { "epoch": 1.127777099609375e-05, "model_forward_time": 0.024254560470581055, "step": 7391 }, { "epoch": 1.127777099609375e-05, "step": 7391, "training_step_time": 0.14531517028808594 }, { "epoch": 1.1279296875e-05, "model_forward_time": 0.02408909797668457, "step": 7392 }, { "epoch": 1.1279296875e-05, "step": 7392, "training_step_time": 0.14297008514404297 }, { "epoch": 1.128082275390625e-05, "model_forward_time": 0.0245211124420166, "step": 7393 }, { "epoch": 1.128082275390625e-05, "step": 7393, "training_step_time": 0.13319897651672363 }, { "epoch": 1.12823486328125e-05, "model_forward_time": 0.024317264556884766, "step": 7394 }, { "epoch": 1.12823486328125e-05, "step": 7394, "training_step_time": 0.11066365242004395 }, { "epoch": 1.128387451171875e-05, "model_forward_time": 0.02541065216064453, "step": 7395 }, { "epoch": 1.128387451171875e-05, "step": 7395, "training_step_time": 0.10962176322937012 }, { "epoch": 1.1285400390625e-05, "model_forward_time": 0.025240182876586914, "step": 7396 }, { "epoch": 1.1285400390625e-05, "step": 7396, "training_step_time": 0.10647153854370117 }, { "epoch": 1.128692626953125e-05, "model_forward_time": 0.025767087936401367, "step": 7397 }, { "epoch": 1.128692626953125e-05, "step": 7397, "training_step_time": 0.11550045013427734 }, { "epoch": 1.12884521484375e-05, "model_forward_time": 0.027737140655517578, "step": 7398 }, { "epoch": 1.12884521484375e-05, "step": 7398, "training_step_time": 0.14266657829284668 }, { "epoch": 1.128997802734375e-05, "model_forward_time": 0.0270388126373291, "step": 7399 }, { "epoch": 1.128997802734375e-05, "step": 7399, "training_step_time": 0.11533951759338379 }, { "epoch": 1.129150390625e-05, "grad_norm": 0.2715272903442383, "learning_rate": 8.979314883187693e-05, "loss": 0.0667, "step": 7400 }, { "epoch": 1.129150390625e-05, "model_forward_time": 0.024865150451660156, "step": 7400 }, { "epoch": 1.129150390625e-05, "step": 7400, "training_step_time": 0.20172691345214844 }, { "epoch": 1.129302978515625e-05, "model_forward_time": 0.025548934936523438, "step": 7401 }, { "epoch": 1.129302978515625e-05, "step": 7401, "training_step_time": 0.10495829582214355 }, { "epoch": 1.12945556640625e-05, "model_forward_time": 0.02457904815673828, "step": 7402 }, { "epoch": 1.12945556640625e-05, "step": 7402, "training_step_time": 0.11523842811584473 }, { "epoch": 1.129608154296875e-05, "model_forward_time": 0.02513718605041504, "step": 7403 }, { "epoch": 1.129608154296875e-05, "step": 7403, "training_step_time": 0.1959671974182129 }, { "epoch": 1.1297607421875e-05, "model_forward_time": 0.02559828758239746, "step": 7404 }, { "epoch": 1.1297607421875e-05, "step": 7404, "training_step_time": 0.10901403427124023 }, { "epoch": 1.129913330078125e-05, "model_forward_time": 0.025177717208862305, "step": 7405 }, { "epoch": 1.129913330078125e-05, "step": 7405, "training_step_time": 0.18027710914611816 }, { "epoch": 1.13006591796875e-05, "model_forward_time": 0.024931907653808594, "step": 7406 }, { "epoch": 1.13006591796875e-05, "step": 7406, "training_step_time": 0.17157816886901855 }, { "epoch": 1.130218505859375e-05, "model_forward_time": 0.02465653419494629, "step": 7407 }, { "epoch": 1.130218505859375e-05, "step": 7407, "training_step_time": 0.20664453506469727 }, { "epoch": 1.13037109375e-05, "model_forward_time": 0.02470254898071289, "step": 7408 }, { "epoch": 1.13037109375e-05, "step": 7408, "training_step_time": 0.17580413818359375 }, { "epoch": 1.130523681640625e-05, "model_forward_time": 0.024215221405029297, "step": 7409 }, { "epoch": 1.130523681640625e-05, "step": 7409, "training_step_time": 0.15679097175598145 }, { "epoch": 1.13067626953125e-05, "grad_norm": 0.48039108514785767, "learning_rate": 8.975975341011596e-05, "loss": 0.0725, "step": 7410 }, { "epoch": 1.13067626953125e-05, "model_forward_time": 0.024577856063842773, "step": 7410 }, { "epoch": 1.13067626953125e-05, "step": 7410, "training_step_time": 0.13814902305603027 }, { "epoch": 1.130828857421875e-05, "model_forward_time": 0.024631977081298828, "step": 7411 }, { "epoch": 1.130828857421875e-05, "step": 7411, "training_step_time": 0.13447833061218262 }, { "epoch": 1.1309814453125e-05, "model_forward_time": 0.024537086486816406, "step": 7412 }, { "epoch": 1.1309814453125e-05, "step": 7412, "training_step_time": 0.13095355033874512 }, { "epoch": 1.131134033203125e-05, "model_forward_time": 0.024105548858642578, "step": 7413 }, { "epoch": 1.131134033203125e-05, "step": 7413, "training_step_time": 0.12012648582458496 }, { "epoch": 1.13128662109375e-05, "model_forward_time": 0.0253603458404541, "step": 7414 }, { "epoch": 1.13128662109375e-05, "step": 7414, "training_step_time": 0.12164807319641113 }, { "epoch": 1.131439208984375e-05, "model_forward_time": 0.02573108673095703, "step": 7415 }, { "epoch": 1.131439208984375e-05, "step": 7415, "training_step_time": 0.11651396751403809 }, { "epoch": 1.131591796875e-05, "model_forward_time": 0.025417089462280273, "step": 7416 }, { "epoch": 1.131591796875e-05, "step": 7416, "training_step_time": 0.11426615715026855 }, { "epoch": 1.131744384765625e-05, "model_forward_time": 0.02568984031677246, "step": 7417 }, { "epoch": 1.131744384765625e-05, "step": 7417, "training_step_time": 0.11350584030151367 }, { "epoch": 1.13189697265625e-05, "model_forward_time": 0.024745702743530273, "step": 7418 }, { "epoch": 1.13189697265625e-05, "step": 7418, "training_step_time": 0.11293458938598633 }, { "epoch": 1.132049560546875e-05, "model_forward_time": 0.0253143310546875, "step": 7419 }, { "epoch": 1.132049560546875e-05, "step": 7419, "training_step_time": 0.10815167427062988 }, { "epoch": 1.1322021484375e-05, "grad_norm": 0.5846309661865234, "learning_rate": 8.972630967650548e-05, "loss": 0.0816, "step": 7420 }, { "epoch": 1.1322021484375e-05, "model_forward_time": 0.024700403213500977, "step": 7420 }, { "epoch": 1.1322021484375e-05, "step": 7420, "training_step_time": 0.11052703857421875 }, { "epoch": 1.132354736328125e-05, "model_forward_time": 0.024944305419921875, "step": 7421 }, { "epoch": 1.132354736328125e-05, "step": 7421, "training_step_time": 0.10870623588562012 }, { "epoch": 1.13250732421875e-05, "model_forward_time": 0.025362730026245117, "step": 7422 }, { "epoch": 1.13250732421875e-05, "step": 7422, "training_step_time": 0.11077380180358887 }, { "epoch": 1.132659912109375e-05, "model_forward_time": 0.025944232940673828, "step": 7423 }, { "epoch": 1.132659912109375e-05, "step": 7423, "training_step_time": 0.11313629150390625 }, { "epoch": 1.1328125e-05, "model_forward_time": 0.025660037994384766, "step": 7424 }, { "epoch": 1.1328125e-05, "step": 7424, "training_step_time": 0.1739051342010498 }, { "epoch": 1.132965087890625e-05, "model_forward_time": 0.02476215362548828, "step": 7425 }, { "epoch": 1.132965087890625e-05, "step": 7425, "training_step_time": 0.16234040260314941 }, { "epoch": 1.13311767578125e-05, "model_forward_time": 0.02507305145263672, "step": 7426 }, { "epoch": 1.13311767578125e-05, "step": 7426, "training_step_time": 0.11198234558105469 }, { "epoch": 1.133270263671875e-05, "model_forward_time": 0.02494359016418457, "step": 7427 }, { "epoch": 1.133270263671875e-05, "step": 7427, "training_step_time": 0.10856127738952637 }, { "epoch": 1.1334228515625e-05, "model_forward_time": 0.025786876678466797, "step": 7428 }, { "epoch": 1.1334228515625e-05, "step": 7428, "training_step_time": 0.11350059509277344 }, { "epoch": 1.133575439453125e-05, "model_forward_time": 0.0250699520111084, "step": 7429 }, { "epoch": 1.133575439453125e-05, "step": 7429, "training_step_time": 0.12893414497375488 }, { "epoch": 1.13372802734375e-05, "grad_norm": 0.3795636296272278, "learning_rate": 8.969281767168283e-05, "loss": 0.0719, "step": 7430 }, { "epoch": 1.13372802734375e-05, "model_forward_time": 0.025446176528930664, "step": 7430 }, { "epoch": 1.13372802734375e-05, "step": 7430, "training_step_time": 0.11540460586547852 }, { "epoch": 1.133880615234375e-05, "model_forward_time": 0.02591729164123535, "step": 7431 }, { "epoch": 1.133880615234375e-05, "step": 7431, "training_step_time": 0.11378955841064453 }, { "epoch": 1.134033203125e-05, "model_forward_time": 0.025783300399780273, "step": 7432 }, { "epoch": 1.134033203125e-05, "step": 7432, "training_step_time": 0.11556839942932129 }, { "epoch": 1.134185791015625e-05, "model_forward_time": 0.0253293514251709, "step": 7433 }, { "epoch": 1.134185791015625e-05, "step": 7433, "training_step_time": 0.10606932640075684 }, { "epoch": 1.13433837890625e-05, "model_forward_time": 0.025543212890625, "step": 7434 }, { "epoch": 1.13433837890625e-05, "step": 7434, "training_step_time": 0.11037468910217285 }, { "epoch": 1.134490966796875e-05, "model_forward_time": 0.025588035583496094, "step": 7435 }, { "epoch": 1.134490966796875e-05, "step": 7435, "training_step_time": 0.10693669319152832 }, { "epoch": 1.1346435546875e-05, "model_forward_time": 0.025462865829467773, "step": 7436 }, { "epoch": 1.1346435546875e-05, "step": 7436, "training_step_time": 0.10628199577331543 }, { "epoch": 1.134796142578125e-05, "model_forward_time": 0.025536775588989258, "step": 7437 }, { "epoch": 1.134796142578125e-05, "step": 7437, "training_step_time": 0.10689353942871094 }, { "epoch": 1.13494873046875e-05, "model_forward_time": 0.025814533233642578, "step": 7438 }, { "epoch": 1.13494873046875e-05, "step": 7438, "training_step_time": 0.10968470573425293 }, { "epoch": 1.135101318359375e-05, "model_forward_time": 0.025473833084106445, "step": 7439 }, { "epoch": 1.135101318359375e-05, "step": 7439, "training_step_time": 0.10853457450866699 }, { "epoch": 1.13525390625e-05, "grad_norm": 0.3715420365333557, "learning_rate": 8.965927743634391e-05, "loss": 0.0674, "step": 7440 }, { "epoch": 1.13525390625e-05, "model_forward_time": 0.025832653045654297, "step": 7440 }, { "epoch": 1.13525390625e-05, "step": 7440, "training_step_time": 0.10965991020202637 }, { "epoch": 1.135406494140625e-05, "model_forward_time": 0.025208234786987305, "step": 7441 }, { "epoch": 1.135406494140625e-05, "step": 7441, "training_step_time": 0.16036725044250488 }, { "epoch": 1.13555908203125e-05, "model_forward_time": 0.024778127670288086, "step": 7442 }, { "epoch": 1.13555908203125e-05, "step": 7442, "training_step_time": 0.10770535469055176 }, { "epoch": 1.135711669921875e-05, "model_forward_time": 0.024984121322631836, "step": 7443 }, { "epoch": 1.135711669921875e-05, "step": 7443, "training_step_time": 0.1269998550415039 }, { "epoch": 1.1358642578125e-05, "model_forward_time": 0.025672435760498047, "step": 7444 }, { "epoch": 1.1358642578125e-05, "step": 7444, "training_step_time": 0.1109006404876709 }, { "epoch": 1.136016845703125e-05, "model_forward_time": 0.02538323402404785, "step": 7445 }, { "epoch": 1.136016845703125e-05, "step": 7445, "training_step_time": 0.1919553279876709 }, { "epoch": 1.13616943359375e-05, "model_forward_time": 0.024901866912841797, "step": 7446 }, { "epoch": 1.13616943359375e-05, "step": 7446, "training_step_time": 0.12483000755310059 }, { "epoch": 1.136322021484375e-05, "model_forward_time": 0.024997711181640625, "step": 7447 }, { "epoch": 1.136322021484375e-05, "step": 7447, "training_step_time": 0.11973786354064941 }, { "epoch": 1.136474609375e-05, "model_forward_time": 0.025373458862304688, "step": 7448 }, { "epoch": 1.136474609375e-05, "step": 7448, "training_step_time": 0.1066751480102539 }, { "epoch": 1.136627197265625e-05, "model_forward_time": 0.025304079055786133, "step": 7449 }, { "epoch": 1.136627197265625e-05, "step": 7449, "training_step_time": 0.19579410552978516 }, { "epoch": 1.13677978515625e-05, "grad_norm": 0.541823148727417, "learning_rate": 8.962568901124327e-05, "loss": 0.0645, "step": 7450 }, { "epoch": 1.13677978515625e-05, "model_forward_time": 0.024516582489013672, "step": 7450 }, { "epoch": 1.13677978515625e-05, "step": 7450, "training_step_time": 0.13893532752990723 }, { "epoch": 1.136932373046875e-05, "model_forward_time": 0.02518630027770996, "step": 7451 }, { "epoch": 1.136932373046875e-05, "step": 7451, "training_step_time": 0.11243033409118652 }, { "epoch": 1.1370849609375e-05, "model_forward_time": 0.025170326232910156, "step": 7452 }, { "epoch": 1.1370849609375e-05, "step": 7452, "training_step_time": 0.10991573333740234 }, { "epoch": 1.137237548828125e-05, "model_forward_time": 0.025527477264404297, "step": 7453 }, { "epoch": 1.137237548828125e-05, "step": 7453, "training_step_time": 0.1188039779663086 }, { "epoch": 1.13739013671875e-05, "model_forward_time": 0.025712966918945312, "step": 7454 }, { "epoch": 1.13739013671875e-05, "step": 7454, "training_step_time": 0.1149134635925293 }, { "epoch": 1.137542724609375e-05, "model_forward_time": 0.02524733543395996, "step": 7455 }, { "epoch": 1.137542724609375e-05, "step": 7455, "training_step_time": 0.18972492218017578 }, { "epoch": 1.1376953125e-05, "model_forward_time": 0.02466440200805664, "step": 7456 }, { "epoch": 1.1376953125e-05, "step": 7456, "training_step_time": 0.10737967491149902 }, { "epoch": 1.137847900390625e-05, "model_forward_time": 0.02499699592590332, "step": 7457 }, { "epoch": 1.137847900390625e-05, "step": 7457, "training_step_time": 0.10535192489624023 }, { "epoch": 1.13800048828125e-05, "model_forward_time": 0.025231599807739258, "step": 7458 }, { "epoch": 1.13800048828125e-05, "step": 7458, "training_step_time": 0.10739731788635254 }, { "epoch": 1.138153076171875e-05, "model_forward_time": 0.026720285415649414, "step": 7459 }, { "epoch": 1.138153076171875e-05, "step": 7459, "training_step_time": 0.10987997055053711 }, { "epoch": 1.1383056640625e-05, "grad_norm": 0.4487568140029907, "learning_rate": 8.959205243719402e-05, "loss": 0.0725, "step": 7460 }, { "epoch": 1.1383056640625e-05, "model_forward_time": 0.02602219581604004, "step": 7460 }, { "epoch": 1.1383056640625e-05, "step": 7460, "training_step_time": 0.10877442359924316 }, { "epoch": 1.138458251953125e-05, "model_forward_time": 0.025221586227416992, "step": 7461 }, { "epoch": 1.138458251953125e-05, "step": 7461, "training_step_time": 0.10827088356018066 }, { "epoch": 1.13861083984375e-05, "model_forward_time": 0.02580404281616211, "step": 7462 }, { "epoch": 1.13861083984375e-05, "step": 7462, "training_step_time": 0.11054730415344238 }, { "epoch": 1.138763427734375e-05, "model_forward_time": 0.02606201171875, "step": 7463 }, { "epoch": 1.138763427734375e-05, "step": 7463, "training_step_time": 0.10782623291015625 }, { "epoch": 1.138916015625e-05, "model_forward_time": 0.025223731994628906, "step": 7464 }, { "epoch": 1.138916015625e-05, "step": 7464, "training_step_time": 0.10737371444702148 }, { "epoch": 1.139068603515625e-05, "model_forward_time": 0.025101661682128906, "step": 7465 }, { "epoch": 1.139068603515625e-05, "step": 7465, "training_step_time": 0.10848331451416016 }, { "epoch": 1.13922119140625e-05, "model_forward_time": 0.025310039520263672, "step": 7466 }, { "epoch": 1.13922119140625e-05, "step": 7466, "training_step_time": 0.11298537254333496 }, { "epoch": 1.139373779296875e-05, "model_forward_time": 0.025574922561645508, "step": 7467 }, { "epoch": 1.139373779296875e-05, "step": 7467, "training_step_time": 0.20866775512695312 }, { "epoch": 1.1395263671875e-05, "model_forward_time": 0.024865150451660156, "step": 7468 }, { "epoch": 1.1395263671875e-05, "step": 7468, "training_step_time": 0.1118307113647461 }, { "epoch": 1.139678955078125e-05, "model_forward_time": 0.024909019470214844, "step": 7469 }, { "epoch": 1.139678955078125e-05, "step": 7469, "training_step_time": 0.11139750480651855 }, { "epoch": 1.13983154296875e-05, "grad_norm": 0.827911913394928, "learning_rate": 8.955836775506776e-05, "loss": 0.0835, "step": 7470 }, { "epoch": 1.13983154296875e-05, "model_forward_time": 0.025642871856689453, "step": 7470 }, { "epoch": 1.13983154296875e-05, "step": 7470, "training_step_time": 0.21308088302612305 }, { "epoch": 1.139984130859375e-05, "model_forward_time": 0.027009010314941406, "step": 7471 }, { "epoch": 1.139984130859375e-05, "step": 7471, "training_step_time": 0.1160123348236084 }, { "epoch": 1.14013671875e-05, "model_forward_time": 0.025164127349853516, "step": 7472 }, { "epoch": 1.14013671875e-05, "step": 7472, "training_step_time": 0.14982175827026367 }, { "epoch": 1.140289306640625e-05, "model_forward_time": 0.0259552001953125, "step": 7473 }, { "epoch": 1.140289306640625e-05, "step": 7473, "training_step_time": 0.10730361938476562 }, { "epoch": 1.14044189453125e-05, "model_forward_time": 0.025359392166137695, "step": 7474 }, { "epoch": 1.14044189453125e-05, "step": 7474, "training_step_time": 0.10889339447021484 }, { "epoch": 1.140594482421875e-05, "model_forward_time": 0.02538752555847168, "step": 7475 }, { "epoch": 1.140594482421875e-05, "step": 7475, "training_step_time": 0.13800287246704102 }, { "epoch": 1.1407470703125e-05, "model_forward_time": 0.0254974365234375, "step": 7476 }, { "epoch": 1.1407470703125e-05, "step": 7476, "training_step_time": 0.13433384895324707 }, { "epoch": 1.140899658203125e-05, "model_forward_time": 0.024883747100830078, "step": 7477 }, { "epoch": 1.140899658203125e-05, "step": 7477, "training_step_time": 0.11044049263000488 }, { "epoch": 1.14105224609375e-05, "model_forward_time": 0.025385379791259766, "step": 7478 }, { "epoch": 1.14105224609375e-05, "step": 7478, "training_step_time": 0.11016201972961426 }, { "epoch": 1.141204833984375e-05, "model_forward_time": 0.02512812614440918, "step": 7479 }, { "epoch": 1.141204833984375e-05, "step": 7479, "training_step_time": 0.10714221000671387 }, { "epoch": 1.141357421875e-05, "grad_norm": 0.44633209705352783, "learning_rate": 8.95246350057946e-05, "loss": 0.0739, "step": 7480 }, { "epoch": 1.141357421875e-05, "model_forward_time": 0.025025129318237305, "step": 7480 }, { "epoch": 1.141357421875e-05, "step": 7480, "training_step_time": 0.11897420883178711 }, { "epoch": 1.141510009765625e-05, "model_forward_time": 0.02547144889831543, "step": 7481 }, { "epoch": 1.141510009765625e-05, "step": 7481, "training_step_time": 0.12049603462219238 }, { "epoch": 1.14166259765625e-05, "model_forward_time": 0.024299144744873047, "step": 7482 }, { "epoch": 1.14166259765625e-05, "step": 7482, "training_step_time": 0.11717367172241211 }, { "epoch": 1.141815185546875e-05, "model_forward_time": 0.0246732234954834, "step": 7483 }, { "epoch": 1.141815185546875e-05, "step": 7483, "training_step_time": 0.11414647102355957 }, { "epoch": 1.1419677734375e-05, "model_forward_time": 0.02502274513244629, "step": 7484 }, { "epoch": 1.1419677734375e-05, "step": 7484, "training_step_time": 0.11986017227172852 }, { "epoch": 1.142120361328125e-05, "model_forward_time": 0.024698734283447266, "step": 7485 }, { "epoch": 1.142120361328125e-05, "step": 7485, "training_step_time": 0.11498546600341797 }, { "epoch": 1.14227294921875e-05, "model_forward_time": 0.025289297103881836, "step": 7486 }, { "epoch": 1.14227294921875e-05, "step": 7486, "training_step_time": 0.1871654987335205 }, { "epoch": 1.142425537109375e-05, "model_forward_time": 0.02476048469543457, "step": 7487 }, { "epoch": 1.142425537109375e-05, "step": 7487, "training_step_time": 0.11253595352172852 }, { "epoch": 1.142578125e-05, "model_forward_time": 0.025255203247070312, "step": 7488 }, { "epoch": 1.142578125e-05, "step": 7488, "training_step_time": 0.1189577579498291 }, { "epoch": 1.142730712890625e-05, "model_forward_time": 0.027173280715942383, "step": 7489 }, { "epoch": 1.142730712890625e-05, "step": 7489, "training_step_time": 0.13217759132385254 }, { "epoch": 1.14288330078125e-05, "grad_norm": 0.4712039530277252, "learning_rate": 8.949085423036296e-05, "loss": 0.0736, "step": 7490 }, { "epoch": 1.14288330078125e-05, "model_forward_time": 0.02729964256286621, "step": 7490 }, { "epoch": 1.14288330078125e-05, "step": 7490, "training_step_time": 0.2835516929626465 }, { "epoch": 1.143035888671875e-05, "model_forward_time": 0.02773261070251465, "step": 7491 }, { "epoch": 1.143035888671875e-05, "step": 7491, "training_step_time": 0.31546521186828613 }, { "epoch": 1.1431884765625e-05, "model_forward_time": 0.027684450149536133, "step": 7492 }, { "epoch": 1.1431884765625e-05, "step": 7492, "training_step_time": 0.23333311080932617 }, { "epoch": 1.143341064453125e-05, "model_forward_time": 0.029149770736694336, "step": 7493 }, { "epoch": 1.143341064453125e-05, "step": 7493, "training_step_time": 0.27862095832824707 }, { "epoch": 1.14349365234375e-05, "model_forward_time": 0.03225994110107422, "step": 7494 }, { "epoch": 1.14349365234375e-05, "step": 7494, "training_step_time": 0.38484764099121094 }, { "epoch": 1.143646240234375e-05, "model_forward_time": 0.032654762268066406, "step": 7495 }, { "epoch": 1.143646240234375e-05, "step": 7495, "training_step_time": 0.330456018447876 }, { "epoch": 1.143798828125e-05, "model_forward_time": 0.03200221061706543, "step": 7496 }, { "epoch": 1.143798828125e-05, "step": 7496, "training_step_time": 0.288083553314209 }, { "epoch": 1.143951416015625e-05, "model_forward_time": 0.03369498252868652, "step": 7497 }, { "epoch": 1.143951416015625e-05, "step": 7497, "training_step_time": 0.2920682430267334 }, { "epoch": 1.14410400390625e-05, "model_forward_time": 0.03727149963378906, "step": 7498 }, { "epoch": 1.14410400390625e-05, "step": 7498, "training_step_time": 0.2603771686553955 }, { "epoch": 1.144256591796875e-05, "model_forward_time": 0.044814109802246094, "step": 7499 }, { "epoch": 1.144256591796875e-05, "step": 7499, "training_step_time": 0.25127720832824707 }, { "epoch": 1.1444091796875e-05, "grad_norm": 0.6195780038833618, "learning_rate": 8.945702546981969e-05, "loss": 0.0586, "step": 7500 }, { "epoch": 1.1444091796875e-05, "model_forward_time": 0.028694868087768555, "step": 7500 }, { "epoch": 1.1444091796875e-05, "step": 7500, "training_step_time": 0.27643513679504395 }, { "epoch": 1.144561767578125e-05, "model_forward_time": 0.03206181526184082, "step": 7501 }, { "epoch": 1.144561767578125e-05, "step": 7501, "training_step_time": 0.3108961582183838 }, { "epoch": 1.14471435546875e-05, "model_forward_time": 0.030891895294189453, "step": 7502 }, { "epoch": 1.14471435546875e-05, "step": 7502, "training_step_time": 0.2969017028808594 }, { "epoch": 1.144866943359375e-05, "model_forward_time": 0.03175759315490723, "step": 7503 }, { "epoch": 1.144866943359375e-05, "step": 7503, "training_step_time": 0.17866063117980957 }, { "epoch": 1.14501953125e-05, "model_forward_time": 0.03013467788696289, "step": 7504 }, { "epoch": 1.14501953125e-05, "step": 7504, "training_step_time": 0.3036816120147705 }, { "epoch": 1.145172119140625e-05, "model_forward_time": 0.028237581253051758, "step": 7505 }, { "epoch": 1.145172119140625e-05, "step": 7505, "training_step_time": 0.17996764183044434 }, { "epoch": 1.14532470703125e-05, "model_forward_time": 0.027724742889404297, "step": 7506 }, { "epoch": 1.14532470703125e-05, "step": 7506, "training_step_time": 0.15885686874389648 }, { "epoch": 1.145477294921875e-05, "model_forward_time": 0.02636861801147461, "step": 7507 }, { "epoch": 1.145477294921875e-05, "step": 7507, "training_step_time": 0.15992069244384766 }, { "epoch": 1.1456298828125e-05, "model_forward_time": 0.026248693466186523, "step": 7508 }, { "epoch": 1.1456298828125e-05, "step": 7508, "training_step_time": 0.11858963966369629 }, { "epoch": 1.145782470703125e-05, "model_forward_time": 0.025864124298095703, "step": 7509 }, { "epoch": 1.145782470703125e-05, "step": 7509, "training_step_time": 0.11260151863098145 }, { "epoch": 1.14593505859375e-05, "grad_norm": 0.463886022567749, "learning_rate": 8.942314876526992e-05, "loss": 0.0605, "step": 7510 }, { "epoch": 1.14593505859375e-05, "model_forward_time": 0.025745391845703125, "step": 7510 }, { "epoch": 1.14593505859375e-05, "step": 7510, "training_step_time": 0.11325716972351074 }, { "epoch": 1.146087646484375e-05, "model_forward_time": 0.025552749633789062, "step": 7511 }, { "epoch": 1.146087646484375e-05, "step": 7511, "training_step_time": 0.11036968231201172 }, { "epoch": 1.146240234375e-05, "model_forward_time": 0.02500605583190918, "step": 7512 }, { "epoch": 1.146240234375e-05, "step": 7512, "training_step_time": 0.1109781265258789 }, { "epoch": 1.146392822265625e-05, "model_forward_time": 0.02545619010925293, "step": 7513 }, { "epoch": 1.146392822265625e-05, "step": 7513, "training_step_time": 0.1268160343170166 }, { "epoch": 1.14654541015625e-05, "model_forward_time": 0.025903940200805664, "step": 7514 }, { "epoch": 1.14654541015625e-05, "step": 7514, "training_step_time": 0.10970020294189453 }, { "epoch": 1.146697998046875e-05, "model_forward_time": 0.025619983673095703, "step": 7515 }, { "epoch": 1.146697998046875e-05, "step": 7515, "training_step_time": 0.21758794784545898 }, { "epoch": 1.1468505859375e-05, "model_forward_time": 0.02518916130065918, "step": 7516 }, { "epoch": 1.1468505859375e-05, "step": 7516, "training_step_time": 0.10937666893005371 }, { "epoch": 1.147003173828125e-05, "model_forward_time": 0.023174285888671875, "step": 7517 }, { "epoch": 1.147003173828125e-05, "step": 7517, "training_step_time": 0.10879373550415039 }, { "epoch": 1.14715576171875e-05, "model_forward_time": 0.024286985397338867, "step": 7518 }, { "epoch": 1.14715576171875e-05, "step": 7518, "training_step_time": 0.21215009689331055 }, { "epoch": 1.147308349609375e-05, "model_forward_time": 0.02474069595336914, "step": 7519 }, { "epoch": 1.147308349609375e-05, "step": 7519, "training_step_time": 0.10425758361816406 }, { "epoch": 1.1474609375e-05, "grad_norm": 0.6711480021476746, "learning_rate": 8.938922415787703e-05, "loss": 0.0565, "step": 7520 }, { "epoch": 1.1474609375e-05, "model_forward_time": 0.024590015411376953, "step": 7520 }, { "epoch": 1.1474609375e-05, "step": 7520, "training_step_time": 0.10497379302978516 }, { "epoch": 1.147613525390625e-05, "model_forward_time": 0.025584936141967773, "step": 7521 }, { "epoch": 1.147613525390625e-05, "step": 7521, "training_step_time": 0.10781288146972656 }, { "epoch": 1.14776611328125e-05, "model_forward_time": 0.025742769241333008, "step": 7522 }, { "epoch": 1.14776611328125e-05, "step": 7522, "training_step_time": 0.15942955017089844 }, { "epoch": 1.147918701171875e-05, "model_forward_time": 0.026860713958740234, "step": 7523 }, { "epoch": 1.147918701171875e-05, "step": 7523, "training_step_time": 0.22969937324523926 }, { "epoch": 1.1480712890625e-05, "model_forward_time": 0.024640798568725586, "step": 7524 }, { "epoch": 1.1480712890625e-05, "step": 7524, "training_step_time": 0.15426349639892578 }, { "epoch": 1.148223876953125e-05, "model_forward_time": 0.024309158325195312, "step": 7525 }, { "epoch": 1.148223876953125e-05, "step": 7525, "training_step_time": 0.1756727695465088 }, { "epoch": 1.14837646484375e-05, "model_forward_time": 0.024947166442871094, "step": 7526 }, { "epoch": 1.14837646484375e-05, "step": 7526, "training_step_time": 0.13421344757080078 }, { "epoch": 1.148529052734375e-05, "model_forward_time": 0.023604154586791992, "step": 7527 }, { "epoch": 1.148529052734375e-05, "step": 7527, "training_step_time": 0.18221139907836914 }, { "epoch": 1.148681640625e-05, "model_forward_time": 0.024515151977539062, "step": 7528 }, { "epoch": 1.148681640625e-05, "step": 7528, "training_step_time": 0.12261390686035156 }, { "epoch": 1.148834228515625e-05, "model_forward_time": 0.0234529972076416, "step": 7529 }, { "epoch": 1.148834228515625e-05, "step": 7529, "training_step_time": 0.11481118202209473 }, { "epoch": 1.14898681640625e-05, "grad_norm": 0.31023868918418884, "learning_rate": 8.935525168886262e-05, "loss": 0.0513, "step": 7530 }, { "epoch": 1.14898681640625e-05, "model_forward_time": 0.024337291717529297, "step": 7530 }, { "epoch": 1.14898681640625e-05, "step": 7530, "training_step_time": 0.11804342269897461 }, { "epoch": 1.149139404296875e-05, "model_forward_time": 0.02403569221496582, "step": 7531 }, { "epoch": 1.149139404296875e-05, "step": 7531, "training_step_time": 0.11221981048583984 }, { "epoch": 1.1492919921875e-05, "model_forward_time": 0.02452254295349121, "step": 7532 }, { "epoch": 1.1492919921875e-05, "step": 7532, "training_step_time": 0.11206412315368652 }, { "epoch": 1.149444580078125e-05, "model_forward_time": 0.025042295455932617, "step": 7533 }, { "epoch": 1.149444580078125e-05, "step": 7533, "training_step_time": 0.10973405838012695 }, { "epoch": 1.14959716796875e-05, "model_forward_time": 0.02460169792175293, "step": 7534 }, { "epoch": 1.14959716796875e-05, "step": 7534, "training_step_time": 0.10980796813964844 }, { "epoch": 1.149749755859375e-05, "model_forward_time": 0.025773048400878906, "step": 7535 }, { "epoch": 1.149749755859375e-05, "step": 7535, "training_step_time": 0.11005377769470215 }, { "epoch": 1.14990234375e-05, "model_forward_time": 0.027606725692749023, "step": 7536 }, { "epoch": 1.14990234375e-05, "step": 7536, "training_step_time": 0.1114509105682373 }, { "epoch": 1.150054931640625e-05, "model_forward_time": 0.025926828384399414, "step": 7537 }, { "epoch": 1.150054931640625e-05, "step": 7537, "training_step_time": 0.11589455604553223 }, { "epoch": 1.15020751953125e-05, "model_forward_time": 0.026102542877197266, "step": 7538 }, { "epoch": 1.15020751953125e-05, "step": 7538, "training_step_time": 0.1090848445892334 }, { "epoch": 1.150360107421875e-05, "model_forward_time": 0.025512218475341797, "step": 7539 }, { "epoch": 1.150360107421875e-05, "step": 7539, "training_step_time": 0.18982887268066406 }, { "epoch": 1.1505126953125e-05, "grad_norm": 0.34551236033439636, "learning_rate": 8.932123139950648e-05, "loss": 0.0543, "step": 7540 }, { "epoch": 1.1505126953125e-05, "model_forward_time": 0.025347471237182617, "step": 7540 }, { "epoch": 1.1505126953125e-05, "step": 7540, "training_step_time": 0.10652685165405273 }, { "epoch": 1.150665283203125e-05, "model_forward_time": 0.024825334548950195, "step": 7541 }, { "epoch": 1.150665283203125e-05, "step": 7541, "training_step_time": 0.15387463569641113 }, { "epoch": 1.15081787109375e-05, "model_forward_time": 0.025041580200195312, "step": 7542 }, { "epoch": 1.15081787109375e-05, "step": 7542, "training_step_time": 0.15796875953674316 }, { "epoch": 1.150970458984375e-05, "model_forward_time": 0.024828433990478516, "step": 7543 }, { "epoch": 1.150970458984375e-05, "step": 7543, "training_step_time": 0.1181643009185791 }, { "epoch": 1.151123046875e-05, "model_forward_time": 0.02532029151916504, "step": 7544 }, { "epoch": 1.151123046875e-05, "step": 7544, "training_step_time": 0.10732269287109375 }, { "epoch": 1.151275634765625e-05, "model_forward_time": 0.025308609008789062, "step": 7545 }, { "epoch": 1.151275634765625e-05, "step": 7545, "training_step_time": 0.19997739791870117 }, { "epoch": 1.15142822265625e-05, "model_forward_time": 0.02466416358947754, "step": 7546 }, { "epoch": 1.15142822265625e-05, "step": 7546, "training_step_time": 0.10839080810546875 }, { "epoch": 1.151580810546875e-05, "model_forward_time": 0.02546095848083496, "step": 7547 }, { "epoch": 1.151580810546875e-05, "step": 7547, "training_step_time": 0.10600161552429199 }, { "epoch": 1.1517333984375e-05, "model_forward_time": 0.025327205657958984, "step": 7548 }, { "epoch": 1.1517333984375e-05, "step": 7548, "training_step_time": 0.11980986595153809 }, { "epoch": 1.151885986328125e-05, "model_forward_time": 0.025332927703857422, "step": 7549 }, { "epoch": 1.151885986328125e-05, "step": 7549, "training_step_time": 0.13177704811096191 }, { "epoch": 1.15203857421875e-05, "grad_norm": 0.3907056450843811, "learning_rate": 8.928716333114643e-05, "loss": 0.052, "step": 7550 }, { "epoch": 1.15203857421875e-05, "model_forward_time": 0.0253298282623291, "step": 7550 }, { "epoch": 1.15203857421875e-05, "step": 7550, "training_step_time": 0.1103363037109375 }, { "epoch": 1.152191162109375e-05, "model_forward_time": 0.025565147399902344, "step": 7551 }, { "epoch": 1.152191162109375e-05, "step": 7551, "training_step_time": 0.11193203926086426 }, { "epoch": 1.15234375e-05, "model_forward_time": 0.025788307189941406, "step": 7552 }, { "epoch": 1.15234375e-05, "step": 7552, "training_step_time": 0.10695981979370117 }, { "epoch": 1.152496337890625e-05, "model_forward_time": 0.025403738021850586, "step": 7553 }, { "epoch": 1.152496337890625e-05, "step": 7553, "training_step_time": 0.10672807693481445 }, { "epoch": 1.15264892578125e-05, "model_forward_time": 0.025153398513793945, "step": 7554 }, { "epoch": 1.15264892578125e-05, "step": 7554, "training_step_time": 0.10427594184875488 }, { "epoch": 1.152801513671875e-05, "model_forward_time": 0.025589704513549805, "step": 7555 }, { "epoch": 1.152801513671875e-05, "step": 7555, "training_step_time": 0.10627865791320801 }, { "epoch": 1.1529541015625e-05, "model_forward_time": 0.025009870529174805, "step": 7556 }, { "epoch": 1.1529541015625e-05, "step": 7556, "training_step_time": 0.17627835273742676 }, { "epoch": 1.153106689453125e-05, "model_forward_time": 0.025344133377075195, "step": 7557 }, { "epoch": 1.153106689453125e-05, "step": 7557, "training_step_time": 0.1110372543334961 }, { "epoch": 1.15325927734375e-05, "model_forward_time": 0.024831533432006836, "step": 7558 }, { "epoch": 1.15325927734375e-05, "step": 7558, "training_step_time": 0.1956171989440918 }, { "epoch": 1.153411865234375e-05, "model_forward_time": 0.0248563289642334, "step": 7559 }, { "epoch": 1.153411865234375e-05, "step": 7559, "training_step_time": 0.18651485443115234 }, { "epoch": 1.153564453125e-05, "grad_norm": 0.20785319805145264, "learning_rate": 8.92530475251784e-05, "loss": 0.0476, "step": 7560 }, { "epoch": 1.153564453125e-05, "model_forward_time": 0.024492502212524414, "step": 7560 }, { "epoch": 1.153564453125e-05, "step": 7560, "training_step_time": 0.12028884887695312 }, { "epoch": 1.153717041015625e-05, "model_forward_time": 0.02514028549194336, "step": 7561 }, { "epoch": 1.153717041015625e-05, "step": 7561, "training_step_time": 0.11856937408447266 }, { "epoch": 1.15386962890625e-05, "model_forward_time": 0.025243282318115234, "step": 7562 }, { "epoch": 1.15386962890625e-05, "step": 7562, "training_step_time": 0.10597634315490723 }, { "epoch": 1.154022216796875e-05, "model_forward_time": 0.026213407516479492, "step": 7563 }, { "epoch": 1.154022216796875e-05, "step": 7563, "training_step_time": 0.10567951202392578 }, { "epoch": 1.1541748046875e-05, "model_forward_time": 0.025548458099365234, "step": 7564 }, { "epoch": 1.1541748046875e-05, "step": 7564, "training_step_time": 0.10509753227233887 }, { "epoch": 1.154327392578125e-05, "model_forward_time": 0.025368213653564453, "step": 7565 }, { "epoch": 1.154327392578125e-05, "step": 7565, "training_step_time": 0.10883307456970215 }, { "epoch": 1.15447998046875e-05, "model_forward_time": 0.026121854782104492, "step": 7566 }, { "epoch": 1.15447998046875e-05, "step": 7566, "training_step_time": 0.13388895988464355 }, { "epoch": 1.154632568359375e-05, "model_forward_time": 0.025322675704956055, "step": 7567 }, { "epoch": 1.154632568359375e-05, "step": 7567, "training_step_time": 0.14200782775878906 }, { "epoch": 1.15478515625e-05, "model_forward_time": 0.024810791015625, "step": 7568 }, { "epoch": 1.15478515625e-05, "step": 7568, "training_step_time": 0.1113128662109375 }, { "epoch": 1.154937744140625e-05, "model_forward_time": 0.025634765625, "step": 7569 }, { "epoch": 1.154937744140625e-05, "step": 7569, "training_step_time": 0.11280298233032227 }, { "epoch": 1.15509033203125e-05, "grad_norm": 0.41076380014419556, "learning_rate": 8.921888402305628e-05, "loss": 0.057, "step": 7570 }, { "epoch": 1.15509033203125e-05, "model_forward_time": 0.025046110153198242, "step": 7570 }, { "epoch": 1.15509033203125e-05, "step": 7570, "training_step_time": 0.11122369766235352 }, { "epoch": 1.155242919921875e-05, "model_forward_time": 0.0251309871673584, "step": 7571 }, { "epoch": 1.155242919921875e-05, "step": 7571, "training_step_time": 0.10721945762634277 }, { "epoch": 1.1553955078125e-05, "model_forward_time": 0.02491450309753418, "step": 7572 }, { "epoch": 1.1553955078125e-05, "step": 7572, "training_step_time": 0.19387412071228027 }, { "epoch": 1.155548095703125e-05, "model_forward_time": 0.024790525436401367, "step": 7573 }, { "epoch": 1.155548095703125e-05, "step": 7573, "training_step_time": 0.10835862159729004 }, { "epoch": 1.15570068359375e-05, "model_forward_time": 0.02400970458984375, "step": 7574 }, { "epoch": 1.15570068359375e-05, "step": 7574, "training_step_time": 0.10474824905395508 }, { "epoch": 1.155853271484375e-05, "model_forward_time": 0.02563929557800293, "step": 7575 }, { "epoch": 1.155853271484375e-05, "step": 7575, "training_step_time": 0.10742783546447754 }, { "epoch": 1.156005859375e-05, "model_forward_time": 0.025474071502685547, "step": 7576 }, { "epoch": 1.156005859375e-05, "step": 7576, "training_step_time": 0.10790824890136719 }, { "epoch": 1.156158447265625e-05, "model_forward_time": 0.025529861450195312, "step": 7577 }, { "epoch": 1.156158447265625e-05, "step": 7577, "training_step_time": 0.10888409614562988 }, { "epoch": 1.15631103515625e-05, "model_forward_time": 0.02574443817138672, "step": 7578 }, { "epoch": 1.15631103515625e-05, "step": 7578, "training_step_time": 0.10878586769104004 }, { "epoch": 1.156463623046875e-05, "model_forward_time": 0.025615930557250977, "step": 7579 }, { "epoch": 1.156463623046875e-05, "step": 7579, "training_step_time": 0.10616827011108398 }, { "epoch": 1.1566162109375e-05, "grad_norm": 0.4428868293762207, "learning_rate": 8.9184672866292e-05, "loss": 0.0636, "step": 7580 }, { "epoch": 1.1566162109375e-05, "model_forward_time": 0.025633573532104492, "step": 7580 }, { "epoch": 1.1566162109375e-05, "step": 7580, "training_step_time": 0.11730504035949707 }, { "epoch": 1.156768798828125e-05, "model_forward_time": 0.025322437286376953, "step": 7581 }, { "epoch": 1.156768798828125e-05, "step": 7581, "training_step_time": 0.11594557762145996 }, { "epoch": 1.15692138671875e-05, "model_forward_time": 0.025382041931152344, "step": 7582 }, { "epoch": 1.15692138671875e-05, "step": 7582, "training_step_time": 0.11431550979614258 }, { "epoch": 1.157073974609375e-05, "model_forward_time": 0.02511906623840332, "step": 7583 }, { "epoch": 1.157073974609375e-05, "step": 7583, "training_step_time": 0.10933399200439453 }, { "epoch": 1.1572265625e-05, "model_forward_time": 0.025600433349609375, "step": 7584 }, { "epoch": 1.1572265625e-05, "step": 7584, "training_step_time": 0.11113572120666504 }, { "epoch": 1.157379150390625e-05, "model_forward_time": 0.025514841079711914, "step": 7585 }, { "epoch": 1.157379150390625e-05, "step": 7585, "training_step_time": 0.11262178421020508 }, { "epoch": 1.15753173828125e-05, "model_forward_time": 0.025684118270874023, "step": 7586 }, { "epoch": 1.15753173828125e-05, "step": 7586, "training_step_time": 0.11214947700500488 }, { "epoch": 1.157684326171875e-05, "model_forward_time": 0.025887727737426758, "step": 7587 }, { "epoch": 1.157684326171875e-05, "step": 7587, "training_step_time": 0.11516928672790527 }, { "epoch": 1.1578369140625e-05, "model_forward_time": 0.02554941177368164, "step": 7588 }, { "epoch": 1.1578369140625e-05, "step": 7588, "training_step_time": 0.22561955451965332 }, { "epoch": 1.157989501953125e-05, "model_forward_time": 0.025089263916015625, "step": 7589 }, { "epoch": 1.157989501953125e-05, "step": 7589, "training_step_time": 0.11248159408569336 }, { "epoch": 1.15814208984375e-05, "grad_norm": 0.497812420129776, "learning_rate": 8.91504140964553e-05, "loss": 0.06, "step": 7590 }, { "epoch": 1.15814208984375e-05, "model_forward_time": 0.02458810806274414, "step": 7590 }, { "epoch": 1.15814208984375e-05, "step": 7590, "training_step_time": 0.1064901351928711 }, { "epoch": 1.158294677734375e-05, "model_forward_time": 0.025667190551757812, "step": 7591 }, { "epoch": 1.158294677734375e-05, "step": 7591, "training_step_time": 0.11013436317443848 }, { "epoch": 1.158447265625e-05, "model_forward_time": 0.02528834342956543, "step": 7592 }, { "epoch": 1.158447265625e-05, "step": 7592, "training_step_time": 0.11440348625183105 }, { "epoch": 1.158599853515625e-05, "model_forward_time": 0.025539636611938477, "step": 7593 }, { "epoch": 1.158599853515625e-05, "step": 7593, "training_step_time": 0.1176600456237793 }, { "epoch": 1.15875244140625e-05, "model_forward_time": 0.02505660057067871, "step": 7594 }, { "epoch": 1.15875244140625e-05, "step": 7594, "training_step_time": 0.2245039939880371 }, { "epoch": 1.158905029296875e-05, "model_forward_time": 0.02477741241455078, "step": 7595 }, { "epoch": 1.158905029296875e-05, "step": 7595, "training_step_time": 0.12923789024353027 }, { "epoch": 1.1590576171875e-05, "model_forward_time": 0.024675846099853516, "step": 7596 }, { "epoch": 1.1590576171875e-05, "step": 7596, "training_step_time": 0.11308979988098145 }, { "epoch": 1.159210205078125e-05, "model_forward_time": 0.02508544921875, "step": 7597 }, { "epoch": 1.159210205078125e-05, "step": 7597, "training_step_time": 0.11894893646240234 }, { "epoch": 1.15936279296875e-05, "model_forward_time": 0.025282621383666992, "step": 7598 }, { "epoch": 1.15936279296875e-05, "step": 7598, "training_step_time": 0.10695528984069824 }, { "epoch": 1.159515380859375e-05, "model_forward_time": 0.025716304779052734, "step": 7599 }, { "epoch": 1.159515380859375e-05, "step": 7599, "training_step_time": 0.1094503402709961 }, { "epoch": 1.15966796875e-05, "grad_norm": 0.4147672951221466, "learning_rate": 8.911610775517382e-05, "loss": 0.0681, "step": 7600 }, { "epoch": 1.15966796875e-05, "model_forward_time": 0.02514934539794922, "step": 7600 }, { "epoch": 1.15966796875e-05, "step": 7600, "training_step_time": 0.10867762565612793 }, { "epoch": 1.159820556640625e-05, "model_forward_time": 0.026123523712158203, "step": 7601 }, { "epoch": 1.159820556640625e-05, "step": 7601, "training_step_time": 0.1785874366760254 }, { "epoch": 1.15997314453125e-05, "model_forward_time": 0.0252685546875, "step": 7602 }, { "epoch": 1.15997314453125e-05, "step": 7602, "training_step_time": 0.10990190505981445 }, { "epoch": 1.160125732421875e-05, "model_forward_time": 0.02790045738220215, "step": 7603 }, { "epoch": 1.160125732421875e-05, "step": 7603, "training_step_time": 0.21428894996643066 }, { "epoch": 1.1602783203125e-05, "model_forward_time": 0.024366378784179688, "step": 7604 }, { "epoch": 1.1602783203125e-05, "step": 7604, "training_step_time": 0.10966753959655762 }, { "epoch": 1.160430908203125e-05, "model_forward_time": 0.024645566940307617, "step": 7605 }, { "epoch": 1.160430908203125e-05, "step": 7605, "training_step_time": 0.10852789878845215 }, { "epoch": 1.16058349609375e-05, "model_forward_time": 0.025670766830444336, "step": 7606 }, { "epoch": 1.16058349609375e-05, "step": 7606, "training_step_time": 0.11270785331726074 }, { "epoch": 1.160736083984375e-05, "model_forward_time": 0.02571392059326172, "step": 7607 }, { "epoch": 1.160736083984375e-05, "step": 7607, "training_step_time": 0.11242079734802246 }, { "epoch": 1.160888671875e-05, "model_forward_time": 0.02544689178466797, "step": 7608 }, { "epoch": 1.160888671875e-05, "step": 7608, "training_step_time": 0.10724973678588867 }, { "epoch": 1.161041259765625e-05, "model_forward_time": 0.025478601455688477, "step": 7609 }, { "epoch": 1.161041259765625e-05, "step": 7609, "training_step_time": 0.10607290267944336 }, { "epoch": 1.16119384765625e-05, "grad_norm": 0.5737213492393494, "learning_rate": 8.908175388413304e-05, "loss": 0.0597, "step": 7610 }, { "epoch": 1.16119384765625e-05, "model_forward_time": 0.025459766387939453, "step": 7610 }, { "epoch": 1.16119384765625e-05, "step": 7610, "training_step_time": 0.10748910903930664 }, { "epoch": 1.161346435546875e-05, "model_forward_time": 0.025223970413208008, "step": 7611 }, { "epoch": 1.161346435546875e-05, "step": 7611, "training_step_time": 0.10920405387878418 }, { "epoch": 1.1614990234375e-05, "model_forward_time": 0.024556875228881836, "step": 7612 }, { "epoch": 1.1614990234375e-05, "step": 7612, "training_step_time": 0.11302423477172852 }, { "epoch": 1.161651611328125e-05, "model_forward_time": 0.026060104370117188, "step": 7613 }, { "epoch": 1.161651611328125e-05, "step": 7613, "training_step_time": 0.12968683242797852 }, { "epoch": 1.16180419921875e-05, "model_forward_time": 0.025667190551757812, "step": 7614 }, { "epoch": 1.16180419921875e-05, "step": 7614, "training_step_time": 0.1146547794342041 }, { "epoch": 1.161956787109375e-05, "model_forward_time": 0.026381254196166992, "step": 7615 }, { "epoch": 1.161956787109375e-05, "step": 7615, "training_step_time": 0.10876011848449707 }, { "epoch": 1.162109375e-05, "model_forward_time": 0.025258779525756836, "step": 7616 }, { "epoch": 1.162109375e-05, "step": 7616, "training_step_time": 0.1150503158569336 }, { "epoch": 1.162261962890625e-05, "model_forward_time": 0.025096416473388672, "step": 7617 }, { "epoch": 1.162261962890625e-05, "step": 7617, "training_step_time": 0.10724091529846191 }, { "epoch": 1.16241455078125e-05, "model_forward_time": 0.02503824234008789, "step": 7618 }, { "epoch": 1.16241455078125e-05, "step": 7618, "training_step_time": 0.19698643684387207 }, { "epoch": 1.162567138671875e-05, "model_forward_time": 0.024817943572998047, "step": 7619 }, { "epoch": 1.162567138671875e-05, "step": 7619, "training_step_time": 0.1044609546661377 }, { "epoch": 1.1627197265625e-05, "grad_norm": 0.7217997908592224, "learning_rate": 8.90473525250761e-05, "loss": 0.0555, "step": 7620 }, { "epoch": 1.1627197265625e-05, "model_forward_time": 0.024636268615722656, "step": 7620 }, { "epoch": 1.1627197265625e-05, "step": 7620, "training_step_time": 0.10553312301635742 }, { "epoch": 1.162872314453125e-05, "model_forward_time": 0.02508234977722168, "step": 7621 }, { "epoch": 1.162872314453125e-05, "step": 7621, "training_step_time": 0.10552549362182617 }, { "epoch": 1.16302490234375e-05, "model_forward_time": 0.025265932083129883, "step": 7622 }, { "epoch": 1.16302490234375e-05, "step": 7622, "training_step_time": 0.10860824584960938 }, { "epoch": 1.163177490234375e-05, "model_forward_time": 0.025378704071044922, "step": 7623 }, { "epoch": 1.163177490234375e-05, "step": 7623, "training_step_time": 0.1076810359954834 }, { "epoch": 1.163330078125e-05, "model_forward_time": 0.025180339813232422, "step": 7624 }, { "epoch": 1.163330078125e-05, "step": 7624, "training_step_time": 0.10910868644714355 }, { "epoch": 1.163482666015625e-05, "model_forward_time": 0.025206327438354492, "step": 7625 }, { "epoch": 1.163482666015625e-05, "step": 7625, "training_step_time": 0.10742974281311035 }, { "epoch": 1.16363525390625e-05, "model_forward_time": 0.02530074119567871, "step": 7626 }, { "epoch": 1.16363525390625e-05, "step": 7626, "training_step_time": 0.1073293685913086 }, { "epoch": 1.163787841796875e-05, "model_forward_time": 0.025426864624023438, "step": 7627 }, { "epoch": 1.163787841796875e-05, "step": 7627, "training_step_time": 0.11109209060668945 }, { "epoch": 1.1639404296875e-05, "model_forward_time": 0.025487184524536133, "step": 7628 }, { "epoch": 1.1639404296875e-05, "step": 7628, "training_step_time": 0.10695099830627441 }, { "epoch": 1.164093017578125e-05, "model_forward_time": 0.02542281150817871, "step": 7629 }, { "epoch": 1.164093017578125e-05, "step": 7629, "training_step_time": 0.10611367225646973 }, { "epoch": 1.16424560546875e-05, "grad_norm": 0.3115273416042328, "learning_rate": 8.901290371980393e-05, "loss": 0.0589, "step": 7630 }, { "epoch": 1.16424560546875e-05, "model_forward_time": 0.025999069213867188, "step": 7630 }, { "epoch": 1.16424560546875e-05, "step": 7630, "training_step_time": 0.10917162895202637 }, { "epoch": 1.164398193359375e-05, "model_forward_time": 0.02528977394104004, "step": 7631 }, { "epoch": 1.164398193359375e-05, "step": 7631, "training_step_time": 0.16918063163757324 }, { "epoch": 1.16455078125e-05, "model_forward_time": 0.024712562561035156, "step": 7632 }, { "epoch": 1.16455078125e-05, "step": 7632, "training_step_time": 0.16158485412597656 }, { "epoch": 1.164703369140625e-05, "model_forward_time": 0.02495598793029785, "step": 7633 }, { "epoch": 1.164703369140625e-05, "step": 7633, "training_step_time": 0.11144614219665527 }, { "epoch": 1.16485595703125e-05, "model_forward_time": 0.025044918060302734, "step": 7634 }, { "epoch": 1.16485595703125e-05, "step": 7634, "training_step_time": 0.17116045951843262 }, { "epoch": 1.165008544921875e-05, "model_forward_time": 0.024498701095581055, "step": 7635 }, { "epoch": 1.165008544921875e-05, "step": 7635, "training_step_time": 0.1633284091949463 }, { "epoch": 1.1651611328125e-05, "model_forward_time": 0.0246734619140625, "step": 7636 }, { "epoch": 1.1651611328125e-05, "step": 7636, "training_step_time": 0.104644775390625 }, { "epoch": 1.165313720703125e-05, "model_forward_time": 0.025096893310546875, "step": 7637 }, { "epoch": 1.165313720703125e-05, "step": 7637, "training_step_time": 0.10618901252746582 }, { "epoch": 1.16546630859375e-05, "model_forward_time": 0.025556087493896484, "step": 7638 }, { "epoch": 1.16546630859375e-05, "step": 7638, "training_step_time": 0.13782405853271484 }, { "epoch": 1.165618896484375e-05, "model_forward_time": 0.024988651275634766, "step": 7639 }, { "epoch": 1.165618896484375e-05, "step": 7639, "training_step_time": 0.1111452579498291 }, { "epoch": 1.165771484375e-05, "grad_norm": 0.3155267834663391, "learning_rate": 8.897840751017506e-05, "loss": 0.0561, "step": 7640 }, { "epoch": 1.165771484375e-05, "model_forward_time": 0.024981021881103516, "step": 7640 }, { "epoch": 1.165771484375e-05, "step": 7640, "training_step_time": 0.22199630737304688 }, { "epoch": 1.165924072265625e-05, "model_forward_time": 0.024469375610351562, "step": 7641 }, { "epoch": 1.165924072265625e-05, "step": 7641, "training_step_time": 0.1360480785369873 }, { "epoch": 1.16607666015625e-05, "model_forward_time": 0.024828672409057617, "step": 7642 }, { "epoch": 1.16607666015625e-05, "step": 7642, "training_step_time": 0.1051030158996582 }, { "epoch": 1.166229248046875e-05, "model_forward_time": 0.024886369705200195, "step": 7643 }, { "epoch": 1.166229248046875e-05, "step": 7643, "training_step_time": 0.12057209014892578 }, { "epoch": 1.1663818359375e-05, "model_forward_time": 0.02578878402709961, "step": 7644 }, { "epoch": 1.1663818359375e-05, "step": 7644, "training_step_time": 0.11903691291809082 }, { "epoch": 1.166534423828125e-05, "model_forward_time": 0.025133609771728516, "step": 7645 }, { "epoch": 1.166534423828125e-05, "step": 7645, "training_step_time": 0.12076044082641602 }, { "epoch": 1.16668701171875e-05, "model_forward_time": 0.022945404052734375, "step": 7646 }, { "epoch": 1.16668701171875e-05, "step": 7646, "training_step_time": 0.11647319793701172 }, { "epoch": 1.166839599609375e-05, "model_forward_time": 0.02434849739074707, "step": 7647 }, { "epoch": 1.166839599609375e-05, "step": 7647, "training_step_time": 0.16655898094177246 }, { "epoch": 1.1669921875e-05, "model_forward_time": 0.024731874465942383, "step": 7648 }, { "epoch": 1.1669921875e-05, "step": 7648, "training_step_time": 0.1195073127746582 }, { "epoch": 1.167144775390625e-05, "model_forward_time": 0.02491021156311035, "step": 7649 }, { "epoch": 1.167144775390625e-05, "step": 7649, "training_step_time": 0.13245201110839844 }, { "epoch": 1.16729736328125e-05, "grad_norm": 0.4117908775806427, "learning_rate": 8.894386393810563e-05, "loss": 0.0502, "step": 7650 }, { "epoch": 1.16729736328125e-05, "model_forward_time": 0.025406360626220703, "step": 7650 }, { "epoch": 1.16729736328125e-05, "step": 7650, "training_step_time": 0.11232972145080566 }, { "epoch": 1.167449951171875e-05, "model_forward_time": 0.025336742401123047, "step": 7651 }, { "epoch": 1.167449951171875e-05, "step": 7651, "training_step_time": 0.17204499244689941 }, { "epoch": 1.1676025390625e-05, "model_forward_time": 0.024525880813598633, "step": 7652 }, { "epoch": 1.1676025390625e-05, "step": 7652, "training_step_time": 0.13030672073364258 }, { "epoch": 1.167755126953125e-05, "model_forward_time": 0.024316072463989258, "step": 7653 }, { "epoch": 1.167755126953125e-05, "step": 7653, "training_step_time": 0.12222647666931152 }, { "epoch": 1.16790771484375e-05, "model_forward_time": 0.024706125259399414, "step": 7654 }, { "epoch": 1.16790771484375e-05, "step": 7654, "training_step_time": 0.1091926097869873 }, { "epoch": 1.168060302734375e-05, "model_forward_time": 0.02550506591796875, "step": 7655 }, { "epoch": 1.168060302734375e-05, "step": 7655, "training_step_time": 0.1096808910369873 }, { "epoch": 1.168212890625e-05, "model_forward_time": 0.024821758270263672, "step": 7656 }, { "epoch": 1.168212890625e-05, "step": 7656, "training_step_time": 0.10886716842651367 }, { "epoch": 1.168365478515625e-05, "model_forward_time": 0.0246274471282959, "step": 7657 }, { "epoch": 1.168365478515625e-05, "step": 7657, "training_step_time": 0.14403724670410156 }, { "epoch": 1.16851806640625e-05, "model_forward_time": 0.02502918243408203, "step": 7658 }, { "epoch": 1.16851806640625e-05, "step": 7658, "training_step_time": 0.13561058044433594 }, { "epoch": 1.168670654296875e-05, "model_forward_time": 0.024547576904296875, "step": 7659 }, { "epoch": 1.168670654296875e-05, "step": 7659, "training_step_time": 0.1077275276184082 }, { "epoch": 1.1688232421875e-05, "grad_norm": 0.5045364499092102, "learning_rate": 8.890927304556935e-05, "loss": 0.054, "step": 7660 }, { "epoch": 1.1688232421875e-05, "model_forward_time": 0.025346755981445312, "step": 7660 }, { "epoch": 1.1688232421875e-05, "step": 7660, "training_step_time": 0.11255931854248047 }, { "epoch": 1.168975830078125e-05, "model_forward_time": 0.02481698989868164, "step": 7661 }, { "epoch": 1.168975830078125e-05, "step": 7661, "training_step_time": 0.1192939281463623 }, { "epoch": 1.16912841796875e-05, "model_forward_time": 0.02497720718383789, "step": 7662 }, { "epoch": 1.16912841796875e-05, "step": 7662, "training_step_time": 0.10813689231872559 }, { "epoch": 1.169281005859375e-05, "model_forward_time": 0.025302410125732422, "step": 7663 }, { "epoch": 1.169281005859375e-05, "step": 7663, "training_step_time": 0.1939830780029297 }, { "epoch": 1.16943359375e-05, "model_forward_time": 0.024466991424560547, "step": 7664 }, { "epoch": 1.16943359375e-05, "step": 7664, "training_step_time": 0.10402035713195801 }, { "epoch": 1.169586181640625e-05, "model_forward_time": 0.025084733963012695, "step": 7665 }, { "epoch": 1.169586181640625e-05, "step": 7665, "training_step_time": 0.11043930053710938 }, { "epoch": 1.16973876953125e-05, "model_forward_time": 0.025323152542114258, "step": 7666 }, { "epoch": 1.16973876953125e-05, "step": 7666, "training_step_time": 0.11245870590209961 }, { "epoch": 1.169891357421875e-05, "model_forward_time": 0.025198698043823242, "step": 7667 }, { "epoch": 1.169891357421875e-05, "step": 7667, "training_step_time": 0.10982131958007812 }, { "epoch": 1.1700439453125e-05, "model_forward_time": 0.025183677673339844, "step": 7668 }, { "epoch": 1.1700439453125e-05, "step": 7668, "training_step_time": 0.10924649238586426 }, { "epoch": 1.170196533203125e-05, "model_forward_time": 0.02520608901977539, "step": 7669 }, { "epoch": 1.170196533203125e-05, "step": 7669, "training_step_time": 0.10728096961975098 }, { "epoch": 1.17034912109375e-05, "grad_norm": 0.5160402655601501, "learning_rate": 8.887463487459742e-05, "loss": 0.0618, "step": 7670 }, { "epoch": 1.17034912109375e-05, "model_forward_time": 0.025020837783813477, "step": 7670 }, { "epoch": 1.17034912109375e-05, "step": 7670, "training_step_time": 0.10979628562927246 }, { "epoch": 1.170501708984375e-05, "model_forward_time": 0.025074005126953125, "step": 7671 }, { "epoch": 1.170501708984375e-05, "step": 7671, "training_step_time": 0.11176061630249023 }, { "epoch": 1.170654296875e-05, "model_forward_time": 0.025228261947631836, "step": 7672 }, { "epoch": 1.170654296875e-05, "step": 7672, "training_step_time": 0.1084904670715332 }, { "epoch": 1.170806884765625e-05, "model_forward_time": 0.025189876556396484, "step": 7673 }, { "epoch": 1.170806884765625e-05, "step": 7673, "training_step_time": 0.10861968994140625 }, { "epoch": 1.17095947265625e-05, "model_forward_time": 0.02464747428894043, "step": 7674 }, { "epoch": 1.17095947265625e-05, "step": 7674, "training_step_time": 0.11990880966186523 }, { "epoch": 1.171112060546875e-05, "model_forward_time": 0.024719953536987305, "step": 7675 }, { "epoch": 1.171112060546875e-05, "step": 7675, "training_step_time": 0.22686266899108887 }, { "epoch": 1.1712646484375e-05, "model_forward_time": 0.024109363555908203, "step": 7676 }, { "epoch": 1.1712646484375e-05, "step": 7676, "training_step_time": 0.1515214443206787 }, { "epoch": 1.171417236328125e-05, "model_forward_time": 0.024283647537231445, "step": 7677 }, { "epoch": 1.171417236328125e-05, "step": 7677, "training_step_time": 0.1644735336303711 }, { "epoch": 1.17156982421875e-05, "model_forward_time": 0.024814844131469727, "step": 7678 }, { "epoch": 1.17156982421875e-05, "step": 7678, "training_step_time": 0.13182544708251953 }, { "epoch": 1.171722412109375e-05, "model_forward_time": 0.025079727172851562, "step": 7679 }, { "epoch": 1.171722412109375e-05, "step": 7679, "training_step_time": 0.12630176544189453 }, { "epoch": 1.171875e-05, "grad_norm": 0.7796112298965454, "learning_rate": 8.883994946727849e-05, "loss": 0.0631, "step": 7680 }, { "epoch": 1.171875e-05, "model_forward_time": 0.02504730224609375, "step": 7680 }, { "epoch": 1.171875e-05, "step": 7680, "training_step_time": 0.12173128128051758 }, { "epoch": 1.172027587890625e-05, "model_forward_time": 0.025322914123535156, "step": 7681 }, { "epoch": 1.172027587890625e-05, "step": 7681, "training_step_time": 0.11916947364807129 }, { "epoch": 1.17218017578125e-05, "model_forward_time": 0.024396419525146484, "step": 7682 }, { "epoch": 1.17218017578125e-05, "step": 7682, "training_step_time": 0.14658784866333008 }, { "epoch": 1.172332763671875e-05, "model_forward_time": 0.024469614028930664, "step": 7683 }, { "epoch": 1.172332763671875e-05, "step": 7683, "training_step_time": 0.11020231246948242 }, { "epoch": 1.1724853515625e-05, "model_forward_time": 0.025097370147705078, "step": 7684 }, { "epoch": 1.1724853515625e-05, "step": 7684, "training_step_time": 0.11536622047424316 }, { "epoch": 1.172637939453125e-05, "model_forward_time": 0.024798870086669922, "step": 7685 }, { "epoch": 1.172637939453125e-05, "step": 7685, "training_step_time": 0.11691856384277344 }, { "epoch": 1.17279052734375e-05, "model_forward_time": 0.025141239166259766, "step": 7686 }, { "epoch": 1.17279052734375e-05, "step": 7686, "training_step_time": 0.13804841041564941 }, { "epoch": 1.172943115234375e-05, "model_forward_time": 0.024703502655029297, "step": 7687 }, { "epoch": 1.172943115234375e-05, "step": 7687, "training_step_time": 0.11431169509887695 }, { "epoch": 1.173095703125e-05, "model_forward_time": 0.025166988372802734, "step": 7688 }, { "epoch": 1.173095703125e-05, "step": 7688, "training_step_time": 0.11120200157165527 }, { "epoch": 1.173248291015625e-05, "model_forward_time": 0.024980783462524414, "step": 7689 }, { "epoch": 1.173248291015625e-05, "step": 7689, "training_step_time": 0.10812258720397949 }, { "epoch": 1.17340087890625e-05, "grad_norm": 1.3057737350463867, "learning_rate": 8.880521686575857e-05, "loss": 0.0636, "step": 7690 }, { "epoch": 1.17340087890625e-05, "model_forward_time": 0.02374267578125, "step": 7690 }, { "epoch": 1.17340087890625e-05, "step": 7690, "training_step_time": 0.10821175575256348 }, { "epoch": 1.173553466796875e-05, "model_forward_time": 0.02387523651123047, "step": 7691 }, { "epoch": 1.173553466796875e-05, "step": 7691, "training_step_time": 0.11259889602661133 }, { "epoch": 1.1737060546875e-05, "model_forward_time": 0.02520895004272461, "step": 7692 }, { "epoch": 1.1737060546875e-05, "step": 7692, "training_step_time": 0.20957446098327637 }, { "epoch": 1.173858642578125e-05, "model_forward_time": 0.024709701538085938, "step": 7693 }, { "epoch": 1.173858642578125e-05, "step": 7693, "training_step_time": 0.1916193962097168 }, { "epoch": 1.17401123046875e-05, "model_forward_time": 0.024043798446655273, "step": 7694 }, { "epoch": 1.17401123046875e-05, "step": 7694, "training_step_time": 0.17976117134094238 }, { "epoch": 1.174163818359375e-05, "model_forward_time": 0.024140119552612305, "step": 7695 }, { "epoch": 1.174163818359375e-05, "step": 7695, "training_step_time": 0.17760729789733887 }, { "epoch": 1.17431640625e-05, "model_forward_time": 0.024209260940551758, "step": 7696 }, { "epoch": 1.17431640625e-05, "step": 7696, "training_step_time": 0.18854856491088867 }, { "epoch": 1.174468994140625e-05, "model_forward_time": 0.025072097778320312, "step": 7697 }, { "epoch": 1.174468994140625e-05, "step": 7697, "training_step_time": 0.14582109451293945 }, { "epoch": 1.17462158203125e-05, "model_forward_time": 0.023628950119018555, "step": 7698 }, { "epoch": 1.17462158203125e-05, "step": 7698, "training_step_time": 0.12953686714172363 }, { "epoch": 1.174774169921875e-05, "model_forward_time": 0.023647546768188477, "step": 7699 }, { "epoch": 1.174774169921875e-05, "step": 7699, "training_step_time": 0.19784903526306152 }, { "epoch": 1.1749267578125e-05, "grad_norm": 0.541006326675415, "learning_rate": 8.877043711224108e-05, "loss": 0.0444, "step": 7700 }, { "epoch": 1.1749267578125e-05, "model_forward_time": 0.024860858917236328, "step": 7700 }, { "epoch": 1.1749267578125e-05, "step": 7700, "training_step_time": 0.1337127685546875 }, { "epoch": 1.175079345703125e-05, "model_forward_time": 0.024201631546020508, "step": 7701 }, { "epoch": 1.175079345703125e-05, "step": 7701, "training_step_time": 0.11581540107727051 }, { "epoch": 1.17523193359375e-05, "model_forward_time": 0.026084423065185547, "step": 7702 }, { "epoch": 1.17523193359375e-05, "step": 7702, "training_step_time": 0.11082935333251953 }, { "epoch": 1.175384521484375e-05, "model_forward_time": 0.025341510772705078, "step": 7703 }, { "epoch": 1.175384521484375e-05, "step": 7703, "training_step_time": 0.11129283905029297 }, { "epoch": 1.175537109375e-05, "model_forward_time": 0.02535414695739746, "step": 7704 }, { "epoch": 1.175537109375e-05, "step": 7704, "training_step_time": 0.18598604202270508 }, { "epoch": 1.175689697265625e-05, "model_forward_time": 0.02468252182006836, "step": 7705 }, { "epoch": 1.175689697265625e-05, "step": 7705, "training_step_time": 0.12248086929321289 }, { "epoch": 1.17584228515625e-05, "model_forward_time": 0.024626970291137695, "step": 7706 }, { "epoch": 1.17584228515625e-05, "step": 7706, "training_step_time": 0.10551333427429199 }, { "epoch": 1.175994873046875e-05, "model_forward_time": 0.025740861892700195, "step": 7707 }, { "epoch": 1.175994873046875e-05, "step": 7707, "training_step_time": 0.10759902000427246 }, { "epoch": 1.1761474609375e-05, "model_forward_time": 0.025030136108398438, "step": 7708 }, { "epoch": 1.1761474609375e-05, "step": 7708, "training_step_time": 0.1120293140411377 }, { "epoch": 1.176300048828125e-05, "model_forward_time": 0.024737834930419922, "step": 7709 }, { "epoch": 1.176300048828125e-05, "step": 7709, "training_step_time": 0.10749530792236328 }, { "epoch": 1.17645263671875e-05, "grad_norm": 0.8377155065536499, "learning_rate": 8.873561024898668e-05, "loss": 0.0627, "step": 7710 }, { "epoch": 1.17645263671875e-05, "model_forward_time": 0.025483369827270508, "step": 7710 }, { "epoch": 1.17645263671875e-05, "step": 7710, "training_step_time": 0.10772919654846191 }, { "epoch": 1.176605224609375e-05, "model_forward_time": 0.02534198760986328, "step": 7711 }, { "epoch": 1.176605224609375e-05, "step": 7711, "training_step_time": 0.11078071594238281 }, { "epoch": 1.1767578125e-05, "model_forward_time": 0.025656700134277344, "step": 7712 }, { "epoch": 1.1767578125e-05, "step": 7712, "training_step_time": 0.11186408996582031 }, { "epoch": 1.176910400390625e-05, "model_forward_time": 0.025247812271118164, "step": 7713 }, { "epoch": 1.176910400390625e-05, "step": 7713, "training_step_time": 0.10705709457397461 }, { "epoch": 1.17706298828125e-05, "model_forward_time": 0.02513599395751953, "step": 7714 }, { "epoch": 1.17706298828125e-05, "step": 7714, "training_step_time": 0.10616803169250488 }, { "epoch": 1.177215576171875e-05, "model_forward_time": 0.02587413787841797, "step": 7715 }, { "epoch": 1.177215576171875e-05, "step": 7715, "training_step_time": 0.10958504676818848 }, { "epoch": 1.1773681640625e-05, "model_forward_time": 0.02522873878479004, "step": 7716 }, { "epoch": 1.1773681640625e-05, "step": 7716, "training_step_time": 0.10745811462402344 }, { "epoch": 1.177520751953125e-05, "model_forward_time": 0.024819612503051758, "step": 7717 }, { "epoch": 1.177520751953125e-05, "step": 7717, "training_step_time": 0.10750555992126465 }, { "epoch": 1.17767333984375e-05, "model_forward_time": 0.025099515914916992, "step": 7718 }, { "epoch": 1.17767333984375e-05, "step": 7718, "training_step_time": 0.11246013641357422 }, { "epoch": 1.177825927734375e-05, "model_forward_time": 0.02562546730041504, "step": 7719 }, { "epoch": 1.177825927734375e-05, "step": 7719, "training_step_time": 0.1166541576385498 }, { "epoch": 1.177978515625e-05, "grad_norm": 0.4356689155101776, "learning_rate": 8.87007363183133e-05, "loss": 0.05, "step": 7720 }, { "epoch": 1.177978515625e-05, "model_forward_time": 0.025404691696166992, "step": 7720 }, { "epoch": 1.177978515625e-05, "step": 7720, "training_step_time": 0.11284756660461426 }, { "epoch": 1.178131103515625e-05, "model_forward_time": 0.025376081466674805, "step": 7721 }, { "epoch": 1.178131103515625e-05, "step": 7721, "training_step_time": 0.211378812789917 }, { "epoch": 1.17828369140625e-05, "model_forward_time": 0.02438521385192871, "step": 7722 }, { "epoch": 1.17828369140625e-05, "step": 7722, "training_step_time": 0.11699438095092773 }, { "epoch": 1.178436279296875e-05, "model_forward_time": 0.02436351776123047, "step": 7723 }, { "epoch": 1.178436279296875e-05, "step": 7723, "training_step_time": 0.1054234504699707 }, { "epoch": 1.1785888671875e-05, "model_forward_time": 0.025081157684326172, "step": 7724 }, { "epoch": 1.1785888671875e-05, "step": 7724, "training_step_time": 0.10701322555541992 }, { "epoch": 1.178741455078125e-05, "model_forward_time": 0.025431156158447266, "step": 7725 }, { "epoch": 1.178741455078125e-05, "step": 7725, "training_step_time": 0.10744166374206543 }, { "epoch": 1.17889404296875e-05, "model_forward_time": 0.02550959587097168, "step": 7726 }, { "epoch": 1.17889404296875e-05, "step": 7726, "training_step_time": 0.16896700859069824 }, { "epoch": 1.179046630859375e-05, "model_forward_time": 0.024020671844482422, "step": 7727 }, { "epoch": 1.179046630859375e-05, "step": 7727, "training_step_time": 0.11316370964050293 }, { "epoch": 1.17919921875e-05, "model_forward_time": 0.02482128143310547, "step": 7728 }, { "epoch": 1.17919921875e-05, "step": 7728, "training_step_time": 0.10796618461608887 }, { "epoch": 1.179351806640625e-05, "model_forward_time": 0.02555537223815918, "step": 7729 }, { "epoch": 1.179351806640625e-05, "step": 7729, "training_step_time": 0.11862707138061523 }, { "epoch": 1.17950439453125e-05, "grad_norm": 0.318733274936676, "learning_rate": 8.866581536259605e-05, "loss": 0.0583, "step": 7730 }, { "epoch": 1.17950439453125e-05, "model_forward_time": 0.02557849884033203, "step": 7730 }, { "epoch": 1.17950439453125e-05, "step": 7730, "training_step_time": 0.1253345012664795 }, { "epoch": 1.179656982421875e-05, "model_forward_time": 0.02507305145263672, "step": 7731 }, { "epoch": 1.179656982421875e-05, "step": 7731, "training_step_time": 0.1106255054473877 }, { "epoch": 1.1798095703125e-05, "model_forward_time": 0.02522444725036621, "step": 7732 }, { "epoch": 1.1798095703125e-05, "step": 7732, "training_step_time": 0.11439371109008789 }, { "epoch": 1.179962158203125e-05, "model_forward_time": 0.025400876998901367, "step": 7733 }, { "epoch": 1.179962158203125e-05, "step": 7733, "training_step_time": 0.11259913444519043 }, { "epoch": 1.18011474609375e-05, "model_forward_time": 0.024643421173095703, "step": 7734 }, { "epoch": 1.18011474609375e-05, "step": 7734, "training_step_time": 0.11178755760192871 }, { "epoch": 1.180267333984375e-05, "model_forward_time": 0.02538776397705078, "step": 7735 }, { "epoch": 1.180267333984375e-05, "step": 7735, "training_step_time": 0.10647177696228027 }, { "epoch": 1.180419921875e-05, "model_forward_time": 0.025136947631835938, "step": 7736 }, { "epoch": 1.180419921875e-05, "step": 7736, "training_step_time": 0.13620376586914062 }, { "epoch": 1.180572509765625e-05, "model_forward_time": 0.025172710418701172, "step": 7737 }, { "epoch": 1.180572509765625e-05, "step": 7737, "training_step_time": 0.10985875129699707 }, { "epoch": 1.18072509765625e-05, "model_forward_time": 0.024355649948120117, "step": 7738 }, { "epoch": 1.18072509765625e-05, "step": 7738, "training_step_time": 0.13913607597351074 }, { "epoch": 1.180877685546875e-05, "model_forward_time": 0.024984121322631836, "step": 7739 }, { "epoch": 1.180877685546875e-05, "step": 7739, "training_step_time": 0.1537153720855713 }, { "epoch": 1.1810302734375e-05, "grad_norm": 0.5249224901199341, "learning_rate": 8.863084742426719e-05, "loss": 0.0526, "step": 7740 }, { "epoch": 1.1810302734375e-05, "model_forward_time": 0.024425029754638672, "step": 7740 }, { "epoch": 1.1810302734375e-05, "step": 7740, "training_step_time": 0.21947836875915527 }, { "epoch": 1.181182861328125e-05, "model_forward_time": 0.025855302810668945, "step": 7741 }, { "epoch": 1.181182861328125e-05, "step": 7741, "training_step_time": 0.12151646614074707 }, { "epoch": 1.18133544921875e-05, "model_forward_time": 0.023589611053466797, "step": 7742 }, { "epoch": 1.18133544921875e-05, "step": 7742, "training_step_time": 0.1079866886138916 }, { "epoch": 1.181488037109375e-05, "model_forward_time": 0.02535223960876465, "step": 7743 }, { "epoch": 1.181488037109375e-05, "step": 7743, "training_step_time": 0.10709810256958008 }, { "epoch": 1.181640625e-05, "model_forward_time": 0.027086257934570312, "step": 7744 }, { "epoch": 1.181640625e-05, "step": 7744, "training_step_time": 0.11165380477905273 }, { "epoch": 1.181793212890625e-05, "model_forward_time": 0.02668166160583496, "step": 7745 }, { "epoch": 1.181793212890625e-05, "step": 7745, "training_step_time": 0.12515664100646973 }, { "epoch": 1.18194580078125e-05, "model_forward_time": 0.024979114532470703, "step": 7746 }, { "epoch": 1.18194580078125e-05, "step": 7746, "training_step_time": 0.12214446067810059 }, { "epoch": 1.182098388671875e-05, "model_forward_time": 0.025182485580444336, "step": 7747 }, { "epoch": 1.182098388671875e-05, "step": 7747, "training_step_time": 0.12251853942871094 }, { "epoch": 1.1822509765625e-05, "model_forward_time": 0.02502608299255371, "step": 7748 }, { "epoch": 1.1822509765625e-05, "step": 7748, "training_step_time": 0.20865368843078613 }, { "epoch": 1.182403564453125e-05, "model_forward_time": 0.024486541748046875, "step": 7749 }, { "epoch": 1.182403564453125e-05, "step": 7749, "training_step_time": 0.11443901062011719 }, { "epoch": 1.18255615234375e-05, "grad_norm": 0.693023681640625, "learning_rate": 8.859583254581605e-05, "loss": 0.0467, "step": 7750 }, { "epoch": 1.18255615234375e-05, "model_forward_time": 0.024216175079345703, "step": 7750 }, { "epoch": 1.18255615234375e-05, "step": 7750, "training_step_time": 0.1847379207611084 }, { "epoch": 1.182708740234375e-05, "model_forward_time": 0.024264812469482422, "step": 7751 }, { "epoch": 1.182708740234375e-05, "step": 7751, "training_step_time": 0.10838794708251953 }, { "epoch": 1.182861328125e-05, "model_forward_time": 0.024150609970092773, "step": 7752 }, { "epoch": 1.182861328125e-05, "step": 7752, "training_step_time": 0.10871672630310059 }, { "epoch": 1.183013916015625e-05, "model_forward_time": 0.025004148483276367, "step": 7753 }, { "epoch": 1.183013916015625e-05, "step": 7753, "training_step_time": 0.10836505889892578 }, { "epoch": 1.18316650390625e-05, "model_forward_time": 0.02517223358154297, "step": 7754 }, { "epoch": 1.18316650390625e-05, "step": 7754, "training_step_time": 0.10822820663452148 }, { "epoch": 1.183319091796875e-05, "model_forward_time": 0.024975061416625977, "step": 7755 }, { "epoch": 1.183319091796875e-05, "step": 7755, "training_step_time": 0.10958290100097656 }, { "epoch": 1.1834716796875e-05, "model_forward_time": 0.025902271270751953, "step": 7756 }, { "epoch": 1.1834716796875e-05, "step": 7756, "training_step_time": 0.11475801467895508 }, { "epoch": 1.183624267578125e-05, "model_forward_time": 0.02540278434753418, "step": 7757 }, { "epoch": 1.183624267578125e-05, "step": 7757, "training_step_time": 0.11082601547241211 }, { "epoch": 1.18377685546875e-05, "model_forward_time": 0.02541375160217285, "step": 7758 }, { "epoch": 1.18377685546875e-05, "step": 7758, "training_step_time": 0.10836911201477051 }, { "epoch": 1.183929443359375e-05, "model_forward_time": 0.025089740753173828, "step": 7759 }, { "epoch": 1.183929443359375e-05, "step": 7759, "training_step_time": 0.1086115837097168 }, { "epoch": 1.18408203125e-05, "grad_norm": 0.4121415317058563, "learning_rate": 8.856077076978902e-05, "loss": 0.0576, "step": 7760 }, { "epoch": 1.18408203125e-05, "model_forward_time": 0.024975299835205078, "step": 7760 }, { "epoch": 1.18408203125e-05, "step": 7760, "training_step_time": 0.10617828369140625 }, { "epoch": 1.184234619140625e-05, "model_forward_time": 0.025573253631591797, "step": 7761 }, { "epoch": 1.184234619140625e-05, "step": 7761, "training_step_time": 0.10631155967712402 }, { "epoch": 1.18438720703125e-05, "model_forward_time": 0.025957584381103516, "step": 7762 }, { "epoch": 1.18438720703125e-05, "step": 7762, "training_step_time": 0.10744476318359375 }, { "epoch": 1.184539794921875e-05, "model_forward_time": 0.025270938873291016, "step": 7763 }, { "epoch": 1.184539794921875e-05, "step": 7763, "training_step_time": 0.10683321952819824 }, { "epoch": 1.1846923828125e-05, "model_forward_time": 0.025553226470947266, "step": 7764 }, { "epoch": 1.1846923828125e-05, "step": 7764, "training_step_time": 0.12814974784851074 }, { "epoch": 1.184844970703125e-05, "model_forward_time": 0.025400638580322266, "step": 7765 }, { "epoch": 1.184844970703125e-05, "step": 7765, "training_step_time": 0.11033129692077637 }, { "epoch": 1.18499755859375e-05, "model_forward_time": 0.02535390853881836, "step": 7766 }, { "epoch": 1.18499755859375e-05, "step": 7766, "training_step_time": 0.11047124862670898 }, { "epoch": 1.185150146484375e-05, "model_forward_time": 0.025304079055786133, "step": 7767 }, { "epoch": 1.185150146484375e-05, "step": 7767, "training_step_time": 0.215501070022583 }, { "epoch": 1.185302734375e-05, "model_forward_time": 0.02460789680480957, "step": 7768 }, { "epoch": 1.185302734375e-05, "step": 7768, "training_step_time": 0.11265802383422852 }, { "epoch": 1.185455322265625e-05, "model_forward_time": 0.024341821670532227, "step": 7769 }, { "epoch": 1.185455322265625e-05, "step": 7769, "training_step_time": 0.10685324668884277 }, { "epoch": 1.18560791015625e-05, "grad_norm": 0.3426803648471832, "learning_rate": 8.852566213878947e-05, "loss": 0.0496, "step": 7770 }, { "epoch": 1.18560791015625e-05, "model_forward_time": 0.024798154830932617, "step": 7770 }, { "epoch": 1.18560791015625e-05, "step": 7770, "training_step_time": 0.16218066215515137 }, { "epoch": 1.185760498046875e-05, "model_forward_time": 0.024543285369873047, "step": 7771 }, { "epoch": 1.185760498046875e-05, "step": 7771, "training_step_time": 0.19092059135437012 }, { "epoch": 1.1859130859375e-05, "model_forward_time": 0.024196624755859375, "step": 7772 }, { "epoch": 1.1859130859375e-05, "step": 7772, "training_step_time": 0.17877650260925293 }, { "epoch": 1.186065673828125e-05, "model_forward_time": 0.024291515350341797, "step": 7773 }, { "epoch": 1.186065673828125e-05, "step": 7773, "training_step_time": 0.213623046875 }, { "epoch": 1.18621826171875e-05, "model_forward_time": 0.024327516555786133, "step": 7774 }, { "epoch": 1.18621826171875e-05, "step": 7774, "training_step_time": 0.12874150276184082 }, { "epoch": 1.186370849609375e-05, "model_forward_time": 0.024349212646484375, "step": 7775 }, { "epoch": 1.186370849609375e-05, "step": 7775, "training_step_time": 0.10973215103149414 }, { "epoch": 1.1865234375e-05, "model_forward_time": 0.025148391723632812, "step": 7776 }, { "epoch": 1.1865234375e-05, "step": 7776, "training_step_time": 0.10922837257385254 }, { "epoch": 1.186676025390625e-05, "model_forward_time": 0.026033401489257812, "step": 7777 }, { "epoch": 1.186676025390625e-05, "step": 7777, "training_step_time": 0.11815357208251953 }, { "epoch": 1.18682861328125e-05, "model_forward_time": 0.025121688842773438, "step": 7778 }, { "epoch": 1.18682861328125e-05, "step": 7778, "training_step_time": 0.10730695724487305 }, { "epoch": 1.186981201171875e-05, "model_forward_time": 0.025010347366333008, "step": 7779 }, { "epoch": 1.186981201171875e-05, "step": 7779, "training_step_time": 0.11321854591369629 }, { "epoch": 1.1871337890625e-05, "grad_norm": 0.4797683656215668, "learning_rate": 8.849050669547768e-05, "loss": 0.0631, "step": 7780 }, { "epoch": 1.1871337890625e-05, "model_forward_time": 0.02532815933227539, "step": 7780 }, { "epoch": 1.1871337890625e-05, "step": 7780, "training_step_time": 0.1437220573425293 }, { "epoch": 1.187286376953125e-05, "model_forward_time": 0.025399446487426758, "step": 7781 }, { "epoch": 1.187286376953125e-05, "step": 7781, "training_step_time": 0.11267495155334473 }, { "epoch": 1.18743896484375e-05, "model_forward_time": 0.024983644485473633, "step": 7782 }, { "epoch": 1.18743896484375e-05, "step": 7782, "training_step_time": 0.19978928565979004 }, { "epoch": 1.187591552734375e-05, "model_forward_time": 0.024271011352539062, "step": 7783 }, { "epoch": 1.187591552734375e-05, "step": 7783, "training_step_time": 0.20637893676757812 }, { "epoch": 1.187744140625e-05, "model_forward_time": 0.024370670318603516, "step": 7784 }, { "epoch": 1.187744140625e-05, "step": 7784, "training_step_time": 0.20615744590759277 }, { "epoch": 1.187896728515625e-05, "model_forward_time": 0.023543357849121094, "step": 7785 }, { "epoch": 1.187896728515625e-05, "step": 7785, "training_step_time": 0.18914556503295898 }, { "epoch": 1.18804931640625e-05, "model_forward_time": 0.024684429168701172, "step": 7786 }, { "epoch": 1.18804931640625e-05, "step": 7786, "training_step_time": 0.10544657707214355 }, { "epoch": 1.188201904296875e-05, "model_forward_time": 0.024766206741333008, "step": 7787 }, { "epoch": 1.188201904296875e-05, "step": 7787, "training_step_time": 0.14011359214782715 }, { "epoch": 1.1883544921875e-05, "model_forward_time": 0.025393247604370117, "step": 7788 }, { "epoch": 1.1883544921875e-05, "step": 7788, "training_step_time": 0.1330416202545166 }, { "epoch": 1.188507080078125e-05, "model_forward_time": 0.024828672409057617, "step": 7789 }, { "epoch": 1.188507080078125e-05, "step": 7789, "training_step_time": 0.11042213439941406 }, { "epoch": 1.18865966796875e-05, "grad_norm": 0.6274198889732361, "learning_rate": 8.845530448257085e-05, "loss": 0.0545, "step": 7790 }, { "epoch": 1.18865966796875e-05, "model_forward_time": 0.0285036563873291, "step": 7790 }, { "epoch": 1.18865966796875e-05, "step": 7790, "training_step_time": 0.11602091789245605 }, { "epoch": 1.188812255859375e-05, "model_forward_time": 0.02609086036682129, "step": 7791 }, { "epoch": 1.188812255859375e-05, "step": 7791, "training_step_time": 0.10823225975036621 }, { "epoch": 1.18896484375e-05, "model_forward_time": 0.026993274688720703, "step": 7792 }, { "epoch": 1.18896484375e-05, "step": 7792, "training_step_time": 0.17877626419067383 }, { "epoch": 1.189117431640625e-05, "model_forward_time": 0.025026321411132812, "step": 7793 }, { "epoch": 1.189117431640625e-05, "step": 7793, "training_step_time": 0.20397710800170898 }, { "epoch": 1.18927001953125e-05, "model_forward_time": 0.024626493453979492, "step": 7794 }, { "epoch": 1.18927001953125e-05, "step": 7794, "training_step_time": 0.19957852363586426 }, { "epoch": 1.189422607421875e-05, "model_forward_time": 0.02411341667175293, "step": 7795 }, { "epoch": 1.189422607421875e-05, "step": 7795, "training_step_time": 0.19173884391784668 }, { "epoch": 1.1895751953125e-05, "model_forward_time": 0.025067567825317383, "step": 7796 }, { "epoch": 1.1895751953125e-05, "step": 7796, "training_step_time": 0.17390775680541992 }, { "epoch": 1.189727783203125e-05, "model_forward_time": 0.02463245391845703, "step": 7797 }, { "epoch": 1.189727783203125e-05, "step": 7797, "training_step_time": 0.17256402969360352 }, { "epoch": 1.18988037109375e-05, "model_forward_time": 0.0251924991607666, "step": 7798 }, { "epoch": 1.18988037109375e-05, "step": 7798, "training_step_time": 0.11508059501647949 }, { "epoch": 1.190032958984375e-05, "model_forward_time": 0.025096654891967773, "step": 7799 }, { "epoch": 1.190032958984375e-05, "step": 7799, "training_step_time": 0.10608148574829102 }, { "epoch": 1.190185546875e-05, "grad_norm": 0.5842939615249634, "learning_rate": 8.842005554284296e-05, "loss": 0.0455, "step": 7800 }, { "epoch": 1.190185546875e-05, "model_forward_time": 0.025037288665771484, "step": 7800 }, { "epoch": 1.190185546875e-05, "step": 7800, "training_step_time": 0.10821127891540527 }, { "epoch": 1.190338134765625e-05, "model_forward_time": 0.025350093841552734, "step": 7801 }, { "epoch": 1.190338134765625e-05, "step": 7801, "training_step_time": 0.10711550712585449 }, { "epoch": 1.19049072265625e-05, "model_forward_time": 0.02607274055480957, "step": 7802 }, { "epoch": 1.19049072265625e-05, "step": 7802, "training_step_time": 0.10849118232727051 }, { "epoch": 1.190643310546875e-05, "model_forward_time": 0.025176286697387695, "step": 7803 }, { "epoch": 1.190643310546875e-05, "step": 7803, "training_step_time": 0.10977053642272949 }, { "epoch": 1.1907958984375e-05, "model_forward_time": 0.0251772403717041, "step": 7804 }, { "epoch": 1.1907958984375e-05, "step": 7804, "training_step_time": 0.10600566864013672 }, { "epoch": 1.190948486328125e-05, "model_forward_time": 0.025789260864257812, "step": 7805 }, { "epoch": 1.190948486328125e-05, "step": 7805, "training_step_time": 0.1127021312713623 }, { "epoch": 1.19110107421875e-05, "model_forward_time": 0.02523040771484375, "step": 7806 }, { "epoch": 1.19110107421875e-05, "step": 7806, "training_step_time": 0.10910892486572266 }, { "epoch": 1.191253662109375e-05, "model_forward_time": 0.025269031524658203, "step": 7807 }, { "epoch": 1.191253662109375e-05, "step": 7807, "training_step_time": 0.1363232135772705 }, { "epoch": 1.19140625e-05, "model_forward_time": 0.02669501304626465, "step": 7808 }, { "epoch": 1.19140625e-05, "step": 7808, "training_step_time": 0.11250948905944824 }, { "epoch": 1.191558837890625e-05, "model_forward_time": 0.02630138397216797, "step": 7809 }, { "epoch": 1.191558837890625e-05, "step": 7809, "training_step_time": 0.11458873748779297 }, { "epoch": 1.19171142578125e-05, "grad_norm": 0.3187682330608368, "learning_rate": 8.838475991912482e-05, "loss": 0.051, "step": 7810 }, { "epoch": 1.19171142578125e-05, "model_forward_time": 0.02519369125366211, "step": 7810 }, { "epoch": 1.19171142578125e-05, "step": 7810, "training_step_time": 0.10689353942871094 }, { "epoch": 1.191864013671875e-05, "model_forward_time": 0.026935338973999023, "step": 7811 }, { "epoch": 1.191864013671875e-05, "step": 7811, "training_step_time": 0.11625051498413086 }, { "epoch": 1.1920166015625e-05, "model_forward_time": 0.0253448486328125, "step": 7812 }, { "epoch": 1.1920166015625e-05, "step": 7812, "training_step_time": 0.1224210262298584 }, { "epoch": 1.192169189453125e-05, "model_forward_time": 0.0252840518951416, "step": 7813 }, { "epoch": 1.192169189453125e-05, "step": 7813, "training_step_time": 0.1571178436279297 }, { "epoch": 1.19232177734375e-05, "model_forward_time": 0.025163650512695312, "step": 7814 }, { "epoch": 1.19232177734375e-05, "step": 7814, "training_step_time": 0.11394858360290527 }, { "epoch": 1.192474365234375e-05, "model_forward_time": 0.024710416793823242, "step": 7815 }, { "epoch": 1.192474365234375e-05, "step": 7815, "training_step_time": 0.1816692352294922 }, { "epoch": 1.192626953125e-05, "model_forward_time": 0.024820327758789062, "step": 7816 }, { "epoch": 1.192626953125e-05, "step": 7816, "training_step_time": 0.173844575881958 }, { "epoch": 1.192779541015625e-05, "model_forward_time": 0.02471446990966797, "step": 7817 }, { "epoch": 1.192779541015625e-05, "step": 7817, "training_step_time": 0.11200594902038574 }, { "epoch": 1.19293212890625e-05, "model_forward_time": 0.024959564208984375, "step": 7818 }, { "epoch": 1.19293212890625e-05, "step": 7818, "training_step_time": 0.12364673614501953 }, { "epoch": 1.193084716796875e-05, "model_forward_time": 0.025573253631591797, "step": 7819 }, { "epoch": 1.193084716796875e-05, "step": 7819, "training_step_time": 0.10993337631225586 }, { "epoch": 1.1932373046875e-05, "grad_norm": 0.41468310356140137, "learning_rate": 8.834941765430391e-05, "loss": 0.0337, "step": 7820 }, { "epoch": 1.1932373046875e-05, "model_forward_time": 0.025514841079711914, "step": 7820 }, { "epoch": 1.1932373046875e-05, "step": 7820, "training_step_time": 0.2273268699645996 }, { "epoch": 1.193389892578125e-05, "model_forward_time": 0.02413487434387207, "step": 7821 }, { "epoch": 1.193389892578125e-05, "step": 7821, "training_step_time": 0.12456512451171875 }, { "epoch": 1.19354248046875e-05, "model_forward_time": 0.02417469024658203, "step": 7822 }, { "epoch": 1.19354248046875e-05, "step": 7822, "training_step_time": 0.11142587661743164 }, { "epoch": 1.193695068359375e-05, "model_forward_time": 0.0249483585357666, "step": 7823 }, { "epoch": 1.193695068359375e-05, "step": 7823, "training_step_time": 0.12179327011108398 }, { "epoch": 1.19384765625e-05, "model_forward_time": 0.024895906448364258, "step": 7824 }, { "epoch": 1.19384765625e-05, "step": 7824, "training_step_time": 0.17485642433166504 }, { "epoch": 1.194000244140625e-05, "model_forward_time": 0.024909257888793945, "step": 7825 }, { "epoch": 1.194000244140625e-05, "step": 7825, "training_step_time": 0.12551093101501465 }, { "epoch": 1.19415283203125e-05, "model_forward_time": 0.0243227481842041, "step": 7826 }, { "epoch": 1.19415283203125e-05, "step": 7826, "training_step_time": 0.11665582656860352 }, { "epoch": 1.194305419921875e-05, "model_forward_time": 0.025130033493041992, "step": 7827 }, { "epoch": 1.194305419921875e-05, "step": 7827, "training_step_time": 0.10761833190917969 }, { "epoch": 1.1944580078125e-05, "model_forward_time": 0.025203943252563477, "step": 7828 }, { "epoch": 1.1944580078125e-05, "step": 7828, "training_step_time": 0.10996031761169434 }, { "epoch": 1.194610595703125e-05, "model_forward_time": 0.0250852108001709, "step": 7829 }, { "epoch": 1.194610595703125e-05, "step": 7829, "training_step_time": 0.19105124473571777 }, { "epoch": 1.19476318359375e-05, "grad_norm": 0.46187856793403625, "learning_rate": 8.831402879132446e-05, "loss": 0.0479, "step": 7830 }, { "epoch": 1.19476318359375e-05, "model_forward_time": 0.02467966079711914, "step": 7830 }, { "epoch": 1.19476318359375e-05, "step": 7830, "training_step_time": 0.11877274513244629 }, { "epoch": 1.194915771484375e-05, "model_forward_time": 0.024508953094482422, "step": 7831 }, { "epoch": 1.194915771484375e-05, "step": 7831, "training_step_time": 0.11325526237487793 }, { "epoch": 1.195068359375e-05, "model_forward_time": 0.0252685546875, "step": 7832 }, { "epoch": 1.195068359375e-05, "step": 7832, "training_step_time": 0.11606311798095703 }, { "epoch": 1.195220947265625e-05, "model_forward_time": 0.025557994842529297, "step": 7833 }, { "epoch": 1.195220947265625e-05, "step": 7833, "training_step_time": 0.11269259452819824 }, { "epoch": 1.19537353515625e-05, "model_forward_time": 0.02548503875732422, "step": 7834 }, { "epoch": 1.19537353515625e-05, "step": 7834, "training_step_time": 0.10583639144897461 }, { "epoch": 1.195526123046875e-05, "model_forward_time": 0.02512812614440918, "step": 7835 }, { "epoch": 1.195526123046875e-05, "step": 7835, "training_step_time": 0.19674158096313477 }, { "epoch": 1.1956787109375e-05, "model_forward_time": 0.024318218231201172, "step": 7836 }, { "epoch": 1.1956787109375e-05, "step": 7836, "training_step_time": 0.1079404354095459 }, { "epoch": 1.195831298828125e-05, "model_forward_time": 0.02438068389892578, "step": 7837 }, { "epoch": 1.195831298828125e-05, "step": 7837, "training_step_time": 0.1055917739868164 }, { "epoch": 1.19598388671875e-05, "model_forward_time": 0.025700807571411133, "step": 7838 }, { "epoch": 1.19598388671875e-05, "step": 7838, "training_step_time": 0.10809850692749023 }, { "epoch": 1.196136474609375e-05, "model_forward_time": 0.026479244232177734, "step": 7839 }, { "epoch": 1.196136474609375e-05, "step": 7839, "training_step_time": 0.11246728897094727 }, { "epoch": 1.1962890625e-05, "grad_norm": 0.45973441004753113, "learning_rate": 8.827859337318725e-05, "loss": 0.0397, "step": 7840 }, { "epoch": 1.1962890625e-05, "model_forward_time": 0.02504277229309082, "step": 7840 }, { "epoch": 1.1962890625e-05, "step": 7840, "training_step_time": 0.11144638061523438 }, { "epoch": 1.196441650390625e-05, "model_forward_time": 0.025012493133544922, "step": 7841 }, { "epoch": 1.196441650390625e-05, "step": 7841, "training_step_time": 0.10768508911132812 }, { "epoch": 1.19659423828125e-05, "model_forward_time": 0.025186538696289062, "step": 7842 }, { "epoch": 1.19659423828125e-05, "step": 7842, "training_step_time": 0.10861372947692871 }, { "epoch": 1.196746826171875e-05, "model_forward_time": 0.025285959243774414, "step": 7843 }, { "epoch": 1.196746826171875e-05, "step": 7843, "training_step_time": 0.10761666297912598 }, { "epoch": 1.1968994140625e-05, "model_forward_time": 0.024943113327026367, "step": 7844 }, { "epoch": 1.1968994140625e-05, "step": 7844, "training_step_time": 0.10673880577087402 }, { "epoch": 1.197052001953125e-05, "model_forward_time": 0.024587154388427734, "step": 7845 }, { "epoch": 1.197052001953125e-05, "step": 7845, "training_step_time": 0.1075749397277832 }, { "epoch": 1.19720458984375e-05, "model_forward_time": 0.024624109268188477, "step": 7846 }, { "epoch": 1.19720458984375e-05, "step": 7846, "training_step_time": 0.10741400718688965 }, { "epoch": 1.197357177734375e-05, "model_forward_time": 0.025069475173950195, "step": 7847 }, { "epoch": 1.197357177734375e-05, "step": 7847, "training_step_time": 0.11314702033996582 }, { "epoch": 1.197509765625e-05, "model_forward_time": 0.025876998901367188, "step": 7848 }, { "epoch": 1.197509765625e-05, "step": 7848, "training_step_time": 0.1083214282989502 }, { "epoch": 1.197662353515625e-05, "model_forward_time": 0.02499985694885254, "step": 7849 }, { "epoch": 1.197662353515625e-05, "step": 7849, "training_step_time": 0.10786318778991699 }, { "epoch": 1.19781494140625e-05, "grad_norm": 0.5109211206436157, "learning_rate": 8.824311144294965e-05, "loss": 0.0476, "step": 7850 }, { "epoch": 1.19781494140625e-05, "model_forward_time": 0.025148391723632812, "step": 7850 }, { "epoch": 1.19781494140625e-05, "step": 7850, "training_step_time": 0.10919475555419922 }, { "epoch": 1.197967529296875e-05, "model_forward_time": 0.025360107421875, "step": 7851 }, { "epoch": 1.197967529296875e-05, "step": 7851, "training_step_time": 0.11489629745483398 }, { "epoch": 1.1981201171875e-05, "model_forward_time": 0.02649664878845215, "step": 7852 }, { "epoch": 1.1981201171875e-05, "step": 7852, "training_step_time": 0.11010479927062988 }, { "epoch": 1.198272705078125e-05, "model_forward_time": 0.02671504020690918, "step": 7853 }, { "epoch": 1.198272705078125e-05, "step": 7853, "training_step_time": 0.12367057800292969 }, { "epoch": 1.19842529296875e-05, "model_forward_time": 0.025665283203125, "step": 7854 }, { "epoch": 1.19842529296875e-05, "step": 7854, "training_step_time": 0.1161048412322998 }, { "epoch": 1.198577880859375e-05, "model_forward_time": 0.025107622146606445, "step": 7855 }, { "epoch": 1.198577880859375e-05, "step": 7855, "training_step_time": 0.12682318687438965 }, { "epoch": 1.19873046875e-05, "model_forward_time": 0.025748252868652344, "step": 7856 }, { "epoch": 1.19873046875e-05, "step": 7856, "training_step_time": 0.10806560516357422 }, { "epoch": 1.198883056640625e-05, "model_forward_time": 0.025358200073242188, "step": 7857 }, { "epoch": 1.198883056640625e-05, "step": 7857, "training_step_time": 0.10684776306152344 }, { "epoch": 1.19903564453125e-05, "model_forward_time": 0.0252227783203125, "step": 7858 }, { "epoch": 1.19903564453125e-05, "step": 7858, "training_step_time": 0.11107540130615234 }, { "epoch": 1.199188232421875e-05, "model_forward_time": 0.026148319244384766, "step": 7859 }, { "epoch": 1.199188232421875e-05, "step": 7859, "training_step_time": 0.1456146240234375 }, { "epoch": 1.1993408203125e-05, "grad_norm": 0.6275359392166138, "learning_rate": 8.820758304372557e-05, "loss": 0.0587, "step": 7860 }, { "epoch": 1.1993408203125e-05, "model_forward_time": 0.030323028564453125, "step": 7860 }, { "epoch": 1.1993408203125e-05, "step": 7860, "training_step_time": 0.1139078140258789 }, { "epoch": 1.199493408203125e-05, "model_forward_time": 0.024778366088867188, "step": 7861 }, { "epoch": 1.199493408203125e-05, "step": 7861, "training_step_time": 0.17496585845947266 }, { "epoch": 1.19964599609375e-05, "model_forward_time": 0.02480459213256836, "step": 7862 }, { "epoch": 1.19964599609375e-05, "step": 7862, "training_step_time": 0.18030929565429688 }, { "epoch": 1.199798583984375e-05, "model_forward_time": 0.025185585021972656, "step": 7863 }, { "epoch": 1.199798583984375e-05, "step": 7863, "training_step_time": 0.11366701126098633 }, { "epoch": 1.199951171875e-05, "model_forward_time": 0.023967266082763672, "step": 7864 }, { "epoch": 1.199951171875e-05, "step": 7864, "training_step_time": 0.1141812801361084 }, { "epoch": 1.200103759765625e-05, "model_forward_time": 0.025478124618530273, "step": 7865 }, { "epoch": 1.200103759765625e-05, "step": 7865, "training_step_time": 0.11051058769226074 }, { "epoch": 1.20025634765625e-05, "model_forward_time": 0.02510213851928711, "step": 7866 }, { "epoch": 1.20025634765625e-05, "step": 7866, "training_step_time": 0.2274637222290039 }, { "epoch": 1.200408935546875e-05, "model_forward_time": 0.024494647979736328, "step": 7867 }, { "epoch": 1.200408935546875e-05, "step": 7867, "training_step_time": 0.1239631175994873 }, { "epoch": 1.2005615234375e-05, "model_forward_time": 0.025365114212036133, "step": 7868 }, { "epoch": 1.2005615234375e-05, "step": 7868, "training_step_time": 0.13744449615478516 }, { "epoch": 1.200714111328125e-05, "model_forward_time": 0.02488875389099121, "step": 7869 }, { "epoch": 1.200714111328125e-05, "step": 7869, "training_step_time": 0.11133718490600586 }, { "epoch": 1.20086669921875e-05, "grad_norm": 0.638238787651062, "learning_rate": 8.817200821868533e-05, "loss": 0.0617, "step": 7870 }, { "epoch": 1.20086669921875e-05, "model_forward_time": 0.024882078170776367, "step": 7870 }, { "epoch": 1.20086669921875e-05, "step": 7870, "training_step_time": 0.15382027626037598 }, { "epoch": 1.201019287109375e-05, "model_forward_time": 0.026033401489257812, "step": 7871 }, { "epoch": 1.201019287109375e-05, "step": 7871, "training_step_time": 0.12656283378601074 }, { "epoch": 1.201171875e-05, "model_forward_time": 0.024245738983154297, "step": 7872 }, { "epoch": 1.201171875e-05, "step": 7872, "training_step_time": 0.11639904975891113 }, { "epoch": 1.201324462890625e-05, "model_forward_time": 0.025424718856811523, "step": 7873 }, { "epoch": 1.201324462890625e-05, "step": 7873, "training_step_time": 0.10869479179382324 }, { "epoch": 1.20147705078125e-05, "model_forward_time": 0.026077747344970703, "step": 7874 }, { "epoch": 1.20147705078125e-05, "step": 7874, "training_step_time": 0.10886263847351074 }, { "epoch": 1.201629638671875e-05, "model_forward_time": 0.02504420280456543, "step": 7875 }, { "epoch": 1.201629638671875e-05, "step": 7875, "training_step_time": 0.12500286102294922 }, { "epoch": 1.2017822265625e-05, "model_forward_time": 0.02523040771484375, "step": 7876 }, { "epoch": 1.2017822265625e-05, "step": 7876, "training_step_time": 0.11100172996520996 }, { "epoch": 1.201934814453125e-05, "model_forward_time": 0.029355287551879883, "step": 7877 }, { "epoch": 1.201934814453125e-05, "step": 7877, "training_step_time": 0.19710969924926758 }, { "epoch": 1.20208740234375e-05, "model_forward_time": 0.02463984489440918, "step": 7878 }, { "epoch": 1.20208740234375e-05, "step": 7878, "training_step_time": 0.17306756973266602 }, { "epoch": 1.202239990234375e-05, "model_forward_time": 0.02457261085510254, "step": 7879 }, { "epoch": 1.202239990234375e-05, "step": 7879, "training_step_time": 0.18588757514953613 }, { "epoch": 1.202392578125e-05, "grad_norm": 0.2985515594482422, "learning_rate": 8.813638701105573e-05, "loss": 0.0507, "step": 7880 }, { "epoch": 1.202392578125e-05, "model_forward_time": 0.024352312088012695, "step": 7880 }, { "epoch": 1.202392578125e-05, "step": 7880, "training_step_time": 0.16162371635437012 }, { "epoch": 1.202545166015625e-05, "model_forward_time": 0.02441096305847168, "step": 7881 }, { "epoch": 1.202545166015625e-05, "step": 7881, "training_step_time": 0.18004107475280762 }, { "epoch": 1.20269775390625e-05, "model_forward_time": 0.023365020751953125, "step": 7882 }, { "epoch": 1.20269775390625e-05, "step": 7882, "training_step_time": 0.1710350513458252 }, { "epoch": 1.202850341796875e-05, "model_forward_time": 0.023360490798950195, "step": 7883 }, { "epoch": 1.202850341796875e-05, "step": 7883, "training_step_time": 0.1512889862060547 }, { "epoch": 1.2030029296875e-05, "model_forward_time": 0.024245500564575195, "step": 7884 }, { "epoch": 1.2030029296875e-05, "step": 7884, "training_step_time": 0.13660454750061035 }, { "epoch": 1.203155517578125e-05, "model_forward_time": 0.023772001266479492, "step": 7885 }, { "epoch": 1.203155517578125e-05, "step": 7885, "training_step_time": 0.12547993659973145 }, { "epoch": 1.20330810546875e-05, "model_forward_time": 0.02412724494934082, "step": 7886 }, { "epoch": 1.20330810546875e-05, "step": 7886, "training_step_time": 0.11126852035522461 }, { "epoch": 1.203460693359375e-05, "model_forward_time": 0.02537703514099121, "step": 7887 }, { "epoch": 1.203460693359375e-05, "step": 7887, "training_step_time": 0.10500478744506836 }, { "epoch": 1.20361328125e-05, "model_forward_time": 0.02559351921081543, "step": 7888 }, { "epoch": 1.20361328125e-05, "step": 7888, "training_step_time": 0.10680913925170898 }, { "epoch": 1.203765869140625e-05, "model_forward_time": 0.025312423706054688, "step": 7889 }, { "epoch": 1.203765869140625e-05, "step": 7889, "training_step_time": 0.10815954208374023 }, { "epoch": 1.20391845703125e-05, "grad_norm": 0.5479375720024109, "learning_rate": 8.810071946411989e-05, "loss": 0.0429, "step": 7890 }, { "epoch": 1.20391845703125e-05, "model_forward_time": 0.02578592300415039, "step": 7890 }, { "epoch": 1.20391845703125e-05, "step": 7890, "training_step_time": 0.10888934135437012 }, { "epoch": 1.204071044921875e-05, "model_forward_time": 0.026696205139160156, "step": 7891 }, { "epoch": 1.204071044921875e-05, "step": 7891, "training_step_time": 0.10713934898376465 }, { "epoch": 1.2042236328125e-05, "model_forward_time": 0.025906801223754883, "step": 7892 }, { "epoch": 1.2042236328125e-05, "step": 7892, "training_step_time": 0.10468029975891113 }, { "epoch": 1.204376220703125e-05, "model_forward_time": 0.02537989616394043, "step": 7893 }, { "epoch": 1.204376220703125e-05, "step": 7893, "training_step_time": 0.11684536933898926 }, { "epoch": 1.20452880859375e-05, "model_forward_time": 0.025667428970336914, "step": 7894 }, { "epoch": 1.20452880859375e-05, "step": 7894, "training_step_time": 0.18823695182800293 }, { "epoch": 1.204681396484375e-05, "model_forward_time": 0.025701045989990234, "step": 7895 }, { "epoch": 1.204681396484375e-05, "step": 7895, "training_step_time": 0.14424347877502441 }, { "epoch": 1.204833984375e-05, "model_forward_time": 0.02483391761779785, "step": 7896 }, { "epoch": 1.204833984375e-05, "step": 7896, "training_step_time": 0.10968470573425293 }, { "epoch": 1.204986572265625e-05, "model_forward_time": 0.025405168533325195, "step": 7897 }, { "epoch": 1.204986572265625e-05, "step": 7897, "training_step_time": 0.11185812950134277 }, { "epoch": 1.20513916015625e-05, "model_forward_time": 0.025368928909301758, "step": 7898 }, { "epoch": 1.20513916015625e-05, "step": 7898, "training_step_time": 0.10683465003967285 }, { "epoch": 1.205291748046875e-05, "model_forward_time": 0.02557659149169922, "step": 7899 }, { "epoch": 1.205291748046875e-05, "step": 7899, "training_step_time": 0.10740208625793457 }, { "epoch": 1.2054443359375e-05, "grad_norm": 0.5200270414352417, "learning_rate": 8.806500562121723e-05, "loss": 0.0545, "step": 7900 }, { "epoch": 1.2054443359375e-05, "model_forward_time": 0.02678084373474121, "step": 7900 }, { "epoch": 1.2054443359375e-05, "step": 7900, "training_step_time": 0.10790038108825684 }, { "epoch": 1.205596923828125e-05, "model_forward_time": 0.02628946304321289, "step": 7901 }, { "epoch": 1.205596923828125e-05, "step": 7901, "training_step_time": 0.16756153106689453 }, { "epoch": 1.20574951171875e-05, "model_forward_time": 0.024643421173095703, "step": 7902 }, { "epoch": 1.20574951171875e-05, "step": 7902, "training_step_time": 0.12151384353637695 }, { "epoch": 1.205902099609375e-05, "model_forward_time": 0.02806544303894043, "step": 7903 }, { "epoch": 1.205902099609375e-05, "step": 7903, "training_step_time": 0.22317838668823242 }, { "epoch": 1.2060546875e-05, "model_forward_time": 0.025924205780029297, "step": 7904 }, { "epoch": 1.2060546875e-05, "step": 7904, "training_step_time": 0.13548016548156738 }, { "epoch": 1.206207275390625e-05, "model_forward_time": 0.02471446990966797, "step": 7905 }, { "epoch": 1.206207275390625e-05, "step": 7905, "training_step_time": 0.11276721954345703 }, { "epoch": 1.20635986328125e-05, "model_forward_time": 0.024350881576538086, "step": 7906 }, { "epoch": 1.20635986328125e-05, "step": 7906, "training_step_time": 0.1176750659942627 }, { "epoch": 1.206512451171875e-05, "model_forward_time": 0.02517533302307129, "step": 7907 }, { "epoch": 1.206512451171875e-05, "step": 7907, "training_step_time": 0.11346554756164551 }, { "epoch": 1.2066650390625e-05, "model_forward_time": 0.026131153106689453, "step": 7908 }, { "epoch": 1.2066650390625e-05, "step": 7908, "training_step_time": 0.19850683212280273 }, { "epoch": 1.206817626953125e-05, "model_forward_time": 0.025051116943359375, "step": 7909 }, { "epoch": 1.206817626953125e-05, "step": 7909, "training_step_time": 0.11710286140441895 }, { "epoch": 1.20697021484375e-05, "grad_norm": 0.3905726671218872, "learning_rate": 8.802924552574345e-05, "loss": 0.0562, "step": 7910 }, { "epoch": 1.20697021484375e-05, "model_forward_time": 0.02635955810546875, "step": 7910 }, { "epoch": 1.20697021484375e-05, "step": 7910, "training_step_time": 0.13576769828796387 }, { "epoch": 1.207122802734375e-05, "model_forward_time": 0.025088071823120117, "step": 7911 }, { "epoch": 1.207122802734375e-05, "step": 7911, "training_step_time": 0.15278387069702148 }, { "epoch": 1.207275390625e-05, "model_forward_time": 0.025600433349609375, "step": 7912 }, { "epoch": 1.207275390625e-05, "step": 7912, "training_step_time": 0.17139697074890137 }, { "epoch": 1.207427978515625e-05, "model_forward_time": 0.02432537078857422, "step": 7913 }, { "epoch": 1.207427978515625e-05, "step": 7913, "training_step_time": 0.17067360877990723 }, { "epoch": 1.20758056640625e-05, "model_forward_time": 0.024817466735839844, "step": 7914 }, { "epoch": 1.20758056640625e-05, "step": 7914, "training_step_time": 0.10875439643859863 }, { "epoch": 1.207733154296875e-05, "model_forward_time": 0.02481389045715332, "step": 7915 }, { "epoch": 1.207733154296875e-05, "step": 7915, "training_step_time": 0.1369922161102295 }, { "epoch": 1.2078857421875e-05, "model_forward_time": 0.02545762062072754, "step": 7916 }, { "epoch": 1.2078857421875e-05, "step": 7916, "training_step_time": 0.17713451385498047 }, { "epoch": 1.208038330078125e-05, "model_forward_time": 0.02508378028869629, "step": 7917 }, { "epoch": 1.208038330078125e-05, "step": 7917, "training_step_time": 0.10827136039733887 }, { "epoch": 1.20819091796875e-05, "model_forward_time": 0.024219036102294922, "step": 7918 }, { "epoch": 1.20819091796875e-05, "step": 7918, "training_step_time": 0.10509347915649414 }, { "epoch": 1.208343505859375e-05, "model_forward_time": 0.02587723731994629, "step": 7919 }, { "epoch": 1.208343505859375e-05, "step": 7919, "training_step_time": 0.19098520278930664 }, { "epoch": 1.20849609375e-05, "grad_norm": 0.4202629029750824, "learning_rate": 8.799343922115044e-05, "loss": 0.073, "step": 7920 }, { "epoch": 1.20849609375e-05, "model_forward_time": 0.025252342224121094, "step": 7920 }, { "epoch": 1.20849609375e-05, "step": 7920, "training_step_time": 0.1090538501739502 }, { "epoch": 1.208648681640625e-05, "model_forward_time": 0.025035858154296875, "step": 7921 }, { "epoch": 1.208648681640625e-05, "step": 7921, "training_step_time": 0.10737061500549316 }, { "epoch": 1.20880126953125e-05, "model_forward_time": 0.025863170623779297, "step": 7922 }, { "epoch": 1.20880126953125e-05, "step": 7922, "training_step_time": 0.10735893249511719 }, { "epoch": 1.208953857421875e-05, "model_forward_time": 0.0262453556060791, "step": 7923 }, { "epoch": 1.208953857421875e-05, "step": 7923, "training_step_time": 0.10936737060546875 }, { "epoch": 1.2091064453125e-05, "model_forward_time": 0.026357650756835938, "step": 7924 }, { "epoch": 1.2091064453125e-05, "step": 7924, "training_step_time": 0.1101231575012207 }, { "epoch": 1.209259033203125e-05, "model_forward_time": 0.02567577362060547, "step": 7925 }, { "epoch": 1.209259033203125e-05, "step": 7925, "training_step_time": 0.1090693473815918 }, { "epoch": 1.20941162109375e-05, "model_forward_time": 0.02533435821533203, "step": 7926 }, { "epoch": 1.20941162109375e-05, "step": 7926, "training_step_time": 0.11175966262817383 }, { "epoch": 1.209564208984375e-05, "model_forward_time": 0.024616718292236328, "step": 7927 }, { "epoch": 1.209564208984375e-05, "step": 7927, "training_step_time": 0.10579705238342285 }, { "epoch": 1.209716796875e-05, "model_forward_time": 0.02611517906188965, "step": 7928 }, { "epoch": 1.209716796875e-05, "step": 7928, "training_step_time": 0.10750651359558105 }, { "epoch": 1.209869384765625e-05, "model_forward_time": 0.025504112243652344, "step": 7929 }, { "epoch": 1.209869384765625e-05, "step": 7929, "training_step_time": 0.11054706573486328 }, { "epoch": 1.21002197265625e-05, "grad_norm": 0.7044833302497864, "learning_rate": 8.795758675094621e-05, "loss": 0.044, "step": 7930 }, { "epoch": 1.21002197265625e-05, "model_forward_time": 0.024985551834106445, "step": 7930 }, { "epoch": 1.21002197265625e-05, "step": 7930, "training_step_time": 0.10688519477844238 }, { "epoch": 1.210174560546875e-05, "model_forward_time": 0.02528691291809082, "step": 7931 }, { "epoch": 1.210174560546875e-05, "step": 7931, "training_step_time": 0.12502336502075195 }, { "epoch": 1.2103271484375e-05, "model_forward_time": 0.026308298110961914, "step": 7932 }, { "epoch": 1.2103271484375e-05, "step": 7932, "training_step_time": 0.13570427894592285 }, { "epoch": 1.210479736328125e-05, "model_forward_time": 0.02536296844482422, "step": 7933 }, { "epoch": 1.210479736328125e-05, "step": 7933, "training_step_time": 0.13477849960327148 }, { "epoch": 1.21063232421875e-05, "model_forward_time": 0.02510690689086914, "step": 7934 }, { "epoch": 1.21063232421875e-05, "step": 7934, "training_step_time": 0.12601327896118164 }, { "epoch": 1.210784912109375e-05, "model_forward_time": 0.024953603744506836, "step": 7935 }, { "epoch": 1.210784912109375e-05, "step": 7935, "training_step_time": 0.11086630821228027 }, { "epoch": 1.2109375e-05, "model_forward_time": 0.024572372436523438, "step": 7936 }, { "epoch": 1.2109375e-05, "step": 7936, "training_step_time": 0.21455979347229004 }, { "epoch": 1.211090087890625e-05, "model_forward_time": 0.02447366714477539, "step": 7937 }, { "epoch": 1.211090087890625e-05, "step": 7937, "training_step_time": 0.11578989028930664 }, { "epoch": 1.21124267578125e-05, "model_forward_time": 0.024437665939331055, "step": 7938 }, { "epoch": 1.21124267578125e-05, "step": 7938, "training_step_time": 0.11175775527954102 }, { "epoch": 1.211395263671875e-05, "model_forward_time": 0.02499246597290039, "step": 7939 }, { "epoch": 1.211395263671875e-05, "step": 7939, "training_step_time": 0.11520743370056152 }, { "epoch": 1.2115478515625e-05, "grad_norm": 0.6897473931312561, "learning_rate": 8.792168815869493e-05, "loss": 0.0529, "step": 7940 }, { "epoch": 1.2115478515625e-05, "model_forward_time": 0.02551102638244629, "step": 7940 }, { "epoch": 1.2115478515625e-05, "step": 7940, "training_step_time": 0.10939192771911621 }, { "epoch": 1.211700439453125e-05, "model_forward_time": 0.02597832679748535, "step": 7941 }, { "epoch": 1.211700439453125e-05, "step": 7941, "training_step_time": 0.12726497650146484 }, { "epoch": 1.21185302734375e-05, "model_forward_time": 0.02549886703491211, "step": 7942 }, { "epoch": 1.21185302734375e-05, "step": 7942, "training_step_time": 0.10681533813476562 }, { "epoch": 1.212005615234375e-05, "model_forward_time": 0.02470541000366211, "step": 7943 }, { "epoch": 1.212005615234375e-05, "step": 7943, "training_step_time": 0.10898184776306152 }, { "epoch": 1.212158203125e-05, "model_forward_time": 0.02497553825378418, "step": 7944 }, { "epoch": 1.212158203125e-05, "step": 7944, "training_step_time": 0.10942411422729492 }, { "epoch": 1.212310791015625e-05, "model_forward_time": 0.024882793426513672, "step": 7945 }, { "epoch": 1.212310791015625e-05, "step": 7945, "training_step_time": 0.18957972526550293 }, { "epoch": 1.21246337890625e-05, "model_forward_time": 0.024564027786254883, "step": 7946 }, { "epoch": 1.21246337890625e-05, "step": 7946, "training_step_time": 0.10462427139282227 }, { "epoch": 1.212615966796875e-05, "model_forward_time": 0.025892972946166992, "step": 7947 }, { "epoch": 1.212615966796875e-05, "step": 7947, "training_step_time": 0.11996626853942871 }, { "epoch": 1.2127685546875e-05, "model_forward_time": 0.024728059768676758, "step": 7948 }, { "epoch": 1.2127685546875e-05, "step": 7948, "training_step_time": 0.1269516944885254 }, { "epoch": 1.212921142578125e-05, "model_forward_time": 0.02470993995666504, "step": 7949 }, { "epoch": 1.212921142578125e-05, "step": 7949, "training_step_time": 0.13221526145935059 }, { "epoch": 1.21307373046875e-05, "grad_norm": 0.8151080012321472, "learning_rate": 8.788574348801675e-05, "loss": 0.0419, "step": 7950 }, { "epoch": 1.21307373046875e-05, "model_forward_time": 0.02478194236755371, "step": 7950 }, { "epoch": 1.21307373046875e-05, "step": 7950, "training_step_time": 0.11572003364562988 }, { "epoch": 1.213226318359375e-05, "model_forward_time": 0.025125741958618164, "step": 7951 }, { "epoch": 1.213226318359375e-05, "step": 7951, "training_step_time": 0.11235260963439941 }, { "epoch": 1.21337890625e-05, "model_forward_time": 0.024944067001342773, "step": 7952 }, { "epoch": 1.21337890625e-05, "step": 7952, "training_step_time": 0.20114636421203613 }, { "epoch": 1.213531494140625e-05, "model_forward_time": 0.024495363235473633, "step": 7953 }, { "epoch": 1.213531494140625e-05, "step": 7953, "training_step_time": 0.11314654350280762 }, { "epoch": 1.21368408203125e-05, "model_forward_time": 0.029157400131225586, "step": 7954 }, { "epoch": 1.21368408203125e-05, "step": 7954, "training_step_time": 0.13973259925842285 }, { "epoch": 1.213836669921875e-05, "model_forward_time": 0.02474045753479004, "step": 7955 }, { "epoch": 1.213836669921875e-05, "step": 7955, "training_step_time": 0.15376520156860352 }, { "epoch": 1.2139892578125e-05, "model_forward_time": 0.024340391159057617, "step": 7956 }, { "epoch": 1.2139892578125e-05, "step": 7956, "training_step_time": 0.21885466575622559 }, { "epoch": 1.214141845703125e-05, "model_forward_time": 0.024456262588500977, "step": 7957 }, { "epoch": 1.214141845703125e-05, "step": 7957, "training_step_time": 0.11707091331481934 }, { "epoch": 1.21429443359375e-05, "model_forward_time": 0.02457284927368164, "step": 7958 }, { "epoch": 1.21429443359375e-05, "step": 7958, "training_step_time": 0.18384027481079102 }, { "epoch": 1.214447021484375e-05, "model_forward_time": 0.024204730987548828, "step": 7959 }, { "epoch": 1.214447021484375e-05, "step": 7959, "training_step_time": 0.14910554885864258 }, { "epoch": 1.214599609375e-05, "grad_norm": 0.478555828332901, "learning_rate": 8.784975278258783e-05, "loss": 0.0437, "step": 7960 }, { "epoch": 1.214599609375e-05, "model_forward_time": 0.024885892868041992, "step": 7960 }, { "epoch": 1.214599609375e-05, "step": 7960, "training_step_time": 0.11147737503051758 }, { "epoch": 1.214752197265625e-05, "model_forward_time": 0.024567127227783203, "step": 7961 }, { "epoch": 1.214752197265625e-05, "step": 7961, "training_step_time": 0.11080765724182129 }, { "epoch": 1.21490478515625e-05, "model_forward_time": 0.024933815002441406, "step": 7962 }, { "epoch": 1.21490478515625e-05, "step": 7962, "training_step_time": 0.12792205810546875 }, { "epoch": 1.215057373046875e-05, "model_forward_time": 0.025124549865722656, "step": 7963 }, { "epoch": 1.215057373046875e-05, "step": 7963, "training_step_time": 0.11346960067749023 }, { "epoch": 1.2152099609375e-05, "model_forward_time": 0.024829864501953125, "step": 7964 }, { "epoch": 1.2152099609375e-05, "step": 7964, "training_step_time": 0.1145787239074707 }, { "epoch": 1.215362548828125e-05, "model_forward_time": 0.024878501892089844, "step": 7965 }, { "epoch": 1.215362548828125e-05, "step": 7965, "training_step_time": 0.11946439743041992 }, { "epoch": 1.21551513671875e-05, "model_forward_time": 0.025093793869018555, "step": 7966 }, { "epoch": 1.21551513671875e-05, "step": 7966, "training_step_time": 0.10670924186706543 }, { "epoch": 1.215667724609375e-05, "model_forward_time": 0.02442169189453125, "step": 7967 }, { "epoch": 1.215667724609375e-05, "step": 7967, "training_step_time": 0.10758852958679199 }, { "epoch": 1.2158203125e-05, "model_forward_time": 0.02733469009399414, "step": 7968 }, { "epoch": 1.2158203125e-05, "step": 7968, "training_step_time": 0.11456847190856934 }, { "epoch": 1.215972900390625e-05, "model_forward_time": 0.025136232376098633, "step": 7969 }, { "epoch": 1.215972900390625e-05, "step": 7969, "training_step_time": 0.10651516914367676 }, { "epoch": 1.21612548828125e-05, "grad_norm": 0.5801077485084534, "learning_rate": 8.781371608614029e-05, "loss": 0.0444, "step": 7970 }, { "epoch": 1.21612548828125e-05, "model_forward_time": 0.025835752487182617, "step": 7970 }, { "epoch": 1.21612548828125e-05, "step": 7970, "training_step_time": 0.10868954658508301 }, { "epoch": 1.216278076171875e-05, "model_forward_time": 0.025037765502929688, "step": 7971 }, { "epoch": 1.216278076171875e-05, "step": 7971, "training_step_time": 0.10644912719726562 }, { "epoch": 1.2164306640625e-05, "model_forward_time": 0.024999141693115234, "step": 7972 }, { "epoch": 1.2164306640625e-05, "step": 7972, "training_step_time": 0.10641980171203613 }, { "epoch": 1.216583251953125e-05, "model_forward_time": 0.02485489845275879, "step": 7973 }, { "epoch": 1.216583251953125e-05, "step": 7973, "training_step_time": 0.11394405364990234 }, { "epoch": 1.21673583984375e-05, "model_forward_time": 0.0245211124420166, "step": 7974 }, { "epoch": 1.21673583984375e-05, "step": 7974, "training_step_time": 0.1098182201385498 }, { "epoch": 1.216888427734375e-05, "model_forward_time": 0.02516317367553711, "step": 7975 }, { "epoch": 1.216888427734375e-05, "step": 7975, "training_step_time": 0.12448930740356445 }, { "epoch": 1.217041015625e-05, "model_forward_time": 0.024618864059448242, "step": 7976 }, { "epoch": 1.217041015625e-05, "step": 7976, "training_step_time": 0.12089371681213379 }, { "epoch": 1.217193603515625e-05, "model_forward_time": 0.025037527084350586, "step": 7977 }, { "epoch": 1.217193603515625e-05, "step": 7977, "training_step_time": 0.12023663520812988 }, { "epoch": 1.21734619140625e-05, "model_forward_time": 0.024635791778564453, "step": 7978 }, { "epoch": 1.21734619140625e-05, "step": 7978, "training_step_time": 0.1242685317993164 }, { "epoch": 1.217498779296875e-05, "model_forward_time": 0.024935007095336914, "step": 7979 }, { "epoch": 1.217498779296875e-05, "step": 7979, "training_step_time": 0.12067174911499023 }, { "epoch": 1.2176513671875e-05, "grad_norm": 0.8685769438743591, "learning_rate": 8.77776334424621e-05, "loss": 0.0452, "step": 7980 }, { "epoch": 1.2176513671875e-05, "model_forward_time": 0.025543928146362305, "step": 7980 }, { "epoch": 1.2176513671875e-05, "step": 7980, "training_step_time": 0.10847306251525879 }, { "epoch": 1.217803955078125e-05, "model_forward_time": 0.025130748748779297, "step": 7981 }, { "epoch": 1.217803955078125e-05, "step": 7981, "training_step_time": 0.11330461502075195 }, { "epoch": 1.21795654296875e-05, "model_forward_time": 0.025539636611938477, "step": 7982 }, { "epoch": 1.21795654296875e-05, "step": 7982, "training_step_time": 0.11588811874389648 }, { "epoch": 1.218109130859375e-05, "model_forward_time": 0.02513432502746582, "step": 7983 }, { "epoch": 1.218109130859375e-05, "step": 7983, "training_step_time": 0.1141366958618164 }, { "epoch": 1.21826171875e-05, "model_forward_time": 0.02728724479675293, "step": 7984 }, { "epoch": 1.21826171875e-05, "step": 7984, "training_step_time": 0.11709713935852051 }, { "epoch": 1.218414306640625e-05, "model_forward_time": 0.025393962860107422, "step": 7985 }, { "epoch": 1.218414306640625e-05, "step": 7985, "training_step_time": 0.2055351734161377 }, { "epoch": 1.21856689453125e-05, "model_forward_time": 0.02438807487487793, "step": 7986 }, { "epoch": 1.21856689453125e-05, "step": 7986, "training_step_time": 0.11226773262023926 }, { "epoch": 1.218719482421875e-05, "model_forward_time": 0.024125099182128906, "step": 7987 }, { "epoch": 1.218719482421875e-05, "step": 7987, "training_step_time": 0.10791182518005371 }, { "epoch": 1.2188720703125e-05, "model_forward_time": 0.025109529495239258, "step": 7988 }, { "epoch": 1.2188720703125e-05, "step": 7988, "training_step_time": 0.10952353477478027 }, { "epoch": 1.219024658203125e-05, "model_forward_time": 0.024990081787109375, "step": 7989 }, { "epoch": 1.219024658203125e-05, "step": 7989, "training_step_time": 0.10594606399536133 }, { "epoch": 1.21917724609375e-05, "grad_norm": 0.6445547938346863, "learning_rate": 8.774150489539707e-05, "loss": 0.0384, "step": 7990 }, { "epoch": 1.21917724609375e-05, "model_forward_time": 0.023876428604125977, "step": 7990 }, { "epoch": 1.21917724609375e-05, "step": 7990, "training_step_time": 0.10858273506164551 }, { "epoch": 1.219329833984375e-05, "model_forward_time": 0.024243831634521484, "step": 7991 }, { "epoch": 1.219329833984375e-05, "step": 7991, "training_step_time": 0.12700700759887695 }, { "epoch": 1.219482421875e-05, "model_forward_time": 0.02518320083618164, "step": 7992 }, { "epoch": 1.219482421875e-05, "step": 7992, "training_step_time": 0.11277341842651367 }, { "epoch": 1.219635009765625e-05, "model_forward_time": 0.025277137756347656, "step": 7993 }, { "epoch": 1.219635009765625e-05, "step": 7993, "training_step_time": 0.2225806713104248 }, { "epoch": 1.21978759765625e-05, "model_forward_time": 0.02420783042907715, "step": 7994 }, { "epoch": 1.21978759765625e-05, "step": 7994, "training_step_time": 0.12477612495422363 }, { "epoch": 1.219940185546875e-05, "model_forward_time": 0.024157047271728516, "step": 7995 }, { "epoch": 1.219940185546875e-05, "step": 7995, "training_step_time": 0.10909628868103027 }, { "epoch": 1.2200927734375e-05, "model_forward_time": 0.025439977645874023, "step": 7996 }, { "epoch": 1.2200927734375e-05, "step": 7996, "training_step_time": 0.12809038162231445 }, { "epoch": 1.220245361328125e-05, "model_forward_time": 0.024990558624267578, "step": 7997 }, { "epoch": 1.220245361328125e-05, "step": 7997, "training_step_time": 0.15242266654968262 }, { "epoch": 1.22039794921875e-05, "model_forward_time": 0.02489948272705078, "step": 7998 }, { "epoch": 1.22039794921875e-05, "step": 7998, "training_step_time": 0.12850666046142578 }, { "epoch": 1.220550537109375e-05, "model_forward_time": 0.024712800979614258, "step": 7999 }, { "epoch": 1.220550537109375e-05, "step": 7999, "training_step_time": 0.10808920860290527 }, { "epoch": 1.220703125e-05, "grad_norm": 0.42424267530441284, "learning_rate": 8.770533048884482e-05, "loss": 0.0393, "step": 8000 }, { "epoch": 1.220703125e-05, "model_forward_time": 0.026155710220336914, "step": 8000 }, { "epoch": 1.220703125e-05, "step": 8000, "training_step_time": 0.10521769523620605 }, { "epoch": 1.220855712890625e-05, "model_forward_time": 0.02340412139892578, "step": 8001 }, { "epoch": 1.220855712890625e-05, "step": 8001, "training_step_time": 0.15229058265686035 }, { "epoch": 1.22100830078125e-05, "model_forward_time": 0.024644136428833008, "step": 8002 }, { "epoch": 1.22100830078125e-05, "step": 8002, "training_step_time": 0.11137056350708008 }, { "epoch": 1.221160888671875e-05, "model_forward_time": 0.0244448184967041, "step": 8003 }, { "epoch": 1.221160888671875e-05, "step": 8003, "training_step_time": 0.11009430885314941 }, { "epoch": 1.2213134765625e-05, "model_forward_time": 0.024834871292114258, "step": 8004 }, { "epoch": 1.2213134765625e-05, "step": 8004, "training_step_time": 0.12272524833679199 }, { "epoch": 1.221466064453125e-05, "model_forward_time": 0.025106191635131836, "step": 8005 }, { "epoch": 1.221466064453125e-05, "step": 8005, "training_step_time": 0.13160347938537598 }, { "epoch": 1.22161865234375e-05, "model_forward_time": 0.025022506713867188, "step": 8006 }, { "epoch": 1.22161865234375e-05, "step": 8006, "training_step_time": 0.10595393180847168 }, { "epoch": 1.221771240234375e-05, "model_forward_time": 0.024736881256103516, "step": 8007 }, { "epoch": 1.221771240234375e-05, "step": 8007, "training_step_time": 0.11640000343322754 }, { "epoch": 1.221923828125e-05, "model_forward_time": 0.02558732032775879, "step": 8008 }, { "epoch": 1.221923828125e-05, "step": 8008, "training_step_time": 0.209367036819458 }, { "epoch": 1.222076416015625e-05, "model_forward_time": 0.02422022819519043, "step": 8009 }, { "epoch": 1.222076416015625e-05, "step": 8009, "training_step_time": 0.12210321426391602 }, { "epoch": 1.22222900390625e-05, "grad_norm": 0.989590048789978, "learning_rate": 8.766911026676064e-05, "loss": 0.0385, "step": 8010 }, { "epoch": 1.22222900390625e-05, "model_forward_time": 0.02434086799621582, "step": 8010 }, { "epoch": 1.22222900390625e-05, "step": 8010, "training_step_time": 0.10384702682495117 }, { "epoch": 1.222381591796875e-05, "model_forward_time": 0.027399778366088867, "step": 8011 }, { "epoch": 1.222381591796875e-05, "step": 8011, "training_step_time": 0.1770954132080078 }, { "epoch": 1.2225341796875e-05, "model_forward_time": 0.024376392364501953, "step": 8012 }, { "epoch": 1.2225341796875e-05, "step": 8012, "training_step_time": 0.13405919075012207 }, { "epoch": 1.222686767578125e-05, "model_forward_time": 0.023864269256591797, "step": 8013 }, { "epoch": 1.222686767578125e-05, "step": 8013, "training_step_time": 0.11847400665283203 }, { "epoch": 1.22283935546875e-05, "model_forward_time": 0.026166677474975586, "step": 8014 }, { "epoch": 1.22283935546875e-05, "step": 8014, "training_step_time": 0.10955429077148438 }, { "epoch": 1.222991943359375e-05, "model_forward_time": 0.025537967681884766, "step": 8015 }, { "epoch": 1.222991943359375e-05, "step": 8015, "training_step_time": 0.14167284965515137 }, { "epoch": 1.22314453125e-05, "model_forward_time": 0.024870634078979492, "step": 8016 }, { "epoch": 1.22314453125e-05, "step": 8016, "training_step_time": 0.11117029190063477 }, { "epoch": 1.223297119140625e-05, "model_forward_time": 0.024974346160888672, "step": 8017 }, { "epoch": 1.223297119140625e-05, "step": 8017, "training_step_time": 0.10834479331970215 }, { "epoch": 1.22344970703125e-05, "model_forward_time": 0.025220394134521484, "step": 8018 }, { "epoch": 1.22344970703125e-05, "step": 8018, "training_step_time": 0.11035656929016113 }, { "epoch": 1.223602294921875e-05, "model_forward_time": 0.025110244750976562, "step": 8019 }, { "epoch": 1.223602294921875e-05, "step": 8019, "training_step_time": 0.10979390144348145 }, { "epoch": 1.2237548828125e-05, "grad_norm": 0.5198809504508972, "learning_rate": 8.763284427315551e-05, "loss": 0.0455, "step": 8020 }, { "epoch": 1.2237548828125e-05, "model_forward_time": 0.024939775466918945, "step": 8020 }, { "epoch": 1.2237548828125e-05, "step": 8020, "training_step_time": 0.19510436058044434 }, { "epoch": 1.223907470703125e-05, "model_forward_time": 0.024056434631347656, "step": 8021 }, { "epoch": 1.223907470703125e-05, "step": 8021, "training_step_time": 0.10980510711669922 }, { "epoch": 1.22406005859375e-05, "model_forward_time": 0.024538278579711914, "step": 8022 }, { "epoch": 1.22406005859375e-05, "step": 8022, "training_step_time": 0.10904335975646973 }, { "epoch": 1.224212646484375e-05, "model_forward_time": 0.025308847427368164, "step": 8023 }, { "epoch": 1.224212646484375e-05, "step": 8023, "training_step_time": 0.10912275314331055 }, { "epoch": 1.224365234375e-05, "model_forward_time": 0.024741411209106445, "step": 8024 }, { "epoch": 1.224365234375e-05, "step": 8024, "training_step_time": 0.10647392272949219 }, { "epoch": 1.224517822265625e-05, "model_forward_time": 0.02515244483947754, "step": 8025 }, { "epoch": 1.224517822265625e-05, "step": 8025, "training_step_time": 0.10667657852172852 }, { "epoch": 1.22467041015625e-05, "model_forward_time": 0.02509140968322754, "step": 8026 }, { "epoch": 1.22467041015625e-05, "step": 8026, "training_step_time": 0.10818696022033691 }, { "epoch": 1.224822998046875e-05, "model_forward_time": 0.02521347999572754, "step": 8027 }, { "epoch": 1.224822998046875e-05, "step": 8027, "training_step_time": 0.10985255241394043 }, { "epoch": 1.2249755859375e-05, "model_forward_time": 0.02496957778930664, "step": 8028 }, { "epoch": 1.2249755859375e-05, "step": 8028, "training_step_time": 0.10714507102966309 }, { "epoch": 1.225128173828125e-05, "model_forward_time": 0.025231599807739258, "step": 8029 }, { "epoch": 1.225128173828125e-05, "step": 8029, "training_step_time": 0.10595011711120605 }, { "epoch": 1.22528076171875e-05, "grad_norm": 0.6120517253875732, "learning_rate": 8.759653255209606e-05, "loss": 0.0384, "step": 8030 }, { "epoch": 1.22528076171875e-05, "model_forward_time": 0.02554464340209961, "step": 8030 }, { "epoch": 1.22528076171875e-05, "step": 8030, "training_step_time": 0.10946106910705566 }, { "epoch": 1.225433349609375e-05, "model_forward_time": 0.02524113655090332, "step": 8031 }, { "epoch": 1.225433349609375e-05, "step": 8031, "training_step_time": 0.1143794059753418 }, { "epoch": 1.2255859375e-05, "model_forward_time": 0.02399444580078125, "step": 8032 }, { "epoch": 1.2255859375e-05, "step": 8032, "training_step_time": 0.1059868335723877 }, { "epoch": 1.225738525390625e-05, "model_forward_time": 0.024372339248657227, "step": 8033 }, { "epoch": 1.225738525390625e-05, "step": 8033, "training_step_time": 0.1079108715057373 }, { "epoch": 1.22589111328125e-05, "model_forward_time": 0.025270700454711914, "step": 8034 }, { "epoch": 1.22589111328125e-05, "step": 8034, "training_step_time": 0.10793447494506836 }, { "epoch": 1.226043701171875e-05, "model_forward_time": 0.025335311889648438, "step": 8035 }, { "epoch": 1.226043701171875e-05, "step": 8035, "training_step_time": 0.10961413383483887 }, { "epoch": 1.2261962890625e-05, "model_forward_time": 0.026239871978759766, "step": 8036 }, { "epoch": 1.2261962890625e-05, "step": 8036, "training_step_time": 0.10873675346374512 }, { "epoch": 1.226348876953125e-05, "model_forward_time": 0.025423049926757812, "step": 8037 }, { "epoch": 1.226348876953125e-05, "step": 8037, "training_step_time": 0.10605287551879883 }, { "epoch": 1.22650146484375e-05, "model_forward_time": 0.025758981704711914, "step": 8038 }, { "epoch": 1.22650146484375e-05, "step": 8038, "training_step_time": 0.11978030204772949 }, { "epoch": 1.226654052734375e-05, "model_forward_time": 0.02588939666748047, "step": 8039 }, { "epoch": 1.226654052734375e-05, "step": 8039, "training_step_time": 0.11473894119262695 }, { "epoch": 1.226806640625e-05, "grad_norm": 0.424248605966568, "learning_rate": 8.756017514770443e-05, "loss": 0.0406, "step": 8040 }, { "epoch": 1.226806640625e-05, "model_forward_time": 0.025607585906982422, "step": 8040 }, { "epoch": 1.226806640625e-05, "step": 8040, "training_step_time": 0.11047554016113281 }, { "epoch": 1.226959228515625e-05, "model_forward_time": 0.025839805603027344, "step": 8041 }, { "epoch": 1.226959228515625e-05, "step": 8041, "training_step_time": 0.2095508575439453 }, { "epoch": 1.22711181640625e-05, "model_forward_time": 0.024628877639770508, "step": 8042 }, { "epoch": 1.22711181640625e-05, "step": 8042, "training_step_time": 0.11656570434570312 }, { "epoch": 1.227264404296875e-05, "model_forward_time": 0.02409815788269043, "step": 8043 }, { "epoch": 1.227264404296875e-05, "step": 8043, "training_step_time": 0.10518479347229004 }, { "epoch": 1.2274169921875e-05, "model_forward_time": 0.024974584579467773, "step": 8044 }, { "epoch": 1.2274169921875e-05, "step": 8044, "training_step_time": 0.10662031173706055 }, { "epoch": 1.227569580078125e-05, "model_forward_time": 0.02510857582092285, "step": 8045 }, { "epoch": 1.227569580078125e-05, "step": 8045, "training_step_time": 0.10593032836914062 }, { "epoch": 1.22772216796875e-05, "model_forward_time": 0.02510213851928711, "step": 8046 }, { "epoch": 1.22772216796875e-05, "step": 8046, "training_step_time": 0.11077117919921875 }, { "epoch": 1.227874755859375e-05, "model_forward_time": 0.025410890579223633, "step": 8047 }, { "epoch": 1.227874755859375e-05, "step": 8047, "training_step_time": 0.11119270324707031 }, { "epoch": 1.22802734375e-05, "model_forward_time": 0.025301456451416016, "step": 8048 }, { "epoch": 1.22802734375e-05, "step": 8048, "training_step_time": 0.21284008026123047 }, { "epoch": 1.228179931640625e-05, "model_forward_time": 0.02433490753173828, "step": 8049 }, { "epoch": 1.228179931640625e-05, "step": 8049, "training_step_time": 0.2016582489013672 }, { "epoch": 1.22833251953125e-05, "grad_norm": 0.25658440589904785, "learning_rate": 8.75237721041583e-05, "loss": 0.0494, "step": 8050 }, { "epoch": 1.22833251953125e-05, "model_forward_time": 0.02441692352294922, "step": 8050 }, { "epoch": 1.22833251953125e-05, "step": 8050, "training_step_time": 0.18492412567138672 }, { "epoch": 1.228485107421875e-05, "model_forward_time": 0.0244448184967041, "step": 8051 }, { "epoch": 1.228485107421875e-05, "step": 8051, "training_step_time": 0.18129563331604004 }, { "epoch": 1.2286376953125e-05, "model_forward_time": 0.02532672882080078, "step": 8052 }, { "epoch": 1.2286376953125e-05, "step": 8052, "training_step_time": 0.21875810623168945 }, { "epoch": 1.228790283203125e-05, "model_forward_time": 0.024783849716186523, "step": 8053 }, { "epoch": 1.228790283203125e-05, "step": 8053, "training_step_time": 0.11652278900146484 }, { "epoch": 1.22894287109375e-05, "model_forward_time": 0.024639129638671875, "step": 8054 }, { "epoch": 1.22894287109375e-05, "step": 8054, "training_step_time": 0.13737082481384277 }, { "epoch": 1.229095458984375e-05, "model_forward_time": 0.025293827056884766, "step": 8055 }, { "epoch": 1.229095458984375e-05, "step": 8055, "training_step_time": 0.13673615455627441 }, { "epoch": 1.229248046875e-05, "model_forward_time": 0.02505016326904297, "step": 8056 }, { "epoch": 1.229248046875e-05, "step": 8056, "training_step_time": 0.12302613258361816 }, { "epoch": 1.229400634765625e-05, "model_forward_time": 0.024658679962158203, "step": 8057 }, { "epoch": 1.229400634765625e-05, "step": 8057, "training_step_time": 0.12527871131896973 }, { "epoch": 1.22955322265625e-05, "model_forward_time": 0.025033235549926758, "step": 8058 }, { "epoch": 1.22955322265625e-05, "step": 8058, "training_step_time": 0.11224842071533203 }, { "epoch": 1.229705810546875e-05, "model_forward_time": 0.025592803955078125, "step": 8059 }, { "epoch": 1.229705810546875e-05, "step": 8059, "training_step_time": 0.14414167404174805 }, { "epoch": 1.2298583984375e-05, "grad_norm": 0.47664836049079895, "learning_rate": 8.74873234656908e-05, "loss": 0.0374, "step": 8060 }, { "epoch": 1.2298583984375e-05, "model_forward_time": 0.025231122970581055, "step": 8060 }, { "epoch": 1.2298583984375e-05, "step": 8060, "training_step_time": 0.13704943656921387 }, { "epoch": 1.230010986328125e-05, "model_forward_time": 0.024730443954467773, "step": 8061 }, { "epoch": 1.230010986328125e-05, "step": 8061, "training_step_time": 0.11207270622253418 }, { "epoch": 1.23016357421875e-05, "model_forward_time": 0.024997234344482422, "step": 8062 }, { "epoch": 1.23016357421875e-05, "step": 8062, "training_step_time": 0.10901474952697754 }, { "epoch": 1.230316162109375e-05, "model_forward_time": 0.02504754066467285, "step": 8063 }, { "epoch": 1.230316162109375e-05, "step": 8063, "training_step_time": 0.11756300926208496 }, { "epoch": 1.23046875e-05, "model_forward_time": 0.025216341018676758, "step": 8064 }, { "epoch": 1.23046875e-05, "step": 8064, "training_step_time": 0.11266231536865234 }, { "epoch": 1.230621337890625e-05, "model_forward_time": 0.025602102279663086, "step": 8065 }, { "epoch": 1.230621337890625e-05, "step": 8065, "training_step_time": 0.1920170783996582 }, { "epoch": 1.23077392578125e-05, "model_forward_time": 0.024467945098876953, "step": 8066 }, { "epoch": 1.23077392578125e-05, "step": 8066, "training_step_time": 0.11527323722839355 }, { "epoch": 1.230926513671875e-05, "model_forward_time": 0.024228334426879883, "step": 8067 }, { "epoch": 1.230926513671875e-05, "step": 8067, "training_step_time": 0.10783243179321289 }, { "epoch": 1.2310791015625e-05, "model_forward_time": 0.025952577590942383, "step": 8068 }, { "epoch": 1.2310791015625e-05, "step": 8068, "training_step_time": 0.11552977561950684 }, { "epoch": 1.231231689453125e-05, "model_forward_time": 0.025319814682006836, "step": 8069 }, { "epoch": 1.231231689453125e-05, "step": 8069, "training_step_time": 0.11143112182617188 }, { "epoch": 1.23138427734375e-05, "grad_norm": 0.5555617213249207, "learning_rate": 8.745082927659047e-05, "loss": 0.0395, "step": 8070 }, { "epoch": 1.23138427734375e-05, "model_forward_time": 0.025371789932250977, "step": 8070 }, { "epoch": 1.23138427734375e-05, "step": 8070, "training_step_time": 0.1114654541015625 }, { "epoch": 1.231536865234375e-05, "model_forward_time": 0.02511310577392578, "step": 8071 }, { "epoch": 1.231536865234375e-05, "step": 8071, "training_step_time": 0.10948944091796875 }, { "epoch": 1.231689453125e-05, "model_forward_time": 0.025192737579345703, "step": 8072 }, { "epoch": 1.231689453125e-05, "step": 8072, "training_step_time": 0.11054372787475586 }, { "epoch": 1.231842041015625e-05, "model_forward_time": 0.02531290054321289, "step": 8073 }, { "epoch": 1.231842041015625e-05, "step": 8073, "training_step_time": 0.10719180107116699 }, { "epoch": 1.23199462890625e-05, "model_forward_time": 0.0253293514251709, "step": 8074 }, { "epoch": 1.23199462890625e-05, "step": 8074, "training_step_time": 0.11167025566101074 }, { "epoch": 1.232147216796875e-05, "model_forward_time": 0.02662825584411621, "step": 8075 }, { "epoch": 1.232147216796875e-05, "step": 8075, "training_step_time": 0.11392736434936523 }, { "epoch": 1.2322998046875e-05, "model_forward_time": 0.025458097457885742, "step": 8076 }, { "epoch": 1.2322998046875e-05, "step": 8076, "training_step_time": 0.10980010032653809 }, { "epoch": 1.232452392578125e-05, "model_forward_time": 0.02566814422607422, "step": 8077 }, { "epoch": 1.232452392578125e-05, "step": 8077, "training_step_time": 0.11444497108459473 }, { "epoch": 1.23260498046875e-05, "model_forward_time": 0.025586366653442383, "step": 8078 }, { "epoch": 1.23260498046875e-05, "step": 8078, "training_step_time": 0.1084909439086914 }, { "epoch": 1.232757568359375e-05, "model_forward_time": 0.025228261947631836, "step": 8079 }, { "epoch": 1.232757568359375e-05, "step": 8079, "training_step_time": 0.11124134063720703 }, { "epoch": 1.23291015625e-05, "grad_norm": 0.3775727152824402, "learning_rate": 8.741428958120118e-05, "loss": 0.0614, "step": 8080 }, { "epoch": 1.23291015625e-05, "model_forward_time": 0.02603006362915039, "step": 8080 }, { "epoch": 1.23291015625e-05, "step": 8080, "training_step_time": 0.1084434986114502 }, { "epoch": 1.233062744140625e-05, "model_forward_time": 0.024833202362060547, "step": 8081 }, { "epoch": 1.233062744140625e-05, "step": 8081, "training_step_time": 0.11174130439758301 }, { "epoch": 1.23321533203125e-05, "model_forward_time": 0.025392770767211914, "step": 8082 }, { "epoch": 1.23321533203125e-05, "step": 8082, "training_step_time": 0.11500120162963867 }, { "epoch": 1.233367919921875e-05, "model_forward_time": 0.025792360305786133, "step": 8083 }, { "epoch": 1.233367919921875e-05, "step": 8083, "training_step_time": 0.11186480522155762 }, { "epoch": 1.2335205078125e-05, "model_forward_time": 0.025766849517822266, "step": 8084 }, { "epoch": 1.2335205078125e-05, "step": 8084, "training_step_time": 0.11236715316772461 }, { "epoch": 1.233673095703125e-05, "model_forward_time": 0.02521347999572754, "step": 8085 }, { "epoch": 1.233673095703125e-05, "step": 8085, "training_step_time": 0.2122054100036621 }, { "epoch": 1.23382568359375e-05, "model_forward_time": 0.025249958038330078, "step": 8086 }, { "epoch": 1.23382568359375e-05, "step": 8086, "training_step_time": 0.11527323722839355 }, { "epoch": 1.233978271484375e-05, "model_forward_time": 0.02473139762878418, "step": 8087 }, { "epoch": 1.233978271484375e-05, "step": 8087, "training_step_time": 0.11561703681945801 }, { "epoch": 1.234130859375e-05, "model_forward_time": 0.025363445281982422, "step": 8088 }, { "epoch": 1.234130859375e-05, "step": 8088, "training_step_time": 0.10826849937438965 }, { "epoch": 1.234283447265625e-05, "model_forward_time": 0.02518630027770996, "step": 8089 }, { "epoch": 1.234283447265625e-05, "step": 8089, "training_step_time": 0.1096034049987793 }, { "epoch": 1.23443603515625e-05, "grad_norm": 0.6591735482215881, "learning_rate": 8.737770442392212e-05, "loss": 0.0452, "step": 8090 }, { "epoch": 1.23443603515625e-05, "model_forward_time": 0.02527308464050293, "step": 8090 }, { "epoch": 1.23443603515625e-05, "step": 8090, "training_step_time": 0.10939669609069824 }, { "epoch": 1.234588623046875e-05, "model_forward_time": 0.02529311180114746, "step": 8091 }, { "epoch": 1.234588623046875e-05, "step": 8091, "training_step_time": 0.11006808280944824 }, { "epoch": 1.2347412109375e-05, "model_forward_time": 0.025275707244873047, "step": 8092 }, { "epoch": 1.2347412109375e-05, "step": 8092, "training_step_time": 0.1374979019165039 }, { "epoch": 1.234893798828125e-05, "model_forward_time": 0.02574324607849121, "step": 8093 }, { "epoch": 1.234893798828125e-05, "step": 8093, "training_step_time": 0.11505842208862305 }, { "epoch": 1.23504638671875e-05, "model_forward_time": 0.025410890579223633, "step": 8094 }, { "epoch": 1.23504638671875e-05, "step": 8094, "training_step_time": 0.22623395919799805 }, { "epoch": 1.235198974609375e-05, "model_forward_time": 0.024717092514038086, "step": 8095 }, { "epoch": 1.235198974609375e-05, "step": 8095, "training_step_time": 0.13234162330627441 }, { "epoch": 1.2353515625e-05, "model_forward_time": 0.0240933895111084, "step": 8096 }, { "epoch": 1.2353515625e-05, "step": 8096, "training_step_time": 0.11337447166442871 }, { "epoch": 1.235504150390625e-05, "model_forward_time": 0.025170087814331055, "step": 8097 }, { "epoch": 1.235504150390625e-05, "step": 8097, "training_step_time": 0.11403489112854004 }, { "epoch": 1.23565673828125e-05, "model_forward_time": 0.025411128997802734, "step": 8098 }, { "epoch": 1.23565673828125e-05, "step": 8098, "training_step_time": 0.1303086280822754 }, { "epoch": 1.235809326171875e-05, "model_forward_time": 0.02536606788635254, "step": 8099 }, { "epoch": 1.235809326171875e-05, "step": 8099, "training_step_time": 0.12517261505126953 }, { "epoch": 1.2359619140625e-05, "grad_norm": 0.6788386106491089, "learning_rate": 8.73410738492077e-05, "loss": 0.047, "step": 8100 }, { "epoch": 1.2359619140625e-05, "model_forward_time": 0.025327205657958984, "step": 8100 }, { "epoch": 1.2359619140625e-05, "step": 8100, "training_step_time": 0.11152124404907227 }, { "epoch": 1.236114501953125e-05, "model_forward_time": 0.025552988052368164, "step": 8101 }, { "epoch": 1.236114501953125e-05, "step": 8101, "training_step_time": 0.11388063430786133 }, { "epoch": 1.23626708984375e-05, "model_forward_time": 0.02545332908630371, "step": 8102 }, { "epoch": 1.23626708984375e-05, "step": 8102, "training_step_time": 0.10773468017578125 }, { "epoch": 1.236419677734375e-05, "model_forward_time": 0.02523183822631836, "step": 8103 }, { "epoch": 1.236419677734375e-05, "step": 8103, "training_step_time": 0.11584734916687012 }, { "epoch": 1.236572265625e-05, "model_forward_time": 0.024899721145629883, "step": 8104 }, { "epoch": 1.236572265625e-05, "step": 8104, "training_step_time": 0.15821528434753418 }, { "epoch": 1.236724853515625e-05, "model_forward_time": 0.02474188804626465, "step": 8105 }, { "epoch": 1.236724853515625e-05, "step": 8105, "training_step_time": 0.15355396270751953 }, { "epoch": 1.23687744140625e-05, "model_forward_time": 0.024437904357910156, "step": 8106 }, { "epoch": 1.23687744140625e-05, "step": 8106, "training_step_time": 0.16364073753356934 }, { "epoch": 1.237030029296875e-05, "model_forward_time": 0.02473592758178711, "step": 8107 }, { "epoch": 1.237030029296875e-05, "step": 8107, "training_step_time": 0.18017339706420898 }, { "epoch": 1.2371826171875e-05, "model_forward_time": 0.024404048919677734, "step": 8108 }, { "epoch": 1.2371826171875e-05, "step": 8108, "training_step_time": 0.14826321601867676 }, { "epoch": 1.237335205078125e-05, "model_forward_time": 0.02372574806213379, "step": 8109 }, { "epoch": 1.237335205078125e-05, "step": 8109, "training_step_time": 0.1697988510131836 }, { "epoch": 1.23748779296875e-05, "grad_norm": 0.8109952211380005, "learning_rate": 8.730439790156752e-05, "loss": 0.055, "step": 8110 }, { "epoch": 1.23748779296875e-05, "model_forward_time": 0.025094032287597656, "step": 8110 }, { "epoch": 1.23748779296875e-05, "step": 8110, "training_step_time": 0.13260126113891602 }, { "epoch": 1.237640380859375e-05, "model_forward_time": 0.023451805114746094, "step": 8111 }, { "epoch": 1.237640380859375e-05, "step": 8111, "training_step_time": 0.12503528594970703 }, { "epoch": 1.23779296875e-05, "model_forward_time": 0.02404928207397461, "step": 8112 }, { "epoch": 1.23779296875e-05, "step": 8112, "training_step_time": 0.1221613883972168 }, { "epoch": 1.237945556640625e-05, "model_forward_time": 0.024060726165771484, "step": 8113 }, { "epoch": 1.237945556640625e-05, "step": 8113, "training_step_time": 0.11781787872314453 }, { "epoch": 1.23809814453125e-05, "model_forward_time": 0.024256467819213867, "step": 8114 }, { "epoch": 1.23809814453125e-05, "step": 8114, "training_step_time": 0.11264300346374512 }, { "epoch": 1.238250732421875e-05, "model_forward_time": 0.023929834365844727, "step": 8115 }, { "epoch": 1.238250732421875e-05, "step": 8115, "training_step_time": 0.11896085739135742 }, { "epoch": 1.2384033203125e-05, "model_forward_time": 0.026053905487060547, "step": 8116 }, { "epoch": 1.2384033203125e-05, "step": 8116, "training_step_time": 0.11521601676940918 }, { "epoch": 1.238555908203125e-05, "model_forward_time": 0.02526688575744629, "step": 8117 }, { "epoch": 1.238555908203125e-05, "step": 8117, "training_step_time": 0.11238217353820801 }, { "epoch": 1.23870849609375e-05, "model_forward_time": 0.02555108070373535, "step": 8118 }, { "epoch": 1.23870849609375e-05, "step": 8118, "training_step_time": 0.11261415481567383 }, { "epoch": 1.238861083984375e-05, "model_forward_time": 0.025472640991210938, "step": 8119 }, { "epoch": 1.238861083984375e-05, "step": 8119, "training_step_time": 0.11106419563293457 }, { "epoch": 1.239013671875e-05, "grad_norm": 0.28319039940834045, "learning_rate": 8.72676766255663e-05, "loss": 0.0446, "step": 8120 }, { "epoch": 1.239013671875e-05, "model_forward_time": 0.02457404136657715, "step": 8120 }, { "epoch": 1.239013671875e-05, "step": 8120, "training_step_time": 0.10783815383911133 }, { "epoch": 1.239166259765625e-05, "model_forward_time": 0.024559736251831055, "step": 8121 }, { "epoch": 1.239166259765625e-05, "step": 8121, "training_step_time": 0.10813045501708984 }, { "epoch": 1.23931884765625e-05, "model_forward_time": 0.02554011344909668, "step": 8122 }, { "epoch": 1.23931884765625e-05, "step": 8122, "training_step_time": 0.10766816139221191 }, { "epoch": 1.239471435546875e-05, "model_forward_time": 0.025222301483154297, "step": 8123 }, { "epoch": 1.239471435546875e-05, "step": 8123, "training_step_time": 0.10807418823242188 }, { "epoch": 1.2396240234375e-05, "model_forward_time": 0.02599811553955078, "step": 8124 }, { "epoch": 1.2396240234375e-05, "step": 8124, "training_step_time": 0.11099028587341309 }, { "epoch": 1.239776611328125e-05, "model_forward_time": 0.02555370330810547, "step": 8125 }, { "epoch": 1.239776611328125e-05, "step": 8125, "training_step_time": 0.10835576057434082 }, { "epoch": 1.23992919921875e-05, "model_forward_time": 0.025142431259155273, "step": 8126 }, { "epoch": 1.23992919921875e-05, "step": 8126, "training_step_time": 0.10769772529602051 }, { "epoch": 1.240081787109375e-05, "model_forward_time": 0.02531909942626953, "step": 8127 }, { "epoch": 1.240081787109375e-05, "step": 8127, "training_step_time": 0.10702157020568848 }, { "epoch": 1.240234375e-05, "model_forward_time": 0.02565932273864746, "step": 8128 }, { "epoch": 1.240234375e-05, "step": 8128, "training_step_time": 0.11246895790100098 }, { "epoch": 1.240386962890625e-05, "model_forward_time": 0.02562236785888672, "step": 8129 }, { "epoch": 1.240386962890625e-05, "step": 8129, "training_step_time": 0.11068892478942871 }, { "epoch": 1.24053955078125e-05, "grad_norm": 0.7070178389549255, "learning_rate": 8.723091006582389e-05, "loss": 0.0599, "step": 8130 }, { "epoch": 1.24053955078125e-05, "model_forward_time": 0.02548050880432129, "step": 8130 }, { "epoch": 1.24053955078125e-05, "step": 8130, "training_step_time": 0.21427464485168457 }, { "epoch": 1.240692138671875e-05, "model_forward_time": 0.025106430053710938, "step": 8131 }, { "epoch": 1.240692138671875e-05, "step": 8131, "training_step_time": 0.12172651290893555 }, { "epoch": 1.2408447265625e-05, "model_forward_time": 0.024637699127197266, "step": 8132 }, { "epoch": 1.2408447265625e-05, "step": 8132, "training_step_time": 0.10773253440856934 }, { "epoch": 1.240997314453125e-05, "model_forward_time": 0.025692224502563477, "step": 8133 }, { "epoch": 1.240997314453125e-05, "step": 8133, "training_step_time": 0.11088895797729492 }, { "epoch": 1.24114990234375e-05, "model_forward_time": 0.0256502628326416, "step": 8134 }, { "epoch": 1.24114990234375e-05, "step": 8134, "training_step_time": 0.10803699493408203 }, { "epoch": 1.241302490234375e-05, "model_forward_time": 0.0263674259185791, "step": 8135 }, { "epoch": 1.241302490234375e-05, "step": 8135, "training_step_time": 0.10741662979125977 }, { "epoch": 1.241455078125e-05, "model_forward_time": 0.025513410568237305, "step": 8136 }, { "epoch": 1.241455078125e-05, "step": 8136, "training_step_time": 0.10782289505004883 }, { "epoch": 1.241607666015625e-05, "model_forward_time": 0.025393009185791016, "step": 8137 }, { "epoch": 1.241607666015625e-05, "step": 8137, "training_step_time": 0.13406801223754883 }, { "epoch": 1.24176025390625e-05, "model_forward_time": 0.02581310272216797, "step": 8138 }, { "epoch": 1.24176025390625e-05, "step": 8138, "training_step_time": 0.10785317420959473 }, { "epoch": 1.241912841796875e-05, "model_forward_time": 0.025168418884277344, "step": 8139 }, { "epoch": 1.241912841796875e-05, "step": 8139, "training_step_time": 0.1128695011138916 }, { "epoch": 1.2420654296875e-05, "grad_norm": 0.4179186522960663, "learning_rate": 8.719409826701508e-05, "loss": 0.0562, "step": 8140 }, { "epoch": 1.2420654296875e-05, "model_forward_time": 0.025157451629638672, "step": 8140 }, { "epoch": 1.2420654296875e-05, "step": 8140, "training_step_time": 0.12705111503601074 }, { "epoch": 1.242218017578125e-05, "model_forward_time": 0.025702953338623047, "step": 8141 }, { "epoch": 1.242218017578125e-05, "step": 8141, "training_step_time": 0.13133645057678223 }, { "epoch": 1.24237060546875e-05, "model_forward_time": 0.025308609008789062, "step": 8142 }, { "epoch": 1.24237060546875e-05, "step": 8142, "training_step_time": 0.11477875709533691 }, { "epoch": 1.242523193359375e-05, "model_forward_time": 0.025319337844848633, "step": 8143 }, { "epoch": 1.242523193359375e-05, "step": 8143, "training_step_time": 0.11365652084350586 }, { "epoch": 1.24267578125e-05, "model_forward_time": 0.02421736717224121, "step": 8144 }, { "epoch": 1.24267578125e-05, "step": 8144, "training_step_time": 0.11259293556213379 }, { "epoch": 1.242828369140625e-05, "model_forward_time": 0.025304079055786133, "step": 8145 }, { "epoch": 1.242828369140625e-05, "step": 8145, "training_step_time": 0.19833993911743164 }, { "epoch": 1.24298095703125e-05, "model_forward_time": 0.024834632873535156, "step": 8146 }, { "epoch": 1.24298095703125e-05, "step": 8146, "training_step_time": 0.11335206031799316 }, { "epoch": 1.243133544921875e-05, "model_forward_time": 0.024581432342529297, "step": 8147 }, { "epoch": 1.243133544921875e-05, "step": 8147, "training_step_time": 0.13666725158691406 }, { "epoch": 1.2432861328125e-05, "model_forward_time": 0.025397777557373047, "step": 8148 }, { "epoch": 1.2432861328125e-05, "step": 8148, "training_step_time": 0.11576175689697266 }, { "epoch": 1.243438720703125e-05, "model_forward_time": 0.025118589401245117, "step": 8149 }, { "epoch": 1.243438720703125e-05, "step": 8149, "training_step_time": 0.16438603401184082 }, { "epoch": 1.24359130859375e-05, "grad_norm": 0.5981489419937134, "learning_rate": 8.715724127386972e-05, "loss": 0.0463, "step": 8150 }, { "epoch": 1.24359130859375e-05, "model_forward_time": 0.025103330612182617, "step": 8150 }, { "epoch": 1.24359130859375e-05, "step": 8150, "training_step_time": 0.1981360912322998 }, { "epoch": 1.243743896484375e-05, "model_forward_time": 0.027241945266723633, "step": 8151 }, { "epoch": 1.243743896484375e-05, "step": 8151, "training_step_time": 0.10811877250671387 }, { "epoch": 1.243896484375e-05, "model_forward_time": 0.02480626106262207, "step": 8152 }, { "epoch": 1.243896484375e-05, "step": 8152, "training_step_time": 0.12440848350524902 }, { "epoch": 1.244049072265625e-05, "model_forward_time": 0.025801658630371094, "step": 8153 }, { "epoch": 1.244049072265625e-05, "step": 8153, "training_step_time": 0.11080479621887207 }, { "epoch": 1.24420166015625e-05, "model_forward_time": 0.025527000427246094, "step": 8154 }, { "epoch": 1.24420166015625e-05, "step": 8154, "training_step_time": 0.10692644119262695 }, { "epoch": 1.244354248046875e-05, "model_forward_time": 0.025470495223999023, "step": 8155 }, { "epoch": 1.244354248046875e-05, "step": 8155, "training_step_time": 0.19858312606811523 }, { "epoch": 1.2445068359375e-05, "model_forward_time": 0.024739980697631836, "step": 8156 }, { "epoch": 1.2445068359375e-05, "step": 8156, "training_step_time": 0.10601687431335449 }, { "epoch": 1.244659423828125e-05, "model_forward_time": 0.024831295013427734, "step": 8157 }, { "epoch": 1.244659423828125e-05, "step": 8157, "training_step_time": 0.10599398612976074 }, { "epoch": 1.24481201171875e-05, "model_forward_time": 0.025536060333251953, "step": 8158 }, { "epoch": 1.24481201171875e-05, "step": 8158, "training_step_time": 0.10997915267944336 }, { "epoch": 1.244964599609375e-05, "model_forward_time": 0.025259971618652344, "step": 8159 }, { "epoch": 1.244964599609375e-05, "step": 8159, "training_step_time": 0.10826468467712402 }, { "epoch": 1.2451171875e-05, "grad_norm": 0.704897940158844, "learning_rate": 8.71203391311725e-05, "loss": 0.0522, "step": 8160 }, { "epoch": 1.2451171875e-05, "model_forward_time": 0.0256044864654541, "step": 8160 }, { "epoch": 1.2451171875e-05, "step": 8160, "training_step_time": 0.11163139343261719 }, { "epoch": 1.245269775390625e-05, "model_forward_time": 0.025558948516845703, "step": 8161 }, { "epoch": 1.245269775390625e-05, "step": 8161, "training_step_time": 0.10732913017272949 }, { "epoch": 1.24542236328125e-05, "model_forward_time": 0.025450468063354492, "step": 8162 }, { "epoch": 1.24542236328125e-05, "step": 8162, "training_step_time": 0.10640478134155273 }, { "epoch": 1.245574951171875e-05, "model_forward_time": 0.02585744857788086, "step": 8163 }, { "epoch": 1.245574951171875e-05, "step": 8163, "training_step_time": 0.11344099044799805 }, { "epoch": 1.2457275390625e-05, "model_forward_time": 0.025229454040527344, "step": 8164 }, { "epoch": 1.2457275390625e-05, "step": 8164, "training_step_time": 0.10651540756225586 }, { "epoch": 1.245880126953125e-05, "model_forward_time": 0.02511882781982422, "step": 8165 }, { "epoch": 1.245880126953125e-05, "step": 8165, "training_step_time": 0.10824346542358398 }, { "epoch": 1.24603271484375e-05, "model_forward_time": 0.025422334671020508, "step": 8166 }, { "epoch": 1.24603271484375e-05, "step": 8166, "training_step_time": 0.10669851303100586 }, { "epoch": 1.246185302734375e-05, "model_forward_time": 0.025738239288330078, "step": 8167 }, { "epoch": 1.246185302734375e-05, "step": 8167, "training_step_time": 0.10750603675842285 }, { "epoch": 1.246337890625e-05, "model_forward_time": 0.02566385269165039, "step": 8168 }, { "epoch": 1.246337890625e-05, "step": 8168, "training_step_time": 0.1098947525024414 }, { "epoch": 1.246490478515625e-05, "model_forward_time": 0.025243520736694336, "step": 8169 }, { "epoch": 1.246490478515625e-05, "step": 8169, "training_step_time": 0.11076760292053223 }, { "epoch": 1.24664306640625e-05, "grad_norm": 0.17834721505641937, "learning_rate": 8.708339188376302e-05, "loss": 0.0466, "step": 8170 }, { "epoch": 1.24664306640625e-05, "model_forward_time": 0.025246143341064453, "step": 8170 }, { "epoch": 1.24664306640625e-05, "step": 8170, "training_step_time": 0.10639619827270508 }, { "epoch": 1.246795654296875e-05, "model_forward_time": 0.02529597282409668, "step": 8171 }, { "epoch": 1.246795654296875e-05, "step": 8171, "training_step_time": 0.11070513725280762 }, { "epoch": 1.2469482421875e-05, "model_forward_time": 0.026114940643310547, "step": 8172 }, { "epoch": 1.2469482421875e-05, "step": 8172, "training_step_time": 0.10739803314208984 }, { "epoch": 1.247100830078125e-05, "model_forward_time": 0.027096033096313477, "step": 8173 }, { "epoch": 1.247100830078125e-05, "step": 8173, "training_step_time": 0.11214208602905273 }, { "epoch": 1.24725341796875e-05, "model_forward_time": 0.02624344825744629, "step": 8174 }, { "epoch": 1.24725341796875e-05, "step": 8174, "training_step_time": 0.1440715789794922 }, { "epoch": 1.247406005859375e-05, "model_forward_time": 0.025405168533325195, "step": 8175 }, { "epoch": 1.247406005859375e-05, "step": 8175, "training_step_time": 0.19591522216796875 }, { "epoch": 1.24755859375e-05, "model_forward_time": 0.024134159088134766, "step": 8176 }, { "epoch": 1.24755859375e-05, "step": 8176, "training_step_time": 0.15391206741333008 }, { "epoch": 1.247711181640625e-05, "model_forward_time": 0.025673627853393555, "step": 8177 }, { "epoch": 1.247711181640625e-05, "step": 8177, "training_step_time": 0.14055562019348145 }, { "epoch": 1.24786376953125e-05, "model_forward_time": 0.025114774703979492, "step": 8178 }, { "epoch": 1.24786376953125e-05, "step": 8178, "training_step_time": 0.13091659545898438 }, { "epoch": 1.248016357421875e-05, "model_forward_time": 0.02469491958618164, "step": 8179 }, { "epoch": 1.248016357421875e-05, "step": 8179, "training_step_time": 0.12354660034179688 }, { "epoch": 1.2481689453125e-05, "grad_norm": 0.37783750891685486, "learning_rate": 8.704639957653567e-05, "loss": 0.0421, "step": 8180 }, { "epoch": 1.2481689453125e-05, "model_forward_time": 0.02525472640991211, "step": 8180 }, { "epoch": 1.2481689453125e-05, "step": 8180, "training_step_time": 0.11915230751037598 }, { "epoch": 1.248321533203125e-05, "model_forward_time": 0.025423765182495117, "step": 8181 }, { "epoch": 1.248321533203125e-05, "step": 8181, "training_step_time": 0.11789155006408691 }, { "epoch": 1.24847412109375e-05, "model_forward_time": 0.02539658546447754, "step": 8182 }, { "epoch": 1.24847412109375e-05, "step": 8182, "training_step_time": 0.15716838836669922 }, { "epoch": 1.248626708984375e-05, "model_forward_time": 0.0246734619140625, "step": 8183 }, { "epoch": 1.248626708984375e-05, "step": 8183, "training_step_time": 0.11026191711425781 }, { "epoch": 1.248779296875e-05, "model_forward_time": 0.025103092193603516, "step": 8184 }, { "epoch": 1.248779296875e-05, "step": 8184, "training_step_time": 0.11174130439758301 }, { "epoch": 1.248931884765625e-05, "model_forward_time": 0.02518439292907715, "step": 8185 }, { "epoch": 1.248931884765625e-05, "step": 8185, "training_step_time": 0.11531519889831543 }, { "epoch": 1.24908447265625e-05, "model_forward_time": 0.02520608901977539, "step": 8186 }, { "epoch": 1.24908447265625e-05, "step": 8186, "training_step_time": 0.1285996437072754 }, { "epoch": 1.249237060546875e-05, "model_forward_time": 0.02532052993774414, "step": 8187 }, { "epoch": 1.249237060546875e-05, "step": 8187, "training_step_time": 0.11246442794799805 }, { "epoch": 1.2493896484375e-05, "model_forward_time": 0.025151491165161133, "step": 8188 }, { "epoch": 1.2493896484375e-05, "step": 8188, "training_step_time": 0.12714242935180664 }, { "epoch": 1.249542236328125e-05, "model_forward_time": 0.025516033172607422, "step": 8189 }, { "epoch": 1.249542236328125e-05, "step": 8189, "training_step_time": 0.11196565628051758 }, { "epoch": 1.24969482421875e-05, "grad_norm": 0.38588637113571167, "learning_rate": 8.700936225443959e-05, "loss": 0.0417, "step": 8190 }, { "epoch": 1.24969482421875e-05, "model_forward_time": 0.025374889373779297, "step": 8190 }, { "epoch": 1.24969482421875e-05, "step": 8190, "training_step_time": 0.21080279350280762 }, { "epoch": 1.249847412109375e-05, "model_forward_time": 0.024503469467163086, "step": 8191 }, { "epoch": 1.249847412109375e-05, "step": 8191, "training_step_time": 0.11725592613220215 }, { "epoch": 1.25e-05, "model_forward_time": 0.024637222290039062, "step": 8192 }, { "epoch": 1.25e-05, "step": 8192, "training_step_time": 0.14208698272705078 }, { "epoch": 1.250152587890625e-05, "model_forward_time": 0.024822235107421875, "step": 8193 }, { "epoch": 1.250152587890625e-05, "step": 8193, "training_step_time": 0.14830398559570312 }, { "epoch": 1.25030517578125e-05, "model_forward_time": 0.02435922622680664, "step": 8194 }, { "epoch": 1.25030517578125e-05, "step": 8194, "training_step_time": 0.21439456939697266 }, { "epoch": 1.250457763671875e-05, "model_forward_time": 0.02557516098022461, "step": 8195 }, { "epoch": 1.250457763671875e-05, "step": 8195, "training_step_time": 0.11522245407104492 }, { "epoch": 1.2506103515625e-05, "model_forward_time": 0.024249792098999023, "step": 8196 }, { "epoch": 1.2506103515625e-05, "step": 8196, "training_step_time": 0.10521292686462402 }, { "epoch": 1.250762939453125e-05, "model_forward_time": 0.025287389755249023, "step": 8197 }, { "epoch": 1.250762939453125e-05, "step": 8197, "training_step_time": 0.11505365371704102 }, { "epoch": 1.25091552734375e-05, "model_forward_time": 0.025240421295166016, "step": 8198 }, { "epoch": 1.25091552734375e-05, "step": 8198, "training_step_time": 0.11180973052978516 }, { "epoch": 1.251068115234375e-05, "model_forward_time": 0.025684833526611328, "step": 8199 }, { "epoch": 1.251068115234375e-05, "step": 8199, "training_step_time": 0.11319279670715332 }, { "epoch": 1.251220703125e-05, "grad_norm": 0.7702202796936035, "learning_rate": 8.697227996247861e-05, "loss": 0.0425, "step": 8200 }, { "epoch": 1.251220703125e-05, "model_forward_time": 0.024726152420043945, "step": 8200 }, { "epoch": 1.251220703125e-05, "step": 8200, "training_step_time": 0.18407177925109863 }, { "epoch": 1.251373291015625e-05, "model_forward_time": 0.024876832962036133, "step": 8201 }, { "epoch": 1.251373291015625e-05, "step": 8201, "training_step_time": 0.10600090026855469 }, { "epoch": 1.25152587890625e-05, "model_forward_time": 0.024321317672729492, "step": 8202 }, { "epoch": 1.25152587890625e-05, "step": 8202, "training_step_time": 0.1080012321472168 }, { "epoch": 1.251678466796875e-05, "model_forward_time": 0.02572941780090332, "step": 8203 }, { "epoch": 1.251678466796875e-05, "step": 8203, "training_step_time": 0.11006355285644531 }, { "epoch": 1.2518310546875e-05, "model_forward_time": 0.024849414825439453, "step": 8204 }, { "epoch": 1.2518310546875e-05, "step": 8204, "training_step_time": 0.11139583587646484 }, { "epoch": 1.251983642578125e-05, "model_forward_time": 0.02514791488647461, "step": 8205 }, { "epoch": 1.251983642578125e-05, "step": 8205, "training_step_time": 0.18909645080566406 }, { "epoch": 1.25213623046875e-05, "model_forward_time": 0.024007081985473633, "step": 8206 }, { "epoch": 1.25213623046875e-05, "step": 8206, "training_step_time": 0.19478559494018555 }, { "epoch": 1.252288818359375e-05, "model_forward_time": 0.023978471755981445, "step": 8207 }, { "epoch": 1.252288818359375e-05, "step": 8207, "training_step_time": 0.18808794021606445 }, { "epoch": 1.25244140625e-05, "model_forward_time": 0.024257659912109375, "step": 8208 }, { "epoch": 1.25244140625e-05, "step": 8208, "training_step_time": 0.17804312705993652 }, { "epoch": 1.252593994140625e-05, "model_forward_time": 0.0243074893951416, "step": 8209 }, { "epoch": 1.252593994140625e-05, "step": 8209, "training_step_time": 0.16118907928466797 }, { "epoch": 1.25274658203125e-05, "grad_norm": 0.39683008193969727, "learning_rate": 8.693515274571123e-05, "loss": 0.0297, "step": 8210 }, { "epoch": 1.25274658203125e-05, "model_forward_time": 0.024426698684692383, "step": 8210 }, { "epoch": 1.25274658203125e-05, "step": 8210, "training_step_time": 0.107757568359375 }, { "epoch": 1.252899169921875e-05, "model_forward_time": 0.02456212043762207, "step": 8211 }, { "epoch": 1.252899169921875e-05, "step": 8211, "training_step_time": 0.10657000541687012 }, { "epoch": 1.2530517578125e-05, "model_forward_time": 0.02526235580444336, "step": 8212 }, { "epoch": 1.2530517578125e-05, "step": 8212, "training_step_time": 0.11682629585266113 }, { "epoch": 1.253204345703125e-05, "model_forward_time": 0.02599048614501953, "step": 8213 }, { "epoch": 1.253204345703125e-05, "step": 8213, "training_step_time": 0.10712933540344238 }, { "epoch": 1.25335693359375e-05, "model_forward_time": 0.025165319442749023, "step": 8214 }, { "epoch": 1.25335693359375e-05, "step": 8214, "training_step_time": 0.21603631973266602 }, { "epoch": 1.253509521484375e-05, "model_forward_time": 0.02450108528137207, "step": 8215 }, { "epoch": 1.253509521484375e-05, "step": 8215, "training_step_time": 0.11315226554870605 }, { "epoch": 1.253662109375e-05, "model_forward_time": 0.024524688720703125, "step": 8216 }, { "epoch": 1.253662109375e-05, "step": 8216, "training_step_time": 0.11068987846374512 }, { "epoch": 1.253814697265625e-05, "model_forward_time": 0.025288820266723633, "step": 8217 }, { "epoch": 1.253814697265625e-05, "step": 8217, "training_step_time": 0.17493104934692383 }, { "epoch": 1.25396728515625e-05, "model_forward_time": 0.024574995040893555, "step": 8218 }, { "epoch": 1.25396728515625e-05, "step": 8218, "training_step_time": 0.16621136665344238 }, { "epoch": 1.254119873046875e-05, "model_forward_time": 0.02475452423095703, "step": 8219 }, { "epoch": 1.254119873046875e-05, "step": 8219, "training_step_time": 0.10582661628723145 }, { "epoch": 1.2542724609375e-05, "grad_norm": 0.6069748997688293, "learning_rate": 8.689798064925049e-05, "loss": 0.039, "step": 8220 }, { "epoch": 1.2542724609375e-05, "model_forward_time": 0.02534794807434082, "step": 8220 }, { "epoch": 1.2542724609375e-05, "step": 8220, "training_step_time": 0.12897920608520508 }, { "epoch": 1.254425048828125e-05, "model_forward_time": 0.025569915771484375, "step": 8221 }, { "epoch": 1.254425048828125e-05, "step": 8221, "training_step_time": 0.1462569236755371 }, { "epoch": 1.25457763671875e-05, "model_forward_time": 0.024427175521850586, "step": 8222 }, { "epoch": 1.25457763671875e-05, "step": 8222, "training_step_time": 0.13944578170776367 }, { "epoch": 1.254730224609375e-05, "model_forward_time": 0.02634596824645996, "step": 8223 }, { "epoch": 1.254730224609375e-05, "step": 8223, "training_step_time": 0.12539887428283691 }, { "epoch": 1.2548828125e-05, "model_forward_time": 0.029315471649169922, "step": 8224 }, { "epoch": 1.2548828125e-05, "step": 8224, "training_step_time": 0.12380790710449219 }, { "epoch": 1.255035400390625e-05, "model_forward_time": 0.02501201629638672, "step": 8225 }, { "epoch": 1.255035400390625e-05, "step": 8225, "training_step_time": 0.11406183242797852 }, { "epoch": 1.25518798828125e-05, "model_forward_time": 0.025432348251342773, "step": 8226 }, { "epoch": 1.25518798828125e-05, "step": 8226, "training_step_time": 0.22187042236328125 }, { "epoch": 1.255340576171875e-05, "model_forward_time": 0.024504899978637695, "step": 8227 }, { "epoch": 1.255340576171875e-05, "step": 8227, "training_step_time": 0.12736296653747559 }, { "epoch": 1.2554931640625e-05, "model_forward_time": 0.024104833602905273, "step": 8228 }, { "epoch": 1.2554931640625e-05, "step": 8228, "training_step_time": 0.11540102958679199 }, { "epoch": 1.255645751953125e-05, "model_forward_time": 0.02513885498046875, "step": 8229 }, { "epoch": 1.255645751953125e-05, "step": 8229, "training_step_time": 0.12282252311706543 }, { "epoch": 1.25579833984375e-05, "grad_norm": 0.31822460889816284, "learning_rate": 8.686076371826401e-05, "loss": 0.0414, "step": 8230 }, { "epoch": 1.25579833984375e-05, "model_forward_time": 0.02523970603942871, "step": 8230 }, { "epoch": 1.25579833984375e-05, "step": 8230, "training_step_time": 0.11207962036132812 }, { "epoch": 1.255950927734375e-05, "model_forward_time": 0.02538466453552246, "step": 8231 }, { "epoch": 1.255950927734375e-05, "step": 8231, "training_step_time": 0.19382572174072266 }, { "epoch": 1.256103515625e-05, "model_forward_time": 0.024582862854003906, "step": 8232 }, { "epoch": 1.256103515625e-05, "step": 8232, "training_step_time": 0.11188554763793945 }, { "epoch": 1.256256103515625e-05, "model_forward_time": 0.02440953254699707, "step": 8233 }, { "epoch": 1.256256103515625e-05, "step": 8233, "training_step_time": 0.13836240768432617 }, { "epoch": 1.25640869140625e-05, "model_forward_time": 0.025472640991210938, "step": 8234 }, { "epoch": 1.25640869140625e-05, "step": 8234, "training_step_time": 0.15541815757751465 }, { "epoch": 1.256561279296875e-05, "model_forward_time": 0.02447366714477539, "step": 8235 }, { "epoch": 1.256561279296875e-05, "step": 8235, "training_step_time": 0.17270278930664062 }, { "epoch": 1.2567138671875e-05, "model_forward_time": 0.02411031723022461, "step": 8236 }, { "epoch": 1.2567138671875e-05, "step": 8236, "training_step_time": 0.16235613822937012 }, { "epoch": 1.256866455078125e-05, "model_forward_time": 0.024165868759155273, "step": 8237 }, { "epoch": 1.256866455078125e-05, "step": 8237, "training_step_time": 0.15903329849243164 }, { "epoch": 1.25701904296875e-05, "model_forward_time": 0.024572134017944336, "step": 8238 }, { "epoch": 1.25701904296875e-05, "step": 8238, "training_step_time": 0.15021109580993652 }, { "epoch": 1.257171630859375e-05, "model_forward_time": 0.024484872817993164, "step": 8239 }, { "epoch": 1.257171630859375e-05, "step": 8239, "training_step_time": 0.12743806838989258 }, { "epoch": 1.25732421875e-05, "grad_norm": 0.36488208174705505, "learning_rate": 8.682350199797388e-05, "loss": 0.0382, "step": 8240 }, { "epoch": 1.25732421875e-05, "model_forward_time": 0.025040864944458008, "step": 8240 }, { "epoch": 1.25732421875e-05, "step": 8240, "training_step_time": 0.17049050331115723 }, { "epoch": 1.257476806640625e-05, "model_forward_time": 0.024437427520751953, "step": 8241 }, { "epoch": 1.257476806640625e-05, "step": 8241, "training_step_time": 0.10408997535705566 }, { "epoch": 1.25762939453125e-05, "model_forward_time": 0.024277687072753906, "step": 8242 }, { "epoch": 1.25762939453125e-05, "step": 8242, "training_step_time": 0.11392378807067871 }, { "epoch": 1.257781982421875e-05, "model_forward_time": 0.024971723556518555, "step": 8243 }, { "epoch": 1.257781982421875e-05, "step": 8243, "training_step_time": 0.10902976989746094 }, { "epoch": 1.2579345703125e-05, "model_forward_time": 0.026078224182128906, "step": 8244 }, { "epoch": 1.2579345703125e-05, "step": 8244, "training_step_time": 0.11269617080688477 }, { "epoch": 1.258087158203125e-05, "model_forward_time": 0.025257110595703125, "step": 8245 }, { "epoch": 1.258087158203125e-05, "step": 8245, "training_step_time": 0.10592198371887207 }, { "epoch": 1.25823974609375e-05, "model_forward_time": 0.025606632232666016, "step": 8246 }, { "epoch": 1.25823974609375e-05, "step": 8246, "training_step_time": 0.11192727088928223 }, { "epoch": 1.258392333984375e-05, "model_forward_time": 0.025290489196777344, "step": 8247 }, { "epoch": 1.258392333984375e-05, "step": 8247, "training_step_time": 0.10774350166320801 }, { "epoch": 1.258544921875e-05, "model_forward_time": 0.025415658950805664, "step": 8248 }, { "epoch": 1.258544921875e-05, "step": 8248, "training_step_time": 0.10636353492736816 }, { "epoch": 1.258697509765625e-05, "model_forward_time": 0.02469491958618164, "step": 8249 }, { "epoch": 1.258697509765625e-05, "step": 8249, "training_step_time": 0.10709023475646973 }, { "epoch": 1.25885009765625e-05, "grad_norm": 0.365360289812088, "learning_rate": 8.678619553365659e-05, "loss": 0.0381, "step": 8250 }, { "epoch": 1.25885009765625e-05, "model_forward_time": 0.024948835372924805, "step": 8250 }, { "epoch": 1.25885009765625e-05, "step": 8250, "training_step_time": 0.10601091384887695 }, { "epoch": 1.259002685546875e-05, "model_forward_time": 0.025475502014160156, "step": 8251 }, { "epoch": 1.259002685546875e-05, "step": 8251, "training_step_time": 0.1078944206237793 }, { "epoch": 1.2591552734375e-05, "model_forward_time": 0.024945735931396484, "step": 8252 }, { "epoch": 1.2591552734375e-05, "step": 8252, "training_step_time": 0.10906624794006348 }, { "epoch": 1.259307861328125e-05, "model_forward_time": 0.025359392166137695, "step": 8253 }, { "epoch": 1.259307861328125e-05, "step": 8253, "training_step_time": 0.11066246032714844 }, { "epoch": 1.25946044921875e-05, "model_forward_time": 0.02534031867980957, "step": 8254 }, { "epoch": 1.25946044921875e-05, "step": 8254, "training_step_time": 0.11054682731628418 }, { "epoch": 1.259613037109375e-05, "model_forward_time": 0.02546095848083496, "step": 8255 }, { "epoch": 1.259613037109375e-05, "step": 8255, "training_step_time": 0.11228585243225098 }, { "epoch": 1.259765625e-05, "model_forward_time": 0.026200532913208008, "step": 8256 }, { "epoch": 1.259765625e-05, "step": 8256, "training_step_time": 0.10856294631958008 }, { "epoch": 1.259918212890625e-05, "model_forward_time": 0.025425434112548828, "step": 8257 }, { "epoch": 1.259918212890625e-05, "step": 8257, "training_step_time": 0.1676044464111328 }, { "epoch": 1.26007080078125e-05, "model_forward_time": 0.024544239044189453, "step": 8258 }, { "epoch": 1.26007080078125e-05, "step": 8258, "training_step_time": 0.1586611270904541 }, { "epoch": 1.260223388671875e-05, "model_forward_time": 0.024640321731567383, "step": 8259 }, { "epoch": 1.260223388671875e-05, "step": 8259, "training_step_time": 0.11453056335449219 }, { "epoch": 1.2603759765625e-05, "grad_norm": 0.4181400239467621, "learning_rate": 8.674884437064302e-05, "loss": 0.0301, "step": 8260 }, { "epoch": 1.2603759765625e-05, "model_forward_time": 0.02446269989013672, "step": 8260 }, { "epoch": 1.2603759765625e-05, "step": 8260, "training_step_time": 0.21543145179748535 }, { "epoch": 1.260528564453125e-05, "model_forward_time": 0.024779796600341797, "step": 8261 }, { "epoch": 1.260528564453125e-05, "step": 8261, "training_step_time": 0.11783599853515625 }, { "epoch": 1.26068115234375e-05, "model_forward_time": 0.024695873260498047, "step": 8262 }, { "epoch": 1.26068115234375e-05, "step": 8262, "training_step_time": 0.10457754135131836 }, { "epoch": 1.260833740234375e-05, "model_forward_time": 0.02546238899230957, "step": 8263 }, { "epoch": 1.260833740234375e-05, "step": 8263, "training_step_time": 0.10940814018249512 }, { "epoch": 1.260986328125e-05, "model_forward_time": 0.0253298282623291, "step": 8264 }, { "epoch": 1.260986328125e-05, "step": 8264, "training_step_time": 0.10827374458312988 }, { "epoch": 1.261138916015625e-05, "model_forward_time": 0.025241613388061523, "step": 8265 }, { "epoch": 1.261138916015625e-05, "step": 8265, "training_step_time": 0.10695338249206543 }, { "epoch": 1.26129150390625e-05, "model_forward_time": 0.02699732780456543, "step": 8266 }, { "epoch": 1.26129150390625e-05, "step": 8266, "training_step_time": 0.10977029800415039 }, { "epoch": 1.261444091796875e-05, "model_forward_time": 0.025312185287475586, "step": 8267 }, { "epoch": 1.261444091796875e-05, "step": 8267, "training_step_time": 0.10727429389953613 }, { "epoch": 1.2615966796875e-05, "model_forward_time": 0.02538156509399414, "step": 8268 }, { "epoch": 1.2615966796875e-05, "step": 8268, "training_step_time": 0.15910005569458008 }, { "epoch": 1.261749267578125e-05, "model_forward_time": 0.024812698364257812, "step": 8269 }, { "epoch": 1.261749267578125e-05, "step": 8269, "training_step_time": 0.11165642738342285 }, { "epoch": 1.26190185546875e-05, "grad_norm": 0.7219143509864807, "learning_rate": 8.671144855431833e-05, "loss": 0.0366, "step": 8270 }, { "epoch": 1.26190185546875e-05, "model_forward_time": 0.02467656135559082, "step": 8270 }, { "epoch": 1.26190185546875e-05, "step": 8270, "training_step_time": 0.10815858840942383 }, { "epoch": 1.262054443359375e-05, "model_forward_time": 0.02594470977783203, "step": 8271 }, { "epoch": 1.262054443359375e-05, "step": 8271, "training_step_time": 0.11873769760131836 }, { "epoch": 1.26220703125e-05, "model_forward_time": 0.025385618209838867, "step": 8272 }, { "epoch": 1.26220703125e-05, "step": 8272, "training_step_time": 0.13196349143981934 }, { "epoch": 1.262359619140625e-05, "model_forward_time": 0.025077342987060547, "step": 8273 }, { "epoch": 1.262359619140625e-05, "step": 8273, "training_step_time": 0.11383843421936035 }, { "epoch": 1.26251220703125e-05, "model_forward_time": 0.02555704116821289, "step": 8274 }, { "epoch": 1.26251220703125e-05, "step": 8274, "training_step_time": 0.12500810623168945 }, { "epoch": 1.262664794921875e-05, "model_forward_time": 0.025110483169555664, "step": 8275 }, { "epoch": 1.262664794921875e-05, "step": 8275, "training_step_time": 0.1092996597290039 }, { "epoch": 1.2628173828125e-05, "model_forward_time": 0.025484561920166016, "step": 8276 }, { "epoch": 1.2628173828125e-05, "step": 8276, "training_step_time": 0.17926979064941406 }, { "epoch": 1.262969970703125e-05, "model_forward_time": 0.0254061222076416, "step": 8277 }, { "epoch": 1.262969970703125e-05, "step": 8277, "training_step_time": 0.10682964324951172 }, { "epoch": 1.26312255859375e-05, "model_forward_time": 0.024709701538085938, "step": 8278 }, { "epoch": 1.26312255859375e-05, "step": 8278, "training_step_time": 0.13444924354553223 }, { "epoch": 1.263275146484375e-05, "model_forward_time": 0.02516627311706543, "step": 8279 }, { "epoch": 1.263275146484375e-05, "step": 8279, "training_step_time": 0.13913321495056152 }, { "epoch": 1.263427734375e-05, "grad_norm": 0.5215271711349487, "learning_rate": 8.6674008130122e-05, "loss": 0.0463, "step": 8280 }, { "epoch": 1.263427734375e-05, "model_forward_time": 0.02445363998413086, "step": 8280 }, { "epoch": 1.263427734375e-05, "step": 8280, "training_step_time": 0.11500382423400879 }, { "epoch": 1.263580322265625e-05, "model_forward_time": 0.025279998779296875, "step": 8281 }, { "epoch": 1.263580322265625e-05, "step": 8281, "training_step_time": 0.1218256950378418 }, { "epoch": 1.26373291015625e-05, "model_forward_time": 0.025266647338867188, "step": 8282 }, { "epoch": 1.26373291015625e-05, "step": 8282, "training_step_time": 0.1891002655029297 }, { "epoch": 1.263885498046875e-05, "model_forward_time": 0.02354741096496582, "step": 8283 }, { "epoch": 1.263885498046875e-05, "step": 8283, "training_step_time": 0.1746351718902588 }, { "epoch": 1.2640380859375e-05, "model_forward_time": 0.02398395538330078, "step": 8284 }, { "epoch": 1.2640380859375e-05, "step": 8284, "training_step_time": 0.17761015892028809 }, { "epoch": 1.264190673828125e-05, "model_forward_time": 0.02431941032409668, "step": 8285 }, { "epoch": 1.264190673828125e-05, "step": 8285, "training_step_time": 0.10540461540222168 }, { "epoch": 1.26434326171875e-05, "model_forward_time": 0.02440500259399414, "step": 8286 }, { "epoch": 1.26434326171875e-05, "step": 8286, "training_step_time": 0.10448575019836426 }, { "epoch": 1.264495849609375e-05, "model_forward_time": 0.025258541107177734, "step": 8287 }, { "epoch": 1.264495849609375e-05, "step": 8287, "training_step_time": 0.10839462280273438 }, { "epoch": 1.2646484375e-05, "model_forward_time": 0.025036096572875977, "step": 8288 }, { "epoch": 1.2646484375e-05, "step": 8288, "training_step_time": 0.1069180965423584 }, { "epoch": 1.264801025390625e-05, "model_forward_time": 0.02533578872680664, "step": 8289 }, { "epoch": 1.264801025390625e-05, "step": 8289, "training_step_time": 0.10826706886291504 }, { "epoch": 1.26495361328125e-05, "grad_norm": 0.5088498592376709, "learning_rate": 8.663652314354765e-05, "loss": 0.0305, "step": 8290 }, { "epoch": 1.26495361328125e-05, "model_forward_time": 0.02423095703125, "step": 8290 }, { "epoch": 1.26495361328125e-05, "step": 8290, "training_step_time": 0.10860347747802734 }, { "epoch": 1.265106201171875e-05, "model_forward_time": 0.025333881378173828, "step": 8291 }, { "epoch": 1.265106201171875e-05, "step": 8291, "training_step_time": 0.11097002029418945 }, { "epoch": 1.2652587890625e-05, "model_forward_time": 0.0252230167388916, "step": 8292 }, { "epoch": 1.2652587890625e-05, "step": 8292, "training_step_time": 0.10833239555358887 }, { "epoch": 1.265411376953125e-05, "model_forward_time": 0.024645566940307617, "step": 8293 }, { "epoch": 1.265411376953125e-05, "step": 8293, "training_step_time": 0.1066901683807373 }, { "epoch": 1.26556396484375e-05, "model_forward_time": 0.025157690048217773, "step": 8294 }, { "epoch": 1.26556396484375e-05, "step": 8294, "training_step_time": 0.1079859733581543 }, { "epoch": 1.265716552734375e-05, "model_forward_time": 0.025233745574951172, "step": 8295 }, { "epoch": 1.265716552734375e-05, "step": 8295, "training_step_time": 0.1087646484375 }, { "epoch": 1.265869140625e-05, "model_forward_time": 0.024991750717163086, "step": 8296 }, { "epoch": 1.265869140625e-05, "step": 8296, "training_step_time": 0.10718011856079102 }, { "epoch": 1.266021728515625e-05, "model_forward_time": 0.024930715560913086, "step": 8297 }, { "epoch": 1.266021728515625e-05, "step": 8297, "training_step_time": 0.1100461483001709 }, { "epoch": 1.26617431640625e-05, "model_forward_time": 0.02480292320251465, "step": 8298 }, { "epoch": 1.26617431640625e-05, "step": 8298, "training_step_time": 0.1079554557800293 }, { "epoch": 1.266326904296875e-05, "model_forward_time": 0.025150775909423828, "step": 8299 }, { "epoch": 1.266326904296875e-05, "step": 8299, "training_step_time": 0.10934925079345703 }, { "epoch": 1.2664794921875e-05, "grad_norm": 0.2651950716972351, "learning_rate": 8.659899364014309e-05, "loss": 0.0443, "step": 8300 }, { "epoch": 1.2664794921875e-05, "model_forward_time": 0.025262117385864258, "step": 8300 }, { "epoch": 1.2664794921875e-05, "step": 8300, "training_step_time": 0.11060929298400879 }, { "epoch": 1.266632080078125e-05, "model_forward_time": 0.0248870849609375, "step": 8301 }, { "epoch": 1.266632080078125e-05, "step": 8301, "training_step_time": 0.10633325576782227 }, { "epoch": 1.26678466796875e-05, "model_forward_time": 0.025025606155395508, "step": 8302 }, { "epoch": 1.26678466796875e-05, "step": 8302, "training_step_time": 0.10481858253479004 }, { "epoch": 1.266937255859375e-05, "model_forward_time": 0.025299787521362305, "step": 8303 }, { "epoch": 1.266937255859375e-05, "step": 8303, "training_step_time": 0.1071774959564209 }, { "epoch": 1.26708984375e-05, "model_forward_time": 0.025222063064575195, "step": 8304 }, { "epoch": 1.26708984375e-05, "step": 8304, "training_step_time": 0.1157834529876709 }, { "epoch": 1.267242431640625e-05, "model_forward_time": 0.025131702423095703, "step": 8305 }, { "epoch": 1.267242431640625e-05, "step": 8305, "training_step_time": 0.1990342140197754 }, { "epoch": 1.26739501953125e-05, "model_forward_time": 0.024380922317504883, "step": 8306 }, { "epoch": 1.26739501953125e-05, "step": 8306, "training_step_time": 0.12038278579711914 }, { "epoch": 1.267547607421875e-05, "model_forward_time": 0.024713754653930664, "step": 8307 }, { "epoch": 1.267547607421875e-05, "step": 8307, "training_step_time": 0.1348867416381836 }, { "epoch": 1.2677001953125e-05, "model_forward_time": 0.024938583374023438, "step": 8308 }, { "epoch": 1.2677001953125e-05, "step": 8308, "training_step_time": 0.13088464736938477 }, { "epoch": 1.267852783203125e-05, "model_forward_time": 0.024430274963378906, "step": 8309 }, { "epoch": 1.267852783203125e-05, "step": 8309, "training_step_time": 0.13077998161315918 }, { "epoch": 1.26800537109375e-05, "grad_norm": 0.40368762612342834, "learning_rate": 8.656141966551019e-05, "loss": 0.0333, "step": 8310 }, { "epoch": 1.26800537109375e-05, "model_forward_time": 0.02498149871826172, "step": 8310 }, { "epoch": 1.26800537109375e-05, "step": 8310, "training_step_time": 0.12154078483581543 }, { "epoch": 1.268157958984375e-05, "model_forward_time": 0.024976491928100586, "step": 8311 }, { "epoch": 1.268157958984375e-05, "step": 8311, "training_step_time": 0.13335514068603516 }, { "epoch": 1.268310546875e-05, "model_forward_time": 0.024996519088745117, "step": 8312 }, { "epoch": 1.268310546875e-05, "step": 8312, "training_step_time": 0.1266331672668457 }, { "epoch": 1.268463134765625e-05, "model_forward_time": 0.024685144424438477, "step": 8313 }, { "epoch": 1.268463134765625e-05, "step": 8313, "training_step_time": 0.12082147598266602 }, { "epoch": 1.26861572265625e-05, "model_forward_time": 0.025316953659057617, "step": 8314 }, { "epoch": 1.26861572265625e-05, "step": 8314, "training_step_time": 0.13019490242004395 }, { "epoch": 1.268768310546875e-05, "model_forward_time": 0.02454090118408203, "step": 8315 }, { "epoch": 1.268768310546875e-05, "step": 8315, "training_step_time": 0.11512017250061035 }, { "epoch": 1.2689208984375e-05, "model_forward_time": 0.024876832962036133, "step": 8316 }, { "epoch": 1.2689208984375e-05, "step": 8316, "training_step_time": 0.220841646194458 }, { "epoch": 1.269073486328125e-05, "model_forward_time": 0.024767637252807617, "step": 8317 }, { "epoch": 1.269073486328125e-05, "step": 8317, "training_step_time": 0.13405799865722656 }, { "epoch": 1.26922607421875e-05, "model_forward_time": 0.023891448974609375, "step": 8318 }, { "epoch": 1.26922607421875e-05, "step": 8318, "training_step_time": 0.10958003997802734 }, { "epoch": 1.269378662109375e-05, "model_forward_time": 0.02476024627685547, "step": 8319 }, { "epoch": 1.269378662109375e-05, "step": 8319, "training_step_time": 0.12910246849060059 }, { "epoch": 1.26953125e-05, "grad_norm": 0.36668530106544495, "learning_rate": 8.652380126530488e-05, "loss": 0.0288, "step": 8320 }, { "epoch": 1.26953125e-05, "model_forward_time": 0.025208711624145508, "step": 8320 }, { "epoch": 1.26953125e-05, "step": 8320, "training_step_time": 0.23127484321594238 }, { "epoch": 1.269683837890625e-05, "model_forward_time": 0.024553537368774414, "step": 8321 }, { "epoch": 1.269683837890625e-05, "step": 8321, "training_step_time": 0.11568999290466309 }, { "epoch": 1.26983642578125e-05, "model_forward_time": 0.024027585983276367, "step": 8322 }, { "epoch": 1.26983642578125e-05, "step": 8322, "training_step_time": 0.13327670097351074 }, { "epoch": 1.269989013671875e-05, "model_forward_time": 0.02466750144958496, "step": 8323 }, { "epoch": 1.269989013671875e-05, "step": 8323, "training_step_time": 0.19024968147277832 }, { "epoch": 1.2701416015625e-05, "model_forward_time": 0.024016141891479492, "step": 8324 }, { "epoch": 1.2701416015625e-05, "step": 8324, "training_step_time": 0.20044398307800293 }, { "epoch": 1.270294189453125e-05, "model_forward_time": 0.023749828338623047, "step": 8325 }, { "epoch": 1.270294189453125e-05, "step": 8325, "training_step_time": 0.17215275764465332 }, { "epoch": 1.27044677734375e-05, "model_forward_time": 0.025114059448242188, "step": 8326 }, { "epoch": 1.27044677734375e-05, "step": 8326, "training_step_time": 0.11306548118591309 }, { "epoch": 1.270599365234375e-05, "model_forward_time": 0.024098634719848633, "step": 8327 }, { "epoch": 1.270599365234375e-05, "step": 8327, "training_step_time": 0.1038053035736084 }, { "epoch": 1.270751953125e-05, "model_forward_time": 0.025075197219848633, "step": 8328 }, { "epoch": 1.270751953125e-05, "step": 8328, "training_step_time": 0.19225502014160156 }, { "epoch": 1.270904541015625e-05, "model_forward_time": 0.025210857391357422, "step": 8329 }, { "epoch": 1.270904541015625e-05, "step": 8329, "training_step_time": 0.10590791702270508 }, { "epoch": 1.27105712890625e-05, "grad_norm": 0.3765827715396881, "learning_rate": 8.648613848523707e-05, "loss": 0.0382, "step": 8330 }, { "epoch": 1.27105712890625e-05, "model_forward_time": 0.0246429443359375, "step": 8330 }, { "epoch": 1.27105712890625e-05, "step": 8330, "training_step_time": 0.10933160781860352 }, { "epoch": 1.271209716796875e-05, "model_forward_time": 0.025256633758544922, "step": 8331 }, { "epoch": 1.271209716796875e-05, "step": 8331, "training_step_time": 0.11239266395568848 }, { "epoch": 1.2713623046875e-05, "model_forward_time": 0.02524590492248535, "step": 8332 }, { "epoch": 1.2713623046875e-05, "step": 8332, "training_step_time": 0.10671377182006836 }, { "epoch": 1.271514892578125e-05, "model_forward_time": 0.025117874145507812, "step": 8333 }, { "epoch": 1.271514892578125e-05, "step": 8333, "training_step_time": 0.10737824440002441 }, { "epoch": 1.27166748046875e-05, "model_forward_time": 0.025689125061035156, "step": 8334 }, { "epoch": 1.27166748046875e-05, "step": 8334, "training_step_time": 0.10934138298034668 }, { "epoch": 1.271820068359375e-05, "model_forward_time": 0.025018692016601562, "step": 8335 }, { "epoch": 1.271820068359375e-05, "step": 8335, "training_step_time": 0.11110591888427734 }, { "epoch": 1.27197265625e-05, "model_forward_time": 0.025036096572875977, "step": 8336 }, { "epoch": 1.27197265625e-05, "step": 8336, "training_step_time": 0.10603904724121094 }, { "epoch": 1.272125244140625e-05, "model_forward_time": 0.02524876594543457, "step": 8337 }, { "epoch": 1.272125244140625e-05, "step": 8337, "training_step_time": 0.10672140121459961 }, { "epoch": 1.27227783203125e-05, "model_forward_time": 0.025658369064331055, "step": 8338 }, { "epoch": 1.27227783203125e-05, "step": 8338, "training_step_time": 0.1067667007446289 }, { "epoch": 1.272430419921875e-05, "model_forward_time": 0.02503180503845215, "step": 8339 }, { "epoch": 1.272430419921875e-05, "step": 8339, "training_step_time": 0.1061711311340332 }, { "epoch": 1.2725830078125e-05, "grad_norm": 0.4698609709739685, "learning_rate": 8.644843137107059e-05, "loss": 0.029, "step": 8340 }, { "epoch": 1.2725830078125e-05, "model_forward_time": 0.02399420738220215, "step": 8340 }, { "epoch": 1.2725830078125e-05, "step": 8340, "training_step_time": 0.10959410667419434 }, { "epoch": 1.272735595703125e-05, "model_forward_time": 0.024898290634155273, "step": 8341 }, { "epoch": 1.272735595703125e-05, "step": 8341, "training_step_time": 0.1078944206237793 }, { "epoch": 1.27288818359375e-05, "model_forward_time": 0.023999691009521484, "step": 8342 }, { "epoch": 1.27288818359375e-05, "step": 8342, "training_step_time": 0.11195588111877441 }, { "epoch": 1.273040771484375e-05, "model_forward_time": 0.0266265869140625, "step": 8343 }, { "epoch": 1.273040771484375e-05, "step": 8343, "training_step_time": 0.11050820350646973 }, { "epoch": 1.273193359375e-05, "model_forward_time": 0.02622532844543457, "step": 8344 }, { "epoch": 1.273193359375e-05, "step": 8344, "training_step_time": 0.10818195343017578 }, { "epoch": 1.273345947265625e-05, "model_forward_time": 0.02520275115966797, "step": 8345 }, { "epoch": 1.273345947265625e-05, "step": 8345, "training_step_time": 0.10703587532043457 }, { "epoch": 1.27349853515625e-05, "model_forward_time": 0.02508831024169922, "step": 8346 }, { "epoch": 1.27349853515625e-05, "step": 8346, "training_step_time": 0.10923075675964355 }, { "epoch": 1.273651123046875e-05, "model_forward_time": 0.026125431060791016, "step": 8347 }, { "epoch": 1.273651123046875e-05, "step": 8347, "training_step_time": 0.10979175567626953 }, { "epoch": 1.2738037109375e-05, "model_forward_time": 0.025430679321289062, "step": 8348 }, { "epoch": 1.2738037109375e-05, "step": 8348, "training_step_time": 0.1694927215576172 }, { "epoch": 1.273956298828125e-05, "model_forward_time": 0.024853229522705078, "step": 8349 }, { "epoch": 1.273956298828125e-05, "step": 8349, "training_step_time": 0.1736891269683838 }, { "epoch": 1.27410888671875e-05, "grad_norm": 0.3819833993911743, "learning_rate": 8.641067996862311e-05, "loss": 0.0439, "step": 8350 }, { "epoch": 1.27410888671875e-05, "model_forward_time": 0.024979591369628906, "step": 8350 }, { "epoch": 1.27410888671875e-05, "step": 8350, "training_step_time": 0.10771965980529785 }, { "epoch": 1.274261474609375e-05, "model_forward_time": 0.024687528610229492, "step": 8351 }, { "epoch": 1.274261474609375e-05, "step": 8351, "training_step_time": 0.21317529678344727 }, { "epoch": 1.2744140625e-05, "model_forward_time": 0.024732112884521484, "step": 8352 }, { "epoch": 1.2744140625e-05, "step": 8352, "training_step_time": 0.11307048797607422 }, { "epoch": 1.274566650390625e-05, "model_forward_time": 0.024814605712890625, "step": 8353 }, { "epoch": 1.274566650390625e-05, "step": 8353, "training_step_time": 0.10328817367553711 }, { "epoch": 1.27471923828125e-05, "model_forward_time": 0.025638818740844727, "step": 8354 }, { "epoch": 1.27471923828125e-05, "step": 8354, "training_step_time": 0.10732221603393555 }, { "epoch": 1.274871826171875e-05, "model_forward_time": 0.024724483489990234, "step": 8355 }, { "epoch": 1.274871826171875e-05, "step": 8355, "training_step_time": 0.10698437690734863 }, { "epoch": 1.2750244140625e-05, "model_forward_time": 0.025170564651489258, "step": 8356 }, { "epoch": 1.2750244140625e-05, "step": 8356, "training_step_time": 0.10966038703918457 }, { "epoch": 1.275177001953125e-05, "model_forward_time": 0.02550983428955078, "step": 8357 }, { "epoch": 1.275177001953125e-05, "step": 8357, "training_step_time": 0.11053800582885742 }, { "epoch": 1.27532958984375e-05, "model_forward_time": 0.024966716766357422, "step": 8358 }, { "epoch": 1.27532958984375e-05, "step": 8358, "training_step_time": 0.15204310417175293 }, { "epoch": 1.275482177734375e-05, "model_forward_time": 0.025038480758666992, "step": 8359 }, { "epoch": 1.275482177734375e-05, "step": 8359, "training_step_time": 0.11052274703979492 }, { "epoch": 1.275634765625e-05, "grad_norm": 0.6609170436859131, "learning_rate": 8.637288432376618e-05, "loss": 0.0348, "step": 8360 }, { "epoch": 1.275634765625e-05, "model_forward_time": 0.024238109588623047, "step": 8360 }, { "epoch": 1.275634765625e-05, "step": 8360, "training_step_time": 0.10915398597717285 }, { "epoch": 1.275787353515625e-05, "model_forward_time": 0.025017261505126953, "step": 8361 }, { "epoch": 1.275787353515625e-05, "step": 8361, "training_step_time": 0.12468624114990234 }, { "epoch": 1.27593994140625e-05, "model_forward_time": 0.024975061416625977, "step": 8362 }, { "epoch": 1.27593994140625e-05, "step": 8362, "training_step_time": 0.12370729446411133 }, { "epoch": 1.276092529296875e-05, "model_forward_time": 0.025257349014282227, "step": 8363 }, { "epoch": 1.276092529296875e-05, "step": 8363, "training_step_time": 0.11098742485046387 }, { "epoch": 1.2762451171875e-05, "model_forward_time": 0.025055646896362305, "step": 8364 }, { "epoch": 1.2762451171875e-05, "step": 8364, "training_step_time": 0.1190338134765625 }, { "epoch": 1.276397705078125e-05, "model_forward_time": 0.02498650550842285, "step": 8365 }, { "epoch": 1.276397705078125e-05, "step": 8365, "training_step_time": 0.19570350646972656 }, { "epoch": 1.27655029296875e-05, "model_forward_time": 0.02420806884765625, "step": 8366 }, { "epoch": 1.27655029296875e-05, "step": 8366, "training_step_time": 0.12624669075012207 }, { "epoch": 1.276702880859375e-05, "model_forward_time": 0.0243532657623291, "step": 8367 }, { "epoch": 1.276702880859375e-05, "step": 8367, "training_step_time": 0.10639429092407227 }, { "epoch": 1.27685546875e-05, "model_forward_time": 0.0251007080078125, "step": 8368 }, { "epoch": 1.27685546875e-05, "step": 8368, "training_step_time": 0.12567138671875 }, { "epoch": 1.277008056640625e-05, "model_forward_time": 0.024992942810058594, "step": 8369 }, { "epoch": 1.277008056640625e-05, "step": 8369, "training_step_time": 0.1861555576324463 }, { "epoch": 1.27716064453125e-05, "grad_norm": 0.4028451144695282, "learning_rate": 8.633504448242505e-05, "loss": 0.0316, "step": 8370 }, { "epoch": 1.27716064453125e-05, "model_forward_time": 0.02384805679321289, "step": 8370 }, { "epoch": 1.27716064453125e-05, "step": 8370, "training_step_time": 0.18284320831298828 }, { "epoch": 1.277313232421875e-05, "model_forward_time": 0.025268077850341797, "step": 8371 }, { "epoch": 1.277313232421875e-05, "step": 8371, "training_step_time": 0.11041736602783203 }, { "epoch": 1.2774658203125e-05, "model_forward_time": 0.024677515029907227, "step": 8372 }, { "epoch": 1.2774658203125e-05, "step": 8372, "training_step_time": 0.1602156162261963 }, { "epoch": 1.277618408203125e-05, "model_forward_time": 0.02453160285949707, "step": 8373 }, { "epoch": 1.277618408203125e-05, "step": 8373, "training_step_time": 0.10850644111633301 }, { "epoch": 1.27777099609375e-05, "model_forward_time": 0.02485966682434082, "step": 8374 }, { "epoch": 1.27777099609375e-05, "step": 8374, "training_step_time": 0.11056017875671387 }, { "epoch": 1.277923583984375e-05, "model_forward_time": 0.025166034698486328, "step": 8375 }, { "epoch": 1.277923583984375e-05, "step": 8375, "training_step_time": 0.11009383201599121 }, { "epoch": 1.278076171875e-05, "model_forward_time": 0.025252103805541992, "step": 8376 }, { "epoch": 1.278076171875e-05, "step": 8376, "training_step_time": 0.10729479789733887 }, { "epoch": 1.278228759765625e-05, "model_forward_time": 0.025304079055786133, "step": 8377 }, { "epoch": 1.278228759765625e-05, "step": 8377, "training_step_time": 0.10678935050964355 }, { "epoch": 1.27838134765625e-05, "model_forward_time": 0.025885343551635742, "step": 8378 }, { "epoch": 1.27838134765625e-05, "step": 8378, "training_step_time": 0.11346578598022461 }, { "epoch": 1.278533935546875e-05, "model_forward_time": 0.02542734146118164, "step": 8379 }, { "epoch": 1.278533935546875e-05, "step": 8379, "training_step_time": 0.10952210426330566 }, { "epoch": 1.2786865234375e-05, "grad_norm": 0.44128119945526123, "learning_rate": 8.629716049057872e-05, "loss": 0.0422, "step": 8380 }, { "epoch": 1.2786865234375e-05, "model_forward_time": 0.02500295639038086, "step": 8380 }, { "epoch": 1.2786865234375e-05, "step": 8380, "training_step_time": 0.10754776000976562 }, { "epoch": 1.278839111328125e-05, "model_forward_time": 0.02545166015625, "step": 8381 }, { "epoch": 1.278839111328125e-05, "step": 8381, "training_step_time": 0.10729312896728516 }, { "epoch": 1.27899169921875e-05, "model_forward_time": 0.024972200393676758, "step": 8382 }, { "epoch": 1.27899169921875e-05, "step": 8382, "training_step_time": 0.11115312576293945 }, { "epoch": 1.279144287109375e-05, "model_forward_time": 0.025452852249145508, "step": 8383 }, { "epoch": 1.279144287109375e-05, "step": 8383, "training_step_time": 0.10815024375915527 }, { "epoch": 1.279296875e-05, "model_forward_time": 0.025129079818725586, "step": 8384 }, { "epoch": 1.279296875e-05, "step": 8384, "training_step_time": 0.1091454029083252 }, { "epoch": 1.279449462890625e-05, "model_forward_time": 0.025385618209838867, "step": 8385 }, { "epoch": 1.279449462890625e-05, "step": 8385, "training_step_time": 0.10782790184020996 }, { "epoch": 1.27960205078125e-05, "model_forward_time": 0.025218486785888672, "step": 8386 }, { "epoch": 1.27960205078125e-05, "step": 8386, "training_step_time": 0.10896658897399902 }, { "epoch": 1.279754638671875e-05, "model_forward_time": 0.02565145492553711, "step": 8387 }, { "epoch": 1.279754638671875e-05, "step": 8387, "training_step_time": 0.11694836616516113 }, { "epoch": 1.2799072265625e-05, "model_forward_time": 0.02463507652282715, "step": 8388 }, { "epoch": 1.2799072265625e-05, "step": 8388, "training_step_time": 0.1114206314086914 }, { "epoch": 1.280059814453125e-05, "model_forward_time": 0.025370359420776367, "step": 8389 }, { "epoch": 1.280059814453125e-05, "step": 8389, "training_step_time": 0.10836124420166016 }, { "epoch": 1.28021240234375e-05, "grad_norm": 0.24307404458522797, "learning_rate": 8.625923239425978e-05, "loss": 0.032, "step": 8390 }, { "epoch": 1.28021240234375e-05, "model_forward_time": 0.025161027908325195, "step": 8390 }, { "epoch": 1.28021240234375e-05, "step": 8390, "training_step_time": 0.11198854446411133 }, { "epoch": 1.280364990234375e-05, "model_forward_time": 0.025131940841674805, "step": 8391 }, { "epoch": 1.280364990234375e-05, "step": 8391, "training_step_time": 0.10939908027648926 }, { "epoch": 1.280517578125e-05, "model_forward_time": 0.025220394134521484, "step": 8392 }, { "epoch": 1.280517578125e-05, "step": 8392, "training_step_time": 0.11527609825134277 }, { "epoch": 1.280670166015625e-05, "model_forward_time": 0.025374889373779297, "step": 8393 }, { "epoch": 1.280670166015625e-05, "step": 8393, "training_step_time": 0.19013166427612305 }, { "epoch": 1.28082275390625e-05, "model_forward_time": 0.024819374084472656, "step": 8394 }, { "epoch": 1.28082275390625e-05, "step": 8394, "training_step_time": 0.13388395309448242 }, { "epoch": 1.280975341796875e-05, "model_forward_time": 0.02500772476196289, "step": 8395 }, { "epoch": 1.280975341796875e-05, "step": 8395, "training_step_time": 0.1089029312133789 }, { "epoch": 1.2811279296875e-05, "model_forward_time": 0.025564908981323242, "step": 8396 }, { "epoch": 1.2811279296875e-05, "step": 8396, "training_step_time": 0.11120963096618652 }, { "epoch": 1.281280517578125e-05, "model_forward_time": 0.025641679763793945, "step": 8397 }, { "epoch": 1.281280517578125e-05, "step": 8397, "training_step_time": 0.1466817855834961 }, { "epoch": 1.28143310546875e-05, "model_forward_time": 0.025205612182617188, "step": 8398 }, { "epoch": 1.28143310546875e-05, "step": 8398, "training_step_time": 0.1926419734954834 }, { "epoch": 1.281585693359375e-05, "model_forward_time": 0.024611949920654297, "step": 8399 }, { "epoch": 1.281585693359375e-05, "step": 8399, "training_step_time": 0.10730814933776855 }, { "epoch": 1.28173828125e-05, "grad_norm": 0.41981056332588196, "learning_rate": 8.622126023955446e-05, "loss": 0.0345, "step": 8400 }, { "epoch": 1.28173828125e-05, "model_forward_time": 0.024862051010131836, "step": 8400 }, { "epoch": 1.28173828125e-05, "step": 8400, "training_step_time": 0.10840082168579102 }, { "epoch": 1.281890869140625e-05, "model_forward_time": 0.02514362335205078, "step": 8401 }, { "epoch": 1.281890869140625e-05, "step": 8401, "training_step_time": 0.10918140411376953 }, { "epoch": 1.28204345703125e-05, "model_forward_time": 0.025333404541015625, "step": 8402 }, { "epoch": 1.28204345703125e-05, "step": 8402, "training_step_time": 0.11403083801269531 }, { "epoch": 1.282196044921875e-05, "model_forward_time": 0.025063037872314453, "step": 8403 }, { "epoch": 1.282196044921875e-05, "step": 8403, "training_step_time": 0.10716056823730469 }, { "epoch": 1.2823486328125e-05, "model_forward_time": 0.025291919708251953, "step": 8404 }, { "epoch": 1.2823486328125e-05, "step": 8404, "training_step_time": 0.18341279029846191 }, { "epoch": 1.282501220703125e-05, "model_forward_time": 0.0243070125579834, "step": 8405 }, { "epoch": 1.282501220703125e-05, "step": 8405, "training_step_time": 0.10787129402160645 }, { "epoch": 1.28265380859375e-05, "model_forward_time": 0.02421879768371582, "step": 8406 }, { "epoch": 1.28265380859375e-05, "step": 8406, "training_step_time": 0.11003899574279785 }, { "epoch": 1.282806396484375e-05, "model_forward_time": 0.025466203689575195, "step": 8407 }, { "epoch": 1.282806396484375e-05, "step": 8407, "training_step_time": 0.12634658813476562 }, { "epoch": 1.282958984375e-05, "model_forward_time": 0.02593207359313965, "step": 8408 }, { "epoch": 1.282958984375e-05, "step": 8408, "training_step_time": 0.127305269241333 }, { "epoch": 1.283111572265625e-05, "model_forward_time": 0.025074481964111328, "step": 8409 }, { "epoch": 1.283111572265625e-05, "step": 8409, "training_step_time": 0.11005449295043945 }, { "epoch": 1.28326416015625e-05, "grad_norm": 0.44348084926605225, "learning_rate": 8.61832440726025e-05, "loss": 0.0366, "step": 8410 }, { "epoch": 1.28326416015625e-05, "model_forward_time": 0.02497553825378418, "step": 8410 }, { "epoch": 1.28326416015625e-05, "step": 8410, "training_step_time": 0.11715388298034668 }, { "epoch": 1.283416748046875e-05, "model_forward_time": 0.024999618530273438, "step": 8411 }, { "epoch": 1.283416748046875e-05, "step": 8411, "training_step_time": 0.15167570114135742 }, { "epoch": 1.2835693359375e-05, "model_forward_time": 0.024689674377441406, "step": 8412 }, { "epoch": 1.2835693359375e-05, "step": 8412, "training_step_time": 0.11267805099487305 }, { "epoch": 1.283721923828125e-05, "model_forward_time": 0.02488994598388672, "step": 8413 }, { "epoch": 1.283721923828125e-05, "step": 8413, "training_step_time": 0.2178962230682373 }, { "epoch": 1.28387451171875e-05, "model_forward_time": 0.024739503860473633, "step": 8414 }, { "epoch": 1.28387451171875e-05, "step": 8414, "training_step_time": 0.1516273021697998 }, { "epoch": 1.284027099609375e-05, "model_forward_time": 0.024057388305664062, "step": 8415 }, { "epoch": 1.284027099609375e-05, "step": 8415, "training_step_time": 0.1739046573638916 }, { "epoch": 1.2841796875e-05, "model_forward_time": 0.02478790283203125, "step": 8416 }, { "epoch": 1.2841796875e-05, "step": 8416, "training_step_time": 0.16681957244873047 }, { "epoch": 1.284332275390625e-05, "model_forward_time": 0.024401426315307617, "step": 8417 }, { "epoch": 1.284332275390625e-05, "step": 8417, "training_step_time": 0.1191091537475586 }, { "epoch": 1.28448486328125e-05, "model_forward_time": 0.024442672729492188, "step": 8418 }, { "epoch": 1.28448486328125e-05, "step": 8418, "training_step_time": 0.1895139217376709 }, { "epoch": 1.284637451171875e-05, "model_forward_time": 0.02458930015563965, "step": 8419 }, { "epoch": 1.284637451171875e-05, "step": 8419, "training_step_time": 0.11278343200683594 }, { "epoch": 1.2847900390625e-05, "grad_norm": 0.3509846031665802, "learning_rate": 8.614518393959714e-05, "loss": 0.0304, "step": 8420 }, { "epoch": 1.2847900390625e-05, "model_forward_time": 0.024610042572021484, "step": 8420 }, { "epoch": 1.2847900390625e-05, "step": 8420, "training_step_time": 0.11339807510375977 }, { "epoch": 1.284942626953125e-05, "model_forward_time": 0.024471521377563477, "step": 8421 }, { "epoch": 1.284942626953125e-05, "step": 8421, "training_step_time": 0.11230754852294922 }, { "epoch": 1.28509521484375e-05, "model_forward_time": 0.024585723876953125, "step": 8422 }, { "epoch": 1.28509521484375e-05, "step": 8422, "training_step_time": 0.11299920082092285 }, { "epoch": 1.285247802734375e-05, "model_forward_time": 0.025350093841552734, "step": 8423 }, { "epoch": 1.285247802734375e-05, "step": 8423, "training_step_time": 0.1131901741027832 }, { "epoch": 1.285400390625e-05, "model_forward_time": 0.025278568267822266, "step": 8424 }, { "epoch": 1.285400390625e-05, "step": 8424, "training_step_time": 0.11283087730407715 }, { "epoch": 1.285552978515625e-05, "model_forward_time": 0.02487659454345703, "step": 8425 }, { "epoch": 1.285552978515625e-05, "step": 8425, "training_step_time": 0.11358189582824707 }, { "epoch": 1.28570556640625e-05, "model_forward_time": 0.02543926239013672, "step": 8426 }, { "epoch": 1.28570556640625e-05, "step": 8426, "training_step_time": 0.10893869400024414 }, { "epoch": 1.285858154296875e-05, "model_forward_time": 0.02559661865234375, "step": 8427 }, { "epoch": 1.285858154296875e-05, "step": 8427, "training_step_time": 0.1089789867401123 }, { "epoch": 1.2860107421875e-05, "model_forward_time": 0.025310039520263672, "step": 8428 }, { "epoch": 1.2860107421875e-05, "step": 8428, "training_step_time": 0.11014556884765625 }, { "epoch": 1.286163330078125e-05, "model_forward_time": 0.02503824234008789, "step": 8429 }, { "epoch": 1.286163330078125e-05, "step": 8429, "training_step_time": 0.1107332706451416 }, { "epoch": 1.28631591796875e-05, "grad_norm": 0.5693546533584595, "learning_rate": 8.610707988678503e-05, "loss": 0.039, "step": 8430 }, { "epoch": 1.28631591796875e-05, "model_forward_time": 0.0253448486328125, "step": 8430 }, { "epoch": 1.28631591796875e-05, "step": 8430, "training_step_time": 0.10806560516357422 }, { "epoch": 1.286468505859375e-05, "model_forward_time": 0.025567293167114258, "step": 8431 }, { "epoch": 1.286468505859375e-05, "step": 8431, "training_step_time": 0.11466693878173828 }, { "epoch": 1.28662109375e-05, "model_forward_time": 0.02409648895263672, "step": 8432 }, { "epoch": 1.28662109375e-05, "step": 8432, "training_step_time": 0.1093759536743164 }, { "epoch": 1.286773681640625e-05, "model_forward_time": 0.02469158172607422, "step": 8433 }, { "epoch": 1.286773681640625e-05, "step": 8433, "training_step_time": 0.10927915573120117 }, { "epoch": 1.28692626953125e-05, "model_forward_time": 0.025231599807739258, "step": 8434 }, { "epoch": 1.28692626953125e-05, "step": 8434, "training_step_time": 0.10896134376525879 }, { "epoch": 1.287078857421875e-05, "model_forward_time": 0.02518630027770996, "step": 8435 }, { "epoch": 1.287078857421875e-05, "step": 8435, "training_step_time": 0.10616827011108398 }, { "epoch": 1.2872314453125e-05, "model_forward_time": 0.025454044342041016, "step": 8436 }, { "epoch": 1.2872314453125e-05, "step": 8436, "training_step_time": 0.11049771308898926 }, { "epoch": 1.287384033203125e-05, "model_forward_time": 0.025554180145263672, "step": 8437 }, { "epoch": 1.287384033203125e-05, "step": 8437, "training_step_time": 0.10774898529052734 }, { "epoch": 1.28753662109375e-05, "model_forward_time": 0.025630474090576172, "step": 8438 }, { "epoch": 1.28753662109375e-05, "step": 8438, "training_step_time": 0.10767579078674316 }, { "epoch": 1.287689208984375e-05, "model_forward_time": 0.02550816535949707, "step": 8439 }, { "epoch": 1.287689208984375e-05, "step": 8439, "training_step_time": 0.2196333408355713 }, { "epoch": 1.287841796875e-05, "grad_norm": 0.3627856969833374, "learning_rate": 8.606893196046619e-05, "loss": 0.0378, "step": 8440 }, { "epoch": 1.287841796875e-05, "model_forward_time": 0.0250399112701416, "step": 8440 }, { "epoch": 1.287841796875e-05, "step": 8440, "training_step_time": 0.1106717586517334 }, { "epoch": 1.287994384765625e-05, "model_forward_time": 0.02515411376953125, "step": 8441 }, { "epoch": 1.287994384765625e-05, "step": 8441, "training_step_time": 0.1080620288848877 }, { "epoch": 1.28814697265625e-05, "model_forward_time": 0.025762081146240234, "step": 8442 }, { "epoch": 1.28814697265625e-05, "step": 8442, "training_step_time": 0.1706404685974121 }, { "epoch": 1.288299560546875e-05, "model_forward_time": 0.02474498748779297, "step": 8443 }, { "epoch": 1.288299560546875e-05, "step": 8443, "training_step_time": 0.16864013671875 }, { "epoch": 1.2884521484375e-05, "model_forward_time": 0.02462148666381836, "step": 8444 }, { "epoch": 1.2884521484375e-05, "step": 8444, "training_step_time": 0.10453200340270996 }, { "epoch": 1.288604736328125e-05, "model_forward_time": 0.02480602264404297, "step": 8445 }, { "epoch": 1.288604736328125e-05, "step": 8445, "training_step_time": 0.10568714141845703 }, { "epoch": 1.28875732421875e-05, "model_forward_time": 0.025746822357177734, "step": 8446 }, { "epoch": 1.28875732421875e-05, "step": 8446, "training_step_time": 0.11220526695251465 }, { "epoch": 1.288909912109375e-05, "model_forward_time": 0.02534770965576172, "step": 8447 }, { "epoch": 1.288909912109375e-05, "step": 8447, "training_step_time": 0.1049954891204834 }, { "epoch": 1.2890625e-05, "model_forward_time": 0.025272607803344727, "step": 8448 }, { "epoch": 1.2890625e-05, "step": 8448, "training_step_time": 0.10831928253173828 }, { "epoch": 1.289215087890625e-05, "model_forward_time": 0.025092363357543945, "step": 8449 }, { "epoch": 1.289215087890625e-05, "step": 8449, "training_step_time": 0.10623335838317871 }, { "epoch": 1.28936767578125e-05, "grad_norm": 0.3822075128555298, "learning_rate": 8.603074020699393e-05, "loss": 0.0424, "step": 8450 }, { "epoch": 1.28936767578125e-05, "model_forward_time": 0.025623559951782227, "step": 8450 }, { "epoch": 1.28936767578125e-05, "step": 8450, "training_step_time": 0.10686588287353516 }, { "epoch": 1.289520263671875e-05, "model_forward_time": 0.025102615356445312, "step": 8451 }, { "epoch": 1.289520263671875e-05, "step": 8451, "training_step_time": 0.14095354080200195 }, { "epoch": 1.2896728515625e-05, "model_forward_time": 0.025460243225097656, "step": 8452 }, { "epoch": 1.2896728515625e-05, "step": 8452, "training_step_time": 0.10820674896240234 }, { "epoch": 1.289825439453125e-05, "model_forward_time": 0.024969100952148438, "step": 8453 }, { "epoch": 1.289825439453125e-05, "step": 8453, "training_step_time": 0.11278438568115234 }, { "epoch": 1.28997802734375e-05, "model_forward_time": 0.025583267211914062, "step": 8454 }, { "epoch": 1.28997802734375e-05, "step": 8454, "training_step_time": 0.12532830238342285 }, { "epoch": 1.290130615234375e-05, "model_forward_time": 0.025507450103759766, "step": 8455 }, { "epoch": 1.290130615234375e-05, "step": 8455, "training_step_time": 0.1235501766204834 }, { "epoch": 1.290283203125e-05, "model_forward_time": 0.025557279586791992, "step": 8456 }, { "epoch": 1.290283203125e-05, "step": 8456, "training_step_time": 0.11019229888916016 }, { "epoch": 1.290435791015625e-05, "model_forward_time": 0.025714874267578125, "step": 8457 }, { "epoch": 1.290435791015625e-05, "step": 8457, "training_step_time": 0.1983785629272461 }, { "epoch": 1.29058837890625e-05, "model_forward_time": 0.02407550811767578, "step": 8458 }, { "epoch": 1.29058837890625e-05, "step": 8458, "training_step_time": 0.2275409698486328 }, { "epoch": 1.290740966796875e-05, "model_forward_time": 0.02306675910949707, "step": 8459 }, { "epoch": 1.290740966796875e-05, "step": 8459, "training_step_time": 0.22449660301208496 }, { "epoch": 1.2908935546875e-05, "grad_norm": 0.36279141902923584, "learning_rate": 8.599250467277483e-05, "loss": 0.0347, "step": 8460 }, { "epoch": 1.2908935546875e-05, "model_forward_time": 0.02484583854675293, "step": 8460 }, { "epoch": 1.2908935546875e-05, "step": 8460, "training_step_time": 0.23877835273742676 }, { "epoch": 1.291046142578125e-05, "model_forward_time": 0.0248565673828125, "step": 8461 }, { "epoch": 1.291046142578125e-05, "step": 8461, "training_step_time": 0.19716477394104004 }, { "epoch": 1.29119873046875e-05, "model_forward_time": 0.024669408798217773, "step": 8462 }, { "epoch": 1.29119873046875e-05, "step": 8462, "training_step_time": 0.13439655303955078 }, { "epoch": 1.291351318359375e-05, "model_forward_time": 0.024426937103271484, "step": 8463 }, { "epoch": 1.291351318359375e-05, "step": 8463, "training_step_time": 0.10390996932983398 }, { "epoch": 1.29150390625e-05, "model_forward_time": 0.025187969207763672, "step": 8464 }, { "epoch": 1.29150390625e-05, "step": 8464, "training_step_time": 0.10418057441711426 }, { "epoch": 1.291656494140625e-05, "model_forward_time": 0.025497913360595703, "step": 8465 }, { "epoch": 1.291656494140625e-05, "step": 8465, "training_step_time": 0.10616683959960938 }, { "epoch": 1.29180908203125e-05, "model_forward_time": 0.025522947311401367, "step": 8466 }, { "epoch": 1.29180908203125e-05, "step": 8466, "training_step_time": 0.11188364028930664 }, { "epoch": 1.291961669921875e-05, "model_forward_time": 0.025270700454711914, "step": 8467 }, { "epoch": 1.291961669921875e-05, "step": 8467, "training_step_time": 0.10875916481018066 }, { "epoch": 1.2921142578125e-05, "model_forward_time": 0.025496482849121094, "step": 8468 }, { "epoch": 1.2921142578125e-05, "step": 8468, "training_step_time": 0.11177349090576172 }, { "epoch": 1.292266845703125e-05, "model_forward_time": 0.025326967239379883, "step": 8469 }, { "epoch": 1.292266845703125e-05, "step": 8469, "training_step_time": 0.10746955871582031 }, { "epoch": 1.29241943359375e-05, "grad_norm": 0.6637995839118958, "learning_rate": 8.595422540426869e-05, "loss": 0.0331, "step": 8470 }, { "epoch": 1.29241943359375e-05, "model_forward_time": 0.02515864372253418, "step": 8470 }, { "epoch": 1.29241943359375e-05, "step": 8470, "training_step_time": 0.10781717300415039 }, { "epoch": 1.292572021484375e-05, "model_forward_time": 0.025058507919311523, "step": 8471 }, { "epoch": 1.292572021484375e-05, "step": 8471, "training_step_time": 0.11310625076293945 }, { "epoch": 1.292724609375e-05, "model_forward_time": 0.02503204345703125, "step": 8472 }, { "epoch": 1.292724609375e-05, "step": 8472, "training_step_time": 0.11124205589294434 }, { "epoch": 1.292877197265625e-05, "model_forward_time": 0.025274276733398438, "step": 8473 }, { "epoch": 1.292877197265625e-05, "step": 8473, "training_step_time": 0.1098930835723877 }, { "epoch": 1.29302978515625e-05, "model_forward_time": 0.025301218032836914, "step": 8474 }, { "epoch": 1.29302978515625e-05, "step": 8474, "training_step_time": 0.1124880313873291 }, { "epoch": 1.293182373046875e-05, "model_forward_time": 0.025021076202392578, "step": 8475 }, { "epoch": 1.293182373046875e-05, "step": 8475, "training_step_time": 0.11137151718139648 }, { "epoch": 1.2933349609375e-05, "model_forward_time": 0.025391101837158203, "step": 8476 }, { "epoch": 1.2933349609375e-05, "step": 8476, "training_step_time": 0.11142683029174805 }, { "epoch": 1.293487548828125e-05, "model_forward_time": 0.025598526000976562, "step": 8477 }, { "epoch": 1.293487548828125e-05, "step": 8477, "training_step_time": 0.1415262222290039 }, { "epoch": 1.29364013671875e-05, "model_forward_time": 0.024309635162353516, "step": 8478 }, { "epoch": 1.29364013671875e-05, "step": 8478, "training_step_time": 0.20037031173706055 }, { "epoch": 1.293792724609375e-05, "model_forward_time": 0.023020267486572266, "step": 8479 }, { "epoch": 1.293792724609375e-05, "step": 8479, "training_step_time": 0.19777727127075195 }, { "epoch": 1.2939453125e-05, "grad_norm": 0.3210027515888214, "learning_rate": 8.591590244798844e-05, "loss": 0.0318, "step": 8480 }, { "epoch": 1.2939453125e-05, "model_forward_time": 0.023561477661132812, "step": 8480 }, { "epoch": 1.2939453125e-05, "step": 8480, "training_step_time": 0.18262767791748047 }, { "epoch": 1.294097900390625e-05, "model_forward_time": 0.02459430694580078, "step": 8481 }, { "epoch": 1.294097900390625e-05, "step": 8481, "training_step_time": 0.17492151260375977 }, { "epoch": 1.29425048828125e-05, "model_forward_time": 0.02434682846069336, "step": 8482 }, { "epoch": 1.29425048828125e-05, "step": 8482, "training_step_time": 0.18710875511169434 }, { "epoch": 1.294403076171875e-05, "model_forward_time": 0.024454116821289062, "step": 8483 }, { "epoch": 1.294403076171875e-05, "step": 8483, "training_step_time": 0.11809563636779785 }, { "epoch": 1.2945556640625e-05, "model_forward_time": 0.02505326271057129, "step": 8484 }, { "epoch": 1.2945556640625e-05, "step": 8484, "training_step_time": 0.10655808448791504 }, { "epoch": 1.294708251953125e-05, "model_forward_time": 0.027289867401123047, "step": 8485 }, { "epoch": 1.294708251953125e-05, "step": 8485, "training_step_time": 0.1136162281036377 }, { "epoch": 1.29486083984375e-05, "model_forward_time": 0.02543330192565918, "step": 8486 }, { "epoch": 1.29486083984375e-05, "step": 8486, "training_step_time": 0.1173396110534668 }, { "epoch": 1.295013427734375e-05, "model_forward_time": 0.025449275970458984, "step": 8487 }, { "epoch": 1.295013427734375e-05, "step": 8487, "training_step_time": 0.10909271240234375 }, { "epoch": 1.295166015625e-05, "model_forward_time": 0.025208711624145508, "step": 8488 }, { "epoch": 1.295166015625e-05, "step": 8488, "training_step_time": 0.11129260063171387 }, { "epoch": 1.295318603515625e-05, "model_forward_time": 0.02517533302307129, "step": 8489 }, { "epoch": 1.295318603515625e-05, "step": 8489, "training_step_time": 0.10646653175354004 }, { "epoch": 1.29547119140625e-05, "grad_norm": 0.6224812269210815, "learning_rate": 8.587753585050004e-05, "loss": 0.0403, "step": 8490 }, { "epoch": 1.29547119140625e-05, "model_forward_time": 0.026526689529418945, "step": 8490 }, { "epoch": 1.29547119140625e-05, "step": 8490, "training_step_time": 0.10977602005004883 }, { "epoch": 1.295623779296875e-05, "model_forward_time": 0.025548696517944336, "step": 8491 }, { "epoch": 1.295623779296875e-05, "step": 8491, "training_step_time": 0.10744476318359375 }, { "epoch": 1.2957763671875e-05, "model_forward_time": 0.025397539138793945, "step": 8492 }, { "epoch": 1.2957763671875e-05, "step": 8492, "training_step_time": 0.10970592498779297 }, { "epoch": 1.295928955078125e-05, "model_forward_time": 0.025183916091918945, "step": 8493 }, { "epoch": 1.295928955078125e-05, "step": 8493, "training_step_time": 0.11104726791381836 }, { "epoch": 1.29608154296875e-05, "model_forward_time": 0.02545905113220215, "step": 8494 }, { "epoch": 1.29608154296875e-05, "step": 8494, "training_step_time": 0.1938161849975586 }, { "epoch": 1.296234130859375e-05, "model_forward_time": 0.024188995361328125, "step": 8495 }, { "epoch": 1.296234130859375e-05, "step": 8495, "training_step_time": 0.21196365356445312 }, { "epoch": 1.29638671875e-05, "model_forward_time": 0.024163246154785156, "step": 8496 }, { "epoch": 1.29638671875e-05, "step": 8496, "training_step_time": 0.24666523933410645 }, { "epoch": 1.296539306640625e-05, "model_forward_time": 0.024107694625854492, "step": 8497 }, { "epoch": 1.296539306640625e-05, "step": 8497, "training_step_time": 0.2194833755493164 }, { "epoch": 1.29669189453125e-05, "model_forward_time": 0.023825645446777344, "step": 8498 }, { "epoch": 1.29669189453125e-05, "step": 8498, "training_step_time": 0.19492840766906738 }, { "epoch": 1.296844482421875e-05, "model_forward_time": 0.024234294891357422, "step": 8499 }, { "epoch": 1.296844482421875e-05, "step": 8499, "training_step_time": 0.1612870693206787 }, { "epoch": 1.2969970703125e-05, "grad_norm": 0.7341414093971252, "learning_rate": 8.583912565842257e-05, "loss": 0.0487, "step": 8500 }, { "epoch": 1.2969970703125e-05, "model_forward_time": 0.02478194236755371, "step": 8500 }, { "epoch": 1.2969970703125e-05, "step": 8500, "training_step_time": 0.1288461685180664 }, { "epoch": 1.297149658203125e-05, "model_forward_time": 0.024066448211669922, "step": 8501 }, { "epoch": 1.297149658203125e-05, "step": 8501, "training_step_time": 0.19371390342712402 }, { "epoch": 1.29730224609375e-05, "model_forward_time": 0.02440786361694336, "step": 8502 }, { "epoch": 1.29730224609375e-05, "step": 8502, "training_step_time": 0.10483098030090332 }, { "epoch": 1.297454833984375e-05, "model_forward_time": 0.024338483810424805, "step": 8503 }, { "epoch": 1.297454833984375e-05, "step": 8503, "training_step_time": 0.15780210494995117 }, { "epoch": 1.297607421875e-05, "model_forward_time": 0.02457714080810547, "step": 8504 }, { "epoch": 1.297607421875e-05, "step": 8504, "training_step_time": 0.11320710182189941 }, { "epoch": 1.297760009765625e-05, "model_forward_time": 0.024333477020263672, "step": 8505 }, { "epoch": 1.297760009765625e-05, "step": 8505, "training_step_time": 0.10988450050354004 }, { "epoch": 1.29791259765625e-05, "model_forward_time": 0.02524089813232422, "step": 8506 }, { "epoch": 1.29791259765625e-05, "step": 8506, "training_step_time": 0.1080312728881836 }, { "epoch": 1.298065185546875e-05, "model_forward_time": 0.025478363037109375, "step": 8507 }, { "epoch": 1.298065185546875e-05, "step": 8507, "training_step_time": 0.10982155799865723 }, { "epoch": 1.2982177734375e-05, "model_forward_time": 0.025667190551757812, "step": 8508 }, { "epoch": 1.2982177734375e-05, "step": 8508, "training_step_time": 0.10718989372253418 }, { "epoch": 1.298370361328125e-05, "model_forward_time": 0.025009870529174805, "step": 8509 }, { "epoch": 1.298370361328125e-05, "step": 8509, "training_step_time": 0.10791826248168945 }, { "epoch": 1.29852294921875e-05, "grad_norm": 0.3781100809574127, "learning_rate": 8.5800671918428e-05, "loss": 0.0376, "step": 8510 }, { "epoch": 1.29852294921875e-05, "model_forward_time": 0.02417469024658203, "step": 8510 }, { "epoch": 1.29852294921875e-05, "step": 8510, "training_step_time": 0.10970616340637207 }, { "epoch": 1.298675537109375e-05, "model_forward_time": 0.024345874786376953, "step": 8511 }, { "epoch": 1.298675537109375e-05, "step": 8511, "training_step_time": 0.10900688171386719 }, { "epoch": 1.298828125e-05, "model_forward_time": 0.025331974029541016, "step": 8512 }, { "epoch": 1.298828125e-05, "step": 8512, "training_step_time": 0.1087641716003418 }, { "epoch": 1.298980712890625e-05, "model_forward_time": 0.025189638137817383, "step": 8513 }, { "epoch": 1.298980712890625e-05, "step": 8513, "training_step_time": 0.10828614234924316 }, { "epoch": 1.29913330078125e-05, "model_forward_time": 0.0250089168548584, "step": 8514 }, { "epoch": 1.29913330078125e-05, "step": 8514, "training_step_time": 0.11053824424743652 }, { "epoch": 1.299285888671875e-05, "model_forward_time": 0.0253143310546875, "step": 8515 }, { "epoch": 1.299285888671875e-05, "step": 8515, "training_step_time": 0.11462187767028809 }, { "epoch": 1.2994384765625e-05, "model_forward_time": 0.025487184524536133, "step": 8516 }, { "epoch": 1.2994384765625e-05, "step": 8516, "training_step_time": 0.11500740051269531 }, { "epoch": 1.299591064453125e-05, "model_forward_time": 0.025350570678710938, "step": 8517 }, { "epoch": 1.299591064453125e-05, "step": 8517, "training_step_time": 0.1097707748413086 }, { "epoch": 1.29974365234375e-05, "model_forward_time": 0.02521038055419922, "step": 8518 }, { "epoch": 1.29974365234375e-05, "step": 8518, "training_step_time": 0.1079108715057373 }, { "epoch": 1.299896240234375e-05, "model_forward_time": 0.025005102157592773, "step": 8519 }, { "epoch": 1.299896240234375e-05, "step": 8519, "training_step_time": 0.10697531700134277 }, { "epoch": 1.300048828125e-05, "grad_norm": 0.572238564491272, "learning_rate": 8.576217467724128e-05, "loss": 0.027, "step": 8520 }, { "epoch": 1.300048828125e-05, "model_forward_time": 0.024984121322631836, "step": 8520 }, { "epoch": 1.300048828125e-05, "step": 8520, "training_step_time": 0.10987472534179688 }, { "epoch": 1.300201416015625e-05, "model_forward_time": 0.025002479553222656, "step": 8521 }, { "epoch": 1.300201416015625e-05, "step": 8521, "training_step_time": 0.108306884765625 }, { "epoch": 1.30035400390625e-05, "model_forward_time": 0.02526235580444336, "step": 8522 }, { "epoch": 1.30035400390625e-05, "step": 8522, "training_step_time": 0.10857057571411133 }, { "epoch": 1.300506591796875e-05, "model_forward_time": 0.025405406951904297, "step": 8523 }, { "epoch": 1.300506591796875e-05, "step": 8523, "training_step_time": 0.11063265800476074 }, { "epoch": 1.3006591796875e-05, "model_forward_time": 0.026181936264038086, "step": 8524 }, { "epoch": 1.3006591796875e-05, "step": 8524, "training_step_time": 0.1075129508972168 }, { "epoch": 1.300811767578125e-05, "model_forward_time": 0.025140047073364258, "step": 8525 }, { "epoch": 1.300811767578125e-05, "step": 8525, "training_step_time": 0.10638809204101562 }, { "epoch": 1.30096435546875e-05, "model_forward_time": 0.025313615798950195, "step": 8526 }, { "epoch": 1.30096435546875e-05, "step": 8526, "training_step_time": 0.11937713623046875 }, { "epoch": 1.301116943359375e-05, "model_forward_time": 0.02593517303466797, "step": 8527 }, { "epoch": 1.301116943359375e-05, "step": 8527, "training_step_time": 0.10895133018493652 }, { "epoch": 1.30126953125e-05, "model_forward_time": 0.02526116371154785, "step": 8528 }, { "epoch": 1.30126953125e-05, "step": 8528, "training_step_time": 0.10902953147888184 }, { "epoch": 1.301422119140625e-05, "model_forward_time": 0.02546834945678711, "step": 8529 }, { "epoch": 1.301422119140625e-05, "step": 8529, "training_step_time": 0.2134406566619873 }, { "epoch": 1.30157470703125e-05, "grad_norm": 0.36905935406684875, "learning_rate": 8.572363398164017e-05, "loss": 0.0383, "step": 8530 }, { "epoch": 1.30157470703125e-05, "model_forward_time": 0.024584531784057617, "step": 8530 }, { "epoch": 1.30157470703125e-05, "step": 8530, "training_step_time": 0.12221026420593262 }, { "epoch": 1.301727294921875e-05, "model_forward_time": 0.024863481521606445, "step": 8531 }, { "epoch": 1.301727294921875e-05, "step": 8531, "training_step_time": 0.10729432106018066 }, { "epoch": 1.3018798828125e-05, "model_forward_time": 0.025046825408935547, "step": 8532 }, { "epoch": 1.3018798828125e-05, "step": 8532, "training_step_time": 0.10649347305297852 }, { "epoch": 1.302032470703125e-05, "model_forward_time": 0.024453401565551758, "step": 8533 }, { "epoch": 1.302032470703125e-05, "step": 8533, "training_step_time": 0.10722231864929199 }, { "epoch": 1.30218505859375e-05, "model_forward_time": 0.025127172470092773, "step": 8534 }, { "epoch": 1.30218505859375e-05, "step": 8534, "training_step_time": 0.10629034042358398 }, { "epoch": 1.302337646484375e-05, "model_forward_time": 0.02499103546142578, "step": 8535 }, { "epoch": 1.302337646484375e-05, "step": 8535, "training_step_time": 0.10706973075866699 }, { "epoch": 1.302490234375e-05, "model_forward_time": 0.025224685668945312, "step": 8536 }, { "epoch": 1.302490234375e-05, "step": 8536, "training_step_time": 0.11256575584411621 }, { "epoch": 1.302642822265625e-05, "model_forward_time": 0.024209260940551758, "step": 8537 }, { "epoch": 1.302642822265625e-05, "step": 8537, "training_step_time": 0.10873985290527344 }, { "epoch": 1.30279541015625e-05, "model_forward_time": 0.025450706481933594, "step": 8538 }, { "epoch": 1.30279541015625e-05, "step": 8538, "training_step_time": 0.14252734184265137 }, { "epoch": 1.302947998046875e-05, "model_forward_time": 0.025411128997802734, "step": 8539 }, { "epoch": 1.302947998046875e-05, "step": 8539, "training_step_time": 0.11145663261413574 }, { "epoch": 1.3031005859375e-05, "grad_norm": 0.5095164179801941, "learning_rate": 8.568504987845525e-05, "loss": 0.0406, "step": 8540 }, { "epoch": 1.3031005859375e-05, "model_forward_time": 0.024390697479248047, "step": 8540 }, { "epoch": 1.3031005859375e-05, "step": 8540, "training_step_time": 0.17826604843139648 }, { "epoch": 1.303253173828125e-05, "model_forward_time": 0.02462172508239746, "step": 8541 }, { "epoch": 1.303253173828125e-05, "step": 8541, "training_step_time": 0.1736743450164795 }, { "epoch": 1.30340576171875e-05, "model_forward_time": 0.025616884231567383, "step": 8542 }, { "epoch": 1.30340576171875e-05, "step": 8542, "training_step_time": 0.15473222732543945 }, { "epoch": 1.303558349609375e-05, "model_forward_time": 0.02454996109008789, "step": 8543 }, { "epoch": 1.303558349609375e-05, "step": 8543, "training_step_time": 0.1759488582611084 }, { "epoch": 1.3037109375e-05, "model_forward_time": 0.024289369583129883, "step": 8544 }, { "epoch": 1.3037109375e-05, "step": 8544, "training_step_time": 0.12207674980163574 }, { "epoch": 1.303863525390625e-05, "model_forward_time": 0.025026559829711914, "step": 8545 }, { "epoch": 1.303863525390625e-05, "step": 8545, "training_step_time": 0.148789644241333 }, { "epoch": 1.30401611328125e-05, "model_forward_time": 0.024718046188354492, "step": 8546 }, { "epoch": 1.30401611328125e-05, "step": 8546, "training_step_time": 0.13233160972595215 }, { "epoch": 1.304168701171875e-05, "model_forward_time": 0.02458024024963379, "step": 8547 }, { "epoch": 1.304168701171875e-05, "step": 8547, "training_step_time": 0.1948089599609375 }, { "epoch": 1.3043212890625e-05, "model_forward_time": 0.024645090103149414, "step": 8548 }, { "epoch": 1.3043212890625e-05, "step": 8548, "training_step_time": 0.10531258583068848 }, { "epoch": 1.304473876953125e-05, "model_forward_time": 0.02452230453491211, "step": 8549 }, { "epoch": 1.304473876953125e-05, "step": 8549, "training_step_time": 0.10472559928894043 }, { "epoch": 1.30462646484375e-05, "grad_norm": 0.2935931980609894, "learning_rate": 8.564642241456986e-05, "loss": 0.0378, "step": 8550 }, { "epoch": 1.30462646484375e-05, "model_forward_time": 0.025105714797973633, "step": 8550 }, { "epoch": 1.30462646484375e-05, "step": 8550, "training_step_time": 0.10613441467285156 }, { "epoch": 1.304779052734375e-05, "model_forward_time": 0.024910449981689453, "step": 8551 }, { "epoch": 1.304779052734375e-05, "step": 8551, "training_step_time": 0.10725283622741699 }, { "epoch": 1.304931640625e-05, "model_forward_time": 0.02523493766784668, "step": 8552 }, { "epoch": 1.304931640625e-05, "step": 8552, "training_step_time": 0.11110925674438477 }, { "epoch": 1.305084228515625e-05, "model_forward_time": 0.025808334350585938, "step": 8553 }, { "epoch": 1.305084228515625e-05, "step": 8553, "training_step_time": 0.10963058471679688 }, { "epoch": 1.30523681640625e-05, "model_forward_time": 0.026088953018188477, "step": 8554 }, { "epoch": 1.30523681640625e-05, "step": 8554, "training_step_time": 0.10961222648620605 }, { "epoch": 1.305389404296875e-05, "model_forward_time": 0.025322437286376953, "step": 8555 }, { "epoch": 1.305389404296875e-05, "step": 8555, "training_step_time": 0.11128902435302734 }, { "epoch": 1.3055419921875e-05, "model_forward_time": 0.02516913414001465, "step": 8556 }, { "epoch": 1.3055419921875e-05, "step": 8556, "training_step_time": 0.11033964157104492 }, { "epoch": 1.305694580078125e-05, "model_forward_time": 0.025201797485351562, "step": 8557 }, { "epoch": 1.305694580078125e-05, "step": 8557, "training_step_time": 0.10784649848937988 }, { "epoch": 1.30584716796875e-05, "model_forward_time": 0.025284767150878906, "step": 8558 }, { "epoch": 1.30584716796875e-05, "step": 8558, "training_step_time": 0.11011981964111328 }, { "epoch": 1.305999755859375e-05, "model_forward_time": 0.024981260299682617, "step": 8559 }, { "epoch": 1.305999755859375e-05, "step": 8559, "training_step_time": 0.10783076286315918 }, { "epoch": 1.30615234375e-05, "grad_norm": 0.7260056734085083, "learning_rate": 8.560775163691999e-05, "loss": 0.0386, "step": 8560 }, { "epoch": 1.30615234375e-05, "model_forward_time": 0.025025606155395508, "step": 8560 }, { "epoch": 1.30615234375e-05, "step": 8560, "training_step_time": 0.11048340797424316 }, { "epoch": 1.306304931640625e-05, "model_forward_time": 0.02541828155517578, "step": 8561 }, { "epoch": 1.306304931640625e-05, "step": 8561, "training_step_time": 0.11759138107299805 }, { "epoch": 1.30645751953125e-05, "model_forward_time": 0.025322914123535156, "step": 8562 }, { "epoch": 1.30645751953125e-05, "step": 8562, "training_step_time": 0.11378288269042969 }, { "epoch": 1.306610107421875e-05, "model_forward_time": 0.025467634201049805, "step": 8563 }, { "epoch": 1.306610107421875e-05, "step": 8563, "training_step_time": 0.11336469650268555 }, { "epoch": 1.3067626953125e-05, "model_forward_time": 0.025119781494140625, "step": 8564 }, { "epoch": 1.3067626953125e-05, "step": 8564, "training_step_time": 0.10680818557739258 }, { "epoch": 1.306915283203125e-05, "model_forward_time": 0.024999141693115234, "step": 8565 }, { "epoch": 1.306915283203125e-05, "step": 8565, "training_step_time": 0.10766458511352539 }, { "epoch": 1.30706787109375e-05, "model_forward_time": 0.025144338607788086, "step": 8566 }, { "epoch": 1.30706787109375e-05, "step": 8566, "training_step_time": 0.11034584045410156 }, { "epoch": 1.307220458984375e-05, "model_forward_time": 0.02532362937927246, "step": 8567 }, { "epoch": 1.307220458984375e-05, "step": 8567, "training_step_time": 0.11063694953918457 }, { "epoch": 1.307373046875e-05, "model_forward_time": 0.02497720718383789, "step": 8568 }, { "epoch": 1.307373046875e-05, "step": 8568, "training_step_time": 0.11074018478393555 }, { "epoch": 1.307525634765625e-05, "model_forward_time": 0.025429487228393555, "step": 8569 }, { "epoch": 1.307525634765625e-05, "step": 8569, "training_step_time": 0.1141209602355957 }, { "epoch": 1.30767822265625e-05, "grad_norm": 0.5504429340362549, "learning_rate": 8.556903759249428e-05, "loss": 0.074, "step": 8570 }, { "epoch": 1.30767822265625e-05, "model_forward_time": 0.025015830993652344, "step": 8570 }, { "epoch": 1.30767822265625e-05, "step": 8570, "training_step_time": 0.10707259178161621 }, { "epoch": 1.307830810546875e-05, "model_forward_time": 0.024809598922729492, "step": 8571 }, { "epoch": 1.307830810546875e-05, "step": 8571, "training_step_time": 0.1060035228729248 }, { "epoch": 1.3079833984375e-05, "model_forward_time": 0.025105953216552734, "step": 8572 }, { "epoch": 1.3079833984375e-05, "step": 8572, "training_step_time": 0.11037945747375488 }, { "epoch": 1.308135986328125e-05, "model_forward_time": 0.025478124618530273, "step": 8573 }, { "epoch": 1.308135986328125e-05, "step": 8573, "training_step_time": 0.11862349510192871 }, { "epoch": 1.30828857421875e-05, "model_forward_time": 0.025417804718017578, "step": 8574 }, { "epoch": 1.30828857421875e-05, "step": 8574, "training_step_time": 0.11163544654846191 }, { "epoch": 1.308441162109375e-05, "model_forward_time": 0.02540898323059082, "step": 8575 }, { "epoch": 1.308441162109375e-05, "step": 8575, "training_step_time": 0.2170274257659912 }, { "epoch": 1.30859375e-05, "model_forward_time": 0.025041580200195312, "step": 8576 }, { "epoch": 1.30859375e-05, "step": 8576, "training_step_time": 0.11631011962890625 }, { "epoch": 1.308746337890625e-05, "model_forward_time": 0.024708032608032227, "step": 8577 }, { "epoch": 1.308746337890625e-05, "step": 8577, "training_step_time": 0.10408782958984375 }, { "epoch": 1.30889892578125e-05, "model_forward_time": 0.025156736373901367, "step": 8578 }, { "epoch": 1.30889892578125e-05, "step": 8578, "training_step_time": 0.10798525810241699 }, { "epoch": 1.309051513671875e-05, "model_forward_time": 0.02511739730834961, "step": 8579 }, { "epoch": 1.309051513671875e-05, "step": 8579, "training_step_time": 0.1102294921875 }, { "epoch": 1.3092041015625e-05, "grad_norm": 0.33122915029525757, "learning_rate": 8.553028032833397e-05, "loss": 0.0478, "step": 8580 }, { "epoch": 1.3092041015625e-05, "model_forward_time": 0.02475738525390625, "step": 8580 }, { "epoch": 1.3092041015625e-05, "step": 8580, "training_step_time": 0.10719585418701172 }, { "epoch": 1.309356689453125e-05, "model_forward_time": 0.025197982788085938, "step": 8581 }, { "epoch": 1.309356689453125e-05, "step": 8581, "training_step_time": 0.10858583450317383 }, { "epoch": 1.30950927734375e-05, "model_forward_time": 0.02564406394958496, "step": 8582 }, { "epoch": 1.30950927734375e-05, "step": 8582, "training_step_time": 0.10618829727172852 }, { "epoch": 1.309661865234375e-05, "model_forward_time": 0.025420665740966797, "step": 8583 }, { "epoch": 1.309661865234375e-05, "step": 8583, "training_step_time": 0.10611605644226074 }, { "epoch": 1.309814453125e-05, "model_forward_time": 0.025449752807617188, "step": 8584 }, { "epoch": 1.309814453125e-05, "step": 8584, "training_step_time": 0.20740389823913574 }, { "epoch": 1.309967041015625e-05, "model_forward_time": 0.024262666702270508, "step": 8585 }, { "epoch": 1.309967041015625e-05, "step": 8585, "training_step_time": 0.11228275299072266 }, { "epoch": 1.31011962890625e-05, "model_forward_time": 0.024262666702270508, "step": 8586 }, { "epoch": 1.31011962890625e-05, "step": 8586, "training_step_time": 0.10965204238891602 }, { "epoch": 1.310272216796875e-05, "model_forward_time": 0.02527642250061035, "step": 8587 }, { "epoch": 1.310272216796875e-05, "step": 8587, "training_step_time": 0.11970233917236328 }, { "epoch": 1.3104248046875e-05, "model_forward_time": 0.025374174118041992, "step": 8588 }, { "epoch": 1.3104248046875e-05, "step": 8588, "training_step_time": 0.126939058303833 }, { "epoch": 1.310577392578125e-05, "model_forward_time": 0.02526569366455078, "step": 8589 }, { "epoch": 1.310577392578125e-05, "step": 8589, "training_step_time": 0.12303614616394043 }, { "epoch": 1.31072998046875e-05, "grad_norm": 0.2752124071121216, "learning_rate": 8.549147989153276e-05, "loss": 0.0354, "step": 8590 }, { "epoch": 1.31072998046875e-05, "model_forward_time": 0.025165319442749023, "step": 8590 }, { "epoch": 1.31072998046875e-05, "step": 8590, "training_step_time": 0.1118309497833252 }, { "epoch": 1.310882568359375e-05, "model_forward_time": 0.025021076202392578, "step": 8591 }, { "epoch": 1.310882568359375e-05, "step": 8591, "training_step_time": 0.1861588954925537 }, { "epoch": 1.31103515625e-05, "model_forward_time": 0.02464151382446289, "step": 8592 }, { "epoch": 1.31103515625e-05, "step": 8592, "training_step_time": 0.14913630485534668 }, { "epoch": 1.311187744140625e-05, "model_forward_time": 0.024680376052856445, "step": 8593 }, { "epoch": 1.311187744140625e-05, "step": 8593, "training_step_time": 0.1892564296722412 }, { "epoch": 1.31134033203125e-05, "model_forward_time": 0.024349212646484375, "step": 8594 }, { "epoch": 1.31134033203125e-05, "step": 8594, "training_step_time": 0.17212247848510742 }, { "epoch": 1.311492919921875e-05, "model_forward_time": 0.024448156356811523, "step": 8595 }, { "epoch": 1.311492919921875e-05, "step": 8595, "training_step_time": 0.10905838012695312 }, { "epoch": 1.3116455078125e-05, "model_forward_time": 0.024903297424316406, "step": 8596 }, { "epoch": 1.3116455078125e-05, "step": 8596, "training_step_time": 0.17607736587524414 }, { "epoch": 1.311798095703125e-05, "model_forward_time": 0.023910999298095703, "step": 8597 }, { "epoch": 1.311798095703125e-05, "step": 8597, "training_step_time": 0.1874864101409912 }, { "epoch": 1.31195068359375e-05, "model_forward_time": 0.024268388748168945, "step": 8598 }, { "epoch": 1.31195068359375e-05, "step": 8598, "training_step_time": 0.1785261631011963 }, { "epoch": 1.312103271484375e-05, "model_forward_time": 0.02351832389831543, "step": 8599 }, { "epoch": 1.312103271484375e-05, "step": 8599, "training_step_time": 0.16787362098693848 }, { "epoch": 1.312255859375e-05, "grad_norm": 0.571994423866272, "learning_rate": 8.545263632923687e-05, "loss": 0.0257, "step": 8600 }, { "epoch": 1.312255859375e-05, "model_forward_time": 0.024059534072875977, "step": 8600 }, { "epoch": 1.312255859375e-05, "step": 8600, "training_step_time": 0.1477503776550293 }, { "epoch": 1.312408447265625e-05, "model_forward_time": 0.02431774139404297, "step": 8601 }, { "epoch": 1.312408447265625e-05, "step": 8601, "training_step_time": 0.14449715614318848 }, { "epoch": 1.31256103515625e-05, "model_forward_time": 0.02480936050415039, "step": 8602 }, { "epoch": 1.31256103515625e-05, "step": 8602, "training_step_time": 0.1284351348876953 }, { "epoch": 1.312713623046875e-05, "model_forward_time": 0.02422308921813965, "step": 8603 }, { "epoch": 1.312713623046875e-05, "step": 8603, "training_step_time": 0.12949728965759277 }, { "epoch": 1.3128662109375e-05, "model_forward_time": 0.024661779403686523, "step": 8604 }, { "epoch": 1.3128662109375e-05, "step": 8604, "training_step_time": 0.12489819526672363 }, { "epoch": 1.313018798828125e-05, "model_forward_time": 0.02506875991821289, "step": 8605 }, { "epoch": 1.313018798828125e-05, "step": 8605, "training_step_time": 0.12144923210144043 }, { "epoch": 1.31317138671875e-05, "model_forward_time": 0.024789810180664062, "step": 8606 }, { "epoch": 1.31317138671875e-05, "step": 8606, "training_step_time": 0.11706876754760742 }, { "epoch": 1.313323974609375e-05, "model_forward_time": 0.025183439254760742, "step": 8607 }, { "epoch": 1.313323974609375e-05, "step": 8607, "training_step_time": 0.11273503303527832 }, { "epoch": 1.3134765625e-05, "model_forward_time": 0.025107145309448242, "step": 8608 }, { "epoch": 1.3134765625e-05, "step": 8608, "training_step_time": 0.11113643646240234 }, { "epoch": 1.313629150390625e-05, "model_forward_time": 0.025447368621826172, "step": 8609 }, { "epoch": 1.313629150390625e-05, "step": 8609, "training_step_time": 0.10822701454162598 }, { "epoch": 1.31378173828125e-05, "grad_norm": 0.6709743142127991, "learning_rate": 8.541374968864487e-05, "loss": 0.0323, "step": 8610 }, { "epoch": 1.31378173828125e-05, "model_forward_time": 0.025504350662231445, "step": 8610 }, { "epoch": 1.31378173828125e-05, "step": 8610, "training_step_time": 0.11101531982421875 }, { "epoch": 1.313934326171875e-05, "model_forward_time": 0.02473759651184082, "step": 8611 }, { "epoch": 1.313934326171875e-05, "step": 8611, "training_step_time": 0.11205029487609863 }, { "epoch": 1.3140869140625e-05, "model_forward_time": 0.02497267723083496, "step": 8612 }, { "epoch": 1.3140869140625e-05, "step": 8612, "training_step_time": 0.10936331748962402 }, { "epoch": 1.314239501953125e-05, "model_forward_time": 0.024730920791625977, "step": 8613 }, { "epoch": 1.314239501953125e-05, "step": 8613, "training_step_time": 0.10367202758789062 }, { "epoch": 1.31439208984375e-05, "model_forward_time": 0.024790048599243164, "step": 8614 }, { "epoch": 1.31439208984375e-05, "step": 8614, "training_step_time": 0.1121358871459961 }, { "epoch": 1.314544677734375e-05, "model_forward_time": 0.024821043014526367, "step": 8615 }, { "epoch": 1.314544677734375e-05, "step": 8615, "training_step_time": 0.11941695213317871 }, { "epoch": 1.314697265625e-05, "model_forward_time": 0.025316953659057617, "step": 8616 }, { "epoch": 1.314697265625e-05, "step": 8616, "training_step_time": 0.13284873962402344 }, { "epoch": 1.314849853515625e-05, "model_forward_time": 0.024974346160888672, "step": 8617 }, { "epoch": 1.314849853515625e-05, "step": 8617, "training_step_time": 0.18481779098510742 }, { "epoch": 1.31500244140625e-05, "model_forward_time": 0.024972915649414062, "step": 8618 }, { "epoch": 1.31500244140625e-05, "step": 8618, "training_step_time": 0.13886356353759766 }, { "epoch": 1.315155029296875e-05, "model_forward_time": 0.025105714797973633, "step": 8619 }, { "epoch": 1.315155029296875e-05, "step": 8619, "training_step_time": 0.12329864501953125 }, { "epoch": 1.3153076171875e-05, "grad_norm": 0.6751372218132019, "learning_rate": 8.537482001700769e-05, "loss": 0.0437, "step": 8620 }, { "epoch": 1.3153076171875e-05, "model_forward_time": 0.024887800216674805, "step": 8620 }, { "epoch": 1.3153076171875e-05, "step": 8620, "training_step_time": 0.11719608306884766 }, { "epoch": 1.315460205078125e-05, "model_forward_time": 0.025247573852539062, "step": 8621 }, { "epoch": 1.315460205078125e-05, "step": 8621, "training_step_time": 0.11867380142211914 }, { "epoch": 1.31561279296875e-05, "model_forward_time": 0.025392532348632812, "step": 8622 }, { "epoch": 1.31561279296875e-05, "step": 8622, "training_step_time": 0.11298704147338867 }, { "epoch": 1.315765380859375e-05, "model_forward_time": 0.025484085083007812, "step": 8623 }, { "epoch": 1.315765380859375e-05, "step": 8623, "training_step_time": 0.11469030380249023 }, { "epoch": 1.31591796875e-05, "model_forward_time": 0.025179147720336914, "step": 8624 }, { "epoch": 1.31591796875e-05, "step": 8624, "training_step_time": 0.1128082275390625 }, { "epoch": 1.316070556640625e-05, "model_forward_time": 0.02524280548095703, "step": 8625 }, { "epoch": 1.316070556640625e-05, "step": 8625, "training_step_time": 0.10920953750610352 }, { "epoch": 1.31622314453125e-05, "model_forward_time": 0.02540898323059082, "step": 8626 }, { "epoch": 1.31622314453125e-05, "step": 8626, "training_step_time": 0.11580705642700195 }, { "epoch": 1.316375732421875e-05, "model_forward_time": 0.025336027145385742, "step": 8627 }, { "epoch": 1.316375732421875e-05, "step": 8627, "training_step_time": 0.14510464668273926 }, { "epoch": 1.3165283203125e-05, "model_forward_time": 0.024894237518310547, "step": 8628 }, { "epoch": 1.3165283203125e-05, "step": 8628, "training_step_time": 0.11376333236694336 }, { "epoch": 1.316680908203125e-05, "model_forward_time": 0.02507162094116211, "step": 8629 }, { "epoch": 1.316680908203125e-05, "step": 8629, "training_step_time": 0.11668038368225098 }, { "epoch": 1.31683349609375e-05, "grad_norm": 0.5390626788139343, "learning_rate": 8.533584736162857e-05, "loss": 0.0297, "step": 8630 }, { "epoch": 1.31683349609375e-05, "model_forward_time": 0.025188207626342773, "step": 8630 }, { "epoch": 1.31683349609375e-05, "step": 8630, "training_step_time": 0.11632895469665527 }, { "epoch": 1.316986083984375e-05, "model_forward_time": 0.025362491607666016, "step": 8631 }, { "epoch": 1.316986083984375e-05, "step": 8631, "training_step_time": 0.12700271606445312 }, { "epoch": 1.317138671875e-05, "model_forward_time": 0.025690793991088867, "step": 8632 }, { "epoch": 1.317138671875e-05, "step": 8632, "training_step_time": 0.14475631713867188 }, { "epoch": 1.317291259765625e-05, "model_forward_time": 0.025162458419799805, "step": 8633 }, { "epoch": 1.317291259765625e-05, "step": 8633, "training_step_time": 0.18711352348327637 }, { "epoch": 1.31744384765625e-05, "model_forward_time": 0.02428150177001953, "step": 8634 }, { "epoch": 1.31744384765625e-05, "step": 8634, "training_step_time": 0.19015979766845703 }, { "epoch": 1.317596435546875e-05, "model_forward_time": 0.02457880973815918, "step": 8635 }, { "epoch": 1.317596435546875e-05, "step": 8635, "training_step_time": 0.17269039154052734 }, { "epoch": 1.3177490234375e-05, "model_forward_time": 0.024191856384277344, "step": 8636 }, { "epoch": 1.3177490234375e-05, "step": 8636, "training_step_time": 0.15909171104431152 }, { "epoch": 1.317901611328125e-05, "model_forward_time": 0.024516820907592773, "step": 8637 }, { "epoch": 1.317901611328125e-05, "step": 8637, "training_step_time": 0.10775113105773926 }, { "epoch": 1.31805419921875e-05, "model_forward_time": 0.02461385726928711, "step": 8638 }, { "epoch": 1.31805419921875e-05, "step": 8638, "training_step_time": 0.10728979110717773 }, { "epoch": 1.318206787109375e-05, "model_forward_time": 0.024155378341674805, "step": 8639 }, { "epoch": 1.318206787109375e-05, "step": 8639, "training_step_time": 0.1101679801940918 }, { "epoch": 1.318359375e-05, "grad_norm": 0.37394291162490845, "learning_rate": 8.529683176986295e-05, "loss": 0.0397, "step": 8640 }, { "epoch": 1.318359375e-05, "model_forward_time": 0.0252838134765625, "step": 8640 }, { "epoch": 1.318359375e-05, "step": 8640, "training_step_time": 0.10839533805847168 }, { "epoch": 1.318511962890625e-05, "model_forward_time": 0.025204181671142578, "step": 8641 }, { "epoch": 1.318511962890625e-05, "step": 8641, "training_step_time": 0.10919642448425293 }, { "epoch": 1.31866455078125e-05, "model_forward_time": 0.025541305541992188, "step": 8642 }, { "epoch": 1.31866455078125e-05, "step": 8642, "training_step_time": 0.10925555229187012 }, { "epoch": 1.318817138671875e-05, "model_forward_time": 0.024201393127441406, "step": 8643 }, { "epoch": 1.318817138671875e-05, "step": 8643, "training_step_time": 0.10751581192016602 }, { "epoch": 1.3189697265625e-05, "model_forward_time": 0.025599002838134766, "step": 8644 }, { "epoch": 1.3189697265625e-05, "step": 8644, "training_step_time": 0.1433415412902832 }, { "epoch": 1.319122314453125e-05, "model_forward_time": 0.024764060974121094, "step": 8645 }, { "epoch": 1.319122314453125e-05, "step": 8645, "training_step_time": 0.16957402229309082 }, { "epoch": 1.31927490234375e-05, "model_forward_time": 0.025166988372802734, "step": 8646 }, { "epoch": 1.31927490234375e-05, "step": 8646, "training_step_time": 0.15799665451049805 }, { "epoch": 1.319427490234375e-05, "model_forward_time": 0.024077177047729492, "step": 8647 }, { "epoch": 1.319427490234375e-05, "step": 8647, "training_step_time": 0.14330744743347168 }, { "epoch": 1.319580078125e-05, "model_forward_time": 0.0247042179107666, "step": 8648 }, { "epoch": 1.319580078125e-05, "step": 8648, "training_step_time": 0.1289200782775879 }, { "epoch": 1.319732666015625e-05, "model_forward_time": 0.02465033531188965, "step": 8649 }, { "epoch": 1.319732666015625e-05, "step": 8649, "training_step_time": 0.12702298164367676 }, { "epoch": 1.31988525390625e-05, "grad_norm": 0.40195232629776, "learning_rate": 8.525777328911846e-05, "loss": 0.0419, "step": 8650 }, { "epoch": 1.31988525390625e-05, "model_forward_time": 0.025113821029663086, "step": 8650 }, { "epoch": 1.31988525390625e-05, "step": 8650, "training_step_time": 0.12198662757873535 }, { "epoch": 1.320037841796875e-05, "model_forward_time": 0.025211572647094727, "step": 8651 }, { "epoch": 1.320037841796875e-05, "step": 8651, "training_step_time": 0.12036871910095215 }, { "epoch": 1.3201904296875e-05, "model_forward_time": 0.02510857582092285, "step": 8652 }, { "epoch": 1.3201904296875e-05, "step": 8652, "training_step_time": 0.11340117454528809 }, { "epoch": 1.320343017578125e-05, "model_forward_time": 0.025313377380371094, "step": 8653 }, { "epoch": 1.320343017578125e-05, "step": 8653, "training_step_time": 0.11277365684509277 }, { "epoch": 1.32049560546875e-05, "model_forward_time": 0.02543807029724121, "step": 8654 }, { "epoch": 1.32049560546875e-05, "step": 8654, "training_step_time": 0.11357522010803223 }, { "epoch": 1.320648193359375e-05, "model_forward_time": 0.02534794807434082, "step": 8655 }, { "epoch": 1.320648193359375e-05, "step": 8655, "training_step_time": 0.1105504035949707 }, { "epoch": 1.32080078125e-05, "model_forward_time": 0.0254976749420166, "step": 8656 }, { "epoch": 1.32080078125e-05, "step": 8656, "training_step_time": 0.10862159729003906 }, { "epoch": 1.320953369140625e-05, "model_forward_time": 0.025475263595581055, "step": 8657 }, { "epoch": 1.320953369140625e-05, "step": 8657, "training_step_time": 0.10529112815856934 }, { "epoch": 1.32110595703125e-05, "model_forward_time": 0.024819612503051758, "step": 8658 }, { "epoch": 1.32110595703125e-05, "step": 8658, "training_step_time": 0.10994410514831543 }, { "epoch": 1.321258544921875e-05, "model_forward_time": 0.02486133575439453, "step": 8659 }, { "epoch": 1.321258544921875e-05, "step": 8659, "training_step_time": 0.11730790138244629 }, { "epoch": 1.3214111328125e-05, "grad_norm": 0.42142099142074585, "learning_rate": 8.521867196685482e-05, "loss": 0.0327, "step": 8660 }, { "epoch": 1.3214111328125e-05, "model_forward_time": 0.025101184844970703, "step": 8660 }, { "epoch": 1.3214111328125e-05, "step": 8660, "training_step_time": 0.11071372032165527 }, { "epoch": 1.321563720703125e-05, "model_forward_time": 0.025377750396728516, "step": 8661 }, { "epoch": 1.321563720703125e-05, "step": 8661, "training_step_time": 0.10681867599487305 }, { "epoch": 1.32171630859375e-05, "model_forward_time": 0.025609731674194336, "step": 8662 }, { "epoch": 1.32171630859375e-05, "step": 8662, "training_step_time": 0.17490720748901367 }, { "epoch": 1.321868896484375e-05, "model_forward_time": 0.024233341217041016, "step": 8663 }, { "epoch": 1.321868896484375e-05, "step": 8663, "training_step_time": 0.16441917419433594 }, { "epoch": 1.322021484375e-05, "model_forward_time": 0.024694442749023438, "step": 8664 }, { "epoch": 1.322021484375e-05, "step": 8664, "training_step_time": 0.10715794563293457 }, { "epoch": 1.322174072265625e-05, "model_forward_time": 0.02450084686279297, "step": 8665 }, { "epoch": 1.322174072265625e-05, "step": 8665, "training_step_time": 0.10627007484436035 }, { "epoch": 1.32232666015625e-05, "model_forward_time": 0.025232553482055664, "step": 8666 }, { "epoch": 1.32232666015625e-05, "step": 8666, "training_step_time": 0.11317014694213867 }, { "epoch": 1.322479248046875e-05, "model_forward_time": 0.025261640548706055, "step": 8667 }, { "epoch": 1.322479248046875e-05, "step": 8667, "training_step_time": 0.10953426361083984 }, { "epoch": 1.3226318359375e-05, "model_forward_time": 0.025219202041625977, "step": 8668 }, { "epoch": 1.3226318359375e-05, "step": 8668, "training_step_time": 0.11009407043457031 }, { "epoch": 1.322784423828125e-05, "model_forward_time": 0.02515721321105957, "step": 8669 }, { "epoch": 1.322784423828125e-05, "step": 8669, "training_step_time": 0.10850119590759277 }, { "epoch": 1.32293701171875e-05, "grad_norm": 0.41516929864883423, "learning_rate": 8.517952785058385e-05, "loss": 0.0286, "step": 8670 }, { "epoch": 1.32293701171875e-05, "model_forward_time": 0.02478957176208496, "step": 8670 }, { "epoch": 1.32293701171875e-05, "step": 8670, "training_step_time": 0.10839557647705078 }, { "epoch": 1.323089599609375e-05, "model_forward_time": 0.02549576759338379, "step": 8671 }, { "epoch": 1.323089599609375e-05, "step": 8671, "training_step_time": 0.11282062530517578 }, { "epoch": 1.3232421875e-05, "model_forward_time": 0.02573084831237793, "step": 8672 }, { "epoch": 1.3232421875e-05, "step": 8672, "training_step_time": 0.15232276916503906 }, { "epoch": 1.323394775390625e-05, "model_forward_time": 0.025122880935668945, "step": 8673 }, { "epoch": 1.323394775390625e-05, "step": 8673, "training_step_time": 0.11089658737182617 }, { "epoch": 1.32354736328125e-05, "model_forward_time": 0.025050878524780273, "step": 8674 }, { "epoch": 1.32354736328125e-05, "step": 8674, "training_step_time": 0.11160063743591309 }, { "epoch": 1.323699951171875e-05, "model_forward_time": 0.02529430389404297, "step": 8675 }, { "epoch": 1.323699951171875e-05, "step": 8675, "training_step_time": 0.11767864227294922 }, { "epoch": 1.3238525390625e-05, "model_forward_time": 0.025238513946533203, "step": 8676 }, { "epoch": 1.3238525390625e-05, "step": 8676, "training_step_time": 0.15393757820129395 }, { "epoch": 1.324005126953125e-05, "model_forward_time": 0.025004148483276367, "step": 8677 }, { "epoch": 1.324005126953125e-05, "step": 8677, "training_step_time": 0.21161937713623047 }, { "epoch": 1.32415771484375e-05, "model_forward_time": 0.024626731872558594, "step": 8678 }, { "epoch": 1.32415771484375e-05, "step": 8678, "training_step_time": 0.14476418495178223 }, { "epoch": 1.324310302734375e-05, "model_forward_time": 0.024337291717529297, "step": 8679 }, { "epoch": 1.324310302734375e-05, "step": 8679, "training_step_time": 0.15688753128051758 }, { "epoch": 1.324462890625e-05, "grad_norm": 0.3715316653251648, "learning_rate": 8.514034098786933e-05, "loss": 0.0215, "step": 8680 }, { "epoch": 1.324462890625e-05, "model_forward_time": 0.02440333366394043, "step": 8680 }, { "epoch": 1.324462890625e-05, "step": 8680, "training_step_time": 0.21669411659240723 }, { "epoch": 1.324615478515625e-05, "model_forward_time": 0.024186134338378906, "step": 8681 }, { "epoch": 1.324615478515625e-05, "step": 8681, "training_step_time": 0.16753268241882324 }, { "epoch": 1.32476806640625e-05, "model_forward_time": 0.02419304847717285, "step": 8682 }, { "epoch": 1.32476806640625e-05, "step": 8682, "training_step_time": 0.10979366302490234 }, { "epoch": 1.324920654296875e-05, "model_forward_time": 0.024457931518554688, "step": 8683 }, { "epoch": 1.324920654296875e-05, "step": 8683, "training_step_time": 0.10829734802246094 }, { "epoch": 1.3250732421875e-05, "model_forward_time": 0.02453303337097168, "step": 8684 }, { "epoch": 1.3250732421875e-05, "step": 8684, "training_step_time": 0.10763359069824219 }, { "epoch": 1.325225830078125e-05, "model_forward_time": 0.026613712310791016, "step": 8685 }, { "epoch": 1.325225830078125e-05, "step": 8685, "training_step_time": 0.11093807220458984 }, { "epoch": 1.32537841796875e-05, "model_forward_time": 0.02528095245361328, "step": 8686 }, { "epoch": 1.32537841796875e-05, "step": 8686, "training_step_time": 0.11257052421569824 }, { "epoch": 1.325531005859375e-05, "model_forward_time": 0.025196552276611328, "step": 8687 }, { "epoch": 1.325531005859375e-05, "step": 8687, "training_step_time": 0.11330842971801758 }, { "epoch": 1.32568359375e-05, "model_forward_time": 0.026841402053833008, "step": 8688 }, { "epoch": 1.32568359375e-05, "step": 8688, "training_step_time": 0.11173033714294434 }, { "epoch": 1.325836181640625e-05, "model_forward_time": 0.025418996810913086, "step": 8689 }, { "epoch": 1.325836181640625e-05, "step": 8689, "training_step_time": 0.11455345153808594 }, { "epoch": 1.32598876953125e-05, "grad_norm": 0.34135934710502625, "learning_rate": 8.510111142632698e-05, "loss": 0.0306, "step": 8690 }, { "epoch": 1.32598876953125e-05, "model_forward_time": 0.025343894958496094, "step": 8690 }, { "epoch": 1.32598876953125e-05, "step": 8690, "training_step_time": 0.11251282691955566 }, { "epoch": 1.326141357421875e-05, "model_forward_time": 0.025675058364868164, "step": 8691 }, { "epoch": 1.326141357421875e-05, "step": 8691, "training_step_time": 0.11121821403503418 }, { "epoch": 1.3262939453125e-05, "model_forward_time": 0.02627873420715332, "step": 8692 }, { "epoch": 1.3262939453125e-05, "step": 8692, "training_step_time": 0.10932326316833496 }, { "epoch": 1.326446533203125e-05, "model_forward_time": 0.025389671325683594, "step": 8693 }, { "epoch": 1.326446533203125e-05, "step": 8693, "training_step_time": 0.10909795761108398 }, { "epoch": 1.32659912109375e-05, "model_forward_time": 0.025283336639404297, "step": 8694 }, { "epoch": 1.32659912109375e-05, "step": 8694, "training_step_time": 0.1109457015991211 }, { "epoch": 1.326751708984375e-05, "model_forward_time": 0.025147676467895508, "step": 8695 }, { "epoch": 1.326751708984375e-05, "step": 8695, "training_step_time": 0.11028432846069336 }, { "epoch": 1.326904296875e-05, "model_forward_time": 0.025182247161865234, "step": 8696 }, { "epoch": 1.326904296875e-05, "step": 8696, "training_step_time": 0.11253476142883301 }, { "epoch": 1.327056884765625e-05, "model_forward_time": 0.0254361629486084, "step": 8697 }, { "epoch": 1.327056884765625e-05, "step": 8697, "training_step_time": 0.11026787757873535 }, { "epoch": 1.32720947265625e-05, "model_forward_time": 0.02513408660888672, "step": 8698 }, { "epoch": 1.32720947265625e-05, "step": 8698, "training_step_time": 0.10847854614257812 }, { "epoch": 1.327362060546875e-05, "model_forward_time": 0.025403976440429688, "step": 8699 }, { "epoch": 1.327362060546875e-05, "step": 8699, "training_step_time": 0.11688709259033203 }, { "epoch": 1.3275146484375e-05, "grad_norm": 0.37964928150177, "learning_rate": 8.506183921362443e-05, "loss": 0.0327, "step": 8700 }, { "epoch": 1.3275146484375e-05, "model_forward_time": 0.024176597595214844, "step": 8700 }, { "epoch": 1.3275146484375e-05, "step": 8700, "training_step_time": 0.1116485595703125 }, { "epoch": 1.327667236328125e-05, "model_forward_time": 0.0249631404876709, "step": 8701 }, { "epoch": 1.327667236328125e-05, "step": 8701, "training_step_time": 0.11012601852416992 }, { "epoch": 1.32781982421875e-05, "model_forward_time": 0.025253772735595703, "step": 8702 }, { "epoch": 1.32781982421875e-05, "step": 8702, "training_step_time": 0.11392855644226074 }, { "epoch": 1.327972412109375e-05, "model_forward_time": 0.02556443214416504, "step": 8703 }, { "epoch": 1.327972412109375e-05, "step": 8703, "training_step_time": 0.10684514045715332 }, { "epoch": 1.328125e-05, "model_forward_time": 0.02507495880126953, "step": 8704 }, { "epoch": 1.328125e-05, "step": 8704, "training_step_time": 0.1119847297668457 }, { "epoch": 1.328277587890625e-05, "model_forward_time": 0.025295019149780273, "step": 8705 }, { "epoch": 1.328277587890625e-05, "step": 8705, "training_step_time": 0.10932540893554688 }, { "epoch": 1.32843017578125e-05, "model_forward_time": 0.025536060333251953, "step": 8706 }, { "epoch": 1.32843017578125e-05, "step": 8706, "training_step_time": 0.11809372901916504 }, { "epoch": 1.328582763671875e-05, "model_forward_time": 0.02560710906982422, "step": 8707 }, { "epoch": 1.328582763671875e-05, "step": 8707, "training_step_time": 0.11478495597839355 }, { "epoch": 1.3287353515625e-05, "model_forward_time": 0.025247812271118164, "step": 8708 }, { "epoch": 1.3287353515625e-05, "step": 8708, "training_step_time": 0.2127077579498291 }, { "epoch": 1.328887939453125e-05, "model_forward_time": 0.024817466735839844, "step": 8709 }, { "epoch": 1.328887939453125e-05, "step": 8709, "training_step_time": 0.11666011810302734 }, { "epoch": 1.32904052734375e-05, "grad_norm": 0.3208135664463043, "learning_rate": 8.502252439748113e-05, "loss": 0.0414, "step": 8710 }, { "epoch": 1.32904052734375e-05, "model_forward_time": 0.024654626846313477, "step": 8710 }, { "epoch": 1.32904052734375e-05, "step": 8710, "training_step_time": 0.10544872283935547 }, { "epoch": 1.329193115234375e-05, "model_forward_time": 0.025427579879760742, "step": 8711 }, { "epoch": 1.329193115234375e-05, "step": 8711, "training_step_time": 0.10595250129699707 }, { "epoch": 1.329345703125e-05, "model_forward_time": 0.024982690811157227, "step": 8712 }, { "epoch": 1.329345703125e-05, "step": 8712, "training_step_time": 0.10770392417907715 }, { "epoch": 1.329498291015625e-05, "model_forward_time": 0.02541828155517578, "step": 8713 }, { "epoch": 1.329498291015625e-05, "step": 8713, "training_step_time": 0.11406874656677246 }, { "epoch": 1.32965087890625e-05, "model_forward_time": 0.024962902069091797, "step": 8714 }, { "epoch": 1.32965087890625e-05, "step": 8714, "training_step_time": 0.10871672630310059 }, { "epoch": 1.329803466796875e-05, "model_forward_time": 0.02542710304260254, "step": 8715 }, { "epoch": 1.329803466796875e-05, "step": 8715, "training_step_time": 0.10850667953491211 }, { "epoch": 1.3299560546875e-05, "model_forward_time": 0.025145769119262695, "step": 8716 }, { "epoch": 1.3299560546875e-05, "step": 8716, "training_step_time": 0.1073160171508789 }, { "epoch": 1.330108642578125e-05, "model_forward_time": 0.02538895606994629, "step": 8717 }, { "epoch": 1.330108642578125e-05, "step": 8717, "training_step_time": 0.10846948623657227 }, { "epoch": 1.33026123046875e-05, "model_forward_time": 0.02521681785583496, "step": 8718 }, { "epoch": 1.33026123046875e-05, "step": 8718, "training_step_time": 0.15985345840454102 }, { "epoch": 1.330413818359375e-05, "model_forward_time": 0.02490091323852539, "step": 8719 }, { "epoch": 1.330413818359375e-05, "step": 8719, "training_step_time": 0.10992693901062012 }, { "epoch": 1.33056640625e-05, "grad_norm": 0.6753371953964233, "learning_rate": 8.498316702566828e-05, "loss": 0.0525, "step": 8720 }, { "epoch": 1.33056640625e-05, "model_forward_time": 0.023951292037963867, "step": 8720 }, { "epoch": 1.33056640625e-05, "step": 8720, "training_step_time": 0.11025524139404297 }, { "epoch": 1.330718994140625e-05, "model_forward_time": 0.025206565856933594, "step": 8721 }, { "epoch": 1.330718994140625e-05, "step": 8721, "training_step_time": 0.11939597129821777 }, { "epoch": 1.33087158203125e-05, "model_forward_time": 0.02513599395751953, "step": 8722 }, { "epoch": 1.33087158203125e-05, "step": 8722, "training_step_time": 0.20740532875061035 }, { "epoch": 1.331024169921875e-05, "model_forward_time": 0.024218320846557617, "step": 8723 }, { "epoch": 1.331024169921875e-05, "step": 8723, "training_step_time": 0.15556645393371582 }, { "epoch": 1.3311767578125e-05, "model_forward_time": 0.024419069290161133, "step": 8724 }, { "epoch": 1.3311767578125e-05, "step": 8724, "training_step_time": 0.19742417335510254 }, { "epoch": 1.331329345703125e-05, "model_forward_time": 0.024473190307617188, "step": 8725 }, { "epoch": 1.331329345703125e-05, "step": 8725, "training_step_time": 0.14482855796813965 }, { "epoch": 1.33148193359375e-05, "model_forward_time": 0.02490520477294922, "step": 8726 }, { "epoch": 1.33148193359375e-05, "step": 8726, "training_step_time": 0.1974935531616211 }, { "epoch": 1.331634521484375e-05, "model_forward_time": 0.02417445182800293, "step": 8727 }, { "epoch": 1.331634521484375e-05, "step": 8727, "training_step_time": 0.11752486228942871 }, { "epoch": 1.331787109375e-05, "model_forward_time": 0.024597644805908203, "step": 8728 }, { "epoch": 1.331787109375e-05, "step": 8728, "training_step_time": 0.11045217514038086 }, { "epoch": 1.331939697265625e-05, "model_forward_time": 0.025583982467651367, "step": 8729 }, { "epoch": 1.331939697265625e-05, "step": 8729, "training_step_time": 0.12935280799865723 }, { "epoch": 1.33209228515625e-05, "grad_norm": 0.5615624785423279, "learning_rate": 8.494376714600878e-05, "loss": 0.0281, "step": 8730 }, { "epoch": 1.33209228515625e-05, "model_forward_time": 0.025447607040405273, "step": 8730 }, { "epoch": 1.33209228515625e-05, "step": 8730, "training_step_time": 0.10720276832580566 }, { "epoch": 1.332244873046875e-05, "model_forward_time": 0.02550816535949707, "step": 8731 }, { "epoch": 1.332244873046875e-05, "step": 8731, "training_step_time": 0.10784912109375 }, { "epoch": 1.3323974609375e-05, "model_forward_time": 0.02492690086364746, "step": 8732 }, { "epoch": 1.3323974609375e-05, "step": 8732, "training_step_time": 0.10740041732788086 }, { "epoch": 1.332550048828125e-05, "model_forward_time": 0.025301694869995117, "step": 8733 }, { "epoch": 1.332550048828125e-05, "step": 8733, "training_step_time": 0.10611891746520996 }, { "epoch": 1.33270263671875e-05, "model_forward_time": 0.025489330291748047, "step": 8734 }, { "epoch": 1.33270263671875e-05, "step": 8734, "training_step_time": 0.10845470428466797 }, { "epoch": 1.332855224609375e-05, "model_forward_time": 0.02501225471496582, "step": 8735 }, { "epoch": 1.332855224609375e-05, "step": 8735, "training_step_time": 0.10717320442199707 }, { "epoch": 1.3330078125e-05, "model_forward_time": 0.025678157806396484, "step": 8736 }, { "epoch": 1.3330078125e-05, "step": 8736, "training_step_time": 0.11244511604309082 }, { "epoch": 1.333160400390625e-05, "model_forward_time": 0.02494645118713379, "step": 8737 }, { "epoch": 1.333160400390625e-05, "step": 8737, "training_step_time": 0.10598158836364746 }, { "epoch": 1.33331298828125e-05, "model_forward_time": 0.024924039840698242, "step": 8738 }, { "epoch": 1.33331298828125e-05, "step": 8738, "training_step_time": 0.10685133934020996 }, { "epoch": 1.333465576171875e-05, "model_forward_time": 0.025328874588012695, "step": 8739 }, { "epoch": 1.333465576171875e-05, "step": 8739, "training_step_time": 0.10676169395446777 }, { "epoch": 1.3336181640625e-05, "grad_norm": 0.405076801776886, "learning_rate": 8.490432480637723e-05, "loss": 0.0296, "step": 8740 }, { "epoch": 1.3336181640625e-05, "model_forward_time": 0.025174617767333984, "step": 8740 }, { "epoch": 1.3336181640625e-05, "step": 8740, "training_step_time": 0.10813426971435547 }, { "epoch": 1.333770751953125e-05, "model_forward_time": 0.02494978904724121, "step": 8741 }, { "epoch": 1.333770751953125e-05, "step": 8741, "training_step_time": 0.10765790939331055 }, { "epoch": 1.33392333984375e-05, "model_forward_time": 0.024982452392578125, "step": 8742 }, { "epoch": 1.33392333984375e-05, "step": 8742, "training_step_time": 0.10985565185546875 }, { "epoch": 1.334075927734375e-05, "model_forward_time": 0.024590015411376953, "step": 8743 }, { "epoch": 1.334075927734375e-05, "step": 8743, "training_step_time": 0.10851693153381348 }, { "epoch": 1.334228515625e-05, "model_forward_time": 0.02555394172668457, "step": 8744 }, { "epoch": 1.334228515625e-05, "step": 8744, "training_step_time": 0.11751031875610352 }, { "epoch": 1.334381103515625e-05, "model_forward_time": 0.02538919448852539, "step": 8745 }, { "epoch": 1.334381103515625e-05, "step": 8745, "training_step_time": 0.10754060745239258 }, { "epoch": 1.33453369140625e-05, "model_forward_time": 0.025147199630737305, "step": 8746 }, { "epoch": 1.33453369140625e-05, "step": 8746, "training_step_time": 0.10901141166687012 }, { "epoch": 1.334686279296875e-05, "model_forward_time": 0.025341033935546875, "step": 8747 }, { "epoch": 1.334686279296875e-05, "step": 8747, "training_step_time": 0.10875320434570312 }, { "epoch": 1.3348388671875e-05, "model_forward_time": 0.02519392967224121, "step": 8748 }, { "epoch": 1.3348388671875e-05, "step": 8748, "training_step_time": 0.10841608047485352 }, { "epoch": 1.334991455078125e-05, "model_forward_time": 0.025355815887451172, "step": 8749 }, { "epoch": 1.334991455078125e-05, "step": 8749, "training_step_time": 0.10985255241394043 }, { "epoch": 1.33514404296875e-05, "grad_norm": 0.5718039274215698, "learning_rate": 8.486484005469977e-05, "loss": 0.0406, "step": 8750 }, { "epoch": 1.33514404296875e-05, "model_forward_time": 0.02520751953125, "step": 8750 }, { "epoch": 1.33514404296875e-05, "step": 8750, "training_step_time": 0.11177849769592285 }, { "epoch": 1.335296630859375e-05, "model_forward_time": 0.025536537170410156, "step": 8751 }, { "epoch": 1.335296630859375e-05, "step": 8751, "training_step_time": 0.10800457000732422 }, { "epoch": 1.33544921875e-05, "model_forward_time": 0.02536749839782715, "step": 8752 }, { "epoch": 1.33544921875e-05, "step": 8752, "training_step_time": 0.11539387702941895 }, { "epoch": 1.335601806640625e-05, "model_forward_time": 0.025501728057861328, "step": 8753 }, { "epoch": 1.335601806640625e-05, "step": 8753, "training_step_time": 0.10890793800354004 }, { "epoch": 1.33575439453125e-05, "model_forward_time": 0.025465726852416992, "step": 8754 }, { "epoch": 1.33575439453125e-05, "step": 8754, "training_step_time": 0.11148905754089355 }, { "epoch": 1.335906982421875e-05, "model_forward_time": 0.02530384063720703, "step": 8755 }, { "epoch": 1.335906982421875e-05, "step": 8755, "training_step_time": 0.17321157455444336 }, { "epoch": 1.3360595703125e-05, "model_forward_time": 0.02442145347595215, "step": 8756 }, { "epoch": 1.3360595703125e-05, "step": 8756, "training_step_time": 0.1709728240966797 }, { "epoch": 1.336212158203125e-05, "model_forward_time": 0.02507185935974121, "step": 8757 }, { "epoch": 1.336212158203125e-05, "step": 8757, "training_step_time": 0.10523271560668945 }, { "epoch": 1.33636474609375e-05, "model_forward_time": 0.024764060974121094, "step": 8758 }, { "epoch": 1.33636474609375e-05, "step": 8758, "training_step_time": 0.10849165916442871 }, { "epoch": 1.336517333984375e-05, "model_forward_time": 0.0257565975189209, "step": 8759 }, { "epoch": 1.336517333984375e-05, "step": 8759, "training_step_time": 0.10713338851928711 }, { "epoch": 1.336669921875e-05, "grad_norm": 0.5659478306770325, "learning_rate": 8.482531293895412e-05, "loss": 0.0322, "step": 8760 }, { "epoch": 1.336669921875e-05, "model_forward_time": 0.026951074600219727, "step": 8760 }, { "epoch": 1.336669921875e-05, "step": 8760, "training_step_time": 0.11285281181335449 }, { "epoch": 1.336822509765625e-05, "model_forward_time": 0.025627613067626953, "step": 8761 }, { "epoch": 1.336822509765625e-05, "step": 8761, "training_step_time": 0.11042428016662598 }, { "epoch": 1.33697509765625e-05, "model_forward_time": 0.02534198760986328, "step": 8762 }, { "epoch": 1.33697509765625e-05, "step": 8762, "training_step_time": 0.10664033889770508 }, { "epoch": 1.337127685546875e-05, "model_forward_time": 0.025066137313842773, "step": 8763 }, { "epoch": 1.337127685546875e-05, "step": 8763, "training_step_time": 0.10854625701904297 }, { "epoch": 1.3372802734375e-05, "model_forward_time": 0.02540898323059082, "step": 8764 }, { "epoch": 1.3372802734375e-05, "step": 8764, "training_step_time": 0.10821223258972168 }, { "epoch": 1.337432861328125e-05, "model_forward_time": 0.0286865234375, "step": 8765 }, { "epoch": 1.337432861328125e-05, "step": 8765, "training_step_time": 0.11848688125610352 }, { "epoch": 1.33758544921875e-05, "model_forward_time": 0.024961471557617188, "step": 8766 }, { "epoch": 1.33758544921875e-05, "step": 8766, "training_step_time": 0.11111664772033691 }, { "epoch": 1.337738037109375e-05, "model_forward_time": 0.024529695510864258, "step": 8767 }, { "epoch": 1.337738037109375e-05, "step": 8767, "training_step_time": 0.10783553123474121 }, { "epoch": 1.337890625e-05, "model_forward_time": 0.02606511116027832, "step": 8768 }, { "epoch": 1.337890625e-05, "step": 8768, "training_step_time": 0.1223287582397461 }, { "epoch": 1.338043212890625e-05, "model_forward_time": 0.02485513687133789, "step": 8769 }, { "epoch": 1.338043212890625e-05, "step": 8769, "training_step_time": 0.12665843963623047 }, { "epoch": 1.33819580078125e-05, "grad_norm": 0.5869755744934082, "learning_rate": 8.478574350716941e-05, "loss": 0.0364, "step": 8770 }, { "epoch": 1.33819580078125e-05, "model_forward_time": 0.024556875228881836, "step": 8770 }, { "epoch": 1.33819580078125e-05, "step": 8770, "training_step_time": 0.11711621284484863 }, { "epoch": 1.338348388671875e-05, "model_forward_time": 0.024651288986206055, "step": 8771 }, { "epoch": 1.338348388671875e-05, "step": 8771, "training_step_time": 0.12674331665039062 }, { "epoch": 1.3385009765625e-05, "model_forward_time": 0.025304317474365234, "step": 8772 }, { "epoch": 1.3385009765625e-05, "step": 8772, "training_step_time": 0.13030409812927246 }, { "epoch": 1.338653564453125e-05, "model_forward_time": 0.024782896041870117, "step": 8773 }, { "epoch": 1.338653564453125e-05, "step": 8773, "training_step_time": 0.20790362358093262 }, { "epoch": 1.33880615234375e-05, "model_forward_time": 0.024371623992919922, "step": 8774 }, { "epoch": 1.33880615234375e-05, "step": 8774, "training_step_time": 0.10860562324523926 }, { "epoch": 1.338958740234375e-05, "model_forward_time": 0.024336814880371094, "step": 8775 }, { "epoch": 1.338958740234375e-05, "step": 8775, "training_step_time": 0.1091773509979248 }, { "epoch": 1.339111328125e-05, "model_forward_time": 0.02565598487854004, "step": 8776 }, { "epoch": 1.339111328125e-05, "step": 8776, "training_step_time": 0.12937617301940918 }, { "epoch": 1.339263916015625e-05, "model_forward_time": 0.025231599807739258, "step": 8777 }, { "epoch": 1.339263916015625e-05, "step": 8777, "training_step_time": 0.11984109878540039 }, { "epoch": 1.33941650390625e-05, "model_forward_time": 0.025063037872314453, "step": 8778 }, { "epoch": 1.33941650390625e-05, "step": 8778, "training_step_time": 0.12134313583374023 }, { "epoch": 1.339569091796875e-05, "model_forward_time": 0.025151729583740234, "step": 8779 }, { "epoch": 1.339569091796875e-05, "step": 8779, "training_step_time": 0.12621593475341797 }, { "epoch": 1.3397216796875e-05, "grad_norm": 0.333095908164978, "learning_rate": 8.474613180742628e-05, "loss": 0.0382, "step": 8780 }, { "epoch": 1.3397216796875e-05, "model_forward_time": 0.024985313415527344, "step": 8780 }, { "epoch": 1.3397216796875e-05, "step": 8780, "training_step_time": 0.1232759952545166 }, { "epoch": 1.339874267578125e-05, "model_forward_time": 0.025370359420776367, "step": 8781 }, { "epoch": 1.339874267578125e-05, "step": 8781, "training_step_time": 0.1264786720275879 }, { "epoch": 1.34002685546875e-05, "model_forward_time": 0.024039745330810547, "step": 8782 }, { "epoch": 1.34002685546875e-05, "step": 8782, "training_step_time": 0.12488174438476562 }, { "epoch": 1.340179443359375e-05, "model_forward_time": 0.02418661117553711, "step": 8783 }, { "epoch": 1.340179443359375e-05, "step": 8783, "training_step_time": 0.12580490112304688 }, { "epoch": 1.34033203125e-05, "model_forward_time": 0.026244401931762695, "step": 8784 }, { "epoch": 1.34033203125e-05, "step": 8784, "training_step_time": 0.11731886863708496 }, { "epoch": 1.340484619140625e-05, "model_forward_time": 0.02543044090270996, "step": 8785 }, { "epoch": 1.340484619140625e-05, "step": 8785, "training_step_time": 0.11612439155578613 }, { "epoch": 1.34063720703125e-05, "model_forward_time": 0.025148391723632812, "step": 8786 }, { "epoch": 1.34063720703125e-05, "step": 8786, "training_step_time": 0.1104285717010498 }, { "epoch": 1.340789794921875e-05, "model_forward_time": 0.025269746780395508, "step": 8787 }, { "epoch": 1.340789794921875e-05, "step": 8787, "training_step_time": 0.10896492004394531 }, { "epoch": 1.3409423828125e-05, "model_forward_time": 0.02523326873779297, "step": 8788 }, { "epoch": 1.3409423828125e-05, "step": 8788, "training_step_time": 0.10929012298583984 }, { "epoch": 1.341094970703125e-05, "model_forward_time": 0.025089502334594727, "step": 8789 }, { "epoch": 1.341094970703125e-05, "step": 8789, "training_step_time": 0.11003828048706055 }, { "epoch": 1.34124755859375e-05, "grad_norm": 0.3114985227584839, "learning_rate": 8.470647788785665e-05, "loss": 0.0287, "step": 8790 }, { "epoch": 1.34124755859375e-05, "model_forward_time": 0.025006532669067383, "step": 8790 }, { "epoch": 1.34124755859375e-05, "step": 8790, "training_step_time": 0.10836076736450195 }, { "epoch": 1.341400146484375e-05, "model_forward_time": 0.025517940521240234, "step": 8791 }, { "epoch": 1.341400146484375e-05, "step": 8791, "training_step_time": 0.11234045028686523 }, { "epoch": 1.341552734375e-05, "model_forward_time": 0.025081157684326172, "step": 8792 }, { "epoch": 1.341552734375e-05, "step": 8792, "training_step_time": 0.10860323905944824 }, { "epoch": 1.341705322265625e-05, "model_forward_time": 0.025372028350830078, "step": 8793 }, { "epoch": 1.341705322265625e-05, "step": 8793, "training_step_time": 0.10774827003479004 }, { "epoch": 1.34185791015625e-05, "model_forward_time": 0.025311946868896484, "step": 8794 }, { "epoch": 1.34185791015625e-05, "step": 8794, "training_step_time": 0.10869193077087402 }, { "epoch": 1.342010498046875e-05, "model_forward_time": 0.025477170944213867, "step": 8795 }, { "epoch": 1.342010498046875e-05, "step": 8795, "training_step_time": 0.10699176788330078 }, { "epoch": 1.3421630859375e-05, "model_forward_time": 0.025287866592407227, "step": 8796 }, { "epoch": 1.3421630859375e-05, "step": 8796, "training_step_time": 0.11208748817443848 }, { "epoch": 1.342315673828125e-05, "model_forward_time": 0.02572774887084961, "step": 8797 }, { "epoch": 1.342315673828125e-05, "step": 8797, "training_step_time": 0.10788488388061523 }, { "epoch": 1.34246826171875e-05, "model_forward_time": 0.025348663330078125, "step": 8798 }, { "epoch": 1.34246826171875e-05, "step": 8798, "training_step_time": 0.10809516906738281 }, { "epoch": 1.342620849609375e-05, "model_forward_time": 0.025090932846069336, "step": 8799 }, { "epoch": 1.342620849609375e-05, "step": 8799, "training_step_time": 0.1138010025024414 }, { "epoch": 1.3427734375e-05, "grad_norm": 0.3685428202152252, "learning_rate": 8.466678179664379e-05, "loss": 0.0232, "step": 8800 }, { "epoch": 1.3427734375e-05, "model_forward_time": 0.025508403778076172, "step": 8800 }, { "epoch": 1.3427734375e-05, "step": 8800, "training_step_time": 0.10789299011230469 }, { "epoch": 1.342926025390625e-05, "model_forward_time": 0.025765419006347656, "step": 8801 }, { "epoch": 1.342926025390625e-05, "step": 8801, "training_step_time": 0.11353731155395508 }, { "epoch": 1.34307861328125e-05, "model_forward_time": 0.025545120239257812, "step": 8802 }, { "epoch": 1.34307861328125e-05, "step": 8802, "training_step_time": 0.17339706420898438 }, { "epoch": 1.343231201171875e-05, "model_forward_time": 0.024440288543701172, "step": 8803 }, { "epoch": 1.343231201171875e-05, "step": 8803, "training_step_time": 0.1670207977294922 }, { "epoch": 1.3433837890625e-05, "model_forward_time": 0.024464845657348633, "step": 8804 }, { "epoch": 1.3433837890625e-05, "step": 8804, "training_step_time": 0.10442662239074707 }, { "epoch": 1.343536376953125e-05, "model_forward_time": 0.025066852569580078, "step": 8805 }, { "epoch": 1.343536376953125e-05, "step": 8805, "training_step_time": 0.10520458221435547 }, { "epoch": 1.34368896484375e-05, "model_forward_time": 0.025597333908081055, "step": 8806 }, { "epoch": 1.34368896484375e-05, "step": 8806, "training_step_time": 0.11063241958618164 }, { "epoch": 1.343841552734375e-05, "model_forward_time": 0.025333881378173828, "step": 8807 }, { "epoch": 1.343841552734375e-05, "step": 8807, "training_step_time": 0.10731196403503418 }, { "epoch": 1.343994140625e-05, "model_forward_time": 0.025368213653564453, "step": 8808 }, { "epoch": 1.343994140625e-05, "step": 8808, "training_step_time": 0.10878777503967285 }, { "epoch": 1.344146728515625e-05, "model_forward_time": 0.02531743049621582, "step": 8809 }, { "epoch": 1.344146728515625e-05, "step": 8809, "training_step_time": 0.1060178279876709 }, { "epoch": 1.34429931640625e-05, "grad_norm": 0.2750084102153778, "learning_rate": 8.462704358202216e-05, "loss": 0.031, "step": 8810 }, { "epoch": 1.34429931640625e-05, "model_forward_time": 0.025397539138793945, "step": 8810 }, { "epoch": 1.34429931640625e-05, "step": 8810, "training_step_time": 0.11011457443237305 }, { "epoch": 1.344451904296875e-05, "model_forward_time": 0.02520895004272461, "step": 8811 }, { "epoch": 1.344451904296875e-05, "step": 8811, "training_step_time": 0.1070108413696289 }, { "epoch": 1.3446044921875e-05, "model_forward_time": 0.025388002395629883, "step": 8812 }, { "epoch": 1.3446044921875e-05, "step": 8812, "training_step_time": 0.14338254928588867 }, { "epoch": 1.344757080078125e-05, "model_forward_time": 0.02521681785583496, "step": 8813 }, { "epoch": 1.344757080078125e-05, "step": 8813, "training_step_time": 0.11064887046813965 }, { "epoch": 1.34490966796875e-05, "model_forward_time": 0.02514195442199707, "step": 8814 }, { "epoch": 1.34490966796875e-05, "step": 8814, "training_step_time": 0.11353230476379395 }, { "epoch": 1.345062255859375e-05, "model_forward_time": 0.0254514217376709, "step": 8815 }, { "epoch": 1.345062255859375e-05, "step": 8815, "training_step_time": 0.11924171447753906 }, { "epoch": 1.34521484375e-05, "model_forward_time": 0.025285005569458008, "step": 8816 }, { "epoch": 1.34521484375e-05, "step": 8816, "training_step_time": 0.13036465644836426 }, { "epoch": 1.345367431640625e-05, "model_forward_time": 0.02561783790588379, "step": 8817 }, { "epoch": 1.345367431640625e-05, "step": 8817, "training_step_time": 0.12407898902893066 }, { "epoch": 1.34552001953125e-05, "model_forward_time": 0.02499532699584961, "step": 8818 }, { "epoch": 1.34552001953125e-05, "step": 8818, "training_step_time": 0.15128612518310547 }, { "epoch": 1.345672607421875e-05, "model_forward_time": 0.02417159080505371, "step": 8819 }, { "epoch": 1.345672607421875e-05, "step": 8819, "training_step_time": 0.11753034591674805 }, { "epoch": 1.3458251953125e-05, "grad_norm": 0.5003789067268372, "learning_rate": 8.458726329227747e-05, "loss": 0.0308, "step": 8820 }, { "epoch": 1.3458251953125e-05, "model_forward_time": 0.02455902099609375, "step": 8820 }, { "epoch": 1.3458251953125e-05, "step": 8820, "training_step_time": 0.1599719524383545 }, { "epoch": 1.345977783203125e-05, "model_forward_time": 0.024831295013427734, "step": 8821 }, { "epoch": 1.345977783203125e-05, "step": 8821, "training_step_time": 0.18172764778137207 }, { "epoch": 1.34613037109375e-05, "model_forward_time": 0.024753332138061523, "step": 8822 }, { "epoch": 1.34613037109375e-05, "step": 8822, "training_step_time": 0.20117568969726562 }, { "epoch": 1.346282958984375e-05, "model_forward_time": 0.024095773696899414, "step": 8823 }, { "epoch": 1.346282958984375e-05, "step": 8823, "training_step_time": 0.12165379524230957 }, { "epoch": 1.346435546875e-05, "model_forward_time": 0.024319887161254883, "step": 8824 }, { "epoch": 1.346435546875e-05, "step": 8824, "training_step_time": 0.10489177703857422 }, { "epoch": 1.346588134765625e-05, "model_forward_time": 0.025298118591308594, "step": 8825 }, { "epoch": 1.346588134765625e-05, "step": 8825, "training_step_time": 0.1065514087677002 }, { "epoch": 1.34674072265625e-05, "model_forward_time": 0.025423765182495117, "step": 8826 }, { "epoch": 1.34674072265625e-05, "step": 8826, "training_step_time": 0.10610079765319824 }, { "epoch": 1.346893310546875e-05, "model_forward_time": 0.024874448776245117, "step": 8827 }, { "epoch": 1.346893310546875e-05, "step": 8827, "training_step_time": 0.1057438850402832 }, { "epoch": 1.3470458984375e-05, "model_forward_time": 0.025505542755126953, "step": 8828 }, { "epoch": 1.3470458984375e-05, "step": 8828, "training_step_time": 0.11060285568237305 }, { "epoch": 1.347198486328125e-05, "model_forward_time": 0.024786949157714844, "step": 8829 }, { "epoch": 1.347198486328125e-05, "step": 8829, "training_step_time": 0.11215806007385254 }, { "epoch": 1.34735107421875e-05, "grad_norm": 0.40606626868247986, "learning_rate": 8.454744097574652e-05, "loss": 0.0321, "step": 8830 }, { "epoch": 1.34735107421875e-05, "model_forward_time": 0.025208473205566406, "step": 8830 }, { "epoch": 1.34735107421875e-05, "step": 8830, "training_step_time": 0.10625576972961426 }, { "epoch": 1.347503662109375e-05, "model_forward_time": 0.02485203742980957, "step": 8831 }, { "epoch": 1.347503662109375e-05, "step": 8831, "training_step_time": 0.1052711009979248 }, { "epoch": 1.34765625e-05, "model_forward_time": 0.02641606330871582, "step": 8832 }, { "epoch": 1.34765625e-05, "step": 8832, "training_step_time": 0.10939502716064453 }, { "epoch": 1.347808837890625e-05, "model_forward_time": 0.024899005889892578, "step": 8833 }, { "epoch": 1.347808837890625e-05, "step": 8833, "training_step_time": 0.11361575126647949 }, { "epoch": 1.34796142578125e-05, "model_forward_time": 0.02507781982421875, "step": 8834 }, { "epoch": 1.34796142578125e-05, "step": 8834, "training_step_time": 0.11864209175109863 }, { "epoch": 1.348114013671875e-05, "model_forward_time": 0.025072336196899414, "step": 8835 }, { "epoch": 1.348114013671875e-05, "step": 8835, "training_step_time": 0.12011456489562988 }, { "epoch": 1.3482666015625e-05, "model_forward_time": 0.025199174880981445, "step": 8836 }, { "epoch": 1.3482666015625e-05, "step": 8836, "training_step_time": 0.12028288841247559 }, { "epoch": 1.348419189453125e-05, "model_forward_time": 0.025249481201171875, "step": 8837 }, { "epoch": 1.348419189453125e-05, "step": 8837, "training_step_time": 0.11735177040100098 }, { "epoch": 1.34857177734375e-05, "model_forward_time": 0.025351762771606445, "step": 8838 }, { "epoch": 1.34857177734375e-05, "step": 8838, "training_step_time": 0.11881470680236816 }, { "epoch": 1.348724365234375e-05, "model_forward_time": 0.025122880935668945, "step": 8839 }, { "epoch": 1.348724365234375e-05, "step": 8839, "training_step_time": 0.1134951114654541 }, { "epoch": 1.348876953125e-05, "grad_norm": 0.5772373080253601, "learning_rate": 8.450757668081716e-05, "loss": 0.0312, "step": 8840 }, { "epoch": 1.348876953125e-05, "model_forward_time": 0.024771928787231445, "step": 8840 }, { "epoch": 1.348876953125e-05, "step": 8840, "training_step_time": 0.11152887344360352 }, { "epoch": 1.349029541015625e-05, "model_forward_time": 0.024884939193725586, "step": 8841 }, { "epoch": 1.349029541015625e-05, "step": 8841, "training_step_time": 0.11375761032104492 }, { "epoch": 1.34918212890625e-05, "model_forward_time": 0.024939775466918945, "step": 8842 }, { "epoch": 1.34918212890625e-05, "step": 8842, "training_step_time": 0.11223506927490234 }, { "epoch": 1.349334716796875e-05, "model_forward_time": 0.025085926055908203, "step": 8843 }, { "epoch": 1.349334716796875e-05, "step": 8843, "training_step_time": 0.10805225372314453 }, { "epoch": 1.3494873046875e-05, "model_forward_time": 0.025293827056884766, "step": 8844 }, { "epoch": 1.3494873046875e-05, "step": 8844, "training_step_time": 0.11306881904602051 }, { "epoch": 1.349639892578125e-05, "model_forward_time": 0.025015592575073242, "step": 8845 }, { "epoch": 1.349639892578125e-05, "step": 8845, "training_step_time": 0.16548633575439453 }, { "epoch": 1.34979248046875e-05, "model_forward_time": 0.024149179458618164, "step": 8846 }, { "epoch": 1.34979248046875e-05, "step": 8846, "training_step_time": 0.16495609283447266 }, { "epoch": 1.349945068359375e-05, "model_forward_time": 0.02623605728149414, "step": 8847 }, { "epoch": 1.349945068359375e-05, "step": 8847, "training_step_time": 0.11311841011047363 }, { "epoch": 1.35009765625e-05, "model_forward_time": 0.024779796600341797, "step": 8848 }, { "epoch": 1.35009765625e-05, "step": 8848, "training_step_time": 0.17114758491516113 }, { "epoch": 1.350250244140625e-05, "model_forward_time": 0.024367570877075195, "step": 8849 }, { "epoch": 1.350250244140625e-05, "step": 8849, "training_step_time": 0.17211055755615234 }, { "epoch": 1.35040283203125e-05, "grad_norm": 0.4923705458641052, "learning_rate": 8.44676704559283e-05, "loss": 0.0257, "step": 8850 }, { "epoch": 1.35040283203125e-05, "model_forward_time": 0.02602100372314453, "step": 8850 }, { "epoch": 1.35040283203125e-05, "step": 8850, "training_step_time": 0.11019659042358398 }, { "epoch": 1.350555419921875e-05, "model_forward_time": 0.02504277229309082, "step": 8851 }, { "epoch": 1.350555419921875e-05, "step": 8851, "training_step_time": 0.10940194129943848 }, { "epoch": 1.3507080078125e-05, "model_forward_time": 0.025532007217407227, "step": 8852 }, { "epoch": 1.3507080078125e-05, "step": 8852, "training_step_time": 0.1074824333190918 }, { "epoch": 1.350860595703125e-05, "model_forward_time": 0.02534031867980957, "step": 8853 }, { "epoch": 1.350860595703125e-05, "step": 8853, "training_step_time": 0.10865926742553711 }, { "epoch": 1.35101318359375e-05, "model_forward_time": 0.02662515640258789, "step": 8854 }, { "epoch": 1.35101318359375e-05, "step": 8854, "training_step_time": 0.10818910598754883 }, { "epoch": 1.351165771484375e-05, "model_forward_time": 0.025313854217529297, "step": 8855 }, { "epoch": 1.351165771484375e-05, "step": 8855, "training_step_time": 0.1082310676574707 }, { "epoch": 1.351318359375e-05, "model_forward_time": 0.025217533111572266, "step": 8856 }, { "epoch": 1.351318359375e-05, "step": 8856, "training_step_time": 0.10806703567504883 }, { "epoch": 1.351470947265625e-05, "model_forward_time": 0.024977445602416992, "step": 8857 }, { "epoch": 1.351470947265625e-05, "step": 8857, "training_step_time": 0.11097955703735352 }, { "epoch": 1.35162353515625e-05, "model_forward_time": 0.02490520477294922, "step": 8858 }, { "epoch": 1.35162353515625e-05, "step": 8858, "training_step_time": 0.11985993385314941 }, { "epoch": 1.351776123046875e-05, "model_forward_time": 0.026998519897460938, "step": 8859 }, { "epoch": 1.351776123046875e-05, "step": 8859, "training_step_time": 0.11333155632019043 }, { "epoch": 1.3519287109375e-05, "grad_norm": 0.3777911365032196, "learning_rate": 8.442772234956972e-05, "loss": 0.0313, "step": 8860 }, { "epoch": 1.3519287109375e-05, "model_forward_time": 0.025001049041748047, "step": 8860 }, { "epoch": 1.3519287109375e-05, "step": 8860, "training_step_time": 0.21588969230651855 }, { "epoch": 1.352081298828125e-05, "model_forward_time": 0.024205923080444336, "step": 8861 }, { "epoch": 1.352081298828125e-05, "step": 8861, "training_step_time": 0.1356363296508789 }, { "epoch": 1.35223388671875e-05, "model_forward_time": 0.024274349212646484, "step": 8862 }, { "epoch": 1.35223388671875e-05, "step": 8862, "training_step_time": 0.11793041229248047 }, { "epoch": 1.352386474609375e-05, "model_forward_time": 0.024878978729248047, "step": 8863 }, { "epoch": 1.352386474609375e-05, "step": 8863, "training_step_time": 0.11830258369445801 }, { "epoch": 1.3525390625e-05, "model_forward_time": 0.02496194839477539, "step": 8864 }, { "epoch": 1.3525390625e-05, "step": 8864, "training_step_time": 0.19593119621276855 }, { "epoch": 1.352691650390625e-05, "model_forward_time": 0.02435016632080078, "step": 8865 }, { "epoch": 1.352691650390625e-05, "step": 8865, "training_step_time": 0.1910707950592041 }, { "epoch": 1.35284423828125e-05, "model_forward_time": 0.024585485458374023, "step": 8866 }, { "epoch": 1.35284423828125e-05, "step": 8866, "training_step_time": 0.18648099899291992 }, { "epoch": 1.352996826171875e-05, "model_forward_time": 0.024266958236694336, "step": 8867 }, { "epoch": 1.352996826171875e-05, "step": 8867, "training_step_time": 0.10962033271789551 }, { "epoch": 1.3531494140625e-05, "model_forward_time": 0.024217844009399414, "step": 8868 }, { "epoch": 1.3531494140625e-05, "step": 8868, "training_step_time": 0.1070561408996582 }, { "epoch": 1.353302001953125e-05, "model_forward_time": 0.02538013458251953, "step": 8869 }, { "epoch": 1.353302001953125e-05, "step": 8869, "training_step_time": 0.10833454132080078 }, { "epoch": 1.35345458984375e-05, "grad_norm": 0.2980242073535919, "learning_rate": 8.438773241028219e-05, "loss": 0.031, "step": 8870 }, { "epoch": 1.35345458984375e-05, "model_forward_time": 0.025416851043701172, "step": 8870 }, { "epoch": 1.35345458984375e-05, "step": 8870, "training_step_time": 0.10670590400695801 }, { "epoch": 1.353607177734375e-05, "model_forward_time": 0.02506732940673828, "step": 8871 }, { "epoch": 1.353607177734375e-05, "step": 8871, "training_step_time": 0.10593295097351074 }, { "epoch": 1.353759765625e-05, "model_forward_time": 0.024930715560913086, "step": 8872 }, { "epoch": 1.353759765625e-05, "step": 8872, "training_step_time": 0.11065030097961426 }, { "epoch": 1.353912353515625e-05, "model_forward_time": 0.025054216384887695, "step": 8873 }, { "epoch": 1.353912353515625e-05, "step": 8873, "training_step_time": 0.1139369010925293 }, { "epoch": 1.35406494140625e-05, "model_forward_time": 0.0253293514251709, "step": 8874 }, { "epoch": 1.35406494140625e-05, "step": 8874, "training_step_time": 0.1081547737121582 }, { "epoch": 1.354217529296875e-05, "model_forward_time": 0.024956703186035156, "step": 8875 }, { "epoch": 1.354217529296875e-05, "step": 8875, "training_step_time": 0.10578417778015137 }, { "epoch": 1.3543701171875e-05, "model_forward_time": 0.025588274002075195, "step": 8876 }, { "epoch": 1.3543701171875e-05, "step": 8876, "training_step_time": 0.10856103897094727 }, { "epoch": 1.354522705078125e-05, "model_forward_time": 0.02507615089416504, "step": 8877 }, { "epoch": 1.354522705078125e-05, "step": 8877, "training_step_time": 0.11995530128479004 }, { "epoch": 1.35467529296875e-05, "model_forward_time": 0.025243520736694336, "step": 8878 }, { "epoch": 1.35467529296875e-05, "step": 8878, "training_step_time": 0.11559224128723145 }, { "epoch": 1.354827880859375e-05, "model_forward_time": 0.024836063385009766, "step": 8879 }, { "epoch": 1.354827880859375e-05, "step": 8879, "training_step_time": 0.1065518856048584 }, { "epoch": 1.35498046875e-05, "grad_norm": 0.20807787775993347, "learning_rate": 8.434770068665723e-05, "loss": 0.037, "step": 8880 }, { "epoch": 1.35498046875e-05, "model_forward_time": 0.02470111846923828, "step": 8880 }, { "epoch": 1.35498046875e-05, "step": 8880, "training_step_time": 0.1107339859008789 }, { "epoch": 1.355133056640625e-05, "model_forward_time": 0.025808095932006836, "step": 8881 }, { "epoch": 1.355133056640625e-05, "step": 8881, "training_step_time": 0.10991358757019043 }, { "epoch": 1.35528564453125e-05, "model_forward_time": 0.024822711944580078, "step": 8882 }, { "epoch": 1.35528564453125e-05, "step": 8882, "training_step_time": 0.10769248008728027 }, { "epoch": 1.355438232421875e-05, "model_forward_time": 0.02543020248413086, "step": 8883 }, { "epoch": 1.355438232421875e-05, "step": 8883, "training_step_time": 0.10801458358764648 }, { "epoch": 1.3555908203125e-05, "model_forward_time": 0.0252230167388916, "step": 8884 }, { "epoch": 1.3555908203125e-05, "step": 8884, "training_step_time": 0.10470938682556152 }, { "epoch": 1.355743408203125e-05, "model_forward_time": 0.024959564208984375, "step": 8885 }, { "epoch": 1.355743408203125e-05, "step": 8885, "training_step_time": 0.10763239860534668 }, { "epoch": 1.35589599609375e-05, "model_forward_time": 0.02584075927734375, "step": 8886 }, { "epoch": 1.35589599609375e-05, "step": 8886, "training_step_time": 0.11389970779418945 }, { "epoch": 1.356048583984375e-05, "model_forward_time": 0.025709152221679688, "step": 8887 }, { "epoch": 1.356048583984375e-05, "step": 8887, "training_step_time": 0.14470815658569336 }, { "epoch": 1.356201171875e-05, "model_forward_time": 0.024810314178466797, "step": 8888 }, { "epoch": 1.356201171875e-05, "step": 8888, "training_step_time": 0.14928317070007324 }, { "epoch": 1.356353759765625e-05, "model_forward_time": 0.025288105010986328, "step": 8889 }, { "epoch": 1.356353759765625e-05, "step": 8889, "training_step_time": 0.11434555053710938 }, { "epoch": 1.35650634765625e-05, "grad_norm": 0.411801815032959, "learning_rate": 8.430762722733714e-05, "loss": 0.0329, "step": 8890 }, { "epoch": 1.35650634765625e-05, "model_forward_time": 0.02453756332397461, "step": 8890 }, { "epoch": 1.35650634765625e-05, "step": 8890, "training_step_time": 0.21364188194274902 }, { "epoch": 1.356658935546875e-05, "model_forward_time": 0.02632451057434082, "step": 8891 }, { "epoch": 1.356658935546875e-05, "step": 8891, "training_step_time": 0.13043761253356934 }, { "epoch": 1.3568115234375e-05, "model_forward_time": 0.024518966674804688, "step": 8892 }, { "epoch": 1.3568115234375e-05, "step": 8892, "training_step_time": 0.18149518966674805 }, { "epoch": 1.356964111328125e-05, "model_forward_time": 0.024816036224365234, "step": 8893 }, { "epoch": 1.356964111328125e-05, "step": 8893, "training_step_time": 0.1370687484741211 }, { "epoch": 1.35711669921875e-05, "model_forward_time": 0.0244596004486084, "step": 8894 }, { "epoch": 1.35711669921875e-05, "step": 8894, "training_step_time": 0.11090683937072754 }, { "epoch": 1.357269287109375e-05, "model_forward_time": 0.02552318572998047, "step": 8895 }, { "epoch": 1.357269287109375e-05, "step": 8895, "training_step_time": 0.1131746768951416 }, { "epoch": 1.357421875e-05, "model_forward_time": 0.025241374969482422, "step": 8896 }, { "epoch": 1.357421875e-05, "step": 8896, "training_step_time": 0.10974407196044922 }, { "epoch": 1.357574462890625e-05, "model_forward_time": 0.02513599395751953, "step": 8897 }, { "epoch": 1.357574462890625e-05, "step": 8897, "training_step_time": 0.10878872871398926 }, { "epoch": 1.35772705078125e-05, "model_forward_time": 0.025089740753173828, "step": 8898 }, { "epoch": 1.35772705078125e-05, "step": 8898, "training_step_time": 0.10956001281738281 }, { "epoch": 1.357879638671875e-05, "model_forward_time": 0.024848222732543945, "step": 8899 }, { "epoch": 1.357879638671875e-05, "step": 8899, "training_step_time": 0.10918354988098145 }, { "epoch": 1.3580322265625e-05, "grad_norm": 0.37221142649650574, "learning_rate": 8.4267512081015e-05, "loss": 0.031, "step": 8900 }, { "epoch": 1.3580322265625e-05, "model_forward_time": 0.02458977699279785, "step": 8900 }, { "epoch": 1.3580322265625e-05, "step": 8900, "training_step_time": 0.11006402969360352 }, { "epoch": 1.358184814453125e-05, "model_forward_time": 0.025192975997924805, "step": 8901 }, { "epoch": 1.358184814453125e-05, "step": 8901, "training_step_time": 0.1093902587890625 }, { "epoch": 1.35833740234375e-05, "model_forward_time": 0.02514815330505371, "step": 8902 }, { "epoch": 1.35833740234375e-05, "step": 8902, "training_step_time": 0.10878324508666992 }, { "epoch": 1.358489990234375e-05, "model_forward_time": 0.029021024703979492, "step": 8903 }, { "epoch": 1.358489990234375e-05, "step": 8903, "training_step_time": 0.14989256858825684 }, { "epoch": 1.358642578125e-05, "model_forward_time": 0.025372743606567383, "step": 8904 }, { "epoch": 1.358642578125e-05, "step": 8904, "training_step_time": 0.11182427406311035 }, { "epoch": 1.358795166015625e-05, "model_forward_time": 0.02456974983215332, "step": 8905 }, { "epoch": 1.358795166015625e-05, "step": 8905, "training_step_time": 0.2236781120300293 }, { "epoch": 1.35894775390625e-05, "model_forward_time": 0.024309873580932617, "step": 8906 }, { "epoch": 1.35894775390625e-05, "step": 8906, "training_step_time": 0.12743830680847168 }, { "epoch": 1.359100341796875e-05, "model_forward_time": 0.02399921417236328, "step": 8907 }, { "epoch": 1.359100341796875e-05, "step": 8907, "training_step_time": 0.10874700546264648 }, { "epoch": 1.3592529296875e-05, "model_forward_time": 0.025083303451538086, "step": 8908 }, { "epoch": 1.3592529296875e-05, "step": 8908, "training_step_time": 0.12138795852661133 }, { "epoch": 1.359405517578125e-05, "model_forward_time": 0.024996519088745117, "step": 8909 }, { "epoch": 1.359405517578125e-05, "step": 8909, "training_step_time": 0.1862947940826416 }, { "epoch": 1.35955810546875e-05, "grad_norm": 0.3871256411075592, "learning_rate": 8.422735529643444e-05, "loss": 0.0292, "step": 8910 }, { "epoch": 1.35955810546875e-05, "model_forward_time": 0.024939298629760742, "step": 8910 }, { "epoch": 1.35955810546875e-05, "step": 8910, "training_step_time": 0.18158173561096191 }, { "epoch": 1.359710693359375e-05, "model_forward_time": 0.02435135841369629, "step": 8911 }, { "epoch": 1.359710693359375e-05, "step": 8911, "training_step_time": 0.15028619766235352 }, { "epoch": 1.35986328125e-05, "model_forward_time": 0.024403095245361328, "step": 8912 }, { "epoch": 1.35986328125e-05, "step": 8912, "training_step_time": 0.11498665809631348 }, { "epoch": 1.360015869140625e-05, "model_forward_time": 0.024422168731689453, "step": 8913 }, { "epoch": 1.360015869140625e-05, "step": 8913, "training_step_time": 0.10341835021972656 }, { "epoch": 1.36016845703125e-05, "model_forward_time": 0.025068283081054688, "step": 8914 }, { "epoch": 1.36016845703125e-05, "step": 8914, "training_step_time": 0.10617995262145996 }, { "epoch": 1.360321044921875e-05, "model_forward_time": 0.025139570236206055, "step": 8915 }, { "epoch": 1.360321044921875e-05, "step": 8915, "training_step_time": 0.10662174224853516 }, { "epoch": 1.3604736328125e-05, "model_forward_time": 0.025115013122558594, "step": 8916 }, { "epoch": 1.3604736328125e-05, "step": 8916, "training_step_time": 0.10822749137878418 }, { "epoch": 1.360626220703125e-05, "model_forward_time": 0.025452375411987305, "step": 8917 }, { "epoch": 1.360626220703125e-05, "step": 8917, "training_step_time": 0.11053967475891113 }, { "epoch": 1.36077880859375e-05, "model_forward_time": 0.025655746459960938, "step": 8918 }, { "epoch": 1.36077880859375e-05, "step": 8918, "training_step_time": 0.16332650184631348 }, { "epoch": 1.360931396484375e-05, "model_forward_time": 0.023495912551879883, "step": 8919 }, { "epoch": 1.360931396484375e-05, "step": 8919, "training_step_time": 0.19013428688049316 }, { "epoch": 1.361083984375e-05, "grad_norm": 0.7622633576393127, "learning_rate": 8.418715692238978e-05, "loss": 0.0412, "step": 8920 }, { "epoch": 1.361083984375e-05, "model_forward_time": 0.02314591407775879, "step": 8920 }, { "epoch": 1.361083984375e-05, "step": 8920, "training_step_time": 0.17504215240478516 }, { "epoch": 1.361236572265625e-05, "model_forward_time": 0.023323535919189453, "step": 8921 }, { "epoch": 1.361236572265625e-05, "step": 8921, "training_step_time": 0.17570209503173828 }, { "epoch": 1.36138916015625e-05, "model_forward_time": 0.02346968650817871, "step": 8922 }, { "epoch": 1.36138916015625e-05, "step": 8922, "training_step_time": 0.1582016944885254 }, { "epoch": 1.361541748046875e-05, "model_forward_time": 0.023576021194458008, "step": 8923 }, { "epoch": 1.361541748046875e-05, "step": 8923, "training_step_time": 0.1454155445098877 }, { "epoch": 1.3616943359375e-05, "model_forward_time": 0.02311229705810547, "step": 8924 }, { "epoch": 1.3616943359375e-05, "step": 8924, "training_step_time": 0.13324904441833496 }, { "epoch": 1.361846923828125e-05, "model_forward_time": 0.023470640182495117, "step": 8925 }, { "epoch": 1.361846923828125e-05, "step": 8925, "training_step_time": 0.12807035446166992 }, { "epoch": 1.36199951171875e-05, "model_forward_time": 0.02347731590270996, "step": 8926 }, { "epoch": 1.36199951171875e-05, "step": 8926, "training_step_time": 0.12607288360595703 }, { "epoch": 1.362152099609375e-05, "model_forward_time": 0.02404475212097168, "step": 8927 }, { "epoch": 1.362152099609375e-05, "step": 8927, "training_step_time": 0.11951565742492676 }, { "epoch": 1.3623046875e-05, "model_forward_time": 0.024280548095703125, "step": 8928 }, { "epoch": 1.3623046875e-05, "step": 8928, "training_step_time": 0.11611819267272949 }, { "epoch": 1.362457275390625e-05, "model_forward_time": 0.02507328987121582, "step": 8929 }, { "epoch": 1.362457275390625e-05, "step": 8929, "training_step_time": 0.11864995956420898 }, { "epoch": 1.36260986328125e-05, "grad_norm": 0.6880233883857727, "learning_rate": 8.41469170077258e-05, "loss": 0.0331, "step": 8930 }, { "epoch": 1.36260986328125e-05, "model_forward_time": 0.02788519859313965, "step": 8930 }, { "epoch": 1.36260986328125e-05, "step": 8930, "training_step_time": 0.11542701721191406 }, { "epoch": 1.362762451171875e-05, "model_forward_time": 0.024332284927368164, "step": 8931 }, { "epoch": 1.362762451171875e-05, "step": 8931, "training_step_time": 0.10256576538085938 }, { "epoch": 1.3629150390625e-05, "model_forward_time": 0.024483203887939453, "step": 8932 }, { "epoch": 1.3629150390625e-05, "step": 8932, "training_step_time": 0.10550951957702637 }, { "epoch": 1.363067626953125e-05, "model_forward_time": 0.024417638778686523, "step": 8933 }, { "epoch": 1.363067626953125e-05, "step": 8933, "training_step_time": 0.12224030494689941 }, { "epoch": 1.36322021484375e-05, "model_forward_time": 0.024944543838500977, "step": 8934 }, { "epoch": 1.36322021484375e-05, "step": 8934, "training_step_time": 0.10565376281738281 }, { "epoch": 1.363372802734375e-05, "model_forward_time": 0.02512955665588379, "step": 8935 }, { "epoch": 1.363372802734375e-05, "step": 8935, "training_step_time": 0.11670923233032227 }, { "epoch": 1.363525390625e-05, "model_forward_time": 0.025147199630737305, "step": 8936 }, { "epoch": 1.363525390625e-05, "step": 8936, "training_step_time": 0.16999244689941406 }, { "epoch": 1.363677978515625e-05, "model_forward_time": 0.026273012161254883, "step": 8937 }, { "epoch": 1.363677978515625e-05, "step": 8937, "training_step_time": 0.16988635063171387 }, { "epoch": 1.36383056640625e-05, "model_forward_time": 0.024311304092407227, "step": 8938 }, { "epoch": 1.36383056640625e-05, "step": 8938, "training_step_time": 0.10423874855041504 }, { "epoch": 1.363983154296875e-05, "model_forward_time": 0.024471521377563477, "step": 8939 }, { "epoch": 1.363983154296875e-05, "step": 8939, "training_step_time": 0.10640621185302734 }, { "epoch": 1.3641357421875e-05, "grad_norm": 0.35701218247413635, "learning_rate": 8.410663560133784e-05, "loss": 0.0357, "step": 8940 }, { "epoch": 1.3641357421875e-05, "model_forward_time": 0.024985551834106445, "step": 8940 }, { "epoch": 1.3641357421875e-05, "step": 8940, "training_step_time": 0.10749626159667969 }, { "epoch": 1.364288330078125e-05, "model_forward_time": 0.02533245086669922, "step": 8941 }, { "epoch": 1.364288330078125e-05, "step": 8941, "training_step_time": 0.10916972160339355 }, { "epoch": 1.36444091796875e-05, "model_forward_time": 0.02486419677734375, "step": 8942 }, { "epoch": 1.36444091796875e-05, "step": 8942, "training_step_time": 0.10698938369750977 }, { "epoch": 1.364593505859375e-05, "model_forward_time": 0.02512335777282715, "step": 8943 }, { "epoch": 1.364593505859375e-05, "step": 8943, "training_step_time": 0.10818767547607422 }, { "epoch": 1.36474609375e-05, "model_forward_time": 0.024863719940185547, "step": 8944 }, { "epoch": 1.36474609375e-05, "step": 8944, "training_step_time": 0.10978889465332031 }, { "epoch": 1.364898681640625e-05, "model_forward_time": 0.027097463607788086, "step": 8945 }, { "epoch": 1.364898681640625e-05, "step": 8945, "training_step_time": 0.11330389976501465 }, { "epoch": 1.36505126953125e-05, "model_forward_time": 0.02515244483947754, "step": 8946 }, { "epoch": 1.36505126953125e-05, "step": 8946, "training_step_time": 0.13869380950927734 }, { "epoch": 1.365203857421875e-05, "model_forward_time": 0.02506566047668457, "step": 8947 }, { "epoch": 1.365203857421875e-05, "step": 8947, "training_step_time": 0.11571550369262695 }, { "epoch": 1.3653564453125e-05, "model_forward_time": 0.025188922882080078, "step": 8948 }, { "epoch": 1.3653564453125e-05, "step": 8948, "training_step_time": 0.1754453182220459 }, { "epoch": 1.365509033203125e-05, "model_forward_time": 0.024492263793945312, "step": 8949 }, { "epoch": 1.365509033203125e-05, "step": 8949, "training_step_time": 0.18662500381469727 }, { "epoch": 1.36566162109375e-05, "grad_norm": 0.4007699191570282, "learning_rate": 8.406631275217156e-05, "loss": 0.0303, "step": 8950 }, { "epoch": 1.36566162109375e-05, "model_forward_time": 0.02387237548828125, "step": 8950 }, { "epoch": 1.36566162109375e-05, "step": 8950, "training_step_time": 0.11038994789123535 }, { "epoch": 1.365814208984375e-05, "model_forward_time": 0.024508953094482422, "step": 8951 }, { "epoch": 1.365814208984375e-05, "step": 8951, "training_step_time": 0.12679624557495117 }, { "epoch": 1.365966796875e-05, "model_forward_time": 0.02622532844543457, "step": 8952 }, { "epoch": 1.365966796875e-05, "step": 8952, "training_step_time": 0.11607217788696289 }, { "epoch": 1.366119384765625e-05, "model_forward_time": 0.025133848190307617, "step": 8953 }, { "epoch": 1.366119384765625e-05, "step": 8953, "training_step_time": 0.22102761268615723 }, { "epoch": 1.36627197265625e-05, "model_forward_time": 0.02434563636779785, "step": 8954 }, { "epoch": 1.36627197265625e-05, "step": 8954, "training_step_time": 0.23151087760925293 }, { "epoch": 1.366424560546875e-05, "model_forward_time": 0.024414539337158203, "step": 8955 }, { "epoch": 1.366424560546875e-05, "step": 8955, "training_step_time": 0.19629907608032227 }, { "epoch": 1.3665771484375e-05, "model_forward_time": 0.024863243103027344, "step": 8956 }, { "epoch": 1.3665771484375e-05, "step": 8956, "training_step_time": 0.18976831436157227 }, { "epoch": 1.366729736328125e-05, "model_forward_time": 0.0267641544342041, "step": 8957 }, { "epoch": 1.366729736328125e-05, "step": 8957, "training_step_time": 0.18341946601867676 }, { "epoch": 1.36688232421875e-05, "model_forward_time": 0.024255990982055664, "step": 8958 }, { "epoch": 1.36688232421875e-05, "step": 8958, "training_step_time": 0.16764545440673828 }, { "epoch": 1.367034912109375e-05, "model_forward_time": 0.024435997009277344, "step": 8959 }, { "epoch": 1.367034912109375e-05, "step": 8959, "training_step_time": 0.1043086051940918 }, { "epoch": 1.3671875e-05, "grad_norm": 0.30645623803138733, "learning_rate": 8.402594850922305e-05, "loss": 0.0256, "step": 8960 }, { "epoch": 1.3671875e-05, "model_forward_time": 0.024538040161132812, "step": 8960 }, { "epoch": 1.3671875e-05, "step": 8960, "training_step_time": 0.10386252403259277 }, { "epoch": 1.367340087890625e-05, "model_forward_time": 0.025150775909423828, "step": 8961 }, { "epoch": 1.367340087890625e-05, "step": 8961, "training_step_time": 0.11051154136657715 }, { "epoch": 1.36749267578125e-05, "model_forward_time": 0.02490997314453125, "step": 8962 }, { "epoch": 1.36749267578125e-05, "step": 8962, "training_step_time": 0.11093902587890625 }, { "epoch": 1.367645263671875e-05, "model_forward_time": 0.025046586990356445, "step": 8963 }, { "epoch": 1.367645263671875e-05, "step": 8963, "training_step_time": 0.10650134086608887 }, { "epoch": 1.3677978515625e-05, "model_forward_time": 0.02500462532043457, "step": 8964 }, { "epoch": 1.3677978515625e-05, "step": 8964, "training_step_time": 0.109222412109375 }, { "epoch": 1.367950439453125e-05, "model_forward_time": 0.025159120559692383, "step": 8965 }, { "epoch": 1.367950439453125e-05, "step": 8965, "training_step_time": 0.10845494270324707 }, { "epoch": 1.36810302734375e-05, "model_forward_time": 0.024912595748901367, "step": 8966 }, { "epoch": 1.36810302734375e-05, "step": 8966, "training_step_time": 0.1050107479095459 }, { "epoch": 1.368255615234375e-05, "model_forward_time": 0.025229454040527344, "step": 8967 }, { "epoch": 1.368255615234375e-05, "step": 8967, "training_step_time": 0.10692524909973145 }, { "epoch": 1.368408203125e-05, "model_forward_time": 0.02490973472595215, "step": 8968 }, { "epoch": 1.368408203125e-05, "step": 8968, "training_step_time": 0.10834336280822754 }, { "epoch": 1.368560791015625e-05, "model_forward_time": 0.024918079376220703, "step": 8969 }, { "epoch": 1.368560791015625e-05, "step": 8969, "training_step_time": 0.1759345531463623 }, { "epoch": 1.36871337890625e-05, "grad_norm": 0.5372781753540039, "learning_rate": 8.398554292153866e-05, "loss": 0.024, "step": 8970 }, { "epoch": 1.36871337890625e-05, "model_forward_time": 0.0234677791595459, "step": 8970 }, { "epoch": 1.36871337890625e-05, "step": 8970, "training_step_time": 0.18744730949401855 }, { "epoch": 1.368865966796875e-05, "model_forward_time": 0.023607492446899414, "step": 8971 }, { "epoch": 1.368865966796875e-05, "step": 8971, "training_step_time": 0.1913130283355713 }, { "epoch": 1.3690185546875e-05, "model_forward_time": 0.02420830726623535, "step": 8972 }, { "epoch": 1.3690185546875e-05, "step": 8972, "training_step_time": 0.17948675155639648 }, { "epoch": 1.369171142578125e-05, "model_forward_time": 0.024532556533813477, "step": 8973 }, { "epoch": 1.369171142578125e-05, "step": 8973, "training_step_time": 0.1673755645751953 }, { "epoch": 1.36932373046875e-05, "model_forward_time": 0.024538516998291016, "step": 8974 }, { "epoch": 1.36932373046875e-05, "step": 8974, "training_step_time": 0.17450714111328125 }, { "epoch": 1.369476318359375e-05, "model_forward_time": 0.024251937866210938, "step": 8975 }, { "epoch": 1.369476318359375e-05, "step": 8975, "training_step_time": 0.17681360244750977 }, { "epoch": 1.36962890625e-05, "model_forward_time": 0.02441859245300293, "step": 8976 }, { "epoch": 1.36962890625e-05, "step": 8976, "training_step_time": 0.1400141716003418 }, { "epoch": 1.369781494140625e-05, "model_forward_time": 0.028722763061523438, "step": 8977 }, { "epoch": 1.369781494140625e-05, "step": 8977, "training_step_time": 0.1135709285736084 }, { "epoch": 1.36993408203125e-05, "model_forward_time": 0.026620864868164062, "step": 8978 }, { "epoch": 1.36993408203125e-05, "step": 8978, "training_step_time": 0.10544180870056152 }, { "epoch": 1.370086669921875e-05, "model_forward_time": 0.02524733543395996, "step": 8979 }, { "epoch": 1.370086669921875e-05, "step": 8979, "training_step_time": 0.10431694984436035 }, { "epoch": 1.3702392578125e-05, "grad_norm": 0.24594636261463165, "learning_rate": 8.394509603821499e-05, "loss": 0.0368, "step": 8980 }, { "epoch": 1.3702392578125e-05, "model_forward_time": 0.02498602867126465, "step": 8980 }, { "epoch": 1.3702392578125e-05, "step": 8980, "training_step_time": 0.10901856422424316 }, { "epoch": 1.370391845703125e-05, "model_forward_time": 0.025427579879760742, "step": 8981 }, { "epoch": 1.370391845703125e-05, "step": 8981, "training_step_time": 0.10844302177429199 }, { "epoch": 1.37054443359375e-05, "model_forward_time": 0.025048017501831055, "step": 8982 }, { "epoch": 1.37054443359375e-05, "step": 8982, "training_step_time": 0.10976195335388184 }, { "epoch": 1.370697021484375e-05, "model_forward_time": 0.025348901748657227, "step": 8983 }, { "epoch": 1.370697021484375e-05, "step": 8983, "training_step_time": 0.10935449600219727 }, { "epoch": 1.370849609375e-05, "model_forward_time": 0.024932861328125, "step": 8984 }, { "epoch": 1.370849609375e-05, "step": 8984, "training_step_time": 0.10789275169372559 }, { "epoch": 1.371002197265625e-05, "model_forward_time": 0.025131702423095703, "step": 8985 }, { "epoch": 1.371002197265625e-05, "step": 8985, "training_step_time": 0.11817669868469238 }, { "epoch": 1.37115478515625e-05, "model_forward_time": 0.024858474731445312, "step": 8986 }, { "epoch": 1.37115478515625e-05, "step": 8986, "training_step_time": 0.12210369110107422 }, { "epoch": 1.371307373046875e-05, "model_forward_time": 0.025006532669067383, "step": 8987 }, { "epoch": 1.371307373046875e-05, "step": 8987, "training_step_time": 0.1212015151977539 }, { "epoch": 1.3714599609375e-05, "model_forward_time": 0.02530956268310547, "step": 8988 }, { "epoch": 1.3714599609375e-05, "step": 8988, "training_step_time": 0.147416353225708 }, { "epoch": 1.371612548828125e-05, "model_forward_time": 0.02468132972717285, "step": 8989 }, { "epoch": 1.371612548828125e-05, "step": 8989, "training_step_time": 0.11413049697875977 }, { "epoch": 1.37176513671875e-05, "grad_norm": 0.7974529266357422, "learning_rate": 8.390460790839882e-05, "loss": 0.0594, "step": 8990 }, { "epoch": 1.37176513671875e-05, "model_forward_time": 0.026628732681274414, "step": 8990 }, { "epoch": 1.37176513671875e-05, "step": 8990, "training_step_time": 0.21802783012390137 }, { "epoch": 1.371917724609375e-05, "model_forward_time": 0.02472090721130371, "step": 8991 }, { "epoch": 1.371917724609375e-05, "step": 8991, "training_step_time": 0.13001418113708496 }, { "epoch": 1.3720703125e-05, "model_forward_time": 0.02873969078063965, "step": 8992 }, { "epoch": 1.3720703125e-05, "step": 8992, "training_step_time": 0.17467117309570312 }, { "epoch": 1.372222900390625e-05, "model_forward_time": 0.024420976638793945, "step": 8993 }, { "epoch": 1.372222900390625e-05, "step": 8993, "training_step_time": 0.16365504264831543 }, { "epoch": 1.37237548828125e-05, "model_forward_time": 0.024140119552612305, "step": 8994 }, { "epoch": 1.37237548828125e-05, "step": 8994, "training_step_time": 0.14179706573486328 }, { "epoch": 1.372528076171875e-05, "model_forward_time": 0.02414226531982422, "step": 8995 }, { "epoch": 1.372528076171875e-05, "step": 8995, "training_step_time": 0.18182706832885742 }, { "epoch": 1.3726806640625e-05, "model_forward_time": 0.024464130401611328, "step": 8996 }, { "epoch": 1.3726806640625e-05, "step": 8996, "training_step_time": 0.10683941841125488 }, { "epoch": 1.372833251953125e-05, "model_forward_time": 0.025491952896118164, "step": 8997 }, { "epoch": 1.372833251953125e-05, "step": 8997, "training_step_time": 0.10645198822021484 }, { "epoch": 1.37298583984375e-05, "model_forward_time": 0.025068998336791992, "step": 8998 }, { "epoch": 1.37298583984375e-05, "step": 8998, "training_step_time": 0.10979843139648438 }, { "epoch": 1.373138427734375e-05, "model_forward_time": 0.025546789169311523, "step": 8999 }, { "epoch": 1.373138427734375e-05, "step": 8999, "training_step_time": 0.11343121528625488 }, { "epoch": 1.373291015625e-05, "grad_norm": 0.5483483672142029, "learning_rate": 8.386407858128706e-05, "loss": 0.037, "step": 9000 }, { "epoch": 1.373291015625e-05, "model_forward_time": 0.0243685245513916, "step": 9000 }, { "epoch": 1.373291015625e-05, "step": 9000, "training_step_time": 0.1080172061920166 }, { "epoch": 1.373443603515625e-05, "model_forward_time": 0.023291826248168945, "step": 9001 }, { "epoch": 1.373443603515625e-05, "step": 9001, "training_step_time": 0.20213937759399414 }, { "epoch": 1.37359619140625e-05, "model_forward_time": 0.02447366714477539, "step": 9002 }, { "epoch": 1.37359619140625e-05, "step": 9002, "training_step_time": 0.18469715118408203 }, { "epoch": 1.373748779296875e-05, "model_forward_time": 0.024559736251831055, "step": 9003 }, { "epoch": 1.373748779296875e-05, "step": 9003, "training_step_time": 0.12102532386779785 }, { "epoch": 1.3739013671875e-05, "model_forward_time": 0.026051998138427734, "step": 9004 }, { "epoch": 1.3739013671875e-05, "step": 9004, "training_step_time": 0.10467028617858887 }, { "epoch": 1.374053955078125e-05, "model_forward_time": 0.0249025821685791, "step": 9005 }, { "epoch": 1.374053955078125e-05, "step": 9005, "training_step_time": 0.11005520820617676 }, { "epoch": 1.37420654296875e-05, "model_forward_time": 0.0251157283782959, "step": 9006 }, { "epoch": 1.37420654296875e-05, "step": 9006, "training_step_time": 0.11157417297363281 }, { "epoch": 1.374359130859375e-05, "model_forward_time": 0.025456666946411133, "step": 9007 }, { "epoch": 1.374359130859375e-05, "step": 9007, "training_step_time": 0.10640835762023926 }, { "epoch": 1.37451171875e-05, "model_forward_time": 0.025034427642822266, "step": 9008 }, { "epoch": 1.37451171875e-05, "step": 9008, "training_step_time": 0.10537266731262207 }, { "epoch": 1.374664306640625e-05, "model_forward_time": 0.025076627731323242, "step": 9009 }, { "epoch": 1.374664306640625e-05, "step": 9009, "training_step_time": 0.10428094863891602 }, { "epoch": 1.37481689453125e-05, "grad_norm": 0.5794000625610352, "learning_rate": 8.382350810612663e-05, "loss": 0.0313, "step": 9010 }, { "epoch": 1.37481689453125e-05, "model_forward_time": 0.0251922607421875, "step": 9010 }, { "epoch": 1.37481689453125e-05, "step": 9010, "training_step_time": 0.11228704452514648 }, { "epoch": 1.374969482421875e-05, "model_forward_time": 0.025099515914916992, "step": 9011 }, { "epoch": 1.374969482421875e-05, "step": 9011, "training_step_time": 0.10986495018005371 }, { "epoch": 1.3751220703125e-05, "model_forward_time": 0.024966001510620117, "step": 9012 }, { "epoch": 1.3751220703125e-05, "step": 9012, "training_step_time": 0.10871124267578125 }, { "epoch": 1.375274658203125e-05, "model_forward_time": 0.02519845962524414, "step": 9013 }, { "epoch": 1.375274658203125e-05, "step": 9013, "training_step_time": 0.11205339431762695 }, { "epoch": 1.37542724609375e-05, "model_forward_time": 0.025275707244873047, "step": 9014 }, { "epoch": 1.37542724609375e-05, "step": 9014, "training_step_time": 0.11888885498046875 }, { "epoch": 1.375579833984375e-05, "model_forward_time": 0.026001453399658203, "step": 9015 }, { "epoch": 1.375579833984375e-05, "step": 9015, "training_step_time": 0.10770583152770996 }, { "epoch": 1.375732421875e-05, "model_forward_time": 0.02490091323852539, "step": 9016 }, { "epoch": 1.375732421875e-05, "step": 9016, "training_step_time": 0.10969948768615723 }, { "epoch": 1.375885009765625e-05, "model_forward_time": 0.025013446807861328, "step": 9017 }, { "epoch": 1.375885009765625e-05, "step": 9017, "training_step_time": 0.11002993583679199 }, { "epoch": 1.37603759765625e-05, "model_forward_time": 0.025025367736816406, "step": 9018 }, { "epoch": 1.37603759765625e-05, "step": 9018, "training_step_time": 0.10908889770507812 }, { "epoch": 1.376190185546875e-05, "model_forward_time": 0.02509284019470215, "step": 9019 }, { "epoch": 1.376190185546875e-05, "step": 9019, "training_step_time": 0.10874104499816895 }, { "epoch": 1.3763427734375e-05, "grad_norm": 0.5082548260688782, "learning_rate": 8.378289653221452e-05, "loss": 0.035, "step": 9020 }, { "epoch": 1.3763427734375e-05, "model_forward_time": 0.025090932846069336, "step": 9020 }, { "epoch": 1.3763427734375e-05, "step": 9020, "training_step_time": 0.10783910751342773 }, { "epoch": 1.376495361328125e-05, "model_forward_time": 0.025123119354248047, "step": 9021 }, { "epoch": 1.376495361328125e-05, "step": 9021, "training_step_time": 0.1098184585571289 }, { "epoch": 1.37664794921875e-05, "model_forward_time": 0.027651548385620117, "step": 9022 }, { "epoch": 1.37664794921875e-05, "step": 9022, "training_step_time": 0.11696720123291016 }, { "epoch": 1.376800537109375e-05, "model_forward_time": 0.02509760856628418, "step": 9023 }, { "epoch": 1.376800537109375e-05, "step": 9023, "training_step_time": 0.11575770378112793 }, { "epoch": 1.376953125e-05, "model_forward_time": 0.0249483585357666, "step": 9024 }, { "epoch": 1.376953125e-05, "step": 9024, "training_step_time": 0.1095728874206543 }, { "epoch": 1.377105712890625e-05, "model_forward_time": 0.025665998458862305, "step": 9025 }, { "epoch": 1.377105712890625e-05, "step": 9025, "training_step_time": 0.1091766357421875 }, { "epoch": 1.37725830078125e-05, "model_forward_time": 0.024926185607910156, "step": 9026 }, { "epoch": 1.37725830078125e-05, "step": 9026, "training_step_time": 0.16724371910095215 }, { "epoch": 1.377410888671875e-05, "model_forward_time": 0.02477264404296875, "step": 9027 }, { "epoch": 1.377410888671875e-05, "step": 9027, "training_step_time": 0.16097402572631836 }, { "epoch": 1.3775634765625e-05, "model_forward_time": 0.02511882781982422, "step": 9028 }, { "epoch": 1.3775634765625e-05, "step": 9028, "training_step_time": 0.1185603141784668 }, { "epoch": 1.377716064453125e-05, "model_forward_time": 0.02625727653503418, "step": 9029 }, { "epoch": 1.377716064453125e-05, "step": 9029, "training_step_time": 0.17354798316955566 }, { "epoch": 1.37786865234375e-05, "grad_norm": 0.3936881721019745, "learning_rate": 8.37422439088976e-05, "loss": 0.0439, "step": 9030 }, { "epoch": 1.37786865234375e-05, "model_forward_time": 0.024529218673706055, "step": 9030 }, { "epoch": 1.37786865234375e-05, "step": 9030, "training_step_time": 0.1577596664428711 }, { "epoch": 1.378021240234375e-05, "model_forward_time": 0.024654150009155273, "step": 9031 }, { "epoch": 1.378021240234375e-05, "step": 9031, "training_step_time": 0.10467743873596191 }, { "epoch": 1.378173828125e-05, "model_forward_time": 0.025350093841552734, "step": 9032 }, { "epoch": 1.378173828125e-05, "step": 9032, "training_step_time": 0.10571956634521484 }, { "epoch": 1.378326416015625e-05, "model_forward_time": 0.025458097457885742, "step": 9033 }, { "epoch": 1.378326416015625e-05, "step": 9033, "training_step_time": 0.10661530494689941 }, { "epoch": 1.37847900390625e-05, "model_forward_time": 0.025179147720336914, "step": 9034 }, { "epoch": 1.37847900390625e-05, "step": 9034, "training_step_time": 0.10761809349060059 }, { "epoch": 1.378631591796875e-05, "model_forward_time": 0.02529621124267578, "step": 9035 }, { "epoch": 1.378631591796875e-05, "step": 9035, "training_step_time": 0.1101384162902832 }, { "epoch": 1.3787841796875e-05, "model_forward_time": 0.02881479263305664, "step": 9036 }, { "epoch": 1.3787841796875e-05, "step": 9036, "training_step_time": 0.115692138671875 }, { "epoch": 1.378936767578125e-05, "model_forward_time": 0.02627873420715332, "step": 9037 }, { "epoch": 1.378936767578125e-05, "step": 9037, "training_step_time": 0.10987401008605957 }, { "epoch": 1.37908935546875e-05, "model_forward_time": 0.026580333709716797, "step": 9038 }, { "epoch": 1.37908935546875e-05, "step": 9038, "training_step_time": 0.10758852958679199 }, { "epoch": 1.379241943359375e-05, "model_forward_time": 0.025876998901367188, "step": 9039 }, { "epoch": 1.379241943359375e-05, "step": 9039, "training_step_time": 0.10732293128967285 }, { "epoch": 1.37939453125e-05, "grad_norm": 0.414567768573761, "learning_rate": 8.370155028557265e-05, "loss": 0.04, "step": 9040 }, { "epoch": 1.37939453125e-05, "model_forward_time": 0.025490283966064453, "step": 9040 }, { "epoch": 1.37939453125e-05, "step": 9040, "training_step_time": 0.10637068748474121 }, { "epoch": 1.379547119140625e-05, "model_forward_time": 0.02532196044921875, "step": 9041 }, { "epoch": 1.379547119140625e-05, "step": 9041, "training_step_time": 0.15054082870483398 }, { "epoch": 1.37969970703125e-05, "model_forward_time": 0.02528524398803711, "step": 9042 }, { "epoch": 1.37969970703125e-05, "step": 9042, "training_step_time": 0.10934734344482422 }, { "epoch": 1.379852294921875e-05, "model_forward_time": 0.025161027908325195, "step": 9043 }, { "epoch": 1.379852294921875e-05, "step": 9043, "training_step_time": 0.10867977142333984 }, { "epoch": 1.3800048828125e-05, "model_forward_time": 0.02981734275817871, "step": 9044 }, { "epoch": 1.3800048828125e-05, "step": 9044, "training_step_time": 0.20467734336853027 }, { "epoch": 1.380157470703125e-05, "model_forward_time": 0.02487921714782715, "step": 9045 }, { "epoch": 1.380157470703125e-05, "step": 9045, "training_step_time": 0.17493224143981934 }, { "epoch": 1.38031005859375e-05, "model_forward_time": 0.024866342544555664, "step": 9046 }, { "epoch": 1.38031005859375e-05, "step": 9046, "training_step_time": 0.18550753593444824 }, { "epoch": 1.380462646484375e-05, "model_forward_time": 0.024600982666015625, "step": 9047 }, { "epoch": 1.380462646484375e-05, "step": 9047, "training_step_time": 0.1880788803100586 }, { "epoch": 1.380615234375e-05, "model_forward_time": 0.024859905242919922, "step": 9048 }, { "epoch": 1.380615234375e-05, "step": 9048, "training_step_time": 0.16760730743408203 }, { "epoch": 1.380767822265625e-05, "model_forward_time": 0.02560257911682129, "step": 9049 }, { "epoch": 1.380767822265625e-05, "step": 9049, "training_step_time": 0.10809445381164551 }, { "epoch": 1.38092041015625e-05, "grad_norm": 0.29646000266075134, "learning_rate": 8.366081571168625e-05, "loss": 0.0324, "step": 9050 }, { "epoch": 1.38092041015625e-05, "model_forward_time": 0.02561783790588379, "step": 9050 }, { "epoch": 1.38092041015625e-05, "step": 9050, "training_step_time": 0.10949993133544922 }, { "epoch": 1.381072998046875e-05, "model_forward_time": 0.025775671005249023, "step": 9051 }, { "epoch": 1.381072998046875e-05, "step": 9051, "training_step_time": 0.1080939769744873 }, { "epoch": 1.3812255859375e-05, "model_forward_time": 0.025256633758544922, "step": 9052 }, { "epoch": 1.3812255859375e-05, "step": 9052, "training_step_time": 0.10902214050292969 }, { "epoch": 1.381378173828125e-05, "model_forward_time": 0.02559494972229004, "step": 9053 }, { "epoch": 1.381378173828125e-05, "step": 9053, "training_step_time": 0.10812759399414062 }, { "epoch": 1.38153076171875e-05, "model_forward_time": 0.02541947364807129, "step": 9054 }, { "epoch": 1.38153076171875e-05, "step": 9054, "training_step_time": 0.17336249351501465 }, { "epoch": 1.381683349609375e-05, "model_forward_time": 0.02556443214416504, "step": 9055 }, { "epoch": 1.381683349609375e-05, "step": 9055, "training_step_time": 0.16871213912963867 }, { "epoch": 1.3818359375e-05, "model_forward_time": 0.024761676788330078, "step": 9056 }, { "epoch": 1.3818359375e-05, "step": 9056, "training_step_time": 0.14667391777038574 }, { "epoch": 1.381988525390625e-05, "model_forward_time": 0.024607419967651367, "step": 9057 }, { "epoch": 1.381988525390625e-05, "step": 9057, "training_step_time": 0.14133715629577637 }, { "epoch": 1.38214111328125e-05, "model_forward_time": 0.024799823760986328, "step": 9058 }, { "epoch": 1.38214111328125e-05, "step": 9058, "training_step_time": 0.1506960391998291 }, { "epoch": 1.382293701171875e-05, "model_forward_time": 0.025743961334228516, "step": 9059 }, { "epoch": 1.382293701171875e-05, "step": 9059, "training_step_time": 0.14345669746398926 }, { "epoch": 1.3824462890625e-05, "grad_norm": 0.2952696979045868, "learning_rate": 8.362004023673474e-05, "loss": 0.0431, "step": 9060 }, { "epoch": 1.3824462890625e-05, "model_forward_time": 0.02445816993713379, "step": 9060 }, { "epoch": 1.3824462890625e-05, "step": 9060, "training_step_time": 0.12764620780944824 }, { "epoch": 1.382598876953125e-05, "model_forward_time": 0.024692773818969727, "step": 9061 }, { "epoch": 1.382598876953125e-05, "step": 9061, "training_step_time": 0.1258397102355957 }, { "epoch": 1.38275146484375e-05, "model_forward_time": 0.025930166244506836, "step": 9062 }, { "epoch": 1.38275146484375e-05, "step": 9062, "training_step_time": 0.12148118019104004 }, { "epoch": 1.382904052734375e-05, "model_forward_time": 0.025757551193237305, "step": 9063 }, { "epoch": 1.382904052734375e-05, "step": 9063, "training_step_time": 0.1168966293334961 }, { "epoch": 1.383056640625e-05, "model_forward_time": 0.025410890579223633, "step": 9064 }, { "epoch": 1.383056640625e-05, "step": 9064, "training_step_time": 0.11532807350158691 }, { "epoch": 1.383209228515625e-05, "model_forward_time": 0.025713682174682617, "step": 9065 }, { "epoch": 1.383209228515625e-05, "step": 9065, "training_step_time": 0.11672115325927734 }, { "epoch": 1.38336181640625e-05, "model_forward_time": 0.025488615036010742, "step": 9066 }, { "epoch": 1.38336181640625e-05, "step": 9066, "training_step_time": 0.11321759223937988 }, { "epoch": 1.383514404296875e-05, "model_forward_time": 0.025453567504882812, "step": 9067 }, { "epoch": 1.383514404296875e-05, "step": 9067, "training_step_time": 0.10818338394165039 }, { "epoch": 1.3836669921875e-05, "model_forward_time": 0.02543354034423828, "step": 9068 }, { "epoch": 1.3836669921875e-05, "step": 9068, "training_step_time": 0.10792803764343262 }, { "epoch": 1.383819580078125e-05, "model_forward_time": 0.02462148666381836, "step": 9069 }, { "epoch": 1.383819580078125e-05, "step": 9069, "training_step_time": 0.1068258285522461 }, { "epoch": 1.38397216796875e-05, "grad_norm": 0.44075703620910645, "learning_rate": 8.357922391026418e-05, "loss": 0.0259, "step": 9070 }, { "epoch": 1.38397216796875e-05, "model_forward_time": 0.02488565444946289, "step": 9070 }, { "epoch": 1.38397216796875e-05, "step": 9070, "training_step_time": 0.11024069786071777 }, { "epoch": 1.384124755859375e-05, "model_forward_time": 0.0266876220703125, "step": 9071 }, { "epoch": 1.384124755859375e-05, "step": 9071, "training_step_time": 0.11408400535583496 }, { "epoch": 1.38427734375e-05, "model_forward_time": 0.026940584182739258, "step": 9072 }, { "epoch": 1.38427734375e-05, "step": 9072, "training_step_time": 0.11243510246276855 }, { "epoch": 1.384429931640625e-05, "model_forward_time": 0.02596449851989746, "step": 9073 }, { "epoch": 1.384429931640625e-05, "step": 9073, "training_step_time": 0.1717221736907959 }, { "epoch": 1.38458251953125e-05, "model_forward_time": 0.024836301803588867, "step": 9074 }, { "epoch": 1.38458251953125e-05, "step": 9074, "training_step_time": 0.16862154006958008 }, { "epoch": 1.384735107421875e-05, "model_forward_time": 0.024775266647338867, "step": 9075 }, { "epoch": 1.384735107421875e-05, "step": 9075, "training_step_time": 0.1068415641784668 }, { "epoch": 1.3848876953125e-05, "model_forward_time": 0.025150775909423828, "step": 9076 }, { "epoch": 1.3848876953125e-05, "step": 9076, "training_step_time": 0.10566973686218262 }, { "epoch": 1.385040283203125e-05, "model_forward_time": 0.025618553161621094, "step": 9077 }, { "epoch": 1.385040283203125e-05, "step": 9077, "training_step_time": 0.10737299919128418 }, { "epoch": 1.38519287109375e-05, "model_forward_time": 0.0252382755279541, "step": 9078 }, { "epoch": 1.38519287109375e-05, "step": 9078, "training_step_time": 0.11012148857116699 }, { "epoch": 1.385345458984375e-05, "model_forward_time": 0.02565598487854004, "step": 9079 }, { "epoch": 1.385345458984375e-05, "step": 9079, "training_step_time": 0.11248564720153809 }, { "epoch": 1.385498046875e-05, "grad_norm": 0.3671765923500061, "learning_rate": 8.353836678187027e-05, "loss": 0.0294, "step": 9080 }, { "epoch": 1.385498046875e-05, "model_forward_time": 0.025854825973510742, "step": 9080 }, { "epoch": 1.385498046875e-05, "step": 9080, "training_step_time": 0.10753488540649414 }, { "epoch": 1.385650634765625e-05, "model_forward_time": 0.0267331600189209, "step": 9081 }, { "epoch": 1.385650634765625e-05, "step": 9081, "training_step_time": 0.10996675491333008 }, { "epoch": 1.38580322265625e-05, "model_forward_time": 0.025294065475463867, "step": 9082 }, { "epoch": 1.38580322265625e-05, "step": 9082, "training_step_time": 0.10984110832214355 }, { "epoch": 1.385955810546875e-05, "model_forward_time": 0.025162935256958008, "step": 9083 }, { "epoch": 1.385955810546875e-05, "step": 9083, "training_step_time": 0.1065518856048584 }, { "epoch": 1.3861083984375e-05, "model_forward_time": 0.025650501251220703, "step": 9084 }, { "epoch": 1.3861083984375e-05, "step": 9084, "training_step_time": 0.1081686019897461 }, { "epoch": 1.386260986328125e-05, "model_forward_time": 0.02553391456604004, "step": 9085 }, { "epoch": 1.386260986328125e-05, "step": 9085, "training_step_time": 0.14331364631652832 }, { "epoch": 1.38641357421875e-05, "model_forward_time": 0.025303125381469727, "step": 9086 }, { "epoch": 1.38641357421875e-05, "step": 9086, "training_step_time": 0.1101534366607666 }, { "epoch": 1.386566162109375e-05, "model_forward_time": 0.026425838470458984, "step": 9087 }, { "epoch": 1.386566162109375e-05, "step": 9087, "training_step_time": 0.10869574546813965 }, { "epoch": 1.38671875e-05, "model_forward_time": 0.02586507797241211, "step": 9088 }, { "epoch": 1.38671875e-05, "step": 9088, "training_step_time": 0.1996002197265625 }, { "epoch": 1.386871337890625e-05, "model_forward_time": 0.025615453720092773, "step": 9089 }, { "epoch": 1.386871337890625e-05, "step": 9089, "training_step_time": 0.16156959533691406 }, { "epoch": 1.38702392578125e-05, "grad_norm": 0.40770334005355835, "learning_rate": 8.349746890119826e-05, "loss": 0.0403, "step": 9090 }, { "epoch": 1.38702392578125e-05, "model_forward_time": 0.024502992630004883, "step": 9090 }, { "epoch": 1.38702392578125e-05, "step": 9090, "training_step_time": 0.17998361587524414 }, { "epoch": 1.387176513671875e-05, "model_forward_time": 0.025048494338989258, "step": 9091 }, { "epoch": 1.387176513671875e-05, "step": 9091, "training_step_time": 0.16448688507080078 }, { "epoch": 1.3873291015625e-05, "model_forward_time": 0.02584075927734375, "step": 9092 }, { "epoch": 1.3873291015625e-05, "step": 9092, "training_step_time": 0.20310330390930176 }, { "epoch": 1.387481689453125e-05, "model_forward_time": 0.02554035186767578, "step": 9093 }, { "epoch": 1.387481689453125e-05, "step": 9093, "training_step_time": 0.10418057441711426 }, { "epoch": 1.38763427734375e-05, "model_forward_time": 0.025744915008544922, "step": 9094 }, { "epoch": 1.38763427734375e-05, "step": 9094, "training_step_time": 0.10464262962341309 }, { "epoch": 1.387786865234375e-05, "model_forward_time": 0.026983022689819336, "step": 9095 }, { "epoch": 1.387786865234375e-05, "step": 9095, "training_step_time": 0.10904455184936523 }, { "epoch": 1.387939453125e-05, "model_forward_time": 0.025606870651245117, "step": 9096 }, { "epoch": 1.387939453125e-05, "step": 9096, "training_step_time": 0.10742735862731934 }, { "epoch": 1.388092041015625e-05, "model_forward_time": 0.025597333908081055, "step": 9097 }, { "epoch": 1.388092041015625e-05, "step": 9097, "training_step_time": 0.10663414001464844 }, { "epoch": 1.38824462890625e-05, "model_forward_time": 0.025463104248046875, "step": 9098 }, { "epoch": 1.38824462890625e-05, "step": 9098, "training_step_time": 0.10827159881591797 }, { "epoch": 1.388397216796875e-05, "model_forward_time": 0.025330543518066406, "step": 9099 }, { "epoch": 1.388397216796875e-05, "step": 9099, "training_step_time": 0.10693573951721191 }, { "epoch": 1.3885498046875e-05, "grad_norm": 0.626854658126831, "learning_rate": 8.345653031794292e-05, "loss": 0.0435, "step": 9100 }, { "epoch": 1.3885498046875e-05, "model_forward_time": 0.025407075881958008, "step": 9100 }, { "epoch": 1.3885498046875e-05, "step": 9100, "training_step_time": 0.1093299388885498 }, { "epoch": 1.388702392578125e-05, "model_forward_time": 0.02592754364013672, "step": 9101 }, { "epoch": 1.388702392578125e-05, "step": 9101, "training_step_time": 0.10992121696472168 }, { "epoch": 1.38885498046875e-05, "model_forward_time": 0.025316953659057617, "step": 9102 }, { "epoch": 1.38885498046875e-05, "step": 9102, "training_step_time": 0.11553335189819336 }, { "epoch": 1.389007568359375e-05, "model_forward_time": 0.025146007537841797, "step": 9103 }, { "epoch": 1.389007568359375e-05, "step": 9103, "training_step_time": 0.12148523330688477 }, { "epoch": 1.38916015625e-05, "model_forward_time": 0.02622389793395996, "step": 9104 }, { "epoch": 1.38916015625e-05, "step": 9104, "training_step_time": 0.11942172050476074 }, { "epoch": 1.389312744140625e-05, "model_forward_time": 0.025426387786865234, "step": 9105 }, { "epoch": 1.389312744140625e-05, "step": 9105, "training_step_time": 0.11870241165161133 }, { "epoch": 1.38946533203125e-05, "model_forward_time": 0.025638580322265625, "step": 9106 }, { "epoch": 1.38946533203125e-05, "step": 9106, "training_step_time": 0.11709070205688477 }, { "epoch": 1.389617919921875e-05, "model_forward_time": 0.025388002395629883, "step": 9107 }, { "epoch": 1.389617919921875e-05, "step": 9107, "training_step_time": 0.11936521530151367 }, { "epoch": 1.3897705078125e-05, "model_forward_time": 0.02653026580810547, "step": 9108 }, { "epoch": 1.3897705078125e-05, "step": 9108, "training_step_time": 0.11499786376953125 }, { "epoch": 1.389923095703125e-05, "model_forward_time": 0.026556968688964844, "step": 9109 }, { "epoch": 1.389923095703125e-05, "step": 9109, "training_step_time": 0.1176598072052002 }, { "epoch": 1.39007568359375e-05, "grad_norm": 0.405610591173172, "learning_rate": 8.34155510818485e-05, "loss": 0.0362, "step": 9110 }, { "epoch": 1.39007568359375e-05, "model_forward_time": 0.02557063102722168, "step": 9110 }, { "epoch": 1.39007568359375e-05, "step": 9110, "training_step_time": 0.10902833938598633 }, { "epoch": 1.390228271484375e-05, "model_forward_time": 0.02564859390258789, "step": 9111 }, { "epoch": 1.390228271484375e-05, "step": 9111, "training_step_time": 0.10829710960388184 }, { "epoch": 1.390380859375e-05, "model_forward_time": 0.025366783142089844, "step": 9112 }, { "epoch": 1.390380859375e-05, "step": 9112, "training_step_time": 0.10773205757141113 }, { "epoch": 1.390533447265625e-05, "model_forward_time": 0.025246143341064453, "step": 9113 }, { "epoch": 1.390533447265625e-05, "step": 9113, "training_step_time": 0.10864996910095215 }, { "epoch": 1.39068603515625e-05, "model_forward_time": 0.025406837463378906, "step": 9114 }, { "epoch": 1.39068603515625e-05, "step": 9114, "training_step_time": 0.11008143424987793 }, { "epoch": 1.390838623046875e-05, "model_forward_time": 0.026132822036743164, "step": 9115 }, { "epoch": 1.390838623046875e-05, "step": 9115, "training_step_time": 0.10822796821594238 }, { "epoch": 1.3909912109375e-05, "model_forward_time": 0.02559804916381836, "step": 9116 }, { "epoch": 1.3909912109375e-05, "step": 9116, "training_step_time": 0.10969376564025879 }, { "epoch": 1.391143798828125e-05, "model_forward_time": 0.02891993522644043, "step": 9117 }, { "epoch": 1.391143798828125e-05, "step": 9117, "training_step_time": 0.11565017700195312 }, { "epoch": 1.39129638671875e-05, "model_forward_time": 0.026072263717651367, "step": 9118 }, { "epoch": 1.39129638671875e-05, "step": 9118, "training_step_time": 0.10712265968322754 }, { "epoch": 1.391448974609375e-05, "model_forward_time": 0.02549600601196289, "step": 9119 }, { "epoch": 1.391448974609375e-05, "step": 9119, "training_step_time": 0.10980057716369629 }, { "epoch": 1.3916015625e-05, "grad_norm": 0.6752818822860718, "learning_rate": 8.337453124270863e-05, "loss": 0.0364, "step": 9120 }, { "epoch": 1.3916015625e-05, "model_forward_time": 0.025489330291748047, "step": 9120 }, { "epoch": 1.3916015625e-05, "step": 9120, "training_step_time": 0.21808648109436035 }, { "epoch": 1.391754150390625e-05, "model_forward_time": 0.02497124671936035, "step": 9121 }, { "epoch": 1.391754150390625e-05, "step": 9121, "training_step_time": 0.11492919921875 }, { "epoch": 1.39190673828125e-05, "model_forward_time": 0.02472519874572754, "step": 9122 }, { "epoch": 1.39190673828125e-05, "step": 9122, "training_step_time": 0.10494709014892578 }, { "epoch": 1.392059326171875e-05, "model_forward_time": 0.025079727172851562, "step": 9123 }, { "epoch": 1.392059326171875e-05, "step": 9123, "training_step_time": 0.10689520835876465 }, { "epoch": 1.3922119140625e-05, "model_forward_time": 0.02539205551147461, "step": 9124 }, { "epoch": 1.3922119140625e-05, "step": 9124, "training_step_time": 0.11005568504333496 }, { "epoch": 1.392364501953125e-05, "model_forward_time": 0.026664257049560547, "step": 9125 }, { "epoch": 1.392364501953125e-05, "step": 9125, "training_step_time": 0.10702753067016602 }, { "epoch": 1.39251708984375e-05, "model_forward_time": 0.026546001434326172, "step": 9126 }, { "epoch": 1.39251708984375e-05, "step": 9126, "training_step_time": 0.11172747611999512 }, { "epoch": 1.392669677734375e-05, "model_forward_time": 0.026352405548095703, "step": 9127 }, { "epoch": 1.392669677734375e-05, "step": 9127, "training_step_time": 0.10736536979675293 }, { "epoch": 1.392822265625e-05, "model_forward_time": 0.026894330978393555, "step": 9128 }, { "epoch": 1.392822265625e-05, "step": 9128, "training_step_time": 0.10813117027282715 }, { "epoch": 1.392974853515625e-05, "model_forward_time": 0.02512502670288086, "step": 9129 }, { "epoch": 1.392974853515625e-05, "step": 9129, "training_step_time": 0.10827517509460449 }, { "epoch": 1.39312744140625e-05, "grad_norm": 0.6920375823974609, "learning_rate": 8.33334708503663e-05, "loss": 0.0353, "step": 9130 }, { "epoch": 1.39312744140625e-05, "model_forward_time": 0.024845123291015625, "step": 9130 }, { "epoch": 1.39312744140625e-05, "step": 9130, "training_step_time": 0.10716629028320312 }, { "epoch": 1.393280029296875e-05, "model_forward_time": 0.025761127471923828, "step": 9131 }, { "epoch": 1.393280029296875e-05, "step": 9131, "training_step_time": 0.10730528831481934 }, { "epoch": 1.3934326171875e-05, "model_forward_time": 0.024785280227661133, "step": 9132 }, { "epoch": 1.3934326171875e-05, "step": 9132, "training_step_time": 0.12429213523864746 }, { "epoch": 1.393585205078125e-05, "model_forward_time": 0.025729894638061523, "step": 9133 }, { "epoch": 1.393585205078125e-05, "step": 9133, "training_step_time": 0.11242985725402832 }, { "epoch": 1.39373779296875e-05, "model_forward_time": 0.02620387077331543, "step": 9134 }, { "epoch": 1.39373779296875e-05, "step": 9134, "training_step_time": 0.18211984634399414 }, { "epoch": 1.393890380859375e-05, "model_forward_time": 0.025114774703979492, "step": 9135 }, { "epoch": 1.393890380859375e-05, "step": 9135, "training_step_time": 0.1727735996246338 }, { "epoch": 1.39404296875e-05, "model_forward_time": 0.024020671844482422, "step": 9136 }, { "epoch": 1.39404296875e-05, "step": 9136, "training_step_time": 0.20041346549987793 }, { "epoch": 1.394195556640625e-05, "model_forward_time": 0.024845600128173828, "step": 9137 }, { "epoch": 1.394195556640625e-05, "step": 9137, "training_step_time": 0.14423918724060059 }, { "epoch": 1.39434814453125e-05, "model_forward_time": 0.02481365203857422, "step": 9138 }, { "epoch": 1.39434814453125e-05, "step": 9138, "training_step_time": 0.22405028343200684 }, { "epoch": 1.394500732421875e-05, "model_forward_time": 0.02446913719177246, "step": 9139 }, { "epoch": 1.394500732421875e-05, "step": 9139, "training_step_time": 0.11087870597839355 }, { "epoch": 1.3946533203125e-05, "grad_norm": 0.41101816296577454, "learning_rate": 8.329236995471373e-05, "loss": 0.026, "step": 9140 }, { "epoch": 1.3946533203125e-05, "model_forward_time": 0.025381088256835938, "step": 9140 }, { "epoch": 1.3946533203125e-05, "step": 9140, "training_step_time": 0.10801339149475098 }, { "epoch": 1.394805908203125e-05, "model_forward_time": 0.0258638858795166, "step": 9141 }, { "epoch": 1.394805908203125e-05, "step": 9141, "training_step_time": 0.10876870155334473 }, { "epoch": 1.39495849609375e-05, "model_forward_time": 0.025592327117919922, "step": 9142 }, { "epoch": 1.39495849609375e-05, "step": 9142, "training_step_time": 0.10953545570373535 }, { "epoch": 1.395111083984375e-05, "model_forward_time": 0.024933338165283203, "step": 9143 }, { "epoch": 1.395111083984375e-05, "step": 9143, "training_step_time": 0.1128995418548584 }, { "epoch": 1.395263671875e-05, "model_forward_time": 0.025464296340942383, "step": 9144 }, { "epoch": 1.395263671875e-05, "step": 9144, "training_step_time": 0.11025452613830566 }, { "epoch": 1.395416259765625e-05, "model_forward_time": 0.025427579879760742, "step": 9145 }, { "epoch": 1.395416259765625e-05, "step": 9145, "training_step_time": 0.11137962341308594 }, { "epoch": 1.39556884765625e-05, "model_forward_time": 0.025498628616333008, "step": 9146 }, { "epoch": 1.39556884765625e-05, "step": 9146, "training_step_time": 0.1098165512084961 }, { "epoch": 1.395721435546875e-05, "model_forward_time": 0.025551557540893555, "step": 9147 }, { "epoch": 1.395721435546875e-05, "step": 9147, "training_step_time": 0.11008858680725098 }, { "epoch": 1.3958740234375e-05, "model_forward_time": 0.02558588981628418, "step": 9148 }, { "epoch": 1.3958740234375e-05, "step": 9148, "training_step_time": 0.10902762413024902 }, { "epoch": 1.396026611328125e-05, "model_forward_time": 0.02511119842529297, "step": 9149 }, { "epoch": 1.396026611328125e-05, "step": 9149, "training_step_time": 0.10747361183166504 }, { "epoch": 1.39617919921875e-05, "grad_norm": 0.3406887948513031, "learning_rate": 8.32512286056924e-05, "loss": 0.0415, "step": 9150 }, { "epoch": 1.39617919921875e-05, "model_forward_time": 0.026964902877807617, "step": 9150 }, { "epoch": 1.39617919921875e-05, "step": 9150, "training_step_time": 0.11081171035766602 }, { "epoch": 1.396331787109375e-05, "model_forward_time": 0.026320219039916992, "step": 9151 }, { "epoch": 1.396331787109375e-05, "step": 9151, "training_step_time": 0.11258196830749512 }, { "epoch": 1.396484375e-05, "model_forward_time": 0.025126218795776367, "step": 9152 }, { "epoch": 1.396484375e-05, "step": 9152, "training_step_time": 0.10746884346008301 }, { "epoch": 1.396636962890625e-05, "model_forward_time": 0.025794267654418945, "step": 9153 }, { "epoch": 1.396636962890625e-05, "step": 9153, "training_step_time": 0.10755252838134766 }, { "epoch": 1.39678955078125e-05, "model_forward_time": 0.025263309478759766, "step": 9154 }, { "epoch": 1.39678955078125e-05, "step": 9154, "training_step_time": 0.11017203330993652 }, { "epoch": 1.396942138671875e-05, "model_forward_time": 0.025174379348754883, "step": 9155 }, { "epoch": 1.396942138671875e-05, "step": 9155, "training_step_time": 0.10856223106384277 }, { "epoch": 1.3970947265625e-05, "model_forward_time": 0.025234460830688477, "step": 9156 }, { "epoch": 1.3970947265625e-05, "step": 9156, "training_step_time": 0.10779070854187012 }, { "epoch": 1.397247314453125e-05, "model_forward_time": 0.024880409240722656, "step": 9157 }, { "epoch": 1.397247314453125e-05, "step": 9157, "training_step_time": 0.10769534111022949 }, { "epoch": 1.39739990234375e-05, "model_forward_time": 0.024944782257080078, "step": 9158 }, { "epoch": 1.39739990234375e-05, "step": 9158, "training_step_time": 0.11530113220214844 }, { "epoch": 1.397552490234375e-05, "model_forward_time": 0.02593851089477539, "step": 9159 }, { "epoch": 1.397552490234375e-05, "step": 9159, "training_step_time": 0.10800385475158691 }, { "epoch": 1.397705078125e-05, "grad_norm": 0.5496638417243958, "learning_rate": 8.321004685329296e-05, "loss": 0.0275, "step": 9160 }, { "epoch": 1.397705078125e-05, "model_forward_time": 0.02482914924621582, "step": 9160 }, { "epoch": 1.397705078125e-05, "step": 9160, "training_step_time": 0.11044430732727051 }, { "epoch": 1.397857666015625e-05, "model_forward_time": 0.0247955322265625, "step": 9161 }, { "epoch": 1.397857666015625e-05, "step": 9161, "training_step_time": 0.10543060302734375 }, { "epoch": 1.39801025390625e-05, "model_forward_time": 0.02610611915588379, "step": 9162 }, { "epoch": 1.39801025390625e-05, "step": 9162, "training_step_time": 0.10873651504516602 }, { "epoch": 1.398162841796875e-05, "model_forward_time": 0.02532482147216797, "step": 9163 }, { "epoch": 1.398162841796875e-05, "step": 9163, "training_step_time": 0.1094520092010498 }, { "epoch": 1.3983154296875e-05, "model_forward_time": 0.024999618530273438, "step": 9164 }, { "epoch": 1.3983154296875e-05, "step": 9164, "training_step_time": 0.12070369720458984 }, { "epoch": 1.398468017578125e-05, "model_forward_time": 0.025365114212036133, "step": 9165 }, { "epoch": 1.398468017578125e-05, "step": 9165, "training_step_time": 0.1388697624206543 }, { "epoch": 1.39862060546875e-05, "model_forward_time": 0.025928974151611328, "step": 9166 }, { "epoch": 1.39862060546875e-05, "step": 9166, "training_step_time": 0.1851944923400879 }, { "epoch": 1.398773193359375e-05, "model_forward_time": 0.024860858917236328, "step": 9167 }, { "epoch": 1.398773193359375e-05, "step": 9167, "training_step_time": 0.13228583335876465 }, { "epoch": 1.39892578125e-05, "model_forward_time": 0.024890661239624023, "step": 9168 }, { "epoch": 1.39892578125e-05, "step": 9168, "training_step_time": 0.12163019180297852 }, { "epoch": 1.399078369140625e-05, "model_forward_time": 0.0252230167388916, "step": 9169 }, { "epoch": 1.399078369140625e-05, "step": 9169, "training_step_time": 0.11821198463439941 }, { "epoch": 1.39923095703125e-05, "grad_norm": 0.3352798521518707, "learning_rate": 8.316882474755507e-05, "loss": 0.0271, "step": 9170 }, { "epoch": 1.39923095703125e-05, "model_forward_time": 0.026145219802856445, "step": 9170 }, { "epoch": 1.39923095703125e-05, "step": 9170, "training_step_time": 0.1144561767578125 }, { "epoch": 1.399383544921875e-05, "model_forward_time": 0.025437355041503906, "step": 9171 }, { "epoch": 1.399383544921875e-05, "step": 9171, "training_step_time": 0.11360597610473633 }, { "epoch": 1.3995361328125e-05, "model_forward_time": 0.026082754135131836, "step": 9172 }, { "epoch": 1.3995361328125e-05, "step": 9172, "training_step_time": 0.11205363273620605 }, { "epoch": 1.399688720703125e-05, "model_forward_time": 0.026355981826782227, "step": 9173 }, { "epoch": 1.399688720703125e-05, "step": 9173, "training_step_time": 0.11150479316711426 }, { "epoch": 1.39984130859375e-05, "model_forward_time": 0.025547027587890625, "step": 9174 }, { "epoch": 1.39984130859375e-05, "step": 9174, "training_step_time": 0.10934567451477051 }, { "epoch": 1.399993896484375e-05, "model_forward_time": 0.025652170181274414, "step": 9175 }, { "epoch": 1.399993896484375e-05, "step": 9175, "training_step_time": 0.11099910736083984 }, { "epoch": 1.400146484375e-05, "model_forward_time": 0.025305509567260742, "step": 9176 }, { "epoch": 1.400146484375e-05, "step": 9176, "training_step_time": 0.11101102828979492 }, { "epoch": 1.400299072265625e-05, "model_forward_time": 0.025686979293823242, "step": 9177 }, { "epoch": 1.400299072265625e-05, "step": 9177, "training_step_time": 0.10844206809997559 }, { "epoch": 1.40045166015625e-05, "model_forward_time": 0.029117345809936523, "step": 9178 }, { "epoch": 1.40045166015625e-05, "step": 9178, "training_step_time": 0.17886686325073242 }, { "epoch": 1.400604248046875e-05, "model_forward_time": 0.024958372116088867, "step": 9179 }, { "epoch": 1.400604248046875e-05, "step": 9179, "training_step_time": 0.11199951171875 }, { "epoch": 1.4007568359375e-05, "grad_norm": 0.4306095540523529, "learning_rate": 8.31275623385675e-05, "loss": 0.0285, "step": 9180 }, { "epoch": 1.4007568359375e-05, "model_forward_time": 0.024013280868530273, "step": 9180 }, { "epoch": 1.4007568359375e-05, "step": 9180, "training_step_time": 0.17786574363708496 }, { "epoch": 1.400909423828125e-05, "model_forward_time": 0.025191307067871094, "step": 9181 }, { "epoch": 1.400909423828125e-05, "step": 9181, "training_step_time": 0.18233585357666016 }, { "epoch": 1.40106201171875e-05, "model_forward_time": 0.02554178237915039, "step": 9182 }, { "epoch": 1.40106201171875e-05, "step": 9182, "training_step_time": 0.17758464813232422 }, { "epoch": 1.401214599609375e-05, "model_forward_time": 0.025226354598999023, "step": 9183 }, { "epoch": 1.401214599609375e-05, "step": 9183, "training_step_time": 0.14825940132141113 }, { "epoch": 1.4013671875e-05, "model_forward_time": 0.025041580200195312, "step": 9184 }, { "epoch": 1.4013671875e-05, "step": 9184, "training_step_time": 0.17253994941711426 }, { "epoch": 1.401519775390625e-05, "model_forward_time": 0.02605891227722168, "step": 9185 }, { "epoch": 1.401519775390625e-05, "step": 9185, "training_step_time": 0.16543936729431152 }, { "epoch": 1.40167236328125e-05, "model_forward_time": 0.024085521697998047, "step": 9186 }, { "epoch": 1.40167236328125e-05, "step": 9186, "training_step_time": 0.10842394828796387 }, { "epoch": 1.401824951171875e-05, "model_forward_time": 0.02480602264404297, "step": 9187 }, { "epoch": 1.401824951171875e-05, "step": 9187, "training_step_time": 0.10920119285583496 }, { "epoch": 1.4019775390625e-05, "model_forward_time": 0.025591611862182617, "step": 9188 }, { "epoch": 1.4019775390625e-05, "step": 9188, "training_step_time": 0.10972285270690918 }, { "epoch": 1.402130126953125e-05, "model_forward_time": 0.026254653930664062, "step": 9189 }, { "epoch": 1.402130126953125e-05, "step": 9189, "training_step_time": 0.11072182655334473 }, { "epoch": 1.40228271484375e-05, "grad_norm": 0.28492471575737, "learning_rate": 8.308625967646795e-05, "loss": 0.0214, "step": 9190 }, { "epoch": 1.40228271484375e-05, "model_forward_time": 0.027051925659179688, "step": 9190 }, { "epoch": 1.40228271484375e-05, "step": 9190, "training_step_time": 0.11478042602539062 }, { "epoch": 1.402435302734375e-05, "model_forward_time": 0.026002168655395508, "step": 9191 }, { "epoch": 1.402435302734375e-05, "step": 9191, "training_step_time": 0.11104536056518555 }, { "epoch": 1.402587890625e-05, "model_forward_time": 0.025592327117919922, "step": 9192 }, { "epoch": 1.402587890625e-05, "step": 9192, "training_step_time": 0.10969018936157227 }, { "epoch": 1.402740478515625e-05, "model_forward_time": 0.025146007537841797, "step": 9193 }, { "epoch": 1.402740478515625e-05, "step": 9193, "training_step_time": 0.1098945140838623 }, { "epoch": 1.40289306640625e-05, "model_forward_time": 0.025857210159301758, "step": 9194 }, { "epoch": 1.40289306640625e-05, "step": 9194, "training_step_time": 0.11042046546936035 }, { "epoch": 1.403045654296875e-05, "model_forward_time": 0.02577948570251465, "step": 9195 }, { "epoch": 1.403045654296875e-05, "step": 9195, "training_step_time": 0.11013197898864746 }, { "epoch": 1.4031982421875e-05, "model_forward_time": 0.02564406394958496, "step": 9196 }, { "epoch": 1.4031982421875e-05, "step": 9196, "training_step_time": 0.11328887939453125 }, { "epoch": 1.403350830078125e-05, "model_forward_time": 0.025844573974609375, "step": 9197 }, { "epoch": 1.403350830078125e-05, "step": 9197, "training_step_time": 0.10879087448120117 }, { "epoch": 1.40350341796875e-05, "model_forward_time": 0.0251462459564209, "step": 9198 }, { "epoch": 1.40350341796875e-05, "step": 9198, "training_step_time": 0.1086575984954834 }, { "epoch": 1.403656005859375e-05, "model_forward_time": 0.02576446533203125, "step": 9199 }, { "epoch": 1.403656005859375e-05, "step": 9199, "training_step_time": 0.11162900924682617 }, { "epoch": 1.40380859375e-05, "grad_norm": 0.32366159558296204, "learning_rate": 8.304491681144306e-05, "loss": 0.0342, "step": 9200 }, { "epoch": 1.40380859375e-05, "model_forward_time": 0.02669692039489746, "step": 9200 }, { "epoch": 1.40380859375e-05, "step": 9200, "training_step_time": 0.11670875549316406 }, { "epoch": 1.403961181640625e-05, "model_forward_time": 0.025883913040161133, "step": 9201 }, { "epoch": 1.403961181640625e-05, "step": 9201, "training_step_time": 0.11293911933898926 }, { "epoch": 1.40411376953125e-05, "model_forward_time": 0.0256350040435791, "step": 9202 }, { "epoch": 1.40411376953125e-05, "step": 9202, "training_step_time": 0.1109156608581543 }, { "epoch": 1.404266357421875e-05, "model_forward_time": 0.02554631233215332, "step": 9203 }, { "epoch": 1.404266357421875e-05, "step": 9203, "training_step_time": 0.11634659767150879 }, { "epoch": 1.4044189453125e-05, "model_forward_time": 0.025773048400878906, "step": 9204 }, { "epoch": 1.4044189453125e-05, "step": 9204, "training_step_time": 0.1144571304321289 }, { "epoch": 1.404571533203125e-05, "model_forward_time": 0.025533199310302734, "step": 9205 }, { "epoch": 1.404571533203125e-05, "step": 9205, "training_step_time": 0.12031698226928711 }, { "epoch": 1.40472412109375e-05, "model_forward_time": 0.02633833885192871, "step": 9206 }, { "epoch": 1.40472412109375e-05, "step": 9206, "training_step_time": 0.11600995063781738 }, { "epoch": 1.404876708984375e-05, "model_forward_time": 0.02688765525817871, "step": 9207 }, { "epoch": 1.404876708984375e-05, "step": 9207, "training_step_time": 0.1140131950378418 }, { "epoch": 1.405029296875e-05, "model_forward_time": 0.026072978973388672, "step": 9208 }, { "epoch": 1.405029296875e-05, "step": 9208, "training_step_time": 0.19865822792053223 }, { "epoch": 1.405181884765625e-05, "model_forward_time": 0.025192737579345703, "step": 9209 }, { "epoch": 1.405181884765625e-05, "step": 9209, "training_step_time": 0.12128019332885742 }, { "epoch": 1.40533447265625e-05, "grad_norm": 0.44884684681892395, "learning_rate": 8.300353379372834e-05, "loss": 0.0404, "step": 9210 }, { "epoch": 1.40533447265625e-05, "model_forward_time": 0.024949073791503906, "step": 9210 }, { "epoch": 1.40533447265625e-05, "step": 9210, "training_step_time": 0.11033391952514648 }, { "epoch": 1.405487060546875e-05, "model_forward_time": 0.025771617889404297, "step": 9211 }, { "epoch": 1.405487060546875e-05, "step": 9211, "training_step_time": 0.11068487167358398 }, { "epoch": 1.4056396484375e-05, "model_forward_time": 0.025538921356201172, "step": 9212 }, { "epoch": 1.4056396484375e-05, "step": 9212, "training_step_time": 0.1698305606842041 }, { "epoch": 1.405792236328125e-05, "model_forward_time": 0.025423049926757812, "step": 9213 }, { "epoch": 1.405792236328125e-05, "step": 9213, "training_step_time": 0.16509389877319336 }, { "epoch": 1.40594482421875e-05, "model_forward_time": 0.026980161666870117, "step": 9214 }, { "epoch": 1.40594482421875e-05, "step": 9214, "training_step_time": 0.11147308349609375 }, { "epoch": 1.406097412109375e-05, "model_forward_time": 0.025206804275512695, "step": 9215 }, { "epoch": 1.406097412109375e-05, "step": 9215, "training_step_time": 0.10766005516052246 }, { "epoch": 1.40625e-05, "model_forward_time": 0.02593231201171875, "step": 9216 }, { "epoch": 1.40625e-05, "step": 9216, "training_step_time": 0.11008024215698242 }, { "epoch": 1.406402587890625e-05, "model_forward_time": 0.026243925094604492, "step": 9217 }, { "epoch": 1.406402587890625e-05, "step": 9217, "training_step_time": 0.11051106452941895 }, { "epoch": 1.40655517578125e-05, "model_forward_time": 0.025405168533325195, "step": 9218 }, { "epoch": 1.40655517578125e-05, "step": 9218, "training_step_time": 0.11015629768371582 }, { "epoch": 1.406707763671875e-05, "model_forward_time": 0.02550363540649414, "step": 9219 }, { "epoch": 1.406707763671875e-05, "step": 9219, "training_step_time": 0.11301898956298828 }, { "epoch": 1.4068603515625e-05, "grad_norm": 0.4171225428581238, "learning_rate": 8.2962110673608e-05, "loss": 0.0387, "step": 9220 }, { "epoch": 1.4068603515625e-05, "model_forward_time": 0.02526092529296875, "step": 9220 }, { "epoch": 1.4068603515625e-05, "step": 9220, "training_step_time": 0.11416840553283691 }, { "epoch": 1.407012939453125e-05, "model_forward_time": 0.025185585021972656, "step": 9221 }, { "epoch": 1.407012939453125e-05, "step": 9221, "training_step_time": 0.11016988754272461 }, { "epoch": 1.40716552734375e-05, "model_forward_time": 0.025459766387939453, "step": 9222 }, { "epoch": 1.40716552734375e-05, "step": 9222, "training_step_time": 0.11258459091186523 }, { "epoch": 1.407318115234375e-05, "model_forward_time": 0.024973630905151367, "step": 9223 }, { "epoch": 1.407318115234375e-05, "step": 9223, "training_step_time": 0.11520266532897949 }, { "epoch": 1.407470703125e-05, "model_forward_time": 0.024943113327026367, "step": 9224 }, { "epoch": 1.407470703125e-05, "step": 9224, "training_step_time": 0.14126276969909668 }, { "epoch": 1.407623291015625e-05, "model_forward_time": 0.024735689163208008, "step": 9225 }, { "epoch": 1.407623291015625e-05, "step": 9225, "training_step_time": 0.18017315864562988 }, { "epoch": 1.40777587890625e-05, "model_forward_time": 0.025815963745117188, "step": 9226 }, { "epoch": 1.40777587890625e-05, "step": 9226, "training_step_time": 0.16603803634643555 }, { "epoch": 1.407928466796875e-05, "model_forward_time": 0.02499246597290039, "step": 9227 }, { "epoch": 1.407928466796875e-05, "step": 9227, "training_step_time": 0.14215469360351562 }, { "epoch": 1.4080810546875e-05, "model_forward_time": 0.024719953536987305, "step": 9228 }, { "epoch": 1.4080810546875e-05, "step": 9228, "training_step_time": 0.17731165885925293 }, { "epoch": 1.408233642578125e-05, "model_forward_time": 0.02483057975769043, "step": 9229 }, { "epoch": 1.408233642578125e-05, "step": 9229, "training_step_time": 0.17722320556640625 }, { "epoch": 1.40838623046875e-05, "grad_norm": 0.32784372568130493, "learning_rate": 8.292064750141509e-05, "loss": 0.0331, "step": 9230 }, { "epoch": 1.40838623046875e-05, "model_forward_time": 0.024706363677978516, "step": 9230 }, { "epoch": 1.40838623046875e-05, "step": 9230, "training_step_time": 0.1236112117767334 }, { "epoch": 1.408538818359375e-05, "model_forward_time": 0.024143457412719727, "step": 9231 }, { "epoch": 1.408538818359375e-05, "step": 9231, "training_step_time": 0.11300969123840332 }, { "epoch": 1.40869140625e-05, "model_forward_time": 0.02503824234008789, "step": 9232 }, { "epoch": 1.40869140625e-05, "step": 9232, "training_step_time": 0.11308169364929199 }, { "epoch": 1.408843994140625e-05, "model_forward_time": 0.02443528175354004, "step": 9233 }, { "epoch": 1.408843994140625e-05, "step": 9233, "training_step_time": 0.12836909294128418 }, { "epoch": 1.40899658203125e-05, "model_forward_time": 0.02539229393005371, "step": 9234 }, { "epoch": 1.40899658203125e-05, "step": 9234, "training_step_time": 0.1230771541595459 }, { "epoch": 1.409149169921875e-05, "model_forward_time": 0.02460312843322754, "step": 9235 }, { "epoch": 1.409149169921875e-05, "step": 9235, "training_step_time": 0.11778998374938965 }, { "epoch": 1.4093017578125e-05, "model_forward_time": 0.02858901023864746, "step": 9236 }, { "epoch": 1.4093017578125e-05, "step": 9236, "training_step_time": 0.1146688461303711 }, { "epoch": 1.409454345703125e-05, "model_forward_time": 0.025128602981567383, "step": 9237 }, { "epoch": 1.409454345703125e-05, "step": 9237, "training_step_time": 0.11722755432128906 }, { "epoch": 1.40960693359375e-05, "model_forward_time": 0.025204181671142578, "step": 9238 }, { "epoch": 1.40960693359375e-05, "step": 9238, "training_step_time": 0.11149072647094727 }, { "epoch": 1.409759521484375e-05, "model_forward_time": 0.02494192123413086, "step": 9239 }, { "epoch": 1.409759521484375e-05, "step": 9239, "training_step_time": 0.10841727256774902 }, { "epoch": 1.409912109375e-05, "grad_norm": 0.25738635659217834, "learning_rate": 8.287914432753123e-05, "loss": 0.0373, "step": 9240 }, { "epoch": 1.409912109375e-05, "model_forward_time": 0.02593207359313965, "step": 9240 }, { "epoch": 1.409912109375e-05, "step": 9240, "training_step_time": 0.10759234428405762 }, { "epoch": 1.410064697265625e-05, "model_forward_time": 0.025211334228515625, "step": 9241 }, { "epoch": 1.410064697265625e-05, "step": 9241, "training_step_time": 0.11351203918457031 }, { "epoch": 1.41021728515625e-05, "model_forward_time": 0.025452375411987305, "step": 9242 }, { "epoch": 1.41021728515625e-05, "step": 9242, "training_step_time": 0.10903167724609375 }, { "epoch": 1.410369873046875e-05, "model_forward_time": 0.02634739875793457, "step": 9243 }, { "epoch": 1.410369873046875e-05, "step": 9243, "training_step_time": 0.11479735374450684 }, { "epoch": 1.4105224609375e-05, "model_forward_time": 0.02508378028869629, "step": 9244 }, { "epoch": 1.4105224609375e-05, "step": 9244, "training_step_time": 0.10703349113464355 }, { "epoch": 1.410675048828125e-05, "model_forward_time": 0.025347232818603516, "step": 9245 }, { "epoch": 1.410675048828125e-05, "step": 9245, "training_step_time": 0.1081242561340332 }, { "epoch": 1.41082763671875e-05, "model_forward_time": 0.025745391845703125, "step": 9246 }, { "epoch": 1.41082763671875e-05, "step": 9246, "training_step_time": 0.10690927505493164 }, { "epoch": 1.410980224609375e-05, "model_forward_time": 0.025300979614257812, "step": 9247 }, { "epoch": 1.410980224609375e-05, "step": 9247, "training_step_time": 0.10891103744506836 }, { "epoch": 1.4111328125e-05, "model_forward_time": 0.025121212005615234, "step": 9248 }, { "epoch": 1.4111328125e-05, "step": 9248, "training_step_time": 0.10979747772216797 }, { "epoch": 1.411285400390625e-05, "model_forward_time": 0.026063919067382812, "step": 9249 }, { "epoch": 1.411285400390625e-05, "step": 9249, "training_step_time": 0.10776376724243164 }, { "epoch": 1.41143798828125e-05, "grad_norm": 0.28596746921539307, "learning_rate": 8.283760120238672e-05, "loss": 0.0208, "step": 9250 }, { "epoch": 1.41143798828125e-05, "model_forward_time": 0.0256345272064209, "step": 9250 }, { "epoch": 1.41143798828125e-05, "step": 9250, "training_step_time": 0.10863256454467773 }, { "epoch": 1.411590576171875e-05, "model_forward_time": 0.025503873825073242, "step": 9251 }, { "epoch": 1.411590576171875e-05, "step": 9251, "training_step_time": 0.11173129081726074 }, { "epoch": 1.4117431640625e-05, "model_forward_time": 0.025211811065673828, "step": 9252 }, { "epoch": 1.4117431640625e-05, "step": 9252, "training_step_time": 0.1117558479309082 }, { "epoch": 1.411895751953125e-05, "model_forward_time": 0.02545022964477539, "step": 9253 }, { "epoch": 1.411895751953125e-05, "step": 9253, "training_step_time": 0.11340069770812988 }, { "epoch": 1.41204833984375e-05, "model_forward_time": 0.025596141815185547, "step": 9254 }, { "epoch": 1.41204833984375e-05, "step": 9254, "training_step_time": 0.10758757591247559 }, { "epoch": 1.412200927734375e-05, "model_forward_time": 0.025137901306152344, "step": 9255 }, { "epoch": 1.412200927734375e-05, "step": 9255, "training_step_time": 0.16773009300231934 }, { "epoch": 1.412353515625e-05, "model_forward_time": 0.025700092315673828, "step": 9256 }, { "epoch": 1.412353515625e-05, "step": 9256, "training_step_time": 0.16127872467041016 }, { "epoch": 1.412506103515625e-05, "model_forward_time": 0.02477860450744629, "step": 9257 }, { "epoch": 1.412506103515625e-05, "step": 9257, "training_step_time": 0.11895465850830078 }, { "epoch": 1.41265869140625e-05, "model_forward_time": 0.02507495880126953, "step": 9258 }, { "epoch": 1.41265869140625e-05, "step": 9258, "training_step_time": 0.16961288452148438 }, { "epoch": 1.412811279296875e-05, "model_forward_time": 0.024747610092163086, "step": 9259 }, { "epoch": 1.412811279296875e-05, "step": 9259, "training_step_time": 0.17305254936218262 }, { "epoch": 1.4129638671875e-05, "grad_norm": 0.5188223123550415, "learning_rate": 8.279601817646036e-05, "loss": 0.0239, "step": 9260 }, { "epoch": 1.4129638671875e-05, "model_forward_time": 0.02499985694885254, "step": 9260 }, { "epoch": 1.4129638671875e-05, "step": 9260, "training_step_time": 0.10708951950073242 }, { "epoch": 1.413116455078125e-05, "model_forward_time": 0.024985074996948242, "step": 9261 }, { "epoch": 1.413116455078125e-05, "step": 9261, "training_step_time": 0.10597848892211914 }, { "epoch": 1.41326904296875e-05, "model_forward_time": 0.025522947311401367, "step": 9262 }, { "epoch": 1.41326904296875e-05, "step": 9262, "training_step_time": 0.10741066932678223 }, { "epoch": 1.413421630859375e-05, "model_forward_time": 0.02537369728088379, "step": 9263 }, { "epoch": 1.413421630859375e-05, "step": 9263, "training_step_time": 0.10847926139831543 }, { "epoch": 1.41357421875e-05, "model_forward_time": 0.02527141571044922, "step": 9264 }, { "epoch": 1.41357421875e-05, "step": 9264, "training_step_time": 0.10844779014587402 }, { "epoch": 1.413726806640625e-05, "model_forward_time": 0.025139808654785156, "step": 9265 }, { "epoch": 1.413726806640625e-05, "step": 9265, "training_step_time": 0.10712218284606934 }, { "epoch": 1.41387939453125e-05, "model_forward_time": 0.02492547035217285, "step": 9266 }, { "epoch": 1.41387939453125e-05, "step": 9266, "training_step_time": 0.10891890525817871 }, { "epoch": 1.414031982421875e-05, "model_forward_time": 0.025429725646972656, "step": 9267 }, { "epoch": 1.414031982421875e-05, "step": 9267, "training_step_time": 0.11018657684326172 }, { "epoch": 1.4141845703125e-05, "model_forward_time": 0.025316715240478516, "step": 9268 }, { "epoch": 1.4141845703125e-05, "step": 9268, "training_step_time": 0.11097121238708496 }, { "epoch": 1.414337158203125e-05, "model_forward_time": 0.02517247200012207, "step": 9269 }, { "epoch": 1.414337158203125e-05, "step": 9269, "training_step_time": 0.11122965812683105 }, { "epoch": 1.41448974609375e-05, "grad_norm": 0.32652047276496887, "learning_rate": 8.275439530027948e-05, "loss": 0.0294, "step": 9270 }, { "epoch": 1.41448974609375e-05, "model_forward_time": 0.025018930435180664, "step": 9270 }, { "epoch": 1.41448974609375e-05, "step": 9270, "training_step_time": 0.1684269905090332 }, { "epoch": 1.414642333984375e-05, "model_forward_time": 0.024736881256103516, "step": 9271 }, { "epoch": 1.414642333984375e-05, "step": 9271, "training_step_time": 0.18860697746276855 }, { "epoch": 1.414794921875e-05, "model_forward_time": 0.02544856071472168, "step": 9272 }, { "epoch": 1.414794921875e-05, "step": 9272, "training_step_time": 0.12715482711791992 }, { "epoch": 1.414947509765625e-05, "model_forward_time": 0.02449941635131836, "step": 9273 }, { "epoch": 1.414947509765625e-05, "step": 9273, "training_step_time": 0.17458891868591309 }, { "epoch": 1.41510009765625e-05, "model_forward_time": 0.027659177780151367, "step": 9274 }, { "epoch": 1.41510009765625e-05, "step": 9274, "training_step_time": 0.19277024269104004 }, { "epoch": 1.415252685546875e-05, "model_forward_time": 0.02462458610534668, "step": 9275 }, { "epoch": 1.415252685546875e-05, "step": 9275, "training_step_time": 0.12173748016357422 }, { "epoch": 1.4154052734375e-05, "model_forward_time": 0.024233341217041016, "step": 9276 }, { "epoch": 1.4154052734375e-05, "step": 9276, "training_step_time": 0.1326615810394287 }, { "epoch": 1.415557861328125e-05, "model_forward_time": 0.025336027145385742, "step": 9277 }, { "epoch": 1.415557861328125e-05, "step": 9277, "training_step_time": 0.10873579978942871 }, { "epoch": 1.41571044921875e-05, "model_forward_time": 0.02552032470703125, "step": 9278 }, { "epoch": 1.41571044921875e-05, "step": 9278, "training_step_time": 0.10592365264892578 }, { "epoch": 1.415863037109375e-05, "model_forward_time": 0.025714397430419922, "step": 9279 }, { "epoch": 1.415863037109375e-05, "step": 9279, "training_step_time": 0.10602569580078125 }, { "epoch": 1.416015625e-05, "grad_norm": 0.46416255831718445, "learning_rate": 8.271273262441975e-05, "loss": 0.0323, "step": 9280 }, { "epoch": 1.416015625e-05, "model_forward_time": 0.025510072708129883, "step": 9280 }, { "epoch": 1.416015625e-05, "step": 9280, "training_step_time": 0.11364531517028809 }, { "epoch": 1.416168212890625e-05, "model_forward_time": 0.02494359016418457, "step": 9281 }, { "epoch": 1.416168212890625e-05, "step": 9281, "training_step_time": 0.12385916709899902 }, { "epoch": 1.41632080078125e-05, "model_forward_time": 0.025298595428466797, "step": 9282 }, { "epoch": 1.41632080078125e-05, "step": 9282, "training_step_time": 0.11015081405639648 }, { "epoch": 1.416473388671875e-05, "model_forward_time": 0.02552175521850586, "step": 9283 }, { "epoch": 1.416473388671875e-05, "step": 9283, "training_step_time": 0.11682796478271484 }, { "epoch": 1.4166259765625e-05, "model_forward_time": 0.025438547134399414, "step": 9284 }, { "epoch": 1.4166259765625e-05, "step": 9284, "training_step_time": 0.1071324348449707 }, { "epoch": 1.416778564453125e-05, "model_forward_time": 0.025322914123535156, "step": 9285 }, { "epoch": 1.416778564453125e-05, "step": 9285, "training_step_time": 0.10787367820739746 }, { "epoch": 1.41693115234375e-05, "model_forward_time": 0.02552175521850586, "step": 9286 }, { "epoch": 1.41693115234375e-05, "step": 9286, "training_step_time": 0.10880208015441895 }, { "epoch": 1.417083740234375e-05, "model_forward_time": 0.02536606788635254, "step": 9287 }, { "epoch": 1.417083740234375e-05, "step": 9287, "training_step_time": 0.10973405838012695 }, { "epoch": 1.417236328125e-05, "model_forward_time": 0.025511503219604492, "step": 9288 }, { "epoch": 1.417236328125e-05, "step": 9288, "training_step_time": 0.10735821723937988 }, { "epoch": 1.417388916015625e-05, "model_forward_time": 0.02530694007873535, "step": 9289 }, { "epoch": 1.417388916015625e-05, "step": 9289, "training_step_time": 0.1080026626586914 }, { "epoch": 1.41754150390625e-05, "grad_norm": 0.46193185448646545, "learning_rate": 8.267103019950529e-05, "loss": 0.041, "step": 9290 }, { "epoch": 1.41754150390625e-05, "model_forward_time": 0.025723934173583984, "step": 9290 }, { "epoch": 1.41754150390625e-05, "step": 9290, "training_step_time": 0.11262655258178711 }, { "epoch": 1.417694091796875e-05, "model_forward_time": 0.025437593460083008, "step": 9291 }, { "epoch": 1.417694091796875e-05, "step": 9291, "training_step_time": 0.10833311080932617 }, { "epoch": 1.4178466796875e-05, "model_forward_time": 0.025084495544433594, "step": 9292 }, { "epoch": 1.4178466796875e-05, "step": 9292, "training_step_time": 0.10884284973144531 }, { "epoch": 1.417999267578125e-05, "model_forward_time": 0.02580428123474121, "step": 9293 }, { "epoch": 1.417999267578125e-05, "step": 9293, "training_step_time": 0.11286258697509766 }, { "epoch": 1.41815185546875e-05, "model_forward_time": 0.0252532958984375, "step": 9294 }, { "epoch": 1.41815185546875e-05, "step": 9294, "training_step_time": 0.10817098617553711 }, { "epoch": 1.418304443359375e-05, "model_forward_time": 0.025254011154174805, "step": 9295 }, { "epoch": 1.418304443359375e-05, "step": 9295, "training_step_time": 0.10746407508850098 }, { "epoch": 1.41845703125e-05, "model_forward_time": 0.025269269943237305, "step": 9296 }, { "epoch": 1.41845703125e-05, "step": 9296, "training_step_time": 0.1064143180847168 }, { "epoch": 1.418609619140625e-05, "model_forward_time": 0.02565765380859375, "step": 9297 }, { "epoch": 1.418609619140625e-05, "step": 9297, "training_step_time": 0.1084902286529541 }, { "epoch": 1.41876220703125e-05, "model_forward_time": 0.026873350143432617, "step": 9298 }, { "epoch": 1.41876220703125e-05, "step": 9298, "training_step_time": 0.11151838302612305 }, { "epoch": 1.418914794921875e-05, "model_forward_time": 0.02538013458251953, "step": 9299 }, { "epoch": 1.418914794921875e-05, "step": 9299, "training_step_time": 0.10770058631896973 }, { "epoch": 1.4190673828125e-05, "grad_norm": 0.31563517451286316, "learning_rate": 8.262928807620843e-05, "loss": 0.0414, "step": 9300 }, { "epoch": 1.4190673828125e-05, "model_forward_time": 0.025341510772705078, "step": 9300 }, { "epoch": 1.4190673828125e-05, "step": 9300, "training_step_time": 0.10755300521850586 }, { "epoch": 1.419219970703125e-05, "model_forward_time": 0.024741172790527344, "step": 9301 }, { "epoch": 1.419219970703125e-05, "step": 9301, "training_step_time": 0.1060018539428711 }, { "epoch": 1.41937255859375e-05, "model_forward_time": 0.02494335174560547, "step": 9302 }, { "epoch": 1.41937255859375e-05, "step": 9302, "training_step_time": 0.11619067192077637 }, { "epoch": 1.419525146484375e-05, "model_forward_time": 0.025476694107055664, "step": 9303 }, { "epoch": 1.419525146484375e-05, "step": 9303, "training_step_time": 0.11117291450500488 }, { "epoch": 1.419677734375e-05, "model_forward_time": 0.024979114532470703, "step": 9304 }, { "epoch": 1.419677734375e-05, "step": 9304, "training_step_time": 0.1080925464630127 }, { "epoch": 1.419830322265625e-05, "model_forward_time": 0.025685787200927734, "step": 9305 }, { "epoch": 1.419830322265625e-05, "step": 9305, "training_step_time": 0.1720445156097412 }, { "epoch": 1.41998291015625e-05, "model_forward_time": 0.024773120880126953, "step": 9306 }, { "epoch": 1.41998291015625e-05, "step": 9306, "training_step_time": 0.17114877700805664 }, { "epoch": 1.420135498046875e-05, "model_forward_time": 0.024457454681396484, "step": 9307 }, { "epoch": 1.420135498046875e-05, "step": 9307, "training_step_time": 0.10494470596313477 }, { "epoch": 1.4202880859375e-05, "model_forward_time": 0.0251007080078125, "step": 9308 }, { "epoch": 1.4202880859375e-05, "step": 9308, "training_step_time": 0.10710597038269043 }, { "epoch": 1.420440673828125e-05, "model_forward_time": 0.02538585662841797, "step": 9309 }, { "epoch": 1.420440673828125e-05, "step": 9309, "training_step_time": 0.10929417610168457 }, { "epoch": 1.42059326171875e-05, "grad_norm": 0.6427388191223145, "learning_rate": 8.258750630524984e-05, "loss": 0.0316, "step": 9310 }, { "epoch": 1.42059326171875e-05, "model_forward_time": 0.025500059127807617, "step": 9310 }, { "epoch": 1.42059326171875e-05, "step": 9310, "training_step_time": 0.10837960243225098 }, { "epoch": 1.420745849609375e-05, "model_forward_time": 0.02531147003173828, "step": 9311 }, { "epoch": 1.420745849609375e-05, "step": 9311, "training_step_time": 0.10727143287658691 }, { "epoch": 1.4208984375e-05, "model_forward_time": 0.025129079818725586, "step": 9312 }, { "epoch": 1.4208984375e-05, "step": 9312, "training_step_time": 0.11274552345275879 }, { "epoch": 1.421051025390625e-05, "model_forward_time": 0.027581453323364258, "step": 9313 }, { "epoch": 1.421051025390625e-05, "step": 9313, "training_step_time": 0.10983538627624512 }, { "epoch": 1.42120361328125e-05, "model_forward_time": 0.024992704391479492, "step": 9314 }, { "epoch": 1.42120361328125e-05, "step": 9314, "training_step_time": 0.11027073860168457 }, { "epoch": 1.421356201171875e-05, "model_forward_time": 0.02499079704284668, "step": 9315 }, { "epoch": 1.421356201171875e-05, "step": 9315, "training_step_time": 0.10888934135437012 }, { "epoch": 1.4215087890625e-05, "model_forward_time": 0.02524089813232422, "step": 9316 }, { "epoch": 1.4215087890625e-05, "step": 9316, "training_step_time": 0.1559295654296875 }, { "epoch": 1.421661376953125e-05, "model_forward_time": 0.024919509887695312, "step": 9317 }, { "epoch": 1.421661376953125e-05, "step": 9317, "training_step_time": 0.17152619361877441 }, { "epoch": 1.42181396484375e-05, "model_forward_time": 0.02431011199951172, "step": 9318 }, { "epoch": 1.42181396484375e-05, "step": 9318, "training_step_time": 0.19185280799865723 }, { "epoch": 1.421966552734375e-05, "model_forward_time": 0.024252891540527344, "step": 9319 }, { "epoch": 1.421966552734375e-05, "step": 9319, "training_step_time": 0.19882965087890625 }, { "epoch": 1.422119140625e-05, "grad_norm": 0.3493986129760742, "learning_rate": 8.254568493739828e-05, "loss": 0.0503, "step": 9320 }, { "epoch": 1.422119140625e-05, "model_forward_time": 0.023731231689453125, "step": 9320 }, { "epoch": 1.422119140625e-05, "step": 9320, "training_step_time": 0.1493396759033203 }, { "epoch": 1.422271728515625e-05, "model_forward_time": 0.024673938751220703, "step": 9321 }, { "epoch": 1.422271728515625e-05, "step": 9321, "training_step_time": 0.178023099899292 }, { "epoch": 1.42242431640625e-05, "model_forward_time": 0.024322509765625, "step": 9322 }, { "epoch": 1.42242431640625e-05, "step": 9322, "training_step_time": 0.15357208251953125 }, { "epoch": 1.422576904296875e-05, "model_forward_time": 0.02444744110107422, "step": 9323 }, { "epoch": 1.422576904296875e-05, "step": 9323, "training_step_time": 0.12020182609558105 }, { "epoch": 1.4227294921875e-05, "model_forward_time": 0.02468132972717285, "step": 9324 }, { "epoch": 1.4227294921875e-05, "step": 9324, "training_step_time": 0.10423636436462402 }, { "epoch": 1.422882080078125e-05, "model_forward_time": 0.02575063705444336, "step": 9325 }, { "epoch": 1.422882080078125e-05, "step": 9325, "training_step_time": 0.10520267486572266 }, { "epoch": 1.42303466796875e-05, "model_forward_time": 0.02524876594543457, "step": 9326 }, { "epoch": 1.42303466796875e-05, "step": 9326, "training_step_time": 0.10900163650512695 }, { "epoch": 1.423187255859375e-05, "model_forward_time": 0.0251312255859375, "step": 9327 }, { "epoch": 1.423187255859375e-05, "step": 9327, "training_step_time": 0.11357593536376953 }, { "epoch": 1.42333984375e-05, "model_forward_time": 0.02510833740234375, "step": 9328 }, { "epoch": 1.42333984375e-05, "step": 9328, "training_step_time": 0.10618352890014648 }, { "epoch": 1.423492431640625e-05, "model_forward_time": 0.0253140926361084, "step": 9329 }, { "epoch": 1.423492431640625e-05, "step": 9329, "training_step_time": 0.13569021224975586 }, { "epoch": 1.42364501953125e-05, "grad_norm": 0.4366268813610077, "learning_rate": 8.250382402347065e-05, "loss": 0.03, "step": 9330 }, { "epoch": 1.42364501953125e-05, "model_forward_time": 0.024549245834350586, "step": 9330 }, { "epoch": 1.42364501953125e-05, "step": 9330, "training_step_time": 0.1672680377960205 }, { "epoch": 1.423797607421875e-05, "model_forward_time": 0.025560855865478516, "step": 9331 }, { "epoch": 1.423797607421875e-05, "step": 9331, "training_step_time": 0.17728209495544434 }, { "epoch": 1.4239501953125e-05, "model_forward_time": 0.024358749389648438, "step": 9332 }, { "epoch": 1.4239501953125e-05, "step": 9332, "training_step_time": 0.15977144241333008 }, { "epoch": 1.424102783203125e-05, "model_forward_time": 0.027455568313598633, "step": 9333 }, { "epoch": 1.424102783203125e-05, "step": 9333, "training_step_time": 0.139298677444458 }, { "epoch": 1.42425537109375e-05, "model_forward_time": 0.024326086044311523, "step": 9334 }, { "epoch": 1.42425537109375e-05, "step": 9334, "training_step_time": 0.14159107208251953 }, { "epoch": 1.424407958984375e-05, "model_forward_time": 0.024331331253051758, "step": 9335 }, { "epoch": 1.424407958984375e-05, "step": 9335, "training_step_time": 0.12559103965759277 }, { "epoch": 1.424560546875e-05, "model_forward_time": 0.02417588233947754, "step": 9336 }, { "epoch": 1.424560546875e-05, "step": 9336, "training_step_time": 0.1267712116241455 }, { "epoch": 1.424713134765625e-05, "model_forward_time": 0.024806499481201172, "step": 9337 }, { "epoch": 1.424713134765625e-05, "step": 9337, "training_step_time": 0.12349319458007812 }, { "epoch": 1.42486572265625e-05, "model_forward_time": 0.024956464767456055, "step": 9338 }, { "epoch": 1.42486572265625e-05, "step": 9338, "training_step_time": 0.11702919006347656 }, { "epoch": 1.425018310546875e-05, "model_forward_time": 0.025315284729003906, "step": 9339 }, { "epoch": 1.425018310546875e-05, "step": 9339, "training_step_time": 0.11564302444458008 }, { "epoch": 1.4251708984375e-05, "grad_norm": 0.38389018177986145, "learning_rate": 8.246192361433196e-05, "loss": 0.0361, "step": 9340 }, { "epoch": 1.4251708984375e-05, "model_forward_time": 0.025294065475463867, "step": 9340 }, { "epoch": 1.4251708984375e-05, "step": 9340, "training_step_time": 0.1131129264831543 }, { "epoch": 1.425323486328125e-05, "model_forward_time": 0.02499079704284668, "step": 9341 }, { "epoch": 1.425323486328125e-05, "step": 9341, "training_step_time": 0.11035776138305664 }, { "epoch": 1.42547607421875e-05, "model_forward_time": 0.025301218032836914, "step": 9342 }, { "epoch": 1.42547607421875e-05, "step": 9342, "training_step_time": 0.10878896713256836 }, { "epoch": 1.425628662109375e-05, "model_forward_time": 0.025458097457885742, "step": 9343 }, { "epoch": 1.425628662109375e-05, "step": 9343, "training_step_time": 0.11203670501708984 }, { "epoch": 1.42578125e-05, "model_forward_time": 0.025565385818481445, "step": 9344 }, { "epoch": 1.42578125e-05, "step": 9344, "training_step_time": 0.10824418067932129 }, { "epoch": 1.425933837890625e-05, "model_forward_time": 0.02601933479309082, "step": 9345 }, { "epoch": 1.425933837890625e-05, "step": 9345, "training_step_time": 0.10953688621520996 }, { "epoch": 1.42608642578125e-05, "model_forward_time": 0.025738000869750977, "step": 9346 }, { "epoch": 1.42608642578125e-05, "step": 9346, "training_step_time": 0.11192870140075684 }, { "epoch": 1.426239013671875e-05, "model_forward_time": 0.025590181350708008, "step": 9347 }, { "epoch": 1.426239013671875e-05, "step": 9347, "training_step_time": 0.11047792434692383 }, { "epoch": 1.4263916015625e-05, "model_forward_time": 0.0254209041595459, "step": 9348 }, { "epoch": 1.4263916015625e-05, "step": 9348, "training_step_time": 0.11213278770446777 }, { "epoch": 1.426544189453125e-05, "model_forward_time": 0.02570486068725586, "step": 9349 }, { "epoch": 1.426544189453125e-05, "step": 9349, "training_step_time": 0.1730644702911377 }, { "epoch": 1.42669677734375e-05, "grad_norm": 0.41978979110717773, "learning_rate": 8.241998376089508e-05, "loss": 0.0325, "step": 9350 }, { "epoch": 1.42669677734375e-05, "model_forward_time": 0.02469658851623535, "step": 9350 }, { "epoch": 1.42669677734375e-05, "step": 9350, "training_step_time": 0.161177396774292 }, { "epoch": 1.426849365234375e-05, "model_forward_time": 0.02526092529296875, "step": 9351 }, { "epoch": 1.426849365234375e-05, "step": 9351, "training_step_time": 0.10650777816772461 }, { "epoch": 1.427001953125e-05, "model_forward_time": 0.02555704116821289, "step": 9352 }, { "epoch": 1.427001953125e-05, "step": 9352, "training_step_time": 0.10751914978027344 }, { "epoch": 1.427154541015625e-05, "model_forward_time": 0.02543163299560547, "step": 9353 }, { "epoch": 1.427154541015625e-05, "step": 9353, "training_step_time": 0.10830354690551758 }, { "epoch": 1.42730712890625e-05, "model_forward_time": 0.024722576141357422, "step": 9354 }, { "epoch": 1.42730712890625e-05, "step": 9354, "training_step_time": 0.10813260078430176 }, { "epoch": 1.427459716796875e-05, "model_forward_time": 0.025714635848999023, "step": 9355 }, { "epoch": 1.427459716796875e-05, "step": 9355, "training_step_time": 0.10796904563903809 }, { "epoch": 1.4276123046875e-05, "model_forward_time": 0.025572776794433594, "step": 9356 }, { "epoch": 1.4276123046875e-05, "step": 9356, "training_step_time": 0.10874414443969727 }, { "epoch": 1.427764892578125e-05, "model_forward_time": 0.025202274322509766, "step": 9357 }, { "epoch": 1.427764892578125e-05, "step": 9357, "training_step_time": 0.10853314399719238 }, { "epoch": 1.42791748046875e-05, "model_forward_time": 0.0253293514251709, "step": 9358 }, { "epoch": 1.42791748046875e-05, "step": 9358, "training_step_time": 0.1074666976928711 }, { "epoch": 1.428070068359375e-05, "model_forward_time": 0.025534391403198242, "step": 9359 }, { "epoch": 1.428070068359375e-05, "step": 9359, "training_step_time": 0.1083376407623291 }, { "epoch": 1.42822265625e-05, "grad_norm": 0.34948891401290894, "learning_rate": 8.237800451412095e-05, "loss": 0.0298, "step": 9360 }, { "epoch": 1.42822265625e-05, "model_forward_time": 0.025350093841552734, "step": 9360 }, { "epoch": 1.42822265625e-05, "step": 9360, "training_step_time": 0.12054991722106934 }, { "epoch": 1.428375244140625e-05, "model_forward_time": 0.02513599395751953, "step": 9361 }, { "epoch": 1.428375244140625e-05, "step": 9361, "training_step_time": 0.20360779762268066 }, { "epoch": 1.42852783203125e-05, "model_forward_time": 0.024605274200439453, "step": 9362 }, { "epoch": 1.42852783203125e-05, "step": 9362, "training_step_time": 0.16749978065490723 }, { "epoch": 1.428680419921875e-05, "model_forward_time": 0.024476289749145508, "step": 9363 }, { "epoch": 1.428680419921875e-05, "step": 9363, "training_step_time": 0.18401813507080078 }, { "epoch": 1.4288330078125e-05, "model_forward_time": 0.02448582649230957, "step": 9364 }, { "epoch": 1.4288330078125e-05, "step": 9364, "training_step_time": 0.17475533485412598 }, { "epoch": 1.428985595703125e-05, "model_forward_time": 0.024755001068115234, "step": 9365 }, { "epoch": 1.428985595703125e-05, "step": 9365, "training_step_time": 0.17977619171142578 }, { "epoch": 1.42913818359375e-05, "model_forward_time": 0.024433612823486328, "step": 9366 }, { "epoch": 1.42913818359375e-05, "step": 9366, "training_step_time": 0.1424570083618164 }, { "epoch": 1.429290771484375e-05, "model_forward_time": 0.024227619171142578, "step": 9367 }, { "epoch": 1.429290771484375e-05, "step": 9367, "training_step_time": 0.1109161376953125 }, { "epoch": 1.429443359375e-05, "model_forward_time": 0.024853944778442383, "step": 9368 }, { "epoch": 1.429443359375e-05, "step": 9368, "training_step_time": 0.1168217658996582 }, { "epoch": 1.429595947265625e-05, "model_forward_time": 0.02489781379699707, "step": 9369 }, { "epoch": 1.429595947265625e-05, "step": 9369, "training_step_time": 0.11917328834533691 }, { "epoch": 1.42974853515625e-05, "grad_norm": 0.33603987097740173, "learning_rate": 8.233598592501828e-05, "loss": 0.0455, "step": 9370 }, { "epoch": 1.42974853515625e-05, "model_forward_time": 0.025384187698364258, "step": 9370 }, { "epoch": 1.42974853515625e-05, "step": 9370, "training_step_time": 0.11890053749084473 }, { "epoch": 1.429901123046875e-05, "model_forward_time": 0.025571107864379883, "step": 9371 }, { "epoch": 1.429901123046875e-05, "step": 9371, "training_step_time": 0.11262965202331543 }, { "epoch": 1.4300537109375e-05, "model_forward_time": 0.02538585662841797, "step": 9372 }, { "epoch": 1.4300537109375e-05, "step": 9372, "training_step_time": 0.11399149894714355 }, { "epoch": 1.430206298828125e-05, "model_forward_time": 0.025331735610961914, "step": 9373 }, { "epoch": 1.430206298828125e-05, "step": 9373, "training_step_time": 0.11225509643554688 }, { "epoch": 1.43035888671875e-05, "model_forward_time": 0.024239778518676758, "step": 9374 }, { "epoch": 1.43035888671875e-05, "step": 9374, "training_step_time": 0.10890555381774902 }, { "epoch": 1.430511474609375e-05, "model_forward_time": 0.024962186813354492, "step": 9375 }, { "epoch": 1.430511474609375e-05, "step": 9375, "training_step_time": 0.10840606689453125 }, { "epoch": 1.4306640625e-05, "model_forward_time": 0.025429725646972656, "step": 9376 }, { "epoch": 1.4306640625e-05, "step": 9376, "training_step_time": 0.11031770706176758 }, { "epoch": 1.430816650390625e-05, "model_forward_time": 0.02509927749633789, "step": 9377 }, { "epoch": 1.430816650390625e-05, "step": 9377, "training_step_time": 0.11021065711975098 }, { "epoch": 1.43096923828125e-05, "model_forward_time": 0.02535414695739746, "step": 9378 }, { "epoch": 1.43096923828125e-05, "step": 9378, "training_step_time": 0.11138367652893066 }, { "epoch": 1.431121826171875e-05, "model_forward_time": 0.025476932525634766, "step": 9379 }, { "epoch": 1.431121826171875e-05, "step": 9379, "training_step_time": 0.11031198501586914 }, { "epoch": 1.4312744140625e-05, "grad_norm": 0.3461342751979828, "learning_rate": 8.229392804464362e-05, "loss": 0.0308, "step": 9380 }, { "epoch": 1.4312744140625e-05, "model_forward_time": 0.025185346603393555, "step": 9380 }, { "epoch": 1.4312744140625e-05, "step": 9380, "training_step_time": 0.11371135711669922 }, { "epoch": 1.431427001953125e-05, "model_forward_time": 0.024500608444213867, "step": 9381 }, { "epoch": 1.431427001953125e-05, "step": 9381, "training_step_time": 0.10595202445983887 }, { "epoch": 1.43157958984375e-05, "model_forward_time": 0.02512836456298828, "step": 9382 }, { "epoch": 1.43157958984375e-05, "step": 9382, "training_step_time": 0.1128995418548584 }, { "epoch": 1.431732177734375e-05, "model_forward_time": 0.027281522750854492, "step": 9383 }, { "epoch": 1.431732177734375e-05, "step": 9383, "training_step_time": 0.11076545715332031 }, { "epoch": 1.431884765625e-05, "model_forward_time": 0.025717496871948242, "step": 9384 }, { "epoch": 1.431884765625e-05, "step": 9384, "training_step_time": 0.10793733596801758 }, { "epoch": 1.432037353515625e-05, "model_forward_time": 0.02528095245361328, "step": 9385 }, { "epoch": 1.432037353515625e-05, "step": 9385, "training_step_time": 0.1065375804901123 }, { "epoch": 1.43218994140625e-05, "model_forward_time": 0.028439760208129883, "step": 9386 }, { "epoch": 1.43218994140625e-05, "step": 9386, "training_step_time": 0.10967803001403809 }, { "epoch": 1.432342529296875e-05, "model_forward_time": 0.025116920471191406, "step": 9387 }, { "epoch": 1.432342529296875e-05, "step": 9387, "training_step_time": 0.10731315612792969 }, { "epoch": 1.4324951171875e-05, "model_forward_time": 0.025457143783569336, "step": 9388 }, { "epoch": 1.4324951171875e-05, "step": 9388, "training_step_time": 0.10696244239807129 }, { "epoch": 1.432647705078125e-05, "model_forward_time": 0.025299549102783203, "step": 9389 }, { "epoch": 1.432647705078125e-05, "step": 9389, "training_step_time": 0.10823822021484375 }, { "epoch": 1.43280029296875e-05, "grad_norm": 0.37236952781677246, "learning_rate": 8.225183092410128e-05, "loss": 0.0302, "step": 9390 }, { "epoch": 1.43280029296875e-05, "model_forward_time": 0.025687456130981445, "step": 9390 }, { "epoch": 1.43280029296875e-05, "step": 9390, "training_step_time": 0.10878229141235352 }, { "epoch": 1.432952880859375e-05, "model_forward_time": 0.02568960189819336, "step": 9391 }, { "epoch": 1.432952880859375e-05, "step": 9391, "training_step_time": 0.10934019088745117 }, { "epoch": 1.43310546875e-05, "model_forward_time": 0.025292634963989258, "step": 9392 }, { "epoch": 1.43310546875e-05, "step": 9392, "training_step_time": 0.1673116683959961 }, { "epoch": 1.433258056640625e-05, "model_forward_time": 0.02449512481689453, "step": 9393 }, { "epoch": 1.433258056640625e-05, "step": 9393, "training_step_time": 0.16326141357421875 }, { "epoch": 1.43341064453125e-05, "model_forward_time": 0.024966716766357422, "step": 9394 }, { "epoch": 1.43341064453125e-05, "step": 9394, "training_step_time": 0.11365890502929688 }, { "epoch": 1.433563232421875e-05, "model_forward_time": 0.025084495544433594, "step": 9395 }, { "epoch": 1.433563232421875e-05, "step": 9395, "training_step_time": 0.168412446975708 }, { "epoch": 1.4337158203125e-05, "model_forward_time": 0.024068832397460938, "step": 9396 }, { "epoch": 1.4337158203125e-05, "step": 9396, "training_step_time": 0.17074203491210938 }, { "epoch": 1.433868408203125e-05, "model_forward_time": 0.02477884292602539, "step": 9397 }, { "epoch": 1.433868408203125e-05, "step": 9397, "training_step_time": 0.11169052124023438 }, { "epoch": 1.43402099609375e-05, "model_forward_time": 0.024664878845214844, "step": 9398 }, { "epoch": 1.43402099609375e-05, "step": 9398, "training_step_time": 0.10529661178588867 }, { "epoch": 1.434173583984375e-05, "model_forward_time": 0.025512218475341797, "step": 9399 }, { "epoch": 1.434173583984375e-05, "step": 9399, "training_step_time": 0.10719561576843262 }, { "epoch": 1.434326171875e-05, "grad_norm": 0.42937326431274414, "learning_rate": 8.220969461454322e-05, "loss": 0.0448, "step": 9400 }, { "epoch": 1.434326171875e-05, "model_forward_time": 0.02642822265625, "step": 9400 }, { "epoch": 1.434326171875e-05, "step": 9400, "training_step_time": 0.10843586921691895 }, { "epoch": 1.434478759765625e-05, "model_forward_time": 0.025964021682739258, "step": 9401 }, { "epoch": 1.434478759765625e-05, "step": 9401, "training_step_time": 0.11408257484436035 }, { "epoch": 1.43463134765625e-05, "model_forward_time": 0.025191545486450195, "step": 9402 }, { "epoch": 1.43463134765625e-05, "step": 9402, "training_step_time": 0.14751529693603516 }, { "epoch": 1.434783935546875e-05, "model_forward_time": 0.025240182876586914, "step": 9403 }, { "epoch": 1.434783935546875e-05, "step": 9403, "training_step_time": 0.1714925765991211 }, { "epoch": 1.4349365234375e-05, "model_forward_time": 0.024412870407104492, "step": 9404 }, { "epoch": 1.4349365234375e-05, "step": 9404, "training_step_time": 0.18913531303405762 }, { "epoch": 1.435089111328125e-05, "model_forward_time": 0.0242154598236084, "step": 9405 }, { "epoch": 1.435089111328125e-05, "step": 9405, "training_step_time": 0.14969229698181152 }, { "epoch": 1.43524169921875e-05, "model_forward_time": 0.024442434310913086, "step": 9406 }, { "epoch": 1.43524169921875e-05, "step": 9406, "training_step_time": 0.19721150398254395 }, { "epoch": 1.435394287109375e-05, "model_forward_time": 0.02468562126159668, "step": 9407 }, { "epoch": 1.435394287109375e-05, "step": 9407, "training_step_time": 0.1781773567199707 }, { "epoch": 1.435546875e-05, "model_forward_time": 0.024179935455322266, "step": 9408 }, { "epoch": 1.435546875e-05, "step": 9408, "training_step_time": 0.1791691780090332 }, { "epoch": 1.435699462890625e-05, "model_forward_time": 0.024804353713989258, "step": 9409 }, { "epoch": 1.435699462890625e-05, "step": 9409, "training_step_time": 0.15016937255859375 }, { "epoch": 1.43585205078125e-05, "grad_norm": 0.5000482201576233, "learning_rate": 8.2167519167169e-05, "loss": 0.0389, "step": 9410 }, { "epoch": 1.43585205078125e-05, "model_forward_time": 0.02443671226501465, "step": 9410 }, { "epoch": 1.43585205078125e-05, "step": 9410, "training_step_time": 0.17367863655090332 }, { "epoch": 1.436004638671875e-05, "model_forward_time": 0.02401280403137207, "step": 9411 }, { "epoch": 1.436004638671875e-05, "step": 9411, "training_step_time": 0.17651033401489258 }, { "epoch": 1.4361572265625e-05, "model_forward_time": 0.024470806121826172, "step": 9412 }, { "epoch": 1.4361572265625e-05, "step": 9412, "training_step_time": 0.12769865989685059 }, { "epoch": 1.436309814453125e-05, "model_forward_time": 0.024325132369995117, "step": 9413 }, { "epoch": 1.436309814453125e-05, "step": 9413, "training_step_time": 0.10736441612243652 }, { "epoch": 1.43646240234375e-05, "model_forward_time": 0.025475025177001953, "step": 9414 }, { "epoch": 1.43646240234375e-05, "step": 9414, "training_step_time": 0.10654568672180176 }, { "epoch": 1.436614990234375e-05, "model_forward_time": 0.025160551071166992, "step": 9415 }, { "epoch": 1.436614990234375e-05, "step": 9415, "training_step_time": 0.11472940444946289 }, { "epoch": 1.436767578125e-05, "model_forward_time": 0.02504420280456543, "step": 9416 }, { "epoch": 1.436767578125e-05, "step": 9416, "training_step_time": 0.1070094108581543 }, { "epoch": 1.436920166015625e-05, "model_forward_time": 0.02505207061767578, "step": 9417 }, { "epoch": 1.436920166015625e-05, "step": 9417, "training_step_time": 0.15564656257629395 }, { "epoch": 1.43707275390625e-05, "model_forward_time": 0.025227069854736328, "step": 9418 }, { "epoch": 1.43707275390625e-05, "step": 9418, "training_step_time": 0.1701970100402832 }, { "epoch": 1.437225341796875e-05, "model_forward_time": 0.02423238754272461, "step": 9419 }, { "epoch": 1.437225341796875e-05, "step": 9419, "training_step_time": 0.1639697551727295 }, { "epoch": 1.4373779296875e-05, "grad_norm": 0.7076467871665955, "learning_rate": 8.212530463322583e-05, "loss": 0.0343, "step": 9420 }, { "epoch": 1.4373779296875e-05, "model_forward_time": 0.02458977699279785, "step": 9420 }, { "epoch": 1.4373779296875e-05, "step": 9420, "training_step_time": 0.1418919563293457 }, { "epoch": 1.437530517578125e-05, "model_forward_time": 0.024471282958984375, "step": 9421 }, { "epoch": 1.437530517578125e-05, "step": 9421, "training_step_time": 0.14858531951904297 }, { "epoch": 1.43768310546875e-05, "model_forward_time": 0.024564743041992188, "step": 9422 }, { "epoch": 1.43768310546875e-05, "step": 9422, "training_step_time": 0.13172292709350586 }, { "epoch": 1.437835693359375e-05, "model_forward_time": 0.023899078369140625, "step": 9423 }, { "epoch": 1.437835693359375e-05, "step": 9423, "training_step_time": 0.13115763664245605 }, { "epoch": 1.43798828125e-05, "model_forward_time": 0.024674415588378906, "step": 9424 }, { "epoch": 1.43798828125e-05, "step": 9424, "training_step_time": 0.12712359428405762 }, { "epoch": 1.438140869140625e-05, "model_forward_time": 0.024802446365356445, "step": 9425 }, { "epoch": 1.438140869140625e-05, "step": 9425, "training_step_time": 0.12287735939025879 }, { "epoch": 1.43829345703125e-05, "model_forward_time": 0.02494072914123535, "step": 9426 }, { "epoch": 1.43829345703125e-05, "step": 9426, "training_step_time": 0.1213235855102539 }, { "epoch": 1.438446044921875e-05, "model_forward_time": 0.025444507598876953, "step": 9427 }, { "epoch": 1.438446044921875e-05, "step": 9427, "training_step_time": 0.11888933181762695 }, { "epoch": 1.4385986328125e-05, "model_forward_time": 0.025269508361816406, "step": 9428 }, { "epoch": 1.4385986328125e-05, "step": 9428, "training_step_time": 0.10966730117797852 }, { "epoch": 1.438751220703125e-05, "model_forward_time": 0.02518749237060547, "step": 9429 }, { "epoch": 1.438751220703125e-05, "step": 9429, "training_step_time": 0.11363387107849121 }, { "epoch": 1.43890380859375e-05, "grad_norm": 0.4879451394081116, "learning_rate": 8.20830510640083e-05, "loss": 0.0283, "step": 9430 }, { "epoch": 1.43890380859375e-05, "model_forward_time": 0.025077342987060547, "step": 9430 }, { "epoch": 1.43890380859375e-05, "step": 9430, "training_step_time": 0.1096804141998291 }, { "epoch": 1.439056396484375e-05, "model_forward_time": 0.025364398956298828, "step": 9431 }, { "epoch": 1.439056396484375e-05, "step": 9431, "training_step_time": 0.11052846908569336 }, { "epoch": 1.439208984375e-05, "model_forward_time": 0.025980472564697266, "step": 9432 }, { "epoch": 1.439208984375e-05, "step": 9432, "training_step_time": 0.11096501350402832 }, { "epoch": 1.439361572265625e-05, "model_forward_time": 0.025103330612182617, "step": 9433 }, { "epoch": 1.439361572265625e-05, "step": 9433, "training_step_time": 0.1758592128753662 }, { "epoch": 1.43951416015625e-05, "model_forward_time": 0.02556014060974121, "step": 9434 }, { "epoch": 1.43951416015625e-05, "step": 9434, "training_step_time": 0.16201496124267578 }, { "epoch": 1.439666748046875e-05, "model_forward_time": 0.02410578727722168, "step": 9435 }, { "epoch": 1.439666748046875e-05, "step": 9435, "training_step_time": 0.10678410530090332 }, { "epoch": 1.4398193359375e-05, "model_forward_time": 0.024822235107421875, "step": 9436 }, { "epoch": 1.4398193359375e-05, "step": 9436, "training_step_time": 0.10857462882995605 }, { "epoch": 1.439971923828125e-05, "model_forward_time": 0.02538156509399414, "step": 9437 }, { "epoch": 1.439971923828125e-05, "step": 9437, "training_step_time": 0.11333513259887695 }, { "epoch": 1.44012451171875e-05, "model_forward_time": 0.025501012802124023, "step": 9438 }, { "epoch": 1.44012451171875e-05, "step": 9438, "training_step_time": 0.11481881141662598 }, { "epoch": 1.440277099609375e-05, "model_forward_time": 0.025145769119262695, "step": 9439 }, { "epoch": 1.440277099609375e-05, "step": 9439, "training_step_time": 0.11007142066955566 }, { "epoch": 1.4404296875e-05, "grad_norm": 0.6473632454872131, "learning_rate": 8.204075851085849e-05, "loss": 0.0311, "step": 9440 }, { "epoch": 1.4404296875e-05, "model_forward_time": 0.025249481201171875, "step": 9440 }, { "epoch": 1.4404296875e-05, "step": 9440, "training_step_time": 0.10944151878356934 }, { "epoch": 1.440582275390625e-05, "model_forward_time": 0.025234222412109375, "step": 9441 }, { "epoch": 1.440582275390625e-05, "step": 9441, "training_step_time": 0.1064310073852539 }, { "epoch": 1.44073486328125e-05, "model_forward_time": 0.028178691864013672, "step": 9442 }, { "epoch": 1.44073486328125e-05, "step": 9442, "training_step_time": 0.1155390739440918 }, { "epoch": 1.440887451171875e-05, "model_forward_time": 0.024820804595947266, "step": 9443 }, { "epoch": 1.440887451171875e-05, "step": 9443, "training_step_time": 0.11186695098876953 }, { "epoch": 1.4410400390625e-05, "model_forward_time": 0.025270700454711914, "step": 9444 }, { "epoch": 1.4410400390625e-05, "step": 9444, "training_step_time": 0.10855865478515625 }, { "epoch": 1.441192626953125e-05, "model_forward_time": 0.025063514709472656, "step": 9445 }, { "epoch": 1.441192626953125e-05, "step": 9445, "training_step_time": 0.10862016677856445 }, { "epoch": 1.44134521484375e-05, "model_forward_time": 0.024934053421020508, "step": 9446 }, { "epoch": 1.44134521484375e-05, "step": 9446, "training_step_time": 0.10694766044616699 }, { "epoch": 1.441497802734375e-05, "model_forward_time": 0.025220870971679688, "step": 9447 }, { "epoch": 1.441497802734375e-05, "step": 9447, "training_step_time": 0.1406717300415039 }, { "epoch": 1.441650390625e-05, "model_forward_time": 0.0249941349029541, "step": 9448 }, { "epoch": 1.441650390625e-05, "step": 9448, "training_step_time": 0.14068269729614258 }, { "epoch": 1.441802978515625e-05, "model_forward_time": 0.024707794189453125, "step": 9449 }, { "epoch": 1.441802978515625e-05, "step": 9449, "training_step_time": 0.18294644355773926 }, { "epoch": 1.44195556640625e-05, "grad_norm": 0.567092776298523, "learning_rate": 8.199842702516583e-05, "loss": 0.0267, "step": 9450 }, { "epoch": 1.44195556640625e-05, "model_forward_time": 0.02435922622680664, "step": 9450 }, { "epoch": 1.44195556640625e-05, "step": 9450, "training_step_time": 0.1739346981048584 }, { "epoch": 1.442108154296875e-05, "model_forward_time": 0.024892091751098633, "step": 9451 }, { "epoch": 1.442108154296875e-05, "step": 9451, "training_step_time": 0.1849069595336914 }, { "epoch": 1.4422607421875e-05, "model_forward_time": 0.024327754974365234, "step": 9452 }, { "epoch": 1.4422607421875e-05, "step": 9452, "training_step_time": 0.2120833396911621 }, { "epoch": 1.442413330078125e-05, "model_forward_time": 0.024076223373413086, "step": 9453 }, { "epoch": 1.442413330078125e-05, "step": 9453, "training_step_time": 0.13730549812316895 }, { "epoch": 1.44256591796875e-05, "model_forward_time": 0.02444624900817871, "step": 9454 }, { "epoch": 1.44256591796875e-05, "step": 9454, "training_step_time": 0.15231609344482422 }, { "epoch": 1.442718505859375e-05, "model_forward_time": 0.025484085083007812, "step": 9455 }, { "epoch": 1.442718505859375e-05, "step": 9455, "training_step_time": 0.12475156784057617 }, { "epoch": 1.44287109375e-05, "model_forward_time": 0.024611234664916992, "step": 9456 }, { "epoch": 1.44287109375e-05, "step": 9456, "training_step_time": 0.11552882194519043 }, { "epoch": 1.443023681640625e-05, "model_forward_time": 0.0254366397857666, "step": 9457 }, { "epoch": 1.443023681640625e-05, "step": 9457, "training_step_time": 0.10818004608154297 }, { "epoch": 1.44317626953125e-05, "model_forward_time": 0.02500462532043457, "step": 9458 }, { "epoch": 1.44317626953125e-05, "step": 9458, "training_step_time": 0.1084434986114502 }, { "epoch": 1.443328857421875e-05, "model_forward_time": 0.02553582191467285, "step": 9459 }, { "epoch": 1.443328857421875e-05, "step": 9459, "training_step_time": 0.10947966575622559 }, { "epoch": 1.4434814453125e-05, "grad_norm": 0.2936333417892456, "learning_rate": 8.19560566583671e-05, "loss": 0.029, "step": 9460 }, { "epoch": 1.4434814453125e-05, "model_forward_time": 0.025308609008789062, "step": 9460 }, { "epoch": 1.4434814453125e-05, "step": 9460, "training_step_time": 0.11976122856140137 }, { "epoch": 1.443634033203125e-05, "model_forward_time": 0.02499842643737793, "step": 9461 }, { "epoch": 1.443634033203125e-05, "step": 9461, "training_step_time": 0.10711407661437988 }, { "epoch": 1.44378662109375e-05, "model_forward_time": 0.025167465209960938, "step": 9462 }, { "epoch": 1.44378662109375e-05, "step": 9462, "training_step_time": 0.10757589340209961 }, { "epoch": 1.443939208984375e-05, "model_forward_time": 0.025046110153198242, "step": 9463 }, { "epoch": 1.443939208984375e-05, "step": 9463, "training_step_time": 0.10794234275817871 }, { "epoch": 1.444091796875e-05, "model_forward_time": 0.025148630142211914, "step": 9464 }, { "epoch": 1.444091796875e-05, "step": 9464, "training_step_time": 0.10971498489379883 }, { "epoch": 1.444244384765625e-05, "model_forward_time": 0.026518583297729492, "step": 9465 }, { "epoch": 1.444244384765625e-05, "step": 9465, "training_step_time": 0.11096501350402832 }, { "epoch": 1.44439697265625e-05, "model_forward_time": 0.025455474853515625, "step": 9466 }, { "epoch": 1.44439697265625e-05, "step": 9466, "training_step_time": 0.11070609092712402 }, { "epoch": 1.444549560546875e-05, "model_forward_time": 0.025563955307006836, "step": 9467 }, { "epoch": 1.444549560546875e-05, "step": 9467, "training_step_time": 0.1109166145324707 }, { "epoch": 1.4447021484375e-05, "model_forward_time": 0.02547168731689453, "step": 9468 }, { "epoch": 1.4447021484375e-05, "step": 9468, "training_step_time": 0.11037540435791016 }, { "epoch": 1.444854736328125e-05, "model_forward_time": 0.024895191192626953, "step": 9469 }, { "epoch": 1.444854736328125e-05, "step": 9469, "training_step_time": 0.1079864501953125 }, { "epoch": 1.44500732421875e-05, "grad_norm": 0.46716660261154175, "learning_rate": 8.191364746194625e-05, "loss": 0.0505, "step": 9470 }, { "epoch": 1.44500732421875e-05, "model_forward_time": 0.025061368942260742, "step": 9470 }, { "epoch": 1.44500732421875e-05, "step": 9470, "training_step_time": 0.10871458053588867 }, { "epoch": 1.445159912109375e-05, "model_forward_time": 0.02486395835876465, "step": 9471 }, { "epoch": 1.445159912109375e-05, "step": 9471, "training_step_time": 0.11480283737182617 }, { "epoch": 1.4453125e-05, "model_forward_time": 0.025530099868774414, "step": 9472 }, { "epoch": 1.4453125e-05, "step": 9472, "training_step_time": 0.11725187301635742 }, { "epoch": 1.445465087890625e-05, "model_forward_time": 0.025233983993530273, "step": 9473 }, { "epoch": 1.445465087890625e-05, "step": 9473, "training_step_time": 0.10801315307617188 }, { "epoch": 1.44561767578125e-05, "model_forward_time": 0.02501535415649414, "step": 9474 }, { "epoch": 1.44561767578125e-05, "step": 9474, "training_step_time": 0.10945701599121094 }, { "epoch": 1.445770263671875e-05, "model_forward_time": 0.025026559829711914, "step": 9475 }, { "epoch": 1.445770263671875e-05, "step": 9475, "training_step_time": 0.10894036293029785 }, { "epoch": 1.4459228515625e-05, "model_forward_time": 0.02521204948425293, "step": 9476 }, { "epoch": 1.4459228515625e-05, "step": 9476, "training_step_time": 0.10709834098815918 }, { "epoch": 1.446075439453125e-05, "model_forward_time": 0.025342702865600586, "step": 9477 }, { "epoch": 1.446075439453125e-05, "step": 9477, "training_step_time": 0.10789132118225098 }, { "epoch": 1.44622802734375e-05, "model_forward_time": 0.02583003044128418, "step": 9478 }, { "epoch": 1.44622802734375e-05, "step": 9478, "training_step_time": 0.10968708992004395 }, { "epoch": 1.446380615234375e-05, "model_forward_time": 0.025195598602294922, "step": 9479 }, { "epoch": 1.446380615234375e-05, "step": 9479, "training_step_time": 0.1731727123260498 }, { "epoch": 1.446533203125e-05, "grad_norm": 0.2792838215827942, "learning_rate": 8.18711994874345e-05, "loss": 0.0281, "step": 9480 }, { "epoch": 1.446533203125e-05, "model_forward_time": 0.024681806564331055, "step": 9480 }, { "epoch": 1.446533203125e-05, "step": 9480, "training_step_time": 0.16242027282714844 }, { "epoch": 1.446685791015625e-05, "model_forward_time": 0.02485823631286621, "step": 9481 }, { "epoch": 1.446685791015625e-05, "step": 9481, "training_step_time": 0.10384654998779297 }, { "epoch": 1.44683837890625e-05, "model_forward_time": 0.025029420852661133, "step": 9482 }, { "epoch": 1.44683837890625e-05, "step": 9482, "training_step_time": 0.10553216934204102 }, { "epoch": 1.446990966796875e-05, "model_forward_time": 0.025967836380004883, "step": 9483 }, { "epoch": 1.446990966796875e-05, "step": 9483, "training_step_time": 0.12078142166137695 }, { "epoch": 1.4471435546875e-05, "model_forward_time": 0.025418996810913086, "step": 9484 }, { "epoch": 1.4471435546875e-05, "step": 9484, "training_step_time": 0.11324691772460938 }, { "epoch": 1.447296142578125e-05, "model_forward_time": 0.025324344635009766, "step": 9485 }, { "epoch": 1.447296142578125e-05, "step": 9485, "training_step_time": 0.10676264762878418 }, { "epoch": 1.44744873046875e-05, "model_forward_time": 0.026190757751464844, "step": 9486 }, { "epoch": 1.44744873046875e-05, "step": 9486, "training_step_time": 0.1081688404083252 }, { "epoch": 1.447601318359375e-05, "model_forward_time": 0.025905132293701172, "step": 9487 }, { "epoch": 1.447601318359375e-05, "step": 9487, "training_step_time": 0.10705804824829102 }, { "epoch": 1.44775390625e-05, "model_forward_time": 0.02496957778930664, "step": 9488 }, { "epoch": 1.44775390625e-05, "step": 9488, "training_step_time": 0.1114494800567627 }, { "epoch": 1.447906494140625e-05, "model_forward_time": 0.025019407272338867, "step": 9489 }, { "epoch": 1.447906494140625e-05, "step": 9489, "training_step_time": 0.10798192024230957 }, { "epoch": 1.44805908203125e-05, "grad_norm": 0.3415721356868744, "learning_rate": 8.182871278641009e-05, "loss": 0.0259, "step": 9490 }, { "epoch": 1.44805908203125e-05, "model_forward_time": 0.02524852752685547, "step": 9490 }, { "epoch": 1.44805908203125e-05, "step": 9490, "training_step_time": 0.10957932472229004 }, { "epoch": 1.448211669921875e-05, "model_forward_time": 0.025458335876464844, "step": 9491 }, { "epoch": 1.448211669921875e-05, "step": 9491, "training_step_time": 0.1059732437133789 }, { "epoch": 1.4483642578125e-05, "model_forward_time": 0.025094985961914062, "step": 9492 }, { "epoch": 1.4483642578125e-05, "step": 9492, "training_step_time": 0.10744810104370117 }, { "epoch": 1.448516845703125e-05, "model_forward_time": 0.025411605834960938, "step": 9493 }, { "epoch": 1.448516845703125e-05, "step": 9493, "training_step_time": 0.1339278221130371 }, { "epoch": 1.44866943359375e-05, "model_forward_time": 0.025409221649169922, "step": 9494 }, { "epoch": 1.44866943359375e-05, "step": 9494, "training_step_time": 0.13221406936645508 }, { "epoch": 1.448822021484375e-05, "model_forward_time": 0.02527904510498047, "step": 9495 }, { "epoch": 1.448822021484375e-05, "step": 9495, "training_step_time": 0.10785627365112305 }, { "epoch": 1.448974609375e-05, "model_forward_time": 0.02569437026977539, "step": 9496 }, { "epoch": 1.448974609375e-05, "step": 9496, "training_step_time": 0.13548731803894043 }, { "epoch": 1.449127197265625e-05, "model_forward_time": 0.026509523391723633, "step": 9497 }, { "epoch": 1.449127197265625e-05, "step": 9497, "training_step_time": 0.17113208770751953 }, { "epoch": 1.44927978515625e-05, "model_forward_time": 0.02646946907043457, "step": 9498 }, { "epoch": 1.44927978515625e-05, "step": 9498, "training_step_time": 0.18021655082702637 }, { "epoch": 1.449432373046875e-05, "model_forward_time": 0.024465560913085938, "step": 9499 }, { "epoch": 1.449432373046875e-05, "step": 9499, "training_step_time": 0.19385623931884766 }, { "epoch": 1.4495849609375e-05, "grad_norm": 0.2710087299346924, "learning_rate": 8.178618741049842e-05, "loss": 0.0226, "step": 9500 }, { "epoch": 1.4495849609375e-05, "model_forward_time": 0.024242877960205078, "step": 9500 }, { "epoch": 1.4495849609375e-05, "step": 9500, "training_step_time": 0.16071033477783203 }, { "epoch": 1.449737548828125e-05, "model_forward_time": 0.024139404296875, "step": 9501 }, { "epoch": 1.449737548828125e-05, "step": 9501, "training_step_time": 0.21726179122924805 }, { "epoch": 1.44989013671875e-05, "model_forward_time": 0.024756431579589844, "step": 9502 }, { "epoch": 1.44989013671875e-05, "step": 9502, "training_step_time": 0.11127328872680664 }, { "epoch": 1.450042724609375e-05, "model_forward_time": 0.02484607696533203, "step": 9503 }, { "epoch": 1.450042724609375e-05, "step": 9503, "training_step_time": 0.10360264778137207 }, { "epoch": 1.4501953125e-05, "model_forward_time": 0.025338411331176758, "step": 9504 }, { "epoch": 1.4501953125e-05, "step": 9504, "training_step_time": 0.10797262191772461 }, { "epoch": 1.450347900390625e-05, "model_forward_time": 0.02516627311706543, "step": 9505 }, { "epoch": 1.450347900390625e-05, "step": 9505, "training_step_time": 0.10799360275268555 }, { "epoch": 1.45050048828125e-05, "model_forward_time": 0.025120019912719727, "step": 9506 }, { "epoch": 1.45050048828125e-05, "step": 9506, "training_step_time": 0.11000633239746094 }, { "epoch": 1.450653076171875e-05, "model_forward_time": 0.024922609329223633, "step": 9507 }, { "epoch": 1.450653076171875e-05, "step": 9507, "training_step_time": 0.10812830924987793 }, { "epoch": 1.4508056640625e-05, "model_forward_time": 0.025143146514892578, "step": 9508 }, { "epoch": 1.4508056640625e-05, "step": 9508, "training_step_time": 0.11268377304077148 }, { "epoch": 1.450958251953125e-05, "model_forward_time": 0.02468419075012207, "step": 9509 }, { "epoch": 1.450958251953125e-05, "step": 9509, "training_step_time": 0.11316180229187012 }, { "epoch": 1.45111083984375e-05, "grad_norm": 0.21017690002918243, "learning_rate": 8.174362341137177e-05, "loss": 0.0256, "step": 9510 }, { "epoch": 1.45111083984375e-05, "model_forward_time": 0.023894786834716797, "step": 9510 }, { "epoch": 1.45111083984375e-05, "step": 9510, "training_step_time": 0.1072230339050293 }, { "epoch": 1.451263427734375e-05, "model_forward_time": 0.023804664611816406, "step": 9511 }, { "epoch": 1.451263427734375e-05, "step": 9511, "training_step_time": 0.11113691329956055 }, { "epoch": 1.451416015625e-05, "model_forward_time": 0.02505660057067871, "step": 9512 }, { "epoch": 1.451416015625e-05, "step": 9512, "training_step_time": 0.10786795616149902 }, { "epoch": 1.451568603515625e-05, "model_forward_time": 0.025088071823120117, "step": 9513 }, { "epoch": 1.451568603515625e-05, "step": 9513, "training_step_time": 0.1077268123626709 }, { "epoch": 1.45172119140625e-05, "model_forward_time": 0.02546215057373047, "step": 9514 }, { "epoch": 1.45172119140625e-05, "step": 9514, "training_step_time": 0.10877370834350586 }, { "epoch": 1.451873779296875e-05, "model_forward_time": 0.02508997917175293, "step": 9515 }, { "epoch": 1.451873779296875e-05, "step": 9515, "training_step_time": 0.15111684799194336 }, { "epoch": 1.4520263671875e-05, "model_forward_time": 0.02387261390686035, "step": 9516 }, { "epoch": 1.4520263671875e-05, "step": 9516, "training_step_time": 0.17554235458374023 }, { "epoch": 1.452178955078125e-05, "model_forward_time": 0.024065732955932617, "step": 9517 }, { "epoch": 1.452178955078125e-05, "step": 9517, "training_step_time": 0.16174054145812988 }, { "epoch": 1.45233154296875e-05, "model_forward_time": 0.024174213409423828, "step": 9518 }, { "epoch": 1.45233154296875e-05, "step": 9518, "training_step_time": 0.14461040496826172 }, { "epoch": 1.452484130859375e-05, "model_forward_time": 0.024128198623657227, "step": 9519 }, { "epoch": 1.452484130859375e-05, "step": 9519, "training_step_time": 0.13492989540100098 }, { "epoch": 1.45263671875e-05, "grad_norm": 0.3300616443157196, "learning_rate": 8.170102084074946e-05, "loss": 0.0289, "step": 9520 }, { "epoch": 1.45263671875e-05, "model_forward_time": 0.02454686164855957, "step": 9520 }, { "epoch": 1.45263671875e-05, "step": 9520, "training_step_time": 0.12767696380615234 }, { "epoch": 1.452789306640625e-05, "model_forward_time": 0.024207353591918945, "step": 9521 }, { "epoch": 1.452789306640625e-05, "step": 9521, "training_step_time": 0.1253831386566162 }, { "epoch": 1.45294189453125e-05, "model_forward_time": 0.024947166442871094, "step": 9522 }, { "epoch": 1.45294189453125e-05, "step": 9522, "training_step_time": 0.10431671142578125 }, { "epoch": 1.453094482421875e-05, "model_forward_time": 0.024843454360961914, "step": 9523 }, { "epoch": 1.453094482421875e-05, "step": 9523, "training_step_time": 0.11184525489807129 }, { "epoch": 1.4532470703125e-05, "model_forward_time": 0.024613618850708008, "step": 9524 }, { "epoch": 1.4532470703125e-05, "step": 9524, "training_step_time": 0.10435128211975098 }, { "epoch": 1.453399658203125e-05, "model_forward_time": 0.0252227783203125, "step": 9525 }, { "epoch": 1.453399658203125e-05, "step": 9525, "training_step_time": 0.1284794807434082 }, { "epoch": 1.45355224609375e-05, "model_forward_time": 0.02527022361755371, "step": 9526 }, { "epoch": 1.45355224609375e-05, "step": 9526, "training_step_time": 0.11169075965881348 }, { "epoch": 1.453704833984375e-05, "model_forward_time": 0.025441408157348633, "step": 9527 }, { "epoch": 1.453704833984375e-05, "step": 9527, "training_step_time": 0.2080228328704834 }, { "epoch": 1.453857421875e-05, "model_forward_time": 0.02463984489440918, "step": 9528 }, { "epoch": 1.453857421875e-05, "step": 9528, "training_step_time": 0.1180112361907959 }, { "epoch": 1.454010009765625e-05, "model_forward_time": 0.02492547035217285, "step": 9529 }, { "epoch": 1.454010009765625e-05, "step": 9529, "training_step_time": 0.10512661933898926 }, { "epoch": 1.45416259765625e-05, "grad_norm": 0.3115748465061188, "learning_rate": 8.165837975039763e-05, "loss": 0.031, "step": 9530 }, { "epoch": 1.45416259765625e-05, "model_forward_time": 0.0252840518951416, "step": 9530 }, { "epoch": 1.45416259765625e-05, "step": 9530, "training_step_time": 0.10770344734191895 }, { "epoch": 1.454315185546875e-05, "model_forward_time": 0.0253448486328125, "step": 9531 }, { "epoch": 1.454315185546875e-05, "step": 9531, "training_step_time": 0.10718703269958496 }, { "epoch": 1.4544677734375e-05, "model_forward_time": 0.025614261627197266, "step": 9532 }, { "epoch": 1.4544677734375e-05, "step": 9532, "training_step_time": 0.11180472373962402 }, { "epoch": 1.454620361328125e-05, "model_forward_time": 0.02585291862487793, "step": 9533 }, { "epoch": 1.454620361328125e-05, "step": 9533, "training_step_time": 0.1879589557647705 }, { "epoch": 1.45477294921875e-05, "model_forward_time": 0.023278236389160156, "step": 9534 }, { "epoch": 1.45477294921875e-05, "step": 9534, "training_step_time": 0.20203685760498047 }, { "epoch": 1.454925537109375e-05, "model_forward_time": 0.02315831184387207, "step": 9535 }, { "epoch": 1.454925537109375e-05, "step": 9535, "training_step_time": 0.20372414588928223 }, { "epoch": 1.455078125e-05, "model_forward_time": 0.023410797119140625, "step": 9536 }, { "epoch": 1.455078125e-05, "step": 9536, "training_step_time": 0.21012592315673828 }, { "epoch": 1.455230712890625e-05, "model_forward_time": 0.02411794662475586, "step": 9537 }, { "epoch": 1.455230712890625e-05, "step": 9537, "training_step_time": 0.20641613006591797 }, { "epoch": 1.45538330078125e-05, "model_forward_time": 0.024140357971191406, "step": 9538 }, { "epoch": 1.45538330078125e-05, "step": 9538, "training_step_time": 0.22237777709960938 }, { "epoch": 1.455535888671875e-05, "model_forward_time": 0.02439117431640625, "step": 9539 }, { "epoch": 1.455535888671875e-05, "step": 9539, "training_step_time": 0.12183237075805664 }, { "epoch": 1.4556884765625e-05, "grad_norm": 0.6548312306404114, "learning_rate": 8.161570019212921e-05, "loss": 0.0393, "step": 9540 }, { "epoch": 1.4556884765625e-05, "model_forward_time": 0.0243532657623291, "step": 9540 }, { "epoch": 1.4556884765625e-05, "step": 9540, "training_step_time": 0.1226193904876709 }, { "epoch": 1.455841064453125e-05, "model_forward_time": 0.02480626106262207, "step": 9541 }, { "epoch": 1.455841064453125e-05, "step": 9541, "training_step_time": 0.14112520217895508 }, { "epoch": 1.45599365234375e-05, "model_forward_time": 0.0285947322845459, "step": 9542 }, { "epoch": 1.45599365234375e-05, "step": 9542, "training_step_time": 0.17905378341674805 }, { "epoch": 1.456146240234375e-05, "model_forward_time": 0.024629831314086914, "step": 9543 }, { "epoch": 1.456146240234375e-05, "step": 9543, "training_step_time": 0.17879033088684082 }, { "epoch": 1.456298828125e-05, "model_forward_time": 0.024370193481445312, "step": 9544 }, { "epoch": 1.456298828125e-05, "step": 9544, "training_step_time": 0.10547280311584473 }, { "epoch": 1.456451416015625e-05, "model_forward_time": 0.024445056915283203, "step": 9545 }, { "epoch": 1.456451416015625e-05, "step": 9545, "training_step_time": 0.10920095443725586 }, { "epoch": 1.45660400390625e-05, "model_forward_time": 0.02520895004272461, "step": 9546 }, { "epoch": 1.45660400390625e-05, "step": 9546, "training_step_time": 0.11049532890319824 }, { "epoch": 1.456756591796875e-05, "model_forward_time": 0.025287866592407227, "step": 9547 }, { "epoch": 1.456756591796875e-05, "step": 9547, "training_step_time": 0.11164546012878418 }, { "epoch": 1.4569091796875e-05, "model_forward_time": 0.02517247200012207, "step": 9548 }, { "epoch": 1.4569091796875e-05, "step": 9548, "training_step_time": 0.11104512214660645 }, { "epoch": 1.457061767578125e-05, "model_forward_time": 0.025051355361938477, "step": 9549 }, { "epoch": 1.457061767578125e-05, "step": 9549, "training_step_time": 0.11538076400756836 }, { "epoch": 1.45721435546875e-05, "grad_norm": 0.331142783164978, "learning_rate": 8.157298221780389e-05, "loss": 0.0299, "step": 9550 }, { "epoch": 1.45721435546875e-05, "model_forward_time": 0.025533676147460938, "step": 9550 }, { "epoch": 1.45721435546875e-05, "step": 9550, "training_step_time": 0.1122429370880127 }, { "epoch": 1.457366943359375e-05, "model_forward_time": 0.025920867919921875, "step": 9551 }, { "epoch": 1.457366943359375e-05, "step": 9551, "training_step_time": 0.11168408393859863 }, { "epoch": 1.45751953125e-05, "model_forward_time": 0.024968862533569336, "step": 9552 }, { "epoch": 1.45751953125e-05, "step": 9552, "training_step_time": 0.11013317108154297 }, { "epoch": 1.457672119140625e-05, "model_forward_time": 0.025202035903930664, "step": 9553 }, { "epoch": 1.457672119140625e-05, "step": 9553, "training_step_time": 0.1139991283416748 }, { "epoch": 1.45782470703125e-05, "model_forward_time": 0.026732444763183594, "step": 9554 }, { "epoch": 1.45782470703125e-05, "step": 9554, "training_step_time": 0.11099481582641602 }, { "epoch": 1.457977294921875e-05, "model_forward_time": 0.02541351318359375, "step": 9555 }, { "epoch": 1.457977294921875e-05, "step": 9555, "training_step_time": 0.10876035690307617 }, { "epoch": 1.4581298828125e-05, "model_forward_time": 0.025623559951782227, "step": 9556 }, { "epoch": 1.4581298828125e-05, "step": 9556, "training_step_time": 0.10907530784606934 }, { "epoch": 1.458282470703125e-05, "model_forward_time": 0.02522587776184082, "step": 9557 }, { "epoch": 1.458282470703125e-05, "step": 9557, "training_step_time": 0.11230945587158203 }, { "epoch": 1.45843505859375e-05, "model_forward_time": 0.024865150451660156, "step": 9558 }, { "epoch": 1.45843505859375e-05, "step": 9558, "training_step_time": 0.11005592346191406 }, { "epoch": 1.458587646484375e-05, "model_forward_time": 0.025716781616210938, "step": 9559 }, { "epoch": 1.458587646484375e-05, "step": 9559, "training_step_time": 0.11000323295593262 }, { "epoch": 1.458740234375e-05, "grad_norm": 0.5295901894569397, "learning_rate": 8.153022587932803e-05, "loss": 0.0347, "step": 9560 }, { "epoch": 1.458740234375e-05, "model_forward_time": 0.02482891082763672, "step": 9560 }, { "epoch": 1.458740234375e-05, "step": 9560, "training_step_time": 0.11299443244934082 }, { "epoch": 1.458892822265625e-05, "model_forward_time": 0.025539636611938477, "step": 9561 }, { "epoch": 1.458892822265625e-05, "step": 9561, "training_step_time": 0.10791587829589844 }, { "epoch": 1.45904541015625e-05, "model_forward_time": 0.02529740333557129, "step": 9562 }, { "epoch": 1.45904541015625e-05, "step": 9562, "training_step_time": 0.1103065013885498 }, { "epoch": 1.459197998046875e-05, "model_forward_time": 0.025145530700683594, "step": 9563 }, { "epoch": 1.459197998046875e-05, "step": 9563, "training_step_time": 0.10874509811401367 }, { "epoch": 1.4593505859375e-05, "model_forward_time": 0.025287628173828125, "step": 9564 }, { "epoch": 1.4593505859375e-05, "step": 9564, "training_step_time": 0.10816621780395508 }, { "epoch": 1.459503173828125e-05, "model_forward_time": 0.025239229202270508, "step": 9565 }, { "epoch": 1.459503173828125e-05, "step": 9565, "training_step_time": 0.10697746276855469 }, { "epoch": 1.45965576171875e-05, "model_forward_time": 0.026006460189819336, "step": 9566 }, { "epoch": 1.45965576171875e-05, "step": 9566, "training_step_time": 0.10965323448181152 }, { "epoch": 1.459808349609375e-05, "model_forward_time": 0.025601625442504883, "step": 9567 }, { "epoch": 1.459808349609375e-05, "step": 9567, "training_step_time": 0.10748767852783203 }, { "epoch": 1.4599609375e-05, "model_forward_time": 0.02563023567199707, "step": 9568 }, { "epoch": 1.4599609375e-05, "step": 9568, "training_step_time": 0.10876321792602539 }, { "epoch": 1.460113525390625e-05, "model_forward_time": 0.02503347396850586, "step": 9569 }, { "epoch": 1.460113525390625e-05, "step": 9569, "training_step_time": 0.11268496513366699 }, { "epoch": 1.46026611328125e-05, "grad_norm": 0.5119488835334778, "learning_rate": 8.148743122865463e-05, "loss": 0.0484, "step": 9570 }, { "epoch": 1.46026611328125e-05, "model_forward_time": 0.02516794204711914, "step": 9570 }, { "epoch": 1.46026611328125e-05, "step": 9570, "training_step_time": 0.10786795616149902 }, { "epoch": 1.460418701171875e-05, "model_forward_time": 0.02553105354309082, "step": 9571 }, { "epoch": 1.460418701171875e-05, "step": 9571, "training_step_time": 0.21245646476745605 }, { "epoch": 1.4605712890625e-05, "model_forward_time": 0.024172067642211914, "step": 9572 }, { "epoch": 1.4605712890625e-05, "step": 9572, "training_step_time": 0.1259140968322754 }, { "epoch": 1.460723876953125e-05, "model_forward_time": 0.02753901481628418, "step": 9573 }, { "epoch": 1.460723876953125e-05, "step": 9573, "training_step_time": 0.11209988594055176 }, { "epoch": 1.46087646484375e-05, "model_forward_time": 0.025592565536499023, "step": 9574 }, { "epoch": 1.46087646484375e-05, "step": 9574, "training_step_time": 0.1690821647644043 }, { "epoch": 1.461029052734375e-05, "model_forward_time": 0.02434682846069336, "step": 9575 }, { "epoch": 1.461029052734375e-05, "step": 9575, "training_step_time": 0.19556570053100586 }, { "epoch": 1.461181640625e-05, "model_forward_time": 0.0247499942779541, "step": 9576 }, { "epoch": 1.461181640625e-05, "step": 9576, "training_step_time": 0.19040489196777344 }, { "epoch": 1.461334228515625e-05, "model_forward_time": 0.02502751350402832, "step": 9577 }, { "epoch": 1.461334228515625e-05, "step": 9577, "training_step_time": 0.18045425415039062 }, { "epoch": 1.46148681640625e-05, "model_forward_time": 0.024462223052978516, "step": 9578 }, { "epoch": 1.46148681640625e-05, "step": 9578, "training_step_time": 0.2007603645324707 }, { "epoch": 1.461639404296875e-05, "model_forward_time": 0.024204254150390625, "step": 9579 }, { "epoch": 1.461639404296875e-05, "step": 9579, "training_step_time": 0.15765070915222168 }, { "epoch": 1.4617919921875e-05, "grad_norm": 0.45523786544799805, "learning_rate": 8.14445983177832e-05, "loss": 0.0373, "step": 9580 }, { "epoch": 1.4617919921875e-05, "model_forward_time": 0.022441625595092773, "step": 9580 }, { "epoch": 1.4617919921875e-05, "step": 9580, "training_step_time": 0.18658185005187988 }, { "epoch": 1.461944580078125e-05, "model_forward_time": 0.02386331558227539, "step": 9581 }, { "epoch": 1.461944580078125e-05, "step": 9581, "training_step_time": 0.1789553165435791 }, { "epoch": 1.46209716796875e-05, "model_forward_time": 0.024242639541625977, "step": 9582 }, { "epoch": 1.46209716796875e-05, "step": 9582, "training_step_time": 0.16912627220153809 }, { "epoch": 1.462249755859375e-05, "model_forward_time": 0.024147748947143555, "step": 9583 }, { "epoch": 1.462249755859375e-05, "step": 9583, "training_step_time": 0.18231987953186035 }, { "epoch": 1.46240234375e-05, "model_forward_time": 0.027846813201904297, "step": 9584 }, { "epoch": 1.46240234375e-05, "step": 9584, "training_step_time": 0.12413930892944336 }, { "epoch": 1.462554931640625e-05, "model_forward_time": 0.024219751358032227, "step": 9585 }, { "epoch": 1.462554931640625e-05, "step": 9585, "training_step_time": 0.20951390266418457 }, { "epoch": 1.46270751953125e-05, "model_forward_time": 0.024255037307739258, "step": 9586 }, { "epoch": 1.46270751953125e-05, "step": 9586, "training_step_time": 0.11900210380554199 }, { "epoch": 1.462860107421875e-05, "model_forward_time": 0.02521204948425293, "step": 9587 }, { "epoch": 1.462860107421875e-05, "step": 9587, "training_step_time": 0.10596251487731934 }, { "epoch": 1.4630126953125e-05, "model_forward_time": 0.026239395141601562, "step": 9588 }, { "epoch": 1.4630126953125e-05, "step": 9588, "training_step_time": 0.10923075675964355 }, { "epoch": 1.463165283203125e-05, "model_forward_time": 0.025093555450439453, "step": 9589 }, { "epoch": 1.463165283203125e-05, "step": 9589, "training_step_time": 0.13231730461120605 }, { "epoch": 1.46331787109375e-05, "grad_norm": 0.46255525946617126, "learning_rate": 8.140172719875979e-05, "loss": 0.0298, "step": 9590 }, { "epoch": 1.46331787109375e-05, "model_forward_time": 0.024947404861450195, "step": 9590 }, { "epoch": 1.46331787109375e-05, "step": 9590, "training_step_time": 0.14930367469787598 }, { "epoch": 1.463470458984375e-05, "model_forward_time": 0.024849414825439453, "step": 9591 }, { "epoch": 1.463470458984375e-05, "step": 9591, "training_step_time": 0.15892410278320312 }, { "epoch": 1.463623046875e-05, "model_forward_time": 0.024472951889038086, "step": 9592 }, { "epoch": 1.463623046875e-05, "step": 9592, "training_step_time": 0.1548452377319336 }, { "epoch": 1.463775634765625e-05, "model_forward_time": 0.02400827407836914, "step": 9593 }, { "epoch": 1.463775634765625e-05, "step": 9593, "training_step_time": 0.13833308219909668 }, { "epoch": 1.46392822265625e-05, "model_forward_time": 0.024338245391845703, "step": 9594 }, { "epoch": 1.46392822265625e-05, "step": 9594, "training_step_time": 0.1295173168182373 }, { "epoch": 1.464080810546875e-05, "model_forward_time": 0.024268150329589844, "step": 9595 }, { "epoch": 1.464080810546875e-05, "step": 9595, "training_step_time": 0.1271040439605713 }, { "epoch": 1.4642333984375e-05, "model_forward_time": 0.024587631225585938, "step": 9596 }, { "epoch": 1.4642333984375e-05, "step": 9596, "training_step_time": 0.12419867515563965 }, { "epoch": 1.464385986328125e-05, "model_forward_time": 0.025130748748779297, "step": 9597 }, { "epoch": 1.464385986328125e-05, "step": 9597, "training_step_time": 0.12128448486328125 }, { "epoch": 1.46453857421875e-05, "model_forward_time": 0.02537679672241211, "step": 9598 }, { "epoch": 1.46453857421875e-05, "step": 9598, "training_step_time": 0.1133875846862793 }, { "epoch": 1.464691162109375e-05, "model_forward_time": 0.02492356300354004, "step": 9599 }, { "epoch": 1.464691162109375e-05, "step": 9599, "training_step_time": 0.11612200736999512 }, { "epoch": 1.46484375e-05, "grad_norm": 0.3734428286552429, "learning_rate": 8.135881792367686e-05, "loss": 0.0259, "step": 9600 }, { "epoch": 1.46484375e-05, "model_forward_time": 0.025065898895263672, "step": 9600 }, { "epoch": 1.46484375e-05, "step": 9600, "training_step_time": 0.11228609085083008 }, { "epoch": 1.464996337890625e-05, "model_forward_time": 0.025210857391357422, "step": 9601 }, { "epoch": 1.464996337890625e-05, "step": 9601, "training_step_time": 0.10897493362426758 }, { "epoch": 1.46514892578125e-05, "model_forward_time": 0.02472853660583496, "step": 9602 }, { "epoch": 1.46514892578125e-05, "step": 9602, "training_step_time": 0.11083149909973145 }, { "epoch": 1.465301513671875e-05, "model_forward_time": 0.025244951248168945, "step": 9603 }, { "epoch": 1.465301513671875e-05, "step": 9603, "training_step_time": 0.10868978500366211 }, { "epoch": 1.4654541015625e-05, "model_forward_time": 0.024953842163085938, "step": 9604 }, { "epoch": 1.4654541015625e-05, "step": 9604, "training_step_time": 0.10859179496765137 }, { "epoch": 1.465606689453125e-05, "model_forward_time": 0.0256350040435791, "step": 9605 }, { "epoch": 1.465606689453125e-05, "step": 9605, "training_step_time": 0.11030006408691406 }, { "epoch": 1.46575927734375e-05, "model_forward_time": 0.027234554290771484, "step": 9606 }, { "epoch": 1.46575927734375e-05, "step": 9606, "training_step_time": 0.1144266128540039 }, { "epoch": 1.465911865234375e-05, "model_forward_time": 0.025156259536743164, "step": 9607 }, { "epoch": 1.465911865234375e-05, "step": 9607, "training_step_time": 0.1060640811920166 }, { "epoch": 1.466064453125e-05, "model_forward_time": 0.024080276489257812, "step": 9608 }, { "epoch": 1.466064453125e-05, "step": 9608, "training_step_time": 0.14950919151306152 }, { "epoch": 1.466217041015625e-05, "model_forward_time": 0.025336742401123047, "step": 9609 }, { "epoch": 1.466217041015625e-05, "step": 9609, "training_step_time": 0.11146688461303711 }, { "epoch": 1.46636962890625e-05, "grad_norm": 0.3587009906768799, "learning_rate": 8.13158705446732e-05, "loss": 0.0266, "step": 9610 }, { "epoch": 1.46636962890625e-05, "model_forward_time": 0.0251314640045166, "step": 9610 }, { "epoch": 1.46636962890625e-05, "step": 9610, "training_step_time": 0.20196771621704102 }, { "epoch": 1.466522216796875e-05, "model_forward_time": 0.02415013313293457, "step": 9611 }, { "epoch": 1.466522216796875e-05, "step": 9611, "training_step_time": 0.13425207138061523 }, { "epoch": 1.4666748046875e-05, "model_forward_time": 0.02660679817199707, "step": 9612 }, { "epoch": 1.4666748046875e-05, "step": 9612, "training_step_time": 0.11210155487060547 }, { "epoch": 1.466827392578125e-05, "model_forward_time": 0.025350570678710938, "step": 9613 }, { "epoch": 1.466827392578125e-05, "step": 9613, "training_step_time": 0.10577201843261719 }, { "epoch": 1.46697998046875e-05, "model_forward_time": 0.025360822677612305, "step": 9614 }, { "epoch": 1.46697998046875e-05, "step": 9614, "training_step_time": 0.10686516761779785 }, { "epoch": 1.467132568359375e-05, "model_forward_time": 0.025519132614135742, "step": 9615 }, { "epoch": 1.467132568359375e-05, "step": 9615, "training_step_time": 0.10672879219055176 }, { "epoch": 1.46728515625e-05, "model_forward_time": 0.025675058364868164, "step": 9616 }, { "epoch": 1.46728515625e-05, "step": 9616, "training_step_time": 0.10918760299682617 }, { "epoch": 1.467437744140625e-05, "model_forward_time": 0.02524590492248535, "step": 9617 }, { "epoch": 1.467437744140625e-05, "step": 9617, "training_step_time": 0.10837078094482422 }, { "epoch": 1.46759033203125e-05, "model_forward_time": 0.025150060653686523, "step": 9618 }, { "epoch": 1.46759033203125e-05, "step": 9618, "training_step_time": 0.10574865341186523 }, { "epoch": 1.467742919921875e-05, "model_forward_time": 0.02521800994873047, "step": 9619 }, { "epoch": 1.467742919921875e-05, "step": 9619, "training_step_time": 0.10458683967590332 }, { "epoch": 1.4678955078125e-05, "grad_norm": 0.283431738615036, "learning_rate": 8.127288511393392e-05, "loss": 0.0295, "step": 9620 }, { "epoch": 1.4678955078125e-05, "model_forward_time": 0.02683424949645996, "step": 9620 }, { "epoch": 1.4678955078125e-05, "step": 9620, "training_step_time": 0.13709187507629395 }, { "epoch": 1.468048095703125e-05, "model_forward_time": 0.025423526763916016, "step": 9621 }, { "epoch": 1.468048095703125e-05, "step": 9621, "training_step_time": 0.13492822647094727 }, { "epoch": 1.46820068359375e-05, "model_forward_time": 0.024751901626586914, "step": 9622 }, { "epoch": 1.46820068359375e-05, "step": 9622, "training_step_time": 0.11292171478271484 }, { "epoch": 1.468353271484375e-05, "model_forward_time": 0.025397300720214844, "step": 9623 }, { "epoch": 1.468353271484375e-05, "step": 9623, "training_step_time": 0.10853195190429688 }, { "epoch": 1.468505859375e-05, "model_forward_time": 0.02549123764038086, "step": 9624 }, { "epoch": 1.468505859375e-05, "step": 9624, "training_step_time": 0.11530613899230957 }, { "epoch": 1.468658447265625e-05, "model_forward_time": 0.02539992332458496, "step": 9625 }, { "epoch": 1.468658447265625e-05, "step": 9625, "training_step_time": 0.22533321380615234 }, { "epoch": 1.46881103515625e-05, "model_forward_time": 0.024422407150268555, "step": 9626 }, { "epoch": 1.46881103515625e-05, "step": 9626, "training_step_time": 0.13031411170959473 }, { "epoch": 1.468963623046875e-05, "model_forward_time": 0.024100303649902344, "step": 9627 }, { "epoch": 1.468963623046875e-05, "step": 9627, "training_step_time": 0.13034868240356445 }, { "epoch": 1.4691162109375e-05, "model_forward_time": 0.024877071380615234, "step": 9628 }, { "epoch": 1.4691162109375e-05, "step": 9628, "training_step_time": 0.14666152000427246 }, { "epoch": 1.469268798828125e-05, "model_forward_time": 0.024692773818969727, "step": 9629 }, { "epoch": 1.469268798828125e-05, "step": 9629, "training_step_time": 0.21528410911560059 }, { "epoch": 1.46942138671875e-05, "grad_norm": 0.3566807210445404, "learning_rate": 8.12298616836904e-05, "loss": 0.0272, "step": 9630 }, { "epoch": 1.46942138671875e-05, "model_forward_time": 0.024663448333740234, "step": 9630 }, { "epoch": 1.46942138671875e-05, "step": 9630, "training_step_time": 0.10884881019592285 }, { "epoch": 1.469573974609375e-05, "model_forward_time": 0.024942398071289062, "step": 9631 }, { "epoch": 1.469573974609375e-05, "step": 9631, "training_step_time": 0.10837674140930176 }, { "epoch": 1.4697265625e-05, "model_forward_time": 0.025676727294921875, "step": 9632 }, { "epoch": 1.4697265625e-05, "step": 9632, "training_step_time": 0.10961794853210449 }, { "epoch": 1.469879150390625e-05, "model_forward_time": 0.025337696075439453, "step": 9633 }, { "epoch": 1.469879150390625e-05, "step": 9633, "training_step_time": 0.11236691474914551 }, { "epoch": 1.47003173828125e-05, "model_forward_time": 0.02465367317199707, "step": 9634 }, { "epoch": 1.47003173828125e-05, "step": 9634, "training_step_time": 0.11215806007385254 }, { "epoch": 1.470184326171875e-05, "model_forward_time": 0.025501728057861328, "step": 9635 }, { "epoch": 1.470184326171875e-05, "step": 9635, "training_step_time": 0.1109151840209961 }, { "epoch": 1.4703369140625e-05, "model_forward_time": 0.025271177291870117, "step": 9636 }, { "epoch": 1.4703369140625e-05, "step": 9636, "training_step_time": 0.10974717140197754 }, { "epoch": 1.470489501953125e-05, "model_forward_time": 0.025774240493774414, "step": 9637 }, { "epoch": 1.470489501953125e-05, "step": 9637, "training_step_time": 0.10982036590576172 }, { "epoch": 1.47064208984375e-05, "model_forward_time": 0.0253756046295166, "step": 9638 }, { "epoch": 1.47064208984375e-05, "step": 9638, "training_step_time": 0.11546850204467773 }, { "epoch": 1.470794677734375e-05, "model_forward_time": 0.02523970603942871, "step": 9639 }, { "epoch": 1.470794677734375e-05, "step": 9639, "training_step_time": 0.10881662368774414 }, { "epoch": 1.470947265625e-05, "grad_norm": 0.39975979924201965, "learning_rate": 8.118680030622014e-05, "loss": 0.0246, "step": 9640 }, { "epoch": 1.470947265625e-05, "model_forward_time": 0.024846792221069336, "step": 9640 }, { "epoch": 1.470947265625e-05, "step": 9640, "training_step_time": 0.11009716987609863 }, { "epoch": 1.471099853515625e-05, "model_forward_time": 0.02546072006225586, "step": 9641 }, { "epoch": 1.471099853515625e-05, "step": 9641, "training_step_time": 0.10982537269592285 }, { "epoch": 1.47125244140625e-05, "model_forward_time": 0.025167226791381836, "step": 9642 }, { "epoch": 1.47125244140625e-05, "step": 9642, "training_step_time": 0.11650824546813965 }, { "epoch": 1.471405029296875e-05, "model_forward_time": 0.02553534507751465, "step": 9643 }, { "epoch": 1.471405029296875e-05, "step": 9643, "training_step_time": 0.11020517349243164 }, { "epoch": 1.4715576171875e-05, "model_forward_time": 0.025464296340942383, "step": 9644 }, { "epoch": 1.4715576171875e-05, "step": 9644, "training_step_time": 0.10854196548461914 }, { "epoch": 1.471710205078125e-05, "model_forward_time": 0.02610325813293457, "step": 9645 }, { "epoch": 1.471710205078125e-05, "step": 9645, "training_step_time": 0.11237668991088867 }, { "epoch": 1.47186279296875e-05, "model_forward_time": 0.02532792091369629, "step": 9646 }, { "epoch": 1.47186279296875e-05, "step": 9646, "training_step_time": 0.11131787300109863 }, { "epoch": 1.472015380859375e-05, "model_forward_time": 0.02550053596496582, "step": 9647 }, { "epoch": 1.472015380859375e-05, "step": 9647, "training_step_time": 0.11355805397033691 }, { "epoch": 1.47216796875e-05, "model_forward_time": 0.025294065475463867, "step": 9648 }, { "epoch": 1.47216796875e-05, "step": 9648, "training_step_time": 0.11553692817687988 }, { "epoch": 1.472320556640625e-05, "model_forward_time": 0.025770187377929688, "step": 9649 }, { "epoch": 1.472320556640625e-05, "step": 9649, "training_step_time": 0.11240124702453613 }, { "epoch": 1.47247314453125e-05, "grad_norm": 0.4785434305667877, "learning_rate": 8.114370103384681e-05, "loss": 0.0316, "step": 9650 }, { "epoch": 1.47247314453125e-05, "model_forward_time": 0.025299787521362305, "step": 9650 }, { "epoch": 1.47247314453125e-05, "step": 9650, "training_step_time": 0.10789299011230469 }, { "epoch": 1.472625732421875e-05, "model_forward_time": 0.025302886962890625, "step": 9651 }, { "epoch": 1.472625732421875e-05, "step": 9651, "training_step_time": 0.11245298385620117 }, { "epoch": 1.4727783203125e-05, "model_forward_time": 0.024153709411621094, "step": 9652 }, { "epoch": 1.4727783203125e-05, "step": 9652, "training_step_time": 0.16206693649291992 }, { "epoch": 1.472930908203125e-05, "model_forward_time": 0.025659799575805664, "step": 9653 }, { "epoch": 1.472930908203125e-05, "step": 9653, "training_step_time": 0.11801004409790039 }, { "epoch": 1.47308349609375e-05, "model_forward_time": 0.02515411376953125, "step": 9654 }, { "epoch": 1.47308349609375e-05, "step": 9654, "training_step_time": 0.17615747451782227 }, { "epoch": 1.473236083984375e-05, "model_forward_time": 0.02446889877319336, "step": 9655 }, { "epoch": 1.473236083984375e-05, "step": 9655, "training_step_time": 0.20401382446289062 }, { "epoch": 1.473388671875e-05, "model_forward_time": 0.025043249130249023, "step": 9656 }, { "epoch": 1.473388671875e-05, "step": 9656, "training_step_time": 0.11120223999023438 }, { "epoch": 1.473541259765625e-05, "model_forward_time": 0.024876117706298828, "step": 9657 }, { "epoch": 1.473541259765625e-05, "step": 9657, "training_step_time": 0.11959958076477051 }, { "epoch": 1.47369384765625e-05, "model_forward_time": 0.02533698081970215, "step": 9658 }, { "epoch": 1.47369384765625e-05, "step": 9658, "training_step_time": 0.11945939064025879 }, { "epoch": 1.473846435546875e-05, "model_forward_time": 0.025476932525634766, "step": 9659 }, { "epoch": 1.473846435546875e-05, "step": 9659, "training_step_time": 0.12511682510375977 }, { "epoch": 1.4739990234375e-05, "grad_norm": 0.4794352948665619, "learning_rate": 8.110056391894005e-05, "loss": 0.0303, "step": 9660 }, { "epoch": 1.4739990234375e-05, "model_forward_time": 0.025388717651367188, "step": 9660 }, { "epoch": 1.4739990234375e-05, "step": 9660, "training_step_time": 0.10523366928100586 }, { "epoch": 1.474151611328125e-05, "model_forward_time": 0.025391101837158203, "step": 9661 }, { "epoch": 1.474151611328125e-05, "step": 9661, "training_step_time": 0.10731005668640137 }, { "epoch": 1.47430419921875e-05, "model_forward_time": 0.025214195251464844, "step": 9662 }, { "epoch": 1.47430419921875e-05, "step": 9662, "training_step_time": 0.10710453987121582 }, { "epoch": 1.474456787109375e-05, "model_forward_time": 0.02506256103515625, "step": 9663 }, { "epoch": 1.474456787109375e-05, "step": 9663, "training_step_time": 0.10680460929870605 }, { "epoch": 1.474609375e-05, "model_forward_time": 0.026640892028808594, "step": 9664 }, { "epoch": 1.474609375e-05, "step": 9664, "training_step_time": 0.11574983596801758 }, { "epoch": 1.474761962890625e-05, "model_forward_time": 0.02542591094970703, "step": 9665 }, { "epoch": 1.474761962890625e-05, "step": 9665, "training_step_time": 0.11315250396728516 }, { "epoch": 1.47491455078125e-05, "model_forward_time": 0.02511906623840332, "step": 9666 }, { "epoch": 1.47491455078125e-05, "step": 9666, "training_step_time": 0.1337742805480957 }, { "epoch": 1.475067138671875e-05, "model_forward_time": 0.0276944637298584, "step": 9667 }, { "epoch": 1.475067138671875e-05, "step": 9667, "training_step_time": 0.11696386337280273 }, { "epoch": 1.4752197265625e-05, "model_forward_time": 0.02507948875427246, "step": 9668 }, { "epoch": 1.4752197265625e-05, "step": 9668, "training_step_time": 0.10918450355529785 }, { "epoch": 1.475372314453125e-05, "model_forward_time": 0.024968624114990234, "step": 9669 }, { "epoch": 1.475372314453125e-05, "step": 9669, "training_step_time": 0.11141586303710938 }, { "epoch": 1.47552490234375e-05, "grad_norm": 0.5360229015350342, "learning_rate": 8.105738901391552e-05, "loss": 0.0403, "step": 9670 }, { "epoch": 1.47552490234375e-05, "model_forward_time": 0.025995254516601562, "step": 9670 }, { "epoch": 1.47552490234375e-05, "step": 9670, "training_step_time": 0.18514609336853027 }, { "epoch": 1.475677490234375e-05, "model_forward_time": 0.024682998657226562, "step": 9671 }, { "epoch": 1.475677490234375e-05, "step": 9671, "training_step_time": 0.18868637084960938 }, { "epoch": 1.475830078125e-05, "model_forward_time": 0.024445533752441406, "step": 9672 }, { "epoch": 1.475830078125e-05, "step": 9672, "training_step_time": 0.21336054801940918 }, { "epoch": 1.475982666015625e-05, "model_forward_time": 0.025230884552001953, "step": 9673 }, { "epoch": 1.475982666015625e-05, "step": 9673, "training_step_time": 0.2315990924835205 }, { "epoch": 1.47613525390625e-05, "model_forward_time": 0.024277687072753906, "step": 9674 }, { "epoch": 1.47613525390625e-05, "step": 9674, "training_step_time": 0.22087931632995605 }, { "epoch": 1.476287841796875e-05, "model_forward_time": 0.0244596004486084, "step": 9675 }, { "epoch": 1.476287841796875e-05, "step": 9675, "training_step_time": 0.16072559356689453 }, { "epoch": 1.4764404296875e-05, "model_forward_time": 0.024389266967773438, "step": 9676 }, { "epoch": 1.4764404296875e-05, "step": 9676, "training_step_time": 0.13886690139770508 }, { "epoch": 1.476593017578125e-05, "model_forward_time": 0.024599552154541016, "step": 9677 }, { "epoch": 1.476593017578125e-05, "step": 9677, "training_step_time": 0.13137102127075195 }, { "epoch": 1.47674560546875e-05, "model_forward_time": 0.024481534957885742, "step": 9678 }, { "epoch": 1.47674560546875e-05, "step": 9678, "training_step_time": 0.12673735618591309 }, { "epoch": 1.476898193359375e-05, "model_forward_time": 0.024860143661499023, "step": 9679 }, { "epoch": 1.476898193359375e-05, "step": 9679, "training_step_time": 0.1255016326904297 }, { "epoch": 1.47705078125e-05, "grad_norm": 0.46959853172302246, "learning_rate": 8.101417637123484e-05, "loss": 0.0251, "step": 9680 }, { "epoch": 1.47705078125e-05, "model_forward_time": 0.02493429183959961, "step": 9680 }, { "epoch": 1.47705078125e-05, "step": 9680, "training_step_time": 0.1185448169708252 }, { "epoch": 1.477203369140625e-05, "model_forward_time": 0.025189638137817383, "step": 9681 }, { "epoch": 1.477203369140625e-05, "step": 9681, "training_step_time": 0.11478567123413086 }, { "epoch": 1.47735595703125e-05, "model_forward_time": 0.024971485137939453, "step": 9682 }, { "epoch": 1.47735595703125e-05, "step": 9682, "training_step_time": 0.11745119094848633 }, { "epoch": 1.477508544921875e-05, "model_forward_time": 0.025211811065673828, "step": 9683 }, { "epoch": 1.477508544921875e-05, "step": 9683, "training_step_time": 0.11037755012512207 }, { "epoch": 1.4776611328125e-05, "model_forward_time": 0.024907350540161133, "step": 9684 }, { "epoch": 1.4776611328125e-05, "step": 9684, "training_step_time": 0.1110231876373291 }, { "epoch": 1.477813720703125e-05, "model_forward_time": 0.02500605583190918, "step": 9685 }, { "epoch": 1.477813720703125e-05, "step": 9685, "training_step_time": 0.11049795150756836 }, { "epoch": 1.47796630859375e-05, "model_forward_time": 0.025127649307250977, "step": 9686 }, { "epoch": 1.47796630859375e-05, "step": 9686, "training_step_time": 0.11315226554870605 }, { "epoch": 1.478118896484375e-05, "model_forward_time": 0.025789260864257812, "step": 9687 }, { "epoch": 1.478118896484375e-05, "step": 9687, "training_step_time": 0.11287832260131836 }, { "epoch": 1.478271484375e-05, "model_forward_time": 0.023795127868652344, "step": 9688 }, { "epoch": 1.478271484375e-05, "step": 9688, "training_step_time": 0.11110544204711914 }, { "epoch": 1.478424072265625e-05, "model_forward_time": 0.025890588760375977, "step": 9689 }, { "epoch": 1.478424072265625e-05, "step": 9689, "training_step_time": 0.11648321151733398 }, { "epoch": 1.47857666015625e-05, "grad_norm": 0.3298199772834778, "learning_rate": 8.097092604340542e-05, "loss": 0.0286, "step": 9690 }, { "epoch": 1.47857666015625e-05, "model_forward_time": 0.025205612182617188, "step": 9690 }, { "epoch": 1.47857666015625e-05, "step": 9690, "training_step_time": 0.10801434516906738 }, { "epoch": 1.478729248046875e-05, "model_forward_time": 0.02547597885131836, "step": 9691 }, { "epoch": 1.478729248046875e-05, "step": 9691, "training_step_time": 0.11466455459594727 }, { "epoch": 1.4788818359375e-05, "model_forward_time": 0.02488875389099121, "step": 9692 }, { "epoch": 1.4788818359375e-05, "step": 9692, "training_step_time": 0.10933732986450195 }, { "epoch": 1.479034423828125e-05, "model_forward_time": 0.02526116371154785, "step": 9693 }, { "epoch": 1.479034423828125e-05, "step": 9693, "training_step_time": 0.11175751686096191 }, { "epoch": 1.47918701171875e-05, "model_forward_time": 0.025300025939941406, "step": 9694 }, { "epoch": 1.47918701171875e-05, "step": 9694, "training_step_time": 0.11196041107177734 }, { "epoch": 1.479339599609375e-05, "model_forward_time": 0.02659440040588379, "step": 9695 }, { "epoch": 1.479339599609375e-05, "step": 9695, "training_step_time": 0.10761713981628418 }, { "epoch": 1.4794921875e-05, "model_forward_time": 0.025219440460205078, "step": 9696 }, { "epoch": 1.4794921875e-05, "step": 9696, "training_step_time": 0.10684776306152344 }, { "epoch": 1.479644775390625e-05, "model_forward_time": 0.025108814239501953, "step": 9697 }, { "epoch": 1.479644775390625e-05, "step": 9697, "training_step_time": 0.10663199424743652 }, { "epoch": 1.47979736328125e-05, "model_forward_time": 0.024571895599365234, "step": 9698 }, { "epoch": 1.47979736328125e-05, "step": 9698, "training_step_time": 0.11971378326416016 }, { "epoch": 1.479949951171875e-05, "model_forward_time": 0.024518489837646484, "step": 9699 }, { "epoch": 1.479949951171875e-05, "step": 9699, "training_step_time": 0.12796521186828613 }, { "epoch": 1.4801025390625e-05, "grad_norm": 0.41840147972106934, "learning_rate": 8.092763808298048e-05, "loss": 0.0246, "step": 9700 }, { "epoch": 1.4801025390625e-05, "model_forward_time": 0.027907371520996094, "step": 9700 }, { "epoch": 1.4801025390625e-05, "step": 9700, "training_step_time": 0.11349678039550781 }, { "epoch": 1.480255126953125e-05, "model_forward_time": 0.02542710304260254, "step": 9701 }, { "epoch": 1.480255126953125e-05, "step": 9701, "training_step_time": 0.10866808891296387 }, { "epoch": 1.48040771484375e-05, "model_forward_time": 0.0252993106842041, "step": 9702 }, { "epoch": 1.48040771484375e-05, "step": 9702, "training_step_time": 0.22054100036621094 }, { "epoch": 1.480560302734375e-05, "model_forward_time": 0.02471184730529785, "step": 9703 }, { "epoch": 1.480560302734375e-05, "step": 9703, "training_step_time": 0.11343932151794434 }, { "epoch": 1.480712890625e-05, "model_forward_time": 0.024181127548217773, "step": 9704 }, { "epoch": 1.480712890625e-05, "step": 9704, "training_step_time": 0.10850787162780762 }, { "epoch": 1.480865478515625e-05, "model_forward_time": 0.026212453842163086, "step": 9705 }, { "epoch": 1.480865478515625e-05, "step": 9705, "training_step_time": 0.11151647567749023 }, { "epoch": 1.48101806640625e-05, "model_forward_time": 0.02525639533996582, "step": 9706 }, { "epoch": 1.48101806640625e-05, "step": 9706, "training_step_time": 0.10954093933105469 }, { "epoch": 1.481170654296875e-05, "model_forward_time": 0.025243282318115234, "step": 9707 }, { "epoch": 1.481170654296875e-05, "step": 9707, "training_step_time": 0.11170077323913574 }, { "epoch": 1.4813232421875e-05, "model_forward_time": 0.025267601013183594, "step": 9708 }, { "epoch": 1.4813232421875e-05, "step": 9708, "training_step_time": 0.15646743774414062 }, { "epoch": 1.481475830078125e-05, "model_forward_time": 0.0247647762298584, "step": 9709 }, { "epoch": 1.481475830078125e-05, "step": 9709, "training_step_time": 0.13778305053710938 }, { "epoch": 1.48162841796875e-05, "grad_norm": 0.37349286675453186, "learning_rate": 8.088431254255899e-05, "loss": 0.0246, "step": 9710 }, { "epoch": 1.48162841796875e-05, "model_forward_time": 0.024553775787353516, "step": 9710 }, { "epoch": 1.48162841796875e-05, "step": 9710, "training_step_time": 0.10950875282287598 }, { "epoch": 1.481781005859375e-05, "model_forward_time": 0.025505542755126953, "step": 9711 }, { "epoch": 1.481781005859375e-05, "step": 9711, "training_step_time": 0.11762428283691406 }, { "epoch": 1.48193359375e-05, "model_forward_time": 0.02509307861328125, "step": 9712 }, { "epoch": 1.48193359375e-05, "step": 9712, "training_step_time": 0.11009478569030762 }, { "epoch": 1.482086181640625e-05, "model_forward_time": 0.02518153190612793, "step": 9713 }, { "epoch": 1.482086181640625e-05, "step": 9713, "training_step_time": 0.11281967163085938 }, { "epoch": 1.48223876953125e-05, "model_forward_time": 0.025235891342163086, "step": 9714 }, { "epoch": 1.48223876953125e-05, "step": 9714, "training_step_time": 0.19005632400512695 }, { "epoch": 1.482391357421875e-05, "model_forward_time": 0.02453756332397461, "step": 9715 }, { "epoch": 1.482391357421875e-05, "step": 9715, "training_step_time": 0.14233946800231934 }, { "epoch": 1.4825439453125e-05, "model_forward_time": 0.02457141876220703, "step": 9716 }, { "epoch": 1.4825439453125e-05, "step": 9716, "training_step_time": 0.1924452781677246 }, { "epoch": 1.482696533203125e-05, "model_forward_time": 0.024697065353393555, "step": 9717 }, { "epoch": 1.482696533203125e-05, "step": 9717, "training_step_time": 0.19617819786071777 }, { "epoch": 1.48284912109375e-05, "model_forward_time": 0.0245821475982666, "step": 9718 }, { "epoch": 1.48284912109375e-05, "step": 9718, "training_step_time": 0.1259009838104248 }, { "epoch": 1.483001708984375e-05, "model_forward_time": 0.024280786514282227, "step": 9719 }, { "epoch": 1.483001708984375e-05, "step": 9719, "training_step_time": 0.10862994194030762 }, { "epoch": 1.483154296875e-05, "grad_norm": 0.4185222089290619, "learning_rate": 8.084094947478556e-05, "loss": 0.0314, "step": 9720 }, { "epoch": 1.483154296875e-05, "model_forward_time": 0.025447368621826172, "step": 9720 }, { "epoch": 1.483154296875e-05, "step": 9720, "training_step_time": 0.10726070404052734 }, { "epoch": 1.483306884765625e-05, "model_forward_time": 0.02860116958618164, "step": 9721 }, { "epoch": 1.483306884765625e-05, "step": 9721, "training_step_time": 0.11400175094604492 }, { "epoch": 1.48345947265625e-05, "model_forward_time": 0.0255279541015625, "step": 9722 }, { "epoch": 1.48345947265625e-05, "step": 9722, "training_step_time": 0.10703349113464355 }, { "epoch": 1.483612060546875e-05, "model_forward_time": 0.02534770965576172, "step": 9723 }, { "epoch": 1.483612060546875e-05, "step": 9723, "training_step_time": 0.10702657699584961 }, { "epoch": 1.4837646484375e-05, "model_forward_time": 0.025156497955322266, "step": 9724 }, { "epoch": 1.4837646484375e-05, "step": 9724, "training_step_time": 0.10672497749328613 }, { "epoch": 1.483917236328125e-05, "model_forward_time": 0.02500605583190918, "step": 9725 }, { "epoch": 1.483917236328125e-05, "step": 9725, "training_step_time": 0.10639238357543945 }, { "epoch": 1.48406982421875e-05, "model_forward_time": 0.02504730224609375, "step": 9726 }, { "epoch": 1.48406982421875e-05, "step": 9726, "training_step_time": 0.10973453521728516 }, { "epoch": 1.484222412109375e-05, "model_forward_time": 0.02534317970275879, "step": 9727 }, { "epoch": 1.484222412109375e-05, "step": 9727, "training_step_time": 0.10865592956542969 }, { "epoch": 1.484375e-05, "model_forward_time": 0.02507638931274414, "step": 9728 }, { "epoch": 1.484375e-05, "step": 9728, "training_step_time": 0.11304926872253418 }, { "epoch": 1.484527587890625e-05, "model_forward_time": 0.02521800994873047, "step": 9729 }, { "epoch": 1.484527587890625e-05, "step": 9729, "training_step_time": 0.11442923545837402 }, { "epoch": 1.48468017578125e-05, "grad_norm": 0.499977171421051, "learning_rate": 8.07975489323504e-05, "loss": 0.0416, "step": 9730 }, { "epoch": 1.48468017578125e-05, "model_forward_time": 0.024951934814453125, "step": 9730 }, { "epoch": 1.48468017578125e-05, "step": 9730, "training_step_time": 0.11084127426147461 }, { "epoch": 1.484832763671875e-05, "model_forward_time": 0.02566838264465332, "step": 9731 }, { "epoch": 1.484832763671875e-05, "step": 9731, "training_step_time": 0.11111211776733398 }, { "epoch": 1.4849853515625e-05, "model_forward_time": 0.02501225471496582, "step": 9732 }, { "epoch": 1.4849853515625e-05, "step": 9732, "training_step_time": 0.11335420608520508 }, { "epoch": 1.485137939453125e-05, "model_forward_time": 0.02534961700439453, "step": 9733 }, { "epoch": 1.485137939453125e-05, "step": 9733, "training_step_time": 0.10802817344665527 }, { "epoch": 1.48529052734375e-05, "model_forward_time": 0.024985313415527344, "step": 9734 }, { "epoch": 1.48529052734375e-05, "step": 9734, "training_step_time": 0.10816645622253418 }, { "epoch": 1.485443115234375e-05, "model_forward_time": 0.02543950080871582, "step": 9735 }, { "epoch": 1.485443115234375e-05, "step": 9735, "training_step_time": 0.11236882209777832 }, { "epoch": 1.485595703125e-05, "model_forward_time": 0.025061368942260742, "step": 9736 }, { "epoch": 1.485595703125e-05, "step": 9736, "training_step_time": 0.10949158668518066 }, { "epoch": 1.485748291015625e-05, "model_forward_time": 0.025288820266723633, "step": 9737 }, { "epoch": 1.485748291015625e-05, "step": 9737, "training_step_time": 0.1098475456237793 }, { "epoch": 1.48590087890625e-05, "model_forward_time": 0.02564835548400879, "step": 9738 }, { "epoch": 1.48590087890625e-05, "step": 9738, "training_step_time": 0.11377549171447754 }, { "epoch": 1.486053466796875e-05, "model_forward_time": 0.02519392967224121, "step": 9739 }, { "epoch": 1.486053466796875e-05, "step": 9739, "training_step_time": 0.10792064666748047 }, { "epoch": 1.4862060546875e-05, "grad_norm": 0.9022095203399658, "learning_rate": 8.075411096798928e-05, "loss": 0.0329, "step": 9740 }, { "epoch": 1.4862060546875e-05, "model_forward_time": 0.024444103240966797, "step": 9740 }, { "epoch": 1.4862060546875e-05, "step": 9740, "training_step_time": 0.10895538330078125 }, { "epoch": 1.486358642578125e-05, "model_forward_time": 0.025072097778320312, "step": 9741 }, { "epoch": 1.486358642578125e-05, "step": 9741, "training_step_time": 0.1060342788696289 }, { "epoch": 1.48651123046875e-05, "model_forward_time": 0.02538323402404785, "step": 9742 }, { "epoch": 1.48651123046875e-05, "step": 9742, "training_step_time": 0.10661435127258301 }, { "epoch": 1.486663818359375e-05, "model_forward_time": 0.025185346603393555, "step": 9743 }, { "epoch": 1.486663818359375e-05, "step": 9743, "training_step_time": 0.10816645622253418 }, { "epoch": 1.48681640625e-05, "model_forward_time": 0.025403261184692383, "step": 9744 }, { "epoch": 1.48681640625e-05, "step": 9744, "training_step_time": 0.10636615753173828 }, { "epoch": 1.486968994140625e-05, "model_forward_time": 0.025394678115844727, "step": 9745 }, { "epoch": 1.486968994140625e-05, "step": 9745, "training_step_time": 0.1062932014465332 }, { "epoch": 1.48712158203125e-05, "model_forward_time": 0.02556467056274414, "step": 9746 }, { "epoch": 1.48712158203125e-05, "step": 9746, "training_step_time": 0.11341190338134766 }, { "epoch": 1.487274169921875e-05, "model_forward_time": 0.025713682174682617, "step": 9747 }, { "epoch": 1.487274169921875e-05, "step": 9747, "training_step_time": 0.11193251609802246 }, { "epoch": 1.4874267578125e-05, "model_forward_time": 0.02573561668395996, "step": 9748 }, { "epoch": 1.4874267578125e-05, "step": 9748, "training_step_time": 0.10944962501525879 }, { "epoch": 1.487579345703125e-05, "model_forward_time": 0.025519847869873047, "step": 9749 }, { "epoch": 1.487579345703125e-05, "step": 9749, "training_step_time": 0.10676288604736328 }, { "epoch": 1.48773193359375e-05, "grad_norm": 0.564054012298584, "learning_rate": 8.07106356344834e-05, "loss": 0.0277, "step": 9750 }, { "epoch": 1.48773193359375e-05, "model_forward_time": 0.025513172149658203, "step": 9750 }, { "epoch": 1.48773193359375e-05, "step": 9750, "training_step_time": 0.12445688247680664 }, { "epoch": 1.487884521484375e-05, "model_forward_time": 0.02587270736694336, "step": 9751 }, { "epoch": 1.487884521484375e-05, "step": 9751, "training_step_time": 0.10859227180480957 }, { "epoch": 1.488037109375e-05, "model_forward_time": 0.02566218376159668, "step": 9752 }, { "epoch": 1.488037109375e-05, "step": 9752, "training_step_time": 0.10823416709899902 }, { "epoch": 1.488189697265625e-05, "model_forward_time": 0.02594304084777832, "step": 9753 }, { "epoch": 1.488189697265625e-05, "step": 9753, "training_step_time": 0.10813021659851074 }, { "epoch": 1.48834228515625e-05, "model_forward_time": 0.026610374450683594, "step": 9754 }, { "epoch": 1.48834228515625e-05, "step": 9754, "training_step_time": 0.11319208145141602 }, { "epoch": 1.488494873046875e-05, "model_forward_time": 0.025405406951904297, "step": 9755 }, { "epoch": 1.488494873046875e-05, "step": 9755, "training_step_time": 0.10914015769958496 }, { "epoch": 1.4886474609375e-05, "model_forward_time": 0.02536487579345703, "step": 9756 }, { "epoch": 1.4886474609375e-05, "step": 9756, "training_step_time": 0.16846585273742676 }, { "epoch": 1.488800048828125e-05, "model_forward_time": 0.02488994598388672, "step": 9757 }, { "epoch": 1.488800048828125e-05, "step": 9757, "training_step_time": 0.12117958068847656 }, { "epoch": 1.48895263671875e-05, "model_forward_time": 0.024817943572998047, "step": 9758 }, { "epoch": 1.48895263671875e-05, "step": 9758, "training_step_time": 0.10559487342834473 }, { "epoch": 1.489105224609375e-05, "model_forward_time": 0.02567577362060547, "step": 9759 }, { "epoch": 1.489105224609375e-05, "step": 9759, "training_step_time": 0.12911105155944824 }, { "epoch": 1.4892578125e-05, "grad_norm": 0.4195360541343689, "learning_rate": 8.06671229846594e-05, "loss": 0.033, "step": 9760 }, { "epoch": 1.4892578125e-05, "model_forward_time": 0.02538752555847168, "step": 9760 }, { "epoch": 1.4892578125e-05, "step": 9760, "training_step_time": 0.10829472541809082 }, { "epoch": 1.489410400390625e-05, "model_forward_time": 0.025613784790039062, "step": 9761 }, { "epoch": 1.489410400390625e-05, "step": 9761, "training_step_time": 0.22438907623291016 }, { "epoch": 1.48956298828125e-05, "model_forward_time": 0.02418231964111328, "step": 9762 }, { "epoch": 1.48956298828125e-05, "step": 9762, "training_step_time": 0.17682647705078125 }, { "epoch": 1.489715576171875e-05, "model_forward_time": 0.024318456649780273, "step": 9763 }, { "epoch": 1.489715576171875e-05, "step": 9763, "training_step_time": 0.16802334785461426 }, { "epoch": 1.4898681640625e-05, "model_forward_time": 0.024797916412353516, "step": 9764 }, { "epoch": 1.4898681640625e-05, "step": 9764, "training_step_time": 0.13013696670532227 }, { "epoch": 1.490020751953125e-05, "model_forward_time": 0.024611949920654297, "step": 9765 }, { "epoch": 1.490020751953125e-05, "step": 9765, "training_step_time": 0.1895887851715088 }, { "epoch": 1.49017333984375e-05, "model_forward_time": 0.02512359619140625, "step": 9766 }, { "epoch": 1.49017333984375e-05, "step": 9766, "training_step_time": 0.11816024780273438 }, { "epoch": 1.490325927734375e-05, "model_forward_time": 0.02516770362854004, "step": 9767 }, { "epoch": 1.490325927734375e-05, "step": 9767, "training_step_time": 0.10843205451965332 }, { "epoch": 1.490478515625e-05, "model_forward_time": 0.026070833206176758, "step": 9768 }, { "epoch": 1.490478515625e-05, "step": 9768, "training_step_time": 0.10669851303100586 }, { "epoch": 1.490631103515625e-05, "model_forward_time": 0.025387287139892578, "step": 9769 }, { "epoch": 1.490631103515625e-05, "step": 9769, "training_step_time": 0.10902237892150879 }, { "epoch": 1.49078369140625e-05, "grad_norm": 0.4531439542770386, "learning_rate": 8.062357307138926e-05, "loss": 0.0321, "step": 9770 }, { "epoch": 1.49078369140625e-05, "model_forward_time": 0.025746583938598633, "step": 9770 }, { "epoch": 1.49078369140625e-05, "step": 9770, "training_step_time": 0.10598921775817871 }, { "epoch": 1.490936279296875e-05, "model_forward_time": 0.02561497688293457, "step": 9771 }, { "epoch": 1.490936279296875e-05, "step": 9771, "training_step_time": 0.10606884956359863 }, { "epoch": 1.4910888671875e-05, "model_forward_time": 0.025025606155395508, "step": 9772 }, { "epoch": 1.4910888671875e-05, "step": 9772, "training_step_time": 0.10648107528686523 }, { "epoch": 1.491241455078125e-05, "model_forward_time": 0.025859355926513672, "step": 9773 }, { "epoch": 1.491241455078125e-05, "step": 9773, "training_step_time": 0.1109614372253418 }, { "epoch": 1.49139404296875e-05, "model_forward_time": 0.025506019592285156, "step": 9774 }, { "epoch": 1.49139404296875e-05, "step": 9774, "training_step_time": 0.10644173622131348 }, { "epoch": 1.491546630859375e-05, "model_forward_time": 0.024400949478149414, "step": 9775 }, { "epoch": 1.491546630859375e-05, "step": 9775, "training_step_time": 0.10740447044372559 }, { "epoch": 1.49169921875e-05, "model_forward_time": 0.024697542190551758, "step": 9776 }, { "epoch": 1.49169921875e-05, "step": 9776, "training_step_time": 0.10973525047302246 }, { "epoch": 1.491851806640625e-05, "model_forward_time": 0.025323867797851562, "step": 9777 }, { "epoch": 1.491851806640625e-05, "step": 9777, "training_step_time": 0.10542726516723633 }, { "epoch": 1.49200439453125e-05, "model_forward_time": 0.02647089958190918, "step": 9778 }, { "epoch": 1.49200439453125e-05, "step": 9778, "training_step_time": 0.10923576354980469 }, { "epoch": 1.492156982421875e-05, "model_forward_time": 0.02543354034423828, "step": 9779 }, { "epoch": 1.492156982421875e-05, "step": 9779, "training_step_time": 0.1098027229309082 }, { "epoch": 1.4923095703125e-05, "grad_norm": 0.4400675594806671, "learning_rate": 8.057998594759022e-05, "loss": 0.0327, "step": 9780 }, { "epoch": 1.4923095703125e-05, "model_forward_time": 0.025960683822631836, "step": 9780 }, { "epoch": 1.4923095703125e-05, "step": 9780, "training_step_time": 0.11166071891784668 }, { "epoch": 1.492462158203125e-05, "model_forward_time": 0.026118755340576172, "step": 9781 }, { "epoch": 1.492462158203125e-05, "step": 9781, "training_step_time": 0.11455893516540527 }, { "epoch": 1.49261474609375e-05, "model_forward_time": 0.02448105812072754, "step": 9782 }, { "epoch": 1.49261474609375e-05, "step": 9782, "training_step_time": 0.11598944664001465 }, { "epoch": 1.492767333984375e-05, "model_forward_time": 0.024310588836669922, "step": 9783 }, { "epoch": 1.492767333984375e-05, "step": 9783, "training_step_time": 0.11236286163330078 }, { "epoch": 1.492919921875e-05, "model_forward_time": 0.02670598030090332, "step": 9784 }, { "epoch": 1.492919921875e-05, "step": 9784, "training_step_time": 0.11348271369934082 }, { "epoch": 1.493072509765625e-05, "model_forward_time": 0.024506568908691406, "step": 9785 }, { "epoch": 1.493072509765625e-05, "step": 9785, "training_step_time": 0.10960721969604492 }, { "epoch": 1.49322509765625e-05, "model_forward_time": 0.0262601375579834, "step": 9786 }, { "epoch": 1.49322509765625e-05, "step": 9786, "training_step_time": 0.10893511772155762 }, { "epoch": 1.493377685546875e-05, "model_forward_time": 0.025414228439331055, "step": 9787 }, { "epoch": 1.493377685546875e-05, "step": 9787, "training_step_time": 0.11485648155212402 }, { "epoch": 1.4935302734375e-05, "model_forward_time": 0.025536537170410156, "step": 9788 }, { "epoch": 1.4935302734375e-05, "step": 9788, "training_step_time": 0.11267232894897461 }, { "epoch": 1.493682861328125e-05, "model_forward_time": 0.02567768096923828, "step": 9789 }, { "epoch": 1.493682861328125e-05, "step": 9789, "training_step_time": 0.1086418628692627 }, { "epoch": 1.49383544921875e-05, "grad_norm": 0.3012600839138031, "learning_rate": 8.053636166622476e-05, "loss": 0.0253, "step": 9790 }, { "epoch": 1.49383544921875e-05, "model_forward_time": 0.025525331497192383, "step": 9790 }, { "epoch": 1.49383544921875e-05, "step": 9790, "training_step_time": 0.10766363143920898 }, { "epoch": 1.493988037109375e-05, "model_forward_time": 0.02569866180419922, "step": 9791 }, { "epoch": 1.493988037109375e-05, "step": 9791, "training_step_time": 0.10968136787414551 }, { "epoch": 1.494140625e-05, "model_forward_time": 0.024981975555419922, "step": 9792 }, { "epoch": 1.494140625e-05, "step": 9792, "training_step_time": 0.11271810531616211 }, { "epoch": 1.494293212890625e-05, "model_forward_time": 0.02527451515197754, "step": 9793 }, { "epoch": 1.494293212890625e-05, "step": 9793, "training_step_time": 0.10753035545349121 }, { "epoch": 1.49444580078125e-05, "model_forward_time": 0.02565908432006836, "step": 9794 }, { "epoch": 1.49444580078125e-05, "step": 9794, "training_step_time": 0.11359643936157227 }, { "epoch": 1.494598388671875e-05, "model_forward_time": 0.026448965072631836, "step": 9795 }, { "epoch": 1.494598388671875e-05, "step": 9795, "training_step_time": 0.10906648635864258 }, { "epoch": 1.4947509765625e-05, "model_forward_time": 0.025510311126708984, "step": 9796 }, { "epoch": 1.4947509765625e-05, "step": 9796, "training_step_time": 0.2145400047302246 }, { "epoch": 1.494903564453125e-05, "model_forward_time": 0.024912357330322266, "step": 9797 }, { "epoch": 1.494903564453125e-05, "step": 9797, "training_step_time": 0.12426090240478516 }, { "epoch": 1.49505615234375e-05, "model_forward_time": 0.02521538734436035, "step": 9798 }, { "epoch": 1.49505615234375e-05, "step": 9798, "training_step_time": 0.1052711009979248 }, { "epoch": 1.495208740234375e-05, "model_forward_time": 0.025508403778076172, "step": 9799 }, { "epoch": 1.495208740234375e-05, "step": 9799, "training_step_time": 0.10692262649536133 }, { "epoch": 1.495361328125e-05, "grad_norm": 0.3177136480808258, "learning_rate": 8.049270028030046e-05, "loss": 0.0247, "step": 9800 }, { "epoch": 1.495361328125e-05, "model_forward_time": 0.025712251663208008, "step": 9800 }, { "epoch": 1.495361328125e-05, "step": 9800, "training_step_time": 0.1069943904876709 }, { "epoch": 1.495513916015625e-05, "model_forward_time": 0.025146007537841797, "step": 9801 }, { "epoch": 1.495513916015625e-05, "step": 9801, "training_step_time": 0.11007094383239746 }, { "epoch": 1.49566650390625e-05, "model_forward_time": 0.025117874145507812, "step": 9802 }, { "epoch": 1.49566650390625e-05, "step": 9802, "training_step_time": 0.1951737403869629 }, { "epoch": 1.495819091796875e-05, "model_forward_time": 0.024478435516357422, "step": 9803 }, { "epoch": 1.495819091796875e-05, "step": 9803, "training_step_time": 0.14208459854125977 }, { "epoch": 1.4959716796875e-05, "model_forward_time": 0.024687767028808594, "step": 9804 }, { "epoch": 1.4959716796875e-05, "step": 9804, "training_step_time": 0.11280369758605957 }, { "epoch": 1.496124267578125e-05, "model_forward_time": 0.02464151382446289, "step": 9805 }, { "epoch": 1.496124267578125e-05, "step": 9805, "training_step_time": 0.1066131591796875 }, { "epoch": 1.49627685546875e-05, "model_forward_time": 0.02573990821838379, "step": 9806 }, { "epoch": 1.49627685546875e-05, "step": 9806, "training_step_time": 0.12056183815002441 }, { "epoch": 1.496429443359375e-05, "model_forward_time": 0.02540874481201172, "step": 9807 }, { "epoch": 1.496429443359375e-05, "step": 9807, "training_step_time": 0.20915937423706055 }, { "epoch": 1.49658203125e-05, "model_forward_time": 0.025008440017700195, "step": 9808 }, { "epoch": 1.49658203125e-05, "step": 9808, "training_step_time": 0.1241757869720459 }, { "epoch": 1.496734619140625e-05, "model_forward_time": 0.024881601333618164, "step": 9809 }, { "epoch": 1.496734619140625e-05, "step": 9809, "training_step_time": 0.21195316314697266 }, { "epoch": 1.49688720703125e-05, "grad_norm": 0.40076327323913574, "learning_rate": 8.044900184287007e-05, "loss": 0.0336, "step": 9810 }, { "epoch": 1.49688720703125e-05, "model_forward_time": 0.02485489845275879, "step": 9810 }, { "epoch": 1.49688720703125e-05, "step": 9810, "training_step_time": 0.18085384368896484 }, { "epoch": 1.497039794921875e-05, "model_forward_time": 0.024980783462524414, "step": 9811 }, { "epoch": 1.497039794921875e-05, "step": 9811, "training_step_time": 0.17284893989562988 }, { "epoch": 1.4971923828125e-05, "model_forward_time": 0.025893449783325195, "step": 9812 }, { "epoch": 1.4971923828125e-05, "step": 9812, "training_step_time": 0.11108875274658203 }, { "epoch": 1.497344970703125e-05, "model_forward_time": 0.02360248565673828, "step": 9813 }, { "epoch": 1.497344970703125e-05, "step": 9813, "training_step_time": 0.10728788375854492 }, { "epoch": 1.49749755859375e-05, "model_forward_time": 0.025328397750854492, "step": 9814 }, { "epoch": 1.49749755859375e-05, "step": 9814, "training_step_time": 0.10765552520751953 }, { "epoch": 1.497650146484375e-05, "model_forward_time": 0.02597332000732422, "step": 9815 }, { "epoch": 1.497650146484375e-05, "step": 9815, "training_step_time": 0.10791397094726562 }, { "epoch": 1.497802734375e-05, "model_forward_time": 0.025432586669921875, "step": 9816 }, { "epoch": 1.497802734375e-05, "step": 9816, "training_step_time": 0.10934948921203613 }, { "epoch": 1.497955322265625e-05, "model_forward_time": 0.02537822723388672, "step": 9817 }, { "epoch": 1.497955322265625e-05, "step": 9817, "training_step_time": 0.11173439025878906 }, { "epoch": 1.49810791015625e-05, "model_forward_time": 0.025218963623046875, "step": 9818 }, { "epoch": 1.49810791015625e-05, "step": 9818, "training_step_time": 0.11391496658325195 }, { "epoch": 1.498260498046875e-05, "model_forward_time": 0.02529430389404297, "step": 9819 }, { "epoch": 1.498260498046875e-05, "step": 9819, "training_step_time": 0.11739540100097656 }, { "epoch": 1.4984130859375e-05, "grad_norm": 0.4028327465057373, "learning_rate": 8.040526640703128e-05, "loss": 0.0331, "step": 9820 }, { "epoch": 1.4984130859375e-05, "model_forward_time": 0.025072097778320312, "step": 9820 }, { "epoch": 1.4984130859375e-05, "step": 9820, "training_step_time": 0.11196613311767578 }, { "epoch": 1.498565673828125e-05, "model_forward_time": 0.025506258010864258, "step": 9821 }, { "epoch": 1.498565673828125e-05, "step": 9821, "training_step_time": 0.11948871612548828 }, { "epoch": 1.49871826171875e-05, "model_forward_time": 0.024015188217163086, "step": 9822 }, { "epoch": 1.49871826171875e-05, "step": 9822, "training_step_time": 0.11364483833312988 }, { "epoch": 1.498870849609375e-05, "model_forward_time": 0.024483442306518555, "step": 9823 }, { "epoch": 1.498870849609375e-05, "step": 9823, "training_step_time": 0.1119391918182373 }, { "epoch": 1.4990234375e-05, "model_forward_time": 0.02518749237060547, "step": 9824 }, { "epoch": 1.4990234375e-05, "step": 9824, "training_step_time": 0.10871124267578125 }, { "epoch": 1.499176025390625e-05, "model_forward_time": 0.02550530433654785, "step": 9825 }, { "epoch": 1.499176025390625e-05, "step": 9825, "training_step_time": 0.1175835132598877 }, { "epoch": 1.49932861328125e-05, "model_forward_time": 0.02519702911376953, "step": 9826 }, { "epoch": 1.49932861328125e-05, "step": 9826, "training_step_time": 0.10766935348510742 }, { "epoch": 1.499481201171875e-05, "model_forward_time": 0.025609493255615234, "step": 9827 }, { "epoch": 1.499481201171875e-05, "step": 9827, "training_step_time": 0.10742592811584473 }, { "epoch": 1.4996337890625e-05, "model_forward_time": 0.025310516357421875, "step": 9828 }, { "epoch": 1.4996337890625e-05, "step": 9828, "training_step_time": 0.10732388496398926 }, { "epoch": 1.499786376953125e-05, "model_forward_time": 0.025436878204345703, "step": 9829 }, { "epoch": 1.499786376953125e-05, "step": 9829, "training_step_time": 0.11087250709533691 }, { "epoch": 1.49993896484375e-05, "grad_norm": 0.4979908764362335, "learning_rate": 8.036149402592676e-05, "loss": 0.0307, "step": 9830 }, { "epoch": 1.49993896484375e-05, "model_forward_time": 0.025301694869995117, "step": 9830 }, { "epoch": 1.49993896484375e-05, "step": 9830, "training_step_time": 0.1091160774230957 }, { "epoch": 1.500091552734375e-05, "model_forward_time": 0.025782108306884766, "step": 9831 }, { "epoch": 1.500091552734375e-05, "step": 9831, "training_step_time": 0.10886120796203613 }, { "epoch": 1.500244140625e-05, "model_forward_time": 0.02517843246459961, "step": 9832 }, { "epoch": 1.500244140625e-05, "step": 9832, "training_step_time": 0.1075742244720459 }, { "epoch": 1.500396728515625e-05, "model_forward_time": 0.025171279907226562, "step": 9833 }, { "epoch": 1.500396728515625e-05, "step": 9833, "training_step_time": 0.10749101638793945 }, { "epoch": 1.50054931640625e-05, "model_forward_time": 0.028049230575561523, "step": 9834 }, { "epoch": 1.50054931640625e-05, "step": 9834, "training_step_time": 0.11225223541259766 }, { "epoch": 1.500701904296875e-05, "model_forward_time": 0.026309967041015625, "step": 9835 }, { "epoch": 1.500701904296875e-05, "step": 9835, "training_step_time": 0.1086430549621582 }, { "epoch": 1.5008544921875e-05, "model_forward_time": 0.025262117385864258, "step": 9836 }, { "epoch": 1.5008544921875e-05, "step": 9836, "training_step_time": 0.108184814453125 }, { "epoch": 1.501007080078125e-05, "model_forward_time": 0.025929689407348633, "step": 9837 }, { "epoch": 1.501007080078125e-05, "step": 9837, "training_step_time": 0.11485576629638672 }, { "epoch": 1.50115966796875e-05, "model_forward_time": 0.025205373764038086, "step": 9838 }, { "epoch": 1.50115966796875e-05, "step": 9838, "training_step_time": 0.16666173934936523 }, { "epoch": 1.501312255859375e-05, "model_forward_time": 0.024776697158813477, "step": 9839 }, { "epoch": 1.501312255859375e-05, "step": 9839, "training_step_time": 0.16666126251220703 }, { "epoch": 1.50146484375e-05, "grad_norm": 0.2715790271759033, "learning_rate": 8.031768475274413e-05, "loss": 0.0382, "step": 9840 }, { "epoch": 1.50146484375e-05, "model_forward_time": 0.024921417236328125, "step": 9840 }, { "epoch": 1.50146484375e-05, "step": 9840, "training_step_time": 0.1100156307220459 }, { "epoch": 1.501617431640625e-05, "model_forward_time": 0.024936199188232422, "step": 9841 }, { "epoch": 1.501617431640625e-05, "step": 9841, "training_step_time": 0.21661925315856934 }, { "epoch": 1.50177001953125e-05, "model_forward_time": 0.02498340606689453, "step": 9842 }, { "epoch": 1.50177001953125e-05, "step": 9842, "training_step_time": 0.11571121215820312 }, { "epoch": 1.501922607421875e-05, "model_forward_time": 0.026135921478271484, "step": 9843 }, { "epoch": 1.501922607421875e-05, "step": 9843, "training_step_time": 0.10500335693359375 }, { "epoch": 1.5020751953125e-05, "model_forward_time": 0.02542281150817871, "step": 9844 }, { "epoch": 1.5020751953125e-05, "step": 9844, "training_step_time": 0.10891294479370117 }, { "epoch": 1.502227783203125e-05, "model_forward_time": 0.02547001838684082, "step": 9845 }, { "epoch": 1.502227783203125e-05, "step": 9845, "training_step_time": 0.10589766502380371 }, { "epoch": 1.50238037109375e-05, "model_forward_time": 0.025128841400146484, "step": 9846 }, { "epoch": 1.50238037109375e-05, "step": 9846, "training_step_time": 0.10641646385192871 }, { "epoch": 1.502532958984375e-05, "model_forward_time": 0.024691343307495117, "step": 9847 }, { "epoch": 1.502532958984375e-05, "step": 9847, "training_step_time": 0.1967618465423584 }, { "epoch": 1.502685546875e-05, "model_forward_time": 0.024727821350097656, "step": 9848 }, { "epoch": 1.502685546875e-05, "step": 9848, "training_step_time": 0.14198613166809082 }, { "epoch": 1.502838134765625e-05, "model_forward_time": 0.02498149871826172, "step": 9849 }, { "epoch": 1.502838134765625e-05, "step": 9849, "training_step_time": 0.10376548767089844 }, { "epoch": 1.50299072265625e-05, "grad_norm": 0.35556262731552124, "learning_rate": 8.027383864071573e-05, "loss": 0.0358, "step": 9850 }, { "epoch": 1.50299072265625e-05, "model_forward_time": 0.025561809539794922, "step": 9850 }, { "epoch": 1.50299072265625e-05, "step": 9850, "training_step_time": 0.11064934730529785 }, { "epoch": 1.503143310546875e-05, "model_forward_time": 0.025798320770263672, "step": 9851 }, { "epoch": 1.503143310546875e-05, "step": 9851, "training_step_time": 0.11286473274230957 }, { "epoch": 1.5032958984375e-05, "model_forward_time": 0.025142908096313477, "step": 9852 }, { "epoch": 1.5032958984375e-05, "step": 9852, "training_step_time": 0.10771894454956055 }, { "epoch": 1.503448486328125e-05, "model_forward_time": 0.02587151527404785, "step": 9853 }, { "epoch": 1.503448486328125e-05, "step": 9853, "training_step_time": 0.1893303394317627 }, { "epoch": 1.50360107421875e-05, "model_forward_time": 0.02511453628540039, "step": 9854 }, { "epoch": 1.50360107421875e-05, "step": 9854, "training_step_time": 0.20250678062438965 }, { "epoch": 1.503753662109375e-05, "model_forward_time": 0.025141000747680664, "step": 9855 }, { "epoch": 1.503753662109375e-05, "step": 9855, "training_step_time": 0.1935136318206787 }, { "epoch": 1.50390625e-05, "model_forward_time": 0.02452254295349121, "step": 9856 }, { "epoch": 1.50390625e-05, "step": 9856, "training_step_time": 0.19210362434387207 }, { "epoch": 1.504058837890625e-05, "model_forward_time": 0.024485349655151367, "step": 9857 }, { "epoch": 1.504058837890625e-05, "step": 9857, "training_step_time": 0.14561009407043457 }, { "epoch": 1.50421142578125e-05, "model_forward_time": 0.0250241756439209, "step": 9858 }, { "epoch": 1.50421142578125e-05, "step": 9858, "training_step_time": 0.10615849494934082 }, { "epoch": 1.504364013671875e-05, "model_forward_time": 0.024925708770751953, "step": 9859 }, { "epoch": 1.504364013671875e-05, "step": 9859, "training_step_time": 0.10599160194396973 }, { "epoch": 1.5045166015625e-05, "grad_norm": 0.2787579596042633, "learning_rate": 8.022995574311876e-05, "loss": 0.0274, "step": 9860 }, { "epoch": 1.5045166015625e-05, "model_forward_time": 0.025533676147460938, "step": 9860 }, { "epoch": 1.5045166015625e-05, "step": 9860, "training_step_time": 0.1068272590637207 }, { "epoch": 1.504669189453125e-05, "model_forward_time": 0.025585651397705078, "step": 9861 }, { "epoch": 1.504669189453125e-05, "step": 9861, "training_step_time": 0.10719132423400879 }, { "epoch": 1.50482177734375e-05, "model_forward_time": 0.025596141815185547, "step": 9862 }, { "epoch": 1.50482177734375e-05, "step": 9862, "training_step_time": 0.10933804512023926 }, { "epoch": 1.504974365234375e-05, "model_forward_time": 0.024993181228637695, "step": 9863 }, { "epoch": 1.504974365234375e-05, "step": 9863, "training_step_time": 0.10825157165527344 }, { "epoch": 1.505126953125e-05, "model_forward_time": 0.025699853897094727, "step": 9864 }, { "epoch": 1.505126953125e-05, "step": 9864, "training_step_time": 0.1085350513458252 }, { "epoch": 1.505279541015625e-05, "model_forward_time": 0.02520918846130371, "step": 9865 }, { "epoch": 1.505279541015625e-05, "step": 9865, "training_step_time": 0.11288762092590332 }, { "epoch": 1.50543212890625e-05, "model_forward_time": 0.024970054626464844, "step": 9866 }, { "epoch": 1.50543212890625e-05, "step": 9866, "training_step_time": 0.11149358749389648 }, { "epoch": 1.505584716796875e-05, "model_forward_time": 0.025483131408691406, "step": 9867 }, { "epoch": 1.505584716796875e-05, "step": 9867, "training_step_time": 0.18761181831359863 }, { "epoch": 1.5057373046875e-05, "model_forward_time": 0.02482438087463379, "step": 9868 }, { "epoch": 1.5057373046875e-05, "step": 9868, "training_step_time": 0.21346616744995117 }, { "epoch": 1.505889892578125e-05, "model_forward_time": 0.025168895721435547, "step": 9869 }, { "epoch": 1.505889892578125e-05, "step": 9869, "training_step_time": 0.21142888069152832 }, { "epoch": 1.50604248046875e-05, "grad_norm": 0.2760957181453705, "learning_rate": 8.018603611327504e-05, "loss": 0.0258, "step": 9870 }, { "epoch": 1.50604248046875e-05, "model_forward_time": 0.024290800094604492, "step": 9870 }, { "epoch": 1.50604248046875e-05, "step": 9870, "training_step_time": 0.2133183479309082 }, { "epoch": 1.506195068359375e-05, "model_forward_time": 0.024898767471313477, "step": 9871 }, { "epoch": 1.506195068359375e-05, "step": 9871, "training_step_time": 0.20910954475402832 }, { "epoch": 1.50634765625e-05, "model_forward_time": 0.02498173713684082, "step": 9872 }, { "epoch": 1.50634765625e-05, "step": 9872, "training_step_time": 0.19759917259216309 }, { "epoch": 1.506500244140625e-05, "model_forward_time": 0.024593591690063477, "step": 9873 }, { "epoch": 1.506500244140625e-05, "step": 9873, "training_step_time": 0.17523527145385742 }, { "epoch": 1.50665283203125e-05, "model_forward_time": 0.024432897567749023, "step": 9874 }, { "epoch": 1.50665283203125e-05, "step": 9874, "training_step_time": 0.10233092308044434 }, { "epoch": 1.506805419921875e-05, "model_forward_time": 0.024423599243164062, "step": 9875 }, { "epoch": 1.506805419921875e-05, "step": 9875, "training_step_time": 0.10180854797363281 }, { "epoch": 1.5069580078125e-05, "model_forward_time": 0.026668310165405273, "step": 9876 }, { "epoch": 1.5069580078125e-05, "step": 9876, "training_step_time": 0.1063845157623291 }, { "epoch": 1.507110595703125e-05, "model_forward_time": 0.025010108947753906, "step": 9877 }, { "epoch": 1.507110595703125e-05, "step": 9877, "training_step_time": 0.1053462028503418 }, { "epoch": 1.50726318359375e-05, "model_forward_time": 0.02447199821472168, "step": 9878 }, { "epoch": 1.50726318359375e-05, "step": 9878, "training_step_time": 0.11030745506286621 }, { "epoch": 1.507415771484375e-05, "model_forward_time": 0.025578975677490234, "step": 9879 }, { "epoch": 1.507415771484375e-05, "step": 9879, "training_step_time": 0.1108407974243164 }, { "epoch": 1.507568359375e-05, "grad_norm": 0.3437744379043579, "learning_rate": 8.01420798045511e-05, "loss": 0.0353, "step": 9880 }, { "epoch": 1.507568359375e-05, "model_forward_time": 0.02547287940979004, "step": 9880 }, { "epoch": 1.507568359375e-05, "step": 9880, "training_step_time": 0.10824370384216309 }, { "epoch": 1.507720947265625e-05, "model_forward_time": 0.025673866271972656, "step": 9881 }, { "epoch": 1.507720947265625e-05, "step": 9881, "training_step_time": 0.11393046379089355 }, { "epoch": 1.50787353515625e-05, "model_forward_time": 0.025342226028442383, "step": 9882 }, { "epoch": 1.50787353515625e-05, "step": 9882, "training_step_time": 0.1698153018951416 }, { "epoch": 1.508026123046875e-05, "model_forward_time": 0.0241544246673584, "step": 9883 }, { "epoch": 1.508026123046875e-05, "step": 9883, "training_step_time": 0.17233800888061523 }, { "epoch": 1.5081787109375e-05, "model_forward_time": 0.025681257247924805, "step": 9884 }, { "epoch": 1.5081787109375e-05, "step": 9884, "training_step_time": 0.1049811840057373 }, { "epoch": 1.508331298828125e-05, "model_forward_time": 0.02523517608642578, "step": 9885 }, { "epoch": 1.508331298828125e-05, "step": 9885, "training_step_time": 0.10669064521789551 }, { "epoch": 1.50848388671875e-05, "model_forward_time": 0.025859355926513672, "step": 9886 }, { "epoch": 1.50848388671875e-05, "step": 9886, "training_step_time": 0.10920238494873047 }, { "epoch": 1.508636474609375e-05, "model_forward_time": 0.02513909339904785, "step": 9887 }, { "epoch": 1.508636474609375e-05, "step": 9887, "training_step_time": 0.10944366455078125 }, { "epoch": 1.5087890625e-05, "model_forward_time": 0.025278568267822266, "step": 9888 }, { "epoch": 1.5087890625e-05, "step": 9888, "training_step_time": 0.11968684196472168 }, { "epoch": 1.508941650390625e-05, "model_forward_time": 0.025418758392333984, "step": 9889 }, { "epoch": 1.508941650390625e-05, "step": 9889, "training_step_time": 0.13589787483215332 }, { "epoch": 1.50909423828125e-05, "grad_norm": 0.2674311697483063, "learning_rate": 8.009808687035798e-05, "loss": 0.0212, "step": 9890 }, { "epoch": 1.50909423828125e-05, "model_forward_time": 0.02550029754638672, "step": 9890 }, { "epoch": 1.50909423828125e-05, "step": 9890, "training_step_time": 0.11166596412658691 }, { "epoch": 1.509246826171875e-05, "model_forward_time": 0.025786638259887695, "step": 9891 }, { "epoch": 1.509246826171875e-05, "step": 9891, "training_step_time": 0.1151120662689209 }, { "epoch": 1.5093994140625e-05, "model_forward_time": 0.025448083877563477, "step": 9892 }, { "epoch": 1.5093994140625e-05, "step": 9892, "training_step_time": 0.11392068862915039 }, { "epoch": 1.509552001953125e-05, "model_forward_time": 0.02524280548095703, "step": 9893 }, { "epoch": 1.509552001953125e-05, "step": 9893, "training_step_time": 0.15471959114074707 }, { "epoch": 1.50970458984375e-05, "model_forward_time": 0.025466203689575195, "step": 9894 }, { "epoch": 1.50970458984375e-05, "step": 9894, "training_step_time": 0.20530128479003906 }, { "epoch": 1.509857177734375e-05, "model_forward_time": 0.025064706802368164, "step": 9895 }, { "epoch": 1.509857177734375e-05, "step": 9895, "training_step_time": 0.12743377685546875 }, { "epoch": 1.510009765625e-05, "model_forward_time": 0.024439573287963867, "step": 9896 }, { "epoch": 1.510009765625e-05, "step": 9896, "training_step_time": 0.15816116333007812 }, { "epoch": 1.510162353515625e-05, "model_forward_time": 0.024706125259399414, "step": 9897 }, { "epoch": 1.510162353515625e-05, "step": 9897, "training_step_time": 0.20281171798706055 }, { "epoch": 1.51031494140625e-05, "model_forward_time": 0.024506568908691406, "step": 9898 }, { "epoch": 1.51031494140625e-05, "step": 9898, "training_step_time": 0.1391308307647705 }, { "epoch": 1.510467529296875e-05, "model_forward_time": 0.02452540397644043, "step": 9899 }, { "epoch": 1.510467529296875e-05, "step": 9899, "training_step_time": 0.10457706451416016 }, { "epoch": 1.5106201171875e-05, "grad_norm": 0.35864633321762085, "learning_rate": 8.005405736415126e-05, "loss": 0.0309, "step": 9900 }, { "epoch": 1.5106201171875e-05, "model_forward_time": 0.026026010513305664, "step": 9900 }, { "epoch": 1.5106201171875e-05, "step": 9900, "training_step_time": 0.10687041282653809 }, { "epoch": 1.510772705078125e-05, "model_forward_time": 0.025847196578979492, "step": 9901 }, { "epoch": 1.510772705078125e-05, "step": 9901, "training_step_time": 0.10817790031433105 }, { "epoch": 1.51092529296875e-05, "model_forward_time": 0.025614261627197266, "step": 9902 }, { "epoch": 1.51092529296875e-05, "step": 9902, "training_step_time": 0.10854530334472656 }, { "epoch": 1.511077880859375e-05, "model_forward_time": 0.02646040916442871, "step": 9903 }, { "epoch": 1.511077880859375e-05, "step": 9903, "training_step_time": 0.10735607147216797 }, { "epoch": 1.51123046875e-05, "model_forward_time": 0.02680349349975586, "step": 9904 }, { "epoch": 1.51123046875e-05, "step": 9904, "training_step_time": 0.11060953140258789 }, { "epoch": 1.511383056640625e-05, "model_forward_time": 0.026361465454101562, "step": 9905 }, { "epoch": 1.511383056640625e-05, "step": 9905, "training_step_time": 0.11171269416809082 }, { "epoch": 1.51153564453125e-05, "model_forward_time": 0.02573680877685547, "step": 9906 }, { "epoch": 1.51153564453125e-05, "step": 9906, "training_step_time": 0.10826635360717773 }, { "epoch": 1.511688232421875e-05, "model_forward_time": 0.02542853355407715, "step": 9907 }, { "epoch": 1.511688232421875e-05, "step": 9907, "training_step_time": 0.10780572891235352 }, { "epoch": 1.5118408203125e-05, "model_forward_time": 0.025533676147460938, "step": 9908 }, { "epoch": 1.5118408203125e-05, "step": 9908, "training_step_time": 0.10691094398498535 }, { "epoch": 1.511993408203125e-05, "model_forward_time": 0.025153160095214844, "step": 9909 }, { "epoch": 1.511993408203125e-05, "step": 9909, "training_step_time": 0.1094207763671875 }, { "epoch": 1.51214599609375e-05, "grad_norm": 0.36182212829589844, "learning_rate": 8.000999133943093e-05, "loss": 0.0314, "step": 9910 }, { "epoch": 1.51214599609375e-05, "model_forward_time": 0.025544404983520508, "step": 9910 }, { "epoch": 1.51214599609375e-05, "step": 9910, "training_step_time": 0.12115240097045898 }, { "epoch": 1.512298583984375e-05, "model_forward_time": 0.0252838134765625, "step": 9911 }, { "epoch": 1.512298583984375e-05, "step": 9911, "training_step_time": 0.14619112014770508 }, { "epoch": 1.512451171875e-05, "model_forward_time": 0.024916887283325195, "step": 9912 }, { "epoch": 1.512451171875e-05, "step": 9912, "training_step_time": 0.11918830871582031 }, { "epoch": 1.512603759765625e-05, "model_forward_time": 0.025249719619750977, "step": 9913 }, { "epoch": 1.512603759765625e-05, "step": 9913, "training_step_time": 0.11227917671203613 }, { "epoch": 1.51275634765625e-05, "model_forward_time": 0.024380207061767578, "step": 9914 }, { "epoch": 1.51275634765625e-05, "step": 9914, "training_step_time": 0.1116933822631836 }, { "epoch": 1.512908935546875e-05, "model_forward_time": 0.025173187255859375, "step": 9915 }, { "epoch": 1.512908935546875e-05, "step": 9915, "training_step_time": 0.10962867736816406 }, { "epoch": 1.5130615234375e-05, "model_forward_time": 0.02461719512939453, "step": 9916 }, { "epoch": 1.5130615234375e-05, "step": 9916, "training_step_time": 0.1162116527557373 }, { "epoch": 1.513214111328125e-05, "model_forward_time": 0.02581024169921875, "step": 9917 }, { "epoch": 1.513214111328125e-05, "step": 9917, "training_step_time": 0.11242890357971191 }, { "epoch": 1.51336669921875e-05, "model_forward_time": 0.026114702224731445, "step": 9918 }, { "epoch": 1.51336669921875e-05, "step": 9918, "training_step_time": 0.11237239837646484 }, { "epoch": 1.513519287109375e-05, "model_forward_time": 0.02534627914428711, "step": 9919 }, { "epoch": 1.513519287109375e-05, "step": 9919, "training_step_time": 0.1073911190032959 }, { "epoch": 1.513671875e-05, "grad_norm": 0.394562304019928, "learning_rate": 7.996588884974135e-05, "loss": 0.0348, "step": 9920 }, { "epoch": 1.513671875e-05, "model_forward_time": 0.02500319480895996, "step": 9920 }, { "epoch": 1.513671875e-05, "step": 9920, "training_step_time": 0.11501407623291016 }, { "epoch": 1.513824462890625e-05, "model_forward_time": 0.026851415634155273, "step": 9921 }, { "epoch": 1.513824462890625e-05, "step": 9921, "training_step_time": 0.11193370819091797 }, { "epoch": 1.51397705078125e-05, "model_forward_time": 0.025678396224975586, "step": 9922 }, { "epoch": 1.51397705078125e-05, "step": 9922, "training_step_time": 0.11248397827148438 }, { "epoch": 1.514129638671875e-05, "model_forward_time": 0.025379657745361328, "step": 9923 }, { "epoch": 1.514129638671875e-05, "step": 9923, "training_step_time": 0.10933828353881836 }, { "epoch": 1.5142822265625e-05, "model_forward_time": 0.024812936782836914, "step": 9924 }, { "epoch": 1.5142822265625e-05, "step": 9924, "training_step_time": 0.11638903617858887 }, { "epoch": 1.514434814453125e-05, "model_forward_time": 0.025019168853759766, "step": 9925 }, { "epoch": 1.514434814453125e-05, "step": 9925, "training_step_time": 0.12170553207397461 }, { "epoch": 1.51458740234375e-05, "model_forward_time": 0.02584052085876465, "step": 9926 }, { "epoch": 1.51458740234375e-05, "step": 9926, "training_step_time": 0.11509990692138672 }, { "epoch": 1.514739990234375e-05, "model_forward_time": 0.0254364013671875, "step": 9927 }, { "epoch": 1.514739990234375e-05, "step": 9927, "training_step_time": 0.1844475269317627 }, { "epoch": 1.514892578125e-05, "model_forward_time": 0.025149822235107422, "step": 9928 }, { "epoch": 1.514892578125e-05, "step": 9928, "training_step_time": 0.13775134086608887 }, { "epoch": 1.515045166015625e-05, "model_forward_time": 0.024923086166381836, "step": 9929 }, { "epoch": 1.515045166015625e-05, "step": 9929, "training_step_time": 0.1127326488494873 }, { "epoch": 1.51519775390625e-05, "grad_norm": 0.31549686193466187, "learning_rate": 7.992174994867123e-05, "loss": 0.0235, "step": 9930 }, { "epoch": 1.51519775390625e-05, "model_forward_time": 0.02564716339111328, "step": 9930 }, { "epoch": 1.51519775390625e-05, "step": 9930, "training_step_time": 0.10958647727966309 }, { "epoch": 1.515350341796875e-05, "model_forward_time": 0.025803089141845703, "step": 9931 }, { "epoch": 1.515350341796875e-05, "step": 9931, "training_step_time": 0.11190533638000488 }, { "epoch": 1.5155029296875e-05, "model_forward_time": 0.025421619415283203, "step": 9932 }, { "epoch": 1.5155029296875e-05, "step": 9932, "training_step_time": 0.11043334007263184 }, { "epoch": 1.515655517578125e-05, "model_forward_time": 0.0252993106842041, "step": 9933 }, { "epoch": 1.515655517578125e-05, "step": 9933, "training_step_time": 0.16924571990966797 }, { "epoch": 1.51580810546875e-05, "model_forward_time": 0.024580001831054688, "step": 9934 }, { "epoch": 1.51580810546875e-05, "step": 9934, "training_step_time": 0.13338065147399902 }, { "epoch": 1.515960693359375e-05, "model_forward_time": 0.024840354919433594, "step": 9935 }, { "epoch": 1.515960693359375e-05, "step": 9935, "training_step_time": 0.11204123497009277 }, { "epoch": 1.51611328125e-05, "model_forward_time": 0.027257919311523438, "step": 9936 }, { "epoch": 1.51611328125e-05, "step": 9936, "training_step_time": 0.11745691299438477 }, { "epoch": 1.516265869140625e-05, "model_forward_time": 0.025320053100585938, "step": 9937 }, { "epoch": 1.516265869140625e-05, "step": 9937, "training_step_time": 0.10972094535827637 }, { "epoch": 1.51641845703125e-05, "model_forward_time": 0.025059223175048828, "step": 9938 }, { "epoch": 1.51641845703125e-05, "step": 9938, "training_step_time": 0.11083126068115234 }, { "epoch": 1.516571044921875e-05, "model_forward_time": 0.026792287826538086, "step": 9939 }, { "epoch": 1.516571044921875e-05, "step": 9939, "training_step_time": 0.2091670036315918 }, { "epoch": 1.5167236328125e-05, "grad_norm": 0.2688508927822113, "learning_rate": 7.987757468985348e-05, "loss": 0.0269, "step": 9940 }, { "epoch": 1.5167236328125e-05, "model_forward_time": 0.02456188201904297, "step": 9940 }, { "epoch": 1.5167236328125e-05, "step": 9940, "training_step_time": 0.19881248474121094 }, { "epoch": 1.516876220703125e-05, "model_forward_time": 0.02462315559387207, "step": 9941 }, { "epoch": 1.516876220703125e-05, "step": 9941, "training_step_time": 0.1479027271270752 }, { "epoch": 1.51702880859375e-05, "model_forward_time": 0.025420427322387695, "step": 9942 }, { "epoch": 1.51702880859375e-05, "step": 9942, "training_step_time": 0.13161826133728027 }, { "epoch": 1.517181396484375e-05, "model_forward_time": 0.02457284927368164, "step": 9943 }, { "epoch": 1.517181396484375e-05, "step": 9943, "training_step_time": 0.11319947242736816 }, { "epoch": 1.517333984375e-05, "model_forward_time": 0.02525782585144043, "step": 9944 }, { "epoch": 1.517333984375e-05, "step": 9944, "training_step_time": 0.11025524139404297 }, { "epoch": 1.517486572265625e-05, "model_forward_time": 0.02533698081970215, "step": 9945 }, { "epoch": 1.517486572265625e-05, "step": 9945, "training_step_time": 0.10592961311340332 }, { "epoch": 1.51763916015625e-05, "model_forward_time": 0.025657176971435547, "step": 9946 }, { "epoch": 1.51763916015625e-05, "step": 9946, "training_step_time": 0.10996222496032715 }, { "epoch": 1.517791748046875e-05, "model_forward_time": 0.02540135383605957, "step": 9947 }, { "epoch": 1.517791748046875e-05, "step": 9947, "training_step_time": 0.10944628715515137 }, { "epoch": 1.5179443359375e-05, "model_forward_time": 0.025317668914794922, "step": 9948 }, { "epoch": 1.5179443359375e-05, "step": 9948, "training_step_time": 0.1083524227142334 }, { "epoch": 1.518096923828125e-05, "model_forward_time": 0.025221586227416992, "step": 9949 }, { "epoch": 1.518096923828125e-05, "step": 9949, "training_step_time": 0.11153769493103027 }, { "epoch": 1.51824951171875e-05, "grad_norm": 0.2855030596256256, "learning_rate": 7.983336312696522e-05, "loss": 0.02, "step": 9950 }, { "epoch": 1.51824951171875e-05, "model_forward_time": 0.025673389434814453, "step": 9950 }, { "epoch": 1.51824951171875e-05, "step": 9950, "training_step_time": 0.11349177360534668 }, { "epoch": 1.518402099609375e-05, "model_forward_time": 0.02541661262512207, "step": 9951 }, { "epoch": 1.518402099609375e-05, "step": 9951, "training_step_time": 0.10981893539428711 }, { "epoch": 1.5185546875e-05, "model_forward_time": 0.025097131729125977, "step": 9952 }, { "epoch": 1.5185546875e-05, "step": 9952, "training_step_time": 0.10949063301086426 }, { "epoch": 1.518707275390625e-05, "model_forward_time": 0.025618553161621094, "step": 9953 }, { "epoch": 1.518707275390625e-05, "step": 9953, "training_step_time": 0.10845112800598145 }, { "epoch": 1.51885986328125e-05, "model_forward_time": 0.025267839431762695, "step": 9954 }, { "epoch": 1.51885986328125e-05, "step": 9954, "training_step_time": 0.11058163642883301 }, { "epoch": 1.519012451171875e-05, "model_forward_time": 0.02546525001525879, "step": 9955 }, { "epoch": 1.519012451171875e-05, "step": 9955, "training_step_time": 0.10924887657165527 }, { "epoch": 1.5191650390625e-05, "model_forward_time": 0.024658679962158203, "step": 9956 }, { "epoch": 1.5191650390625e-05, "step": 9956, "training_step_time": 0.10893774032592773 }, { "epoch": 1.519317626953125e-05, "model_forward_time": 0.025359153747558594, "step": 9957 }, { "epoch": 1.519317626953125e-05, "step": 9957, "training_step_time": 0.10691499710083008 }, { "epoch": 1.51947021484375e-05, "model_forward_time": 0.025293827056884766, "step": 9958 }, { "epoch": 1.51947021484375e-05, "step": 9958, "training_step_time": 0.10974264144897461 }, { "epoch": 1.519622802734375e-05, "model_forward_time": 0.02467942237854004, "step": 9959 }, { "epoch": 1.519622802734375e-05, "step": 9959, "training_step_time": 0.11054801940917969 }, { "epoch": 1.519775390625e-05, "grad_norm": 0.37216871976852417, "learning_rate": 7.978911531372765e-05, "loss": 0.0228, "step": 9960 }, { "epoch": 1.519775390625e-05, "model_forward_time": 0.025072336196899414, "step": 9960 }, { "epoch": 1.519775390625e-05, "step": 9960, "training_step_time": 0.10569047927856445 }, { "epoch": 1.519927978515625e-05, "model_forward_time": 0.025372028350830078, "step": 9961 }, { "epoch": 1.519927978515625e-05, "step": 9961, "training_step_time": 0.10686922073364258 }, { "epoch": 1.52008056640625e-05, "model_forward_time": 0.02462172508239746, "step": 9962 }, { "epoch": 1.52008056640625e-05, "step": 9962, "training_step_time": 0.10532379150390625 }, { "epoch": 1.520233154296875e-05, "model_forward_time": 0.025685548782348633, "step": 9963 }, { "epoch": 1.520233154296875e-05, "step": 9963, "training_step_time": 0.10824155807495117 }, { "epoch": 1.5203857421875e-05, "model_forward_time": 0.025533676147460938, "step": 9964 }, { "epoch": 1.5203857421875e-05, "step": 9964, "training_step_time": 0.11055803298950195 }, { "epoch": 1.520538330078125e-05, "model_forward_time": 0.025403261184692383, "step": 9965 }, { "epoch": 1.520538330078125e-05, "step": 9965, "training_step_time": 0.10929131507873535 }, { "epoch": 1.52069091796875e-05, "model_forward_time": 0.02533578872680664, "step": 9966 }, { "epoch": 1.52069091796875e-05, "step": 9966, "training_step_time": 0.15046215057373047 }, { "epoch": 1.520843505859375e-05, "model_forward_time": 0.02427840232849121, "step": 9967 }, { "epoch": 1.520843505859375e-05, "step": 9967, "training_step_time": 0.16626191139221191 }, { "epoch": 1.52099609375e-05, "model_forward_time": 0.023575544357299805, "step": 9968 }, { "epoch": 1.52099609375e-05, "step": 9968, "training_step_time": 0.1519632339477539 }, { "epoch": 1.521148681640625e-05, "model_forward_time": 0.02497243881225586, "step": 9969 }, { "epoch": 1.521148681640625e-05, "step": 9969, "training_step_time": 0.10973238945007324 }, { "epoch": 1.52130126953125e-05, "grad_norm": 0.46723583340644836, "learning_rate": 7.974483130390604e-05, "loss": 0.0345, "step": 9970 }, { "epoch": 1.52130126953125e-05, "model_forward_time": 0.02503824234008789, "step": 9970 }, { "epoch": 1.52130126953125e-05, "step": 9970, "training_step_time": 0.17492461204528809 }, { "epoch": 1.521453857421875e-05, "model_forward_time": 0.024873733520507812, "step": 9971 }, { "epoch": 1.521453857421875e-05, "step": 9971, "training_step_time": 0.15408539772033691 }, { "epoch": 1.5216064453125e-05, "model_forward_time": 0.02453756332397461, "step": 9972 }, { "epoch": 1.5216064453125e-05, "step": 9972, "training_step_time": 0.11650347709655762 }, { "epoch": 1.521759033203125e-05, "model_forward_time": 0.025258541107177734, "step": 9973 }, { "epoch": 1.521759033203125e-05, "step": 9973, "training_step_time": 0.17031502723693848 }, { "epoch": 1.52191162109375e-05, "model_forward_time": 0.027492284774780273, "step": 9974 }, { "epoch": 1.52191162109375e-05, "step": 9974, "training_step_time": 0.17324447631835938 }, { "epoch": 1.522064208984375e-05, "model_forward_time": 0.025945425033569336, "step": 9975 }, { "epoch": 1.522064208984375e-05, "step": 9975, "training_step_time": 0.10860991477966309 }, { "epoch": 1.522216796875e-05, "model_forward_time": 0.024903297424316406, "step": 9976 }, { "epoch": 1.522216796875e-05, "step": 9976, "training_step_time": 0.10824179649353027 }, { "epoch": 1.522369384765625e-05, "model_forward_time": 0.025647640228271484, "step": 9977 }, { "epoch": 1.522369384765625e-05, "step": 9977, "training_step_time": 0.10925769805908203 }, { "epoch": 1.52252197265625e-05, "model_forward_time": 0.025425434112548828, "step": 9978 }, { "epoch": 1.52252197265625e-05, "step": 9978, "training_step_time": 0.1094205379486084 }, { "epoch": 1.522674560546875e-05, "model_forward_time": 0.025199413299560547, "step": 9979 }, { "epoch": 1.522674560546875e-05, "step": 9979, "training_step_time": 0.11535406112670898 }, { "epoch": 1.5228271484375e-05, "grad_norm": 0.20894256234169006, "learning_rate": 7.970051115130966e-05, "loss": 0.0259, "step": 9980 }, { "epoch": 1.5228271484375e-05, "model_forward_time": 0.026558637619018555, "step": 9980 }, { "epoch": 1.5228271484375e-05, "step": 9980, "training_step_time": 0.1382884979248047 }, { "epoch": 1.522979736328125e-05, "model_forward_time": 0.02524876594543457, "step": 9981 }, { "epoch": 1.522979736328125e-05, "step": 9981, "training_step_time": 0.10781216621398926 }, { "epoch": 1.52313232421875e-05, "model_forward_time": 0.025518417358398438, "step": 9982 }, { "epoch": 1.52313232421875e-05, "step": 9982, "training_step_time": 0.11473298072814941 }, { "epoch": 1.523284912109375e-05, "model_forward_time": 0.025401592254638672, "step": 9983 }, { "epoch": 1.523284912109375e-05, "step": 9983, "training_step_time": 0.10905647277832031 }, { "epoch": 1.5234375e-05, "model_forward_time": 0.025116682052612305, "step": 9984 }, { "epoch": 1.5234375e-05, "step": 9984, "training_step_time": 0.11001873016357422 }, { "epoch": 1.523590087890625e-05, "model_forward_time": 0.02494502067565918, "step": 9985 }, { "epoch": 1.523590087890625e-05, "step": 9985, "training_step_time": 0.2067408561706543 }, { "epoch": 1.52374267578125e-05, "model_forward_time": 0.023936748504638672, "step": 9986 }, { "epoch": 1.52374267578125e-05, "step": 9986, "training_step_time": 0.1898496150970459 }, { "epoch": 1.523895263671875e-05, "model_forward_time": 0.024956703186035156, "step": 9987 }, { "epoch": 1.523895263671875e-05, "step": 9987, "training_step_time": 0.16049599647521973 }, { "epoch": 1.5240478515625e-05, "model_forward_time": 0.024861812591552734, "step": 9988 }, { "epoch": 1.5240478515625e-05, "step": 9988, "training_step_time": 0.16083621978759766 }, { "epoch": 1.524200439453125e-05, "model_forward_time": 0.024207115173339844, "step": 9989 }, { "epoch": 1.524200439453125e-05, "step": 9989, "training_step_time": 0.1655561923980713 }, { "epoch": 1.52435302734375e-05, "grad_norm": 0.27305060625076294, "learning_rate": 7.965615490979163e-05, "loss": 0.0306, "step": 9990 }, { "epoch": 1.52435302734375e-05, "model_forward_time": 0.025997638702392578, "step": 9990 }, { "epoch": 1.52435302734375e-05, "step": 9990, "training_step_time": 0.10743832588195801 }, { "epoch": 1.524505615234375e-05, "model_forward_time": 0.02498912811279297, "step": 9991 }, { "epoch": 1.524505615234375e-05, "step": 9991, "training_step_time": 0.10569429397583008 }, { "epoch": 1.524658203125e-05, "model_forward_time": 0.025407075881958008, "step": 9992 }, { "epoch": 1.524658203125e-05, "step": 9992, "training_step_time": 0.10815191268920898 }, { "epoch": 1.524810791015625e-05, "model_forward_time": 0.0259549617767334, "step": 9993 }, { "epoch": 1.524810791015625e-05, "step": 9993, "training_step_time": 0.1083524227142334 }, { "epoch": 1.52496337890625e-05, "model_forward_time": 0.024721622467041016, "step": 9994 }, { "epoch": 1.52496337890625e-05, "step": 9994, "training_step_time": 0.11060500144958496 }, { "epoch": 1.525115966796875e-05, "model_forward_time": 0.024374008178710938, "step": 9995 }, { "epoch": 1.525115966796875e-05, "step": 9995, "training_step_time": 0.11023306846618652 }, { "epoch": 1.5252685546875e-05, "model_forward_time": 0.025157928466796875, "step": 9996 }, { "epoch": 1.5252685546875e-05, "step": 9996, "training_step_time": 0.10677242279052734 }, { "epoch": 1.525421142578125e-05, "model_forward_time": 0.02511739730834961, "step": 9997 }, { "epoch": 1.525421142578125e-05, "step": 9997, "training_step_time": 0.11240077018737793 }, { "epoch": 1.52557373046875e-05, "model_forward_time": 0.02527141571044922, "step": 9998 }, { "epoch": 1.52557373046875e-05, "step": 9998, "training_step_time": 0.13327813148498535 }, { "epoch": 1.525726318359375e-05, "model_forward_time": 0.025368690490722656, "step": 9999 }, { "epoch": 1.525726318359375e-05, "step": 9999, "training_step_time": 0.14312124252319336 }, { "epoch": 1.52587890625e-05, "grad_norm": 0.351493775844574, "learning_rate": 7.961176263324901e-05, "loss": 0.0377, "step": 10000 }, { "epoch": 1.52587890625e-05, "model_forward_time": 0.024866580963134766, "step": 10000 }, { "epoch": 1.52587890625e-05, "step": 10000, "training_step_time": 0.09874486923217773 }, { "epoch": 1.526031494140625e-05, "model_forward_time": 0.022858619689941406, "step": 10001 }, { "epoch": 1.526031494140625e-05, "step": 10001, "training_step_time": 0.10366487503051758 }, { "epoch": 1.52618408203125e-05, "model_forward_time": 0.02444171905517578, "step": 10002 }, { "epoch": 1.52618408203125e-05, "step": 10002, "training_step_time": 0.10867643356323242 }, { "epoch": 1.526336669921875e-05, "model_forward_time": 0.025006532669067383, "step": 10003 }, { "epoch": 1.526336669921875e-05, "step": 10003, "training_step_time": 0.1083674430847168 }, { "epoch": 1.5264892578125e-05, "model_forward_time": 0.025159597396850586, "step": 10004 }, { "epoch": 1.5264892578125e-05, "step": 10004, "training_step_time": 0.10979270935058594 }, { "epoch": 1.526641845703125e-05, "model_forward_time": 0.025738000869750977, "step": 10005 }, { "epoch": 1.526641845703125e-05, "step": 10005, "training_step_time": 0.10994672775268555 }, { "epoch": 1.52679443359375e-05, "model_forward_time": 0.025104045867919922, "step": 10006 }, { "epoch": 1.52679443359375e-05, "step": 10006, "training_step_time": 0.11083459854125977 }, { "epoch": 1.526947021484375e-05, "model_forward_time": 0.02450418472290039, "step": 10007 }, { "epoch": 1.526947021484375e-05, "step": 10007, "training_step_time": 0.10733461380004883 }, { "epoch": 1.527099609375e-05, "model_forward_time": 0.025336265563964844, "step": 10008 }, { "epoch": 1.527099609375e-05, "step": 10008, "training_step_time": 0.10805916786193848 }, { "epoch": 1.527252197265625e-05, "model_forward_time": 0.024996280670166016, "step": 10009 }, { "epoch": 1.527252197265625e-05, "step": 10009, "training_step_time": 0.10877299308776855 }, { "epoch": 1.52740478515625e-05, "grad_norm": 0.3605310618877411, "learning_rate": 7.956733437562259e-05, "loss": 0.0323, "step": 10010 }, { "epoch": 1.52740478515625e-05, "model_forward_time": 0.02529454231262207, "step": 10010 }, { "epoch": 1.52740478515625e-05, "step": 10010, "training_step_time": 0.1079092025756836 }, { "epoch": 1.527557373046875e-05, "model_forward_time": 0.025272369384765625, "step": 10011 }, { "epoch": 1.527557373046875e-05, "step": 10011, "training_step_time": 0.10971426963806152 }, { "epoch": 1.5277099609375e-05, "model_forward_time": 0.025789737701416016, "step": 10012 }, { "epoch": 1.5277099609375e-05, "step": 10012, "training_step_time": 0.10787677764892578 }, { "epoch": 1.527862548828125e-05, "model_forward_time": 0.02490711212158203, "step": 10013 }, { "epoch": 1.527862548828125e-05, "step": 10013, "training_step_time": 0.11001753807067871 }, { "epoch": 1.52801513671875e-05, "model_forward_time": 0.02554798126220703, "step": 10014 }, { "epoch": 1.52801513671875e-05, "step": 10014, "training_step_time": 0.11288619041442871 }, { "epoch": 1.528167724609375e-05, "model_forward_time": 0.0250546932220459, "step": 10015 }, { "epoch": 1.528167724609375e-05, "step": 10015, "training_step_time": 0.10996460914611816 }, { "epoch": 1.5283203125e-05, "model_forward_time": 0.02510380744934082, "step": 10016 }, { "epoch": 1.5283203125e-05, "step": 10016, "training_step_time": 0.11495614051818848 }, { "epoch": 1.528472900390625e-05, "model_forward_time": 0.02476334571838379, "step": 10017 }, { "epoch": 1.528472900390625e-05, "step": 10017, "training_step_time": 0.11229825019836426 }, { "epoch": 1.52862548828125e-05, "model_forward_time": 0.025278568267822266, "step": 10018 }, { "epoch": 1.52862548828125e-05, "step": 10018, "training_step_time": 0.11230254173278809 }, { "epoch": 1.528778076171875e-05, "model_forward_time": 0.024647951126098633, "step": 10019 }, { "epoch": 1.528778076171875e-05, "step": 10019, "training_step_time": 0.17102670669555664 }, { "epoch": 1.5289306640625e-05, "grad_norm": 0.4322364330291748, "learning_rate": 7.952287019089685e-05, "loss": 0.0302, "step": 10020 }, { "epoch": 1.5289306640625e-05, "model_forward_time": 0.024394750595092773, "step": 10020 }, { "epoch": 1.5289306640625e-05, "step": 10020, "training_step_time": 0.16081976890563965 }, { "epoch": 1.529083251953125e-05, "model_forward_time": 0.02476024627685547, "step": 10021 }, { "epoch": 1.529083251953125e-05, "step": 10021, "training_step_time": 0.11350131034851074 }, { "epoch": 1.52923583984375e-05, "model_forward_time": 0.02490711212158203, "step": 10022 }, { "epoch": 1.52923583984375e-05, "step": 10022, "training_step_time": 0.20881056785583496 }, { "epoch": 1.529388427734375e-05, "model_forward_time": 0.023776769638061523, "step": 10023 }, { "epoch": 1.529388427734375e-05, "step": 10023, "training_step_time": 0.12075257301330566 }, { "epoch": 1.529541015625e-05, "model_forward_time": 0.024541616439819336, "step": 10024 }, { "epoch": 1.529541015625e-05, "step": 10024, "training_step_time": 0.10636687278747559 }, { "epoch": 1.529693603515625e-05, "model_forward_time": 0.025691509246826172, "step": 10025 }, { "epoch": 1.529693603515625e-05, "step": 10025, "training_step_time": 0.10951018333435059 }, { "epoch": 1.52984619140625e-05, "model_forward_time": 0.024759531021118164, "step": 10026 }, { "epoch": 1.52984619140625e-05, "step": 10026, "training_step_time": 0.11280679702758789 }, { "epoch": 1.529998779296875e-05, "model_forward_time": 0.025245189666748047, "step": 10027 }, { "epoch": 1.529998779296875e-05, "step": 10027, "training_step_time": 0.1070256233215332 }, { "epoch": 1.5301513671875e-05, "model_forward_time": 0.026827096939086914, "step": 10028 }, { "epoch": 1.5301513671875e-05, "step": 10028, "training_step_time": 0.11000728607177734 }, { "epoch": 1.530303955078125e-05, "model_forward_time": 0.025188207626342773, "step": 10029 }, { "epoch": 1.530303955078125e-05, "step": 10029, "training_step_time": 0.1352078914642334 }, { "epoch": 1.53045654296875e-05, "grad_norm": 0.3623270094394684, "learning_rate": 7.947837013310005e-05, "loss": 0.0207, "step": 10030 }, { "epoch": 1.53045654296875e-05, "model_forward_time": 0.024950742721557617, "step": 10030 }, { "epoch": 1.53045654296875e-05, "step": 10030, "training_step_time": 0.11240601539611816 }, { "epoch": 1.530609130859375e-05, "model_forward_time": 0.02467632293701172, "step": 10031 }, { "epoch": 1.530609130859375e-05, "step": 10031, "training_step_time": 0.11512279510498047 }, { "epoch": 1.53076171875e-05, "model_forward_time": 0.025052785873413086, "step": 10032 }, { "epoch": 1.53076171875e-05, "step": 10032, "training_step_time": 0.10787200927734375 }, { "epoch": 1.530914306640625e-05, "model_forward_time": 0.024851083755493164, "step": 10033 }, { "epoch": 1.530914306640625e-05, "step": 10033, "training_step_time": 0.1102304458618164 }, { "epoch": 1.53106689453125e-05, "model_forward_time": 0.025536298751831055, "step": 10034 }, { "epoch": 1.53106689453125e-05, "step": 10034, "training_step_time": 0.20285677909851074 }, { "epoch": 1.531219482421875e-05, "model_forward_time": 0.024356603622436523, "step": 10035 }, { "epoch": 1.531219482421875e-05, "step": 10035, "training_step_time": 0.16776418685913086 }, { "epoch": 1.5313720703125e-05, "model_forward_time": 0.02516317367553711, "step": 10036 }, { "epoch": 1.5313720703125e-05, "step": 10036, "training_step_time": 0.1768326759338379 }, { "epoch": 1.531524658203125e-05, "model_forward_time": 0.024338483810424805, "step": 10037 }, { "epoch": 1.531524658203125e-05, "step": 10037, "training_step_time": 0.19268107414245605 }, { "epoch": 1.53167724609375e-05, "model_forward_time": 0.0242307186126709, "step": 10038 }, { "epoch": 1.53167724609375e-05, "step": 10038, "training_step_time": 0.11486053466796875 }, { "epoch": 1.531829833984375e-05, "model_forward_time": 0.02393794059753418, "step": 10039 }, { "epoch": 1.531829833984375e-05, "step": 10039, "training_step_time": 0.11479878425598145 }, { "epoch": 1.531982421875e-05, "grad_norm": 0.3786519467830658, "learning_rate": 7.943383425630387e-05, "loss": 0.0351, "step": 10040 }, { "epoch": 1.531982421875e-05, "model_forward_time": 0.025146961212158203, "step": 10040 }, { "epoch": 1.531982421875e-05, "step": 10040, "training_step_time": 0.10745835304260254 }, { "epoch": 1.532135009765625e-05, "model_forward_time": 0.02490544319152832, "step": 10041 }, { "epoch": 1.532135009765625e-05, "step": 10041, "training_step_time": 0.10728311538696289 }, { "epoch": 1.53228759765625e-05, "model_forward_time": 0.02537822723388672, "step": 10042 }, { "epoch": 1.53228759765625e-05, "step": 10042, "training_step_time": 0.10988092422485352 }, { "epoch": 1.532440185546875e-05, "model_forward_time": 0.024898052215576172, "step": 10043 }, { "epoch": 1.532440185546875e-05, "step": 10043, "training_step_time": 0.1100320816040039 }, { "epoch": 1.5325927734375e-05, "model_forward_time": 0.02429938316345215, "step": 10044 }, { "epoch": 1.5325927734375e-05, "step": 10044, "training_step_time": 0.1069495677947998 }, { "epoch": 1.532745361328125e-05, "model_forward_time": 0.024873733520507812, "step": 10045 }, { "epoch": 1.532745361328125e-05, "step": 10045, "training_step_time": 0.10822772979736328 }, { "epoch": 1.53289794921875e-05, "model_forward_time": 0.024940013885498047, "step": 10046 }, { "epoch": 1.53289794921875e-05, "step": 10046, "training_step_time": 0.1103048324584961 }, { "epoch": 1.533050537109375e-05, "model_forward_time": 0.02517247200012207, "step": 10047 }, { "epoch": 1.533050537109375e-05, "step": 10047, "training_step_time": 0.10545134544372559 }, { "epoch": 1.533203125e-05, "model_forward_time": 0.025310993194580078, "step": 10048 }, { "epoch": 1.533203125e-05, "step": 10048, "training_step_time": 0.10932755470275879 }, { "epoch": 1.533355712890625e-05, "model_forward_time": 0.02487039566040039, "step": 10049 }, { "epoch": 1.533355712890625e-05, "step": 10049, "training_step_time": 0.11080360412597656 }, { "epoch": 1.53350830078125e-05, "grad_norm": 0.2707097828388214, "learning_rate": 7.938926261462366e-05, "loss": 0.0289, "step": 10050 }, { "epoch": 1.53350830078125e-05, "model_forward_time": 0.024997234344482422, "step": 10050 }, { "epoch": 1.53350830078125e-05, "step": 10050, "training_step_time": 0.1090240478515625 }, { "epoch": 1.533660888671875e-05, "model_forward_time": 0.02492213249206543, "step": 10051 }, { "epoch": 1.533660888671875e-05, "step": 10051, "training_step_time": 0.11160731315612793 }, { "epoch": 1.5338134765625e-05, "model_forward_time": 0.024927377700805664, "step": 10052 }, { "epoch": 1.5338134765625e-05, "step": 10052, "training_step_time": 0.10678601264953613 }, { "epoch": 1.533966064453125e-05, "model_forward_time": 0.025282621383666992, "step": 10053 }, { "epoch": 1.533966064453125e-05, "step": 10053, "training_step_time": 0.10588407516479492 }, { "epoch": 1.53411865234375e-05, "model_forward_time": 0.025478124618530273, "step": 10054 }, { "epoch": 1.53411865234375e-05, "step": 10054, "training_step_time": 0.10859227180480957 }, { "epoch": 1.534271240234375e-05, "model_forward_time": 0.025096893310546875, "step": 10055 }, { "epoch": 1.534271240234375e-05, "step": 10055, "training_step_time": 0.10732102394104004 }, { "epoch": 1.534423828125e-05, "model_forward_time": 0.02495598793029785, "step": 10056 }, { "epoch": 1.534423828125e-05, "step": 10056, "training_step_time": 0.10802960395812988 }, { "epoch": 1.534576416015625e-05, "model_forward_time": 0.02487945556640625, "step": 10057 }, { "epoch": 1.534576416015625e-05, "step": 10057, "training_step_time": 0.10804152488708496 }, { "epoch": 1.53472900390625e-05, "model_forward_time": 0.025394678115844727, "step": 10058 }, { "epoch": 1.53472900390625e-05, "step": 10058, "training_step_time": 0.10782098770141602 }, { "epoch": 1.534881591796875e-05, "model_forward_time": 0.025424480438232422, "step": 10059 }, { "epoch": 1.534881591796875e-05, "step": 10059, "training_step_time": 0.11058688163757324 }, { "epoch": 1.5350341796875e-05, "grad_norm": 0.2767201364040375, "learning_rate": 7.934465526221815e-05, "loss": 0.029, "step": 10060 }, { "epoch": 1.5350341796875e-05, "model_forward_time": 0.025366783142089844, "step": 10060 }, { "epoch": 1.5350341796875e-05, "step": 10060, "training_step_time": 0.10860729217529297 }, { "epoch": 1.535186767578125e-05, "model_forward_time": 0.024857044219970703, "step": 10061 }, { "epoch": 1.535186767578125e-05, "step": 10061, "training_step_time": 0.10847115516662598 }, { "epoch": 1.53533935546875e-05, "model_forward_time": 0.02563166618347168, "step": 10062 }, { "epoch": 1.53533935546875e-05, "step": 10062, "training_step_time": 0.1102597713470459 }, { "epoch": 1.535491943359375e-05, "model_forward_time": 0.024884462356567383, "step": 10063 }, { "epoch": 1.535491943359375e-05, "step": 10063, "training_step_time": 0.11015748977661133 }, { "epoch": 1.53564453125e-05, "model_forward_time": 0.025075674057006836, "step": 10064 }, { "epoch": 1.53564453125e-05, "step": 10064, "training_step_time": 0.10977935791015625 }, { "epoch": 1.535797119140625e-05, "model_forward_time": 0.026086091995239258, "step": 10065 }, { "epoch": 1.535797119140625e-05, "step": 10065, "training_step_time": 0.11261773109436035 }, { "epoch": 1.53594970703125e-05, "model_forward_time": 0.025553226470947266, "step": 10066 }, { "epoch": 1.53594970703125e-05, "step": 10066, "training_step_time": 0.21470260620117188 }, { "epoch": 1.536102294921875e-05, "model_forward_time": 0.02509284019470215, "step": 10067 }, { "epoch": 1.536102294921875e-05, "step": 10067, "training_step_time": 0.11184287071228027 }, { "epoch": 1.5362548828125e-05, "model_forward_time": 0.02429676055908203, "step": 10068 }, { "epoch": 1.5362548828125e-05, "step": 10068, "training_step_time": 0.10765743255615234 }, { "epoch": 1.536407470703125e-05, "model_forward_time": 0.02538299560546875, "step": 10069 }, { "epoch": 1.536407470703125e-05, "step": 10069, "training_step_time": 0.10883116722106934 }, { "epoch": 1.53656005859375e-05, "grad_norm": 0.32040807604789734, "learning_rate": 7.930001225328946e-05, "loss": 0.0203, "step": 10070 }, { "epoch": 1.53656005859375e-05, "model_forward_time": 0.025388240814208984, "step": 10070 }, { "epoch": 1.53656005859375e-05, "step": 10070, "training_step_time": 0.11775374412536621 }, { "epoch": 1.536712646484375e-05, "model_forward_time": 0.026128053665161133, "step": 10071 }, { "epoch": 1.536712646484375e-05, "step": 10071, "training_step_time": 0.11798739433288574 }, { "epoch": 1.536865234375e-05, "model_forward_time": 0.02519845962524414, "step": 10072 }, { "epoch": 1.536865234375e-05, "step": 10072, "training_step_time": 0.1072995662689209 }, { "epoch": 1.537017822265625e-05, "model_forward_time": 0.02544379234313965, "step": 10073 }, { "epoch": 1.537017822265625e-05, "step": 10073, "training_step_time": 0.10733842849731445 }, { "epoch": 1.53717041015625e-05, "model_forward_time": 0.025278091430664062, "step": 10074 }, { "epoch": 1.53717041015625e-05, "step": 10074, "training_step_time": 0.11060619354248047 }, { "epoch": 1.537322998046875e-05, "model_forward_time": 0.025327205657958984, "step": 10075 }, { "epoch": 1.537322998046875e-05, "step": 10075, "training_step_time": 0.15362310409545898 }, { "epoch": 1.5374755859375e-05, "model_forward_time": 0.024886131286621094, "step": 10076 }, { "epoch": 1.5374755859375e-05, "step": 10076, "training_step_time": 0.14186549186706543 }, { "epoch": 1.537628173828125e-05, "model_forward_time": 0.02475595474243164, "step": 10077 }, { "epoch": 1.537628173828125e-05, "step": 10077, "training_step_time": 0.11292743682861328 }, { "epoch": 1.53778076171875e-05, "model_forward_time": 0.024794340133666992, "step": 10078 }, { "epoch": 1.53778076171875e-05, "step": 10078, "training_step_time": 0.17627787590026855 }, { "epoch": 1.537933349609375e-05, "model_forward_time": 0.023310184478759766, "step": 10079 }, { "epoch": 1.537933349609375e-05, "step": 10079, "training_step_time": 0.20332908630371094 }, { "epoch": 1.5380859375e-05, "grad_norm": 0.22738216817378998, "learning_rate": 7.925533364208309e-05, "loss": 0.0283, "step": 10080 }, { "epoch": 1.5380859375e-05, "model_forward_time": 0.0241391658782959, "step": 10080 }, { "epoch": 1.5380859375e-05, "step": 10080, "training_step_time": 0.8756346702575684 }, { "epoch": 1.538238525390625e-05, "model_forward_time": 0.022922277450561523, "step": 10081 }, { "epoch": 1.538238525390625e-05, "step": 10081, "training_step_time": 0.17417335510253906 }, { "epoch": 1.53839111328125e-05, "model_forward_time": 0.023819923400878906, "step": 10082 }, { "epoch": 1.53839111328125e-05, "step": 10082, "training_step_time": 0.10905218124389648 }, { "epoch": 1.538543701171875e-05, "model_forward_time": 0.02519083023071289, "step": 10083 }, { "epoch": 1.538543701171875e-05, "step": 10083, "training_step_time": 0.1093900203704834 }, { "epoch": 1.5386962890625e-05, "model_forward_time": 0.025296449661254883, "step": 10084 }, { "epoch": 1.5386962890625e-05, "step": 10084, "training_step_time": 0.10810303688049316 }, { "epoch": 1.538848876953125e-05, "model_forward_time": 0.02500319480895996, "step": 10085 }, { "epoch": 1.538848876953125e-05, "step": 10085, "training_step_time": 0.10812759399414062 }, { "epoch": 1.53900146484375e-05, "model_forward_time": 0.026134967803955078, "step": 10086 }, { "epoch": 1.53900146484375e-05, "step": 10086, "training_step_time": 0.10683560371398926 }, { "epoch": 1.539154052734375e-05, "model_forward_time": 0.025140762329101562, "step": 10087 }, { "epoch": 1.539154052734375e-05, "step": 10087, "training_step_time": 0.1069185733795166 }, { "epoch": 1.539306640625e-05, "model_forward_time": 0.025353193283081055, "step": 10088 }, { "epoch": 1.539306640625e-05, "step": 10088, "training_step_time": 0.10728216171264648 }, { "epoch": 1.539459228515625e-05, "model_forward_time": 0.025439977645874023, "step": 10089 }, { "epoch": 1.539459228515625e-05, "step": 10089, "training_step_time": 0.10678887367248535 }, { "epoch": 1.53961181640625e-05, "grad_norm": 0.504446268081665, "learning_rate": 7.921061948288773e-05, "loss": 0.0277, "step": 10090 }, { "epoch": 1.53961181640625e-05, "model_forward_time": 0.024975061416625977, "step": 10090 }, { "epoch": 1.53961181640625e-05, "step": 10090, "training_step_time": 0.11394882202148438 }, { "epoch": 1.539764404296875e-05, "model_forward_time": 0.02552032470703125, "step": 10091 }, { "epoch": 1.539764404296875e-05, "step": 10091, "training_step_time": 0.11300373077392578 }, { "epoch": 1.5399169921875e-05, "model_forward_time": 0.024634599685668945, "step": 10092 }, { "epoch": 1.5399169921875e-05, "step": 10092, "training_step_time": 0.10746908187866211 }, { "epoch": 1.540069580078125e-05, "model_forward_time": 0.025124788284301758, "step": 10093 }, { "epoch": 1.540069580078125e-05, "step": 10093, "training_step_time": 0.1068117618560791 }, { "epoch": 1.54022216796875e-05, "model_forward_time": 0.025466442108154297, "step": 10094 }, { "epoch": 1.54022216796875e-05, "step": 10094, "training_step_time": 0.10998392105102539 }, { "epoch": 1.540374755859375e-05, "model_forward_time": 0.02523493766784668, "step": 10095 }, { "epoch": 1.540374755859375e-05, "step": 10095, "training_step_time": 0.1072075366973877 }, { "epoch": 1.54052734375e-05, "model_forward_time": 0.025516271591186523, "step": 10096 }, { "epoch": 1.54052734375e-05, "step": 10096, "training_step_time": 0.1092982292175293 }, { "epoch": 1.540679931640625e-05, "model_forward_time": 0.02397894859313965, "step": 10097 }, { "epoch": 1.540679931640625e-05, "step": 10097, "training_step_time": 0.10802984237670898 }, { "epoch": 1.54083251953125e-05, "model_forward_time": 0.025351762771606445, "step": 10098 }, { "epoch": 1.54083251953125e-05, "step": 10098, "training_step_time": 0.1107323169708252 }, { "epoch": 1.540985107421875e-05, "model_forward_time": 0.024886608123779297, "step": 10099 }, { "epoch": 1.540985107421875e-05, "step": 10099, "training_step_time": 0.10786557197570801 }, { "epoch": 1.5411376953125e-05, "grad_norm": 0.501907467842102, "learning_rate": 7.916586983003533e-05, "loss": 0.0291, "step": 10100 }, { "epoch": 1.5411376953125e-05, "model_forward_time": 0.024957656860351562, "step": 10100 }, { "epoch": 1.5411376953125e-05, "step": 10100, "training_step_time": 0.11036181449890137 }, { "epoch": 1.541290283203125e-05, "model_forward_time": 0.024985313415527344, "step": 10101 }, { "epoch": 1.541290283203125e-05, "step": 10101, "training_step_time": 0.10629725456237793 }, { "epoch": 1.54144287109375e-05, "model_forward_time": 0.024733304977416992, "step": 10102 }, { "epoch": 1.54144287109375e-05, "step": 10102, "training_step_time": 0.1578047275543213 }, { "epoch": 1.541595458984375e-05, "model_forward_time": 0.025124311447143555, "step": 10103 }, { "epoch": 1.541595458984375e-05, "step": 10103, "training_step_time": 0.16382408142089844 }, { "epoch": 1.541748046875e-05, "model_forward_time": 0.023879289627075195, "step": 10104 }, { "epoch": 1.541748046875e-05, "step": 10104, "training_step_time": 0.14234328269958496 }, { "epoch": 1.541900634765625e-05, "model_forward_time": 0.02464437484741211, "step": 10105 }, { "epoch": 1.541900634765625e-05, "step": 10105, "training_step_time": 0.10982894897460938 }, { "epoch": 1.54205322265625e-05, "model_forward_time": 0.02779102325439453, "step": 10106 }, { "epoch": 1.54205322265625e-05, "step": 10106, "training_step_time": 0.1487720012664795 }, { "epoch": 1.542205810546875e-05, "model_forward_time": 0.024782657623291016, "step": 10107 }, { "epoch": 1.542205810546875e-05, "step": 10107, "training_step_time": 0.1222691535949707 }, { "epoch": 1.5423583984375e-05, "model_forward_time": 0.024914264678955078, "step": 10108 }, { "epoch": 1.5423583984375e-05, "step": 10108, "training_step_time": 0.18389654159545898 }, { "epoch": 1.542510986328125e-05, "model_forward_time": 0.024308204650878906, "step": 10109 }, { "epoch": 1.542510986328125e-05, "step": 10109, "training_step_time": 0.14718270301818848 }, { "epoch": 1.54266357421875e-05, "grad_norm": 0.3539285659790039, "learning_rate": 7.912108473790092e-05, "loss": 0.0199, "step": 10110 }, { "epoch": 1.54266357421875e-05, "model_forward_time": 0.0243222713470459, "step": 10110 }, { "epoch": 1.54266357421875e-05, "step": 10110, "training_step_time": 0.21373891830444336 }, { "epoch": 1.542816162109375e-05, "model_forward_time": 0.024480819702148438, "step": 10111 }, { "epoch": 1.542816162109375e-05, "step": 10111, "training_step_time": 0.10843658447265625 }, { "epoch": 1.54296875e-05, "model_forward_time": 0.024720430374145508, "step": 10112 }, { "epoch": 1.54296875e-05, "step": 10112, "training_step_time": 0.11596989631652832 }, { "epoch": 1.543121337890625e-05, "model_forward_time": 0.02765369415283203, "step": 10113 }, { "epoch": 1.543121337890625e-05, "step": 10113, "training_step_time": 0.1116480827331543 }, { "epoch": 1.54327392578125e-05, "model_forward_time": 0.024901628494262695, "step": 10114 }, { "epoch": 1.54327392578125e-05, "step": 10114, "training_step_time": 0.11186599731445312 }, { "epoch": 1.543426513671875e-05, "model_forward_time": 0.025295019149780273, "step": 10115 }, { "epoch": 1.543426513671875e-05, "step": 10115, "training_step_time": 0.13511276245117188 }, { "epoch": 1.5435791015625e-05, "model_forward_time": 0.02487659454345703, "step": 10116 }, { "epoch": 1.5435791015625e-05, "step": 10116, "training_step_time": 0.11017584800720215 }, { "epoch": 1.543731689453125e-05, "model_forward_time": 0.02500152587890625, "step": 10117 }, { "epoch": 1.543731689453125e-05, "step": 10117, "training_step_time": 0.12564420700073242 }, { "epoch": 1.54388427734375e-05, "model_forward_time": 0.02510523796081543, "step": 10118 }, { "epoch": 1.54388427734375e-05, "step": 10118, "training_step_time": 0.10876727104187012 }, { "epoch": 1.544036865234375e-05, "model_forward_time": 0.0252838134765625, "step": 10119 }, { "epoch": 1.544036865234375e-05, "step": 10119, "training_step_time": 0.1938619613647461 }, { "epoch": 1.544189453125e-05, "grad_norm": 0.3355169892311096, "learning_rate": 7.907626426090262e-05, "loss": 0.024, "step": 10120 }, { "epoch": 1.544189453125e-05, "model_forward_time": 0.024459362030029297, "step": 10120 }, { "epoch": 1.544189453125e-05, "step": 10120, "training_step_time": 0.12192940711975098 }, { "epoch": 1.544342041015625e-05, "model_forward_time": 0.02383112907409668, "step": 10121 }, { "epoch": 1.544342041015625e-05, "step": 10121, "training_step_time": 0.1436760425567627 }, { "epoch": 1.54449462890625e-05, "model_forward_time": 0.0248720645904541, "step": 10122 }, { "epoch": 1.54449462890625e-05, "step": 10122, "training_step_time": 0.1451125144958496 }, { "epoch": 1.544647216796875e-05, "model_forward_time": 0.02434086799621582, "step": 10123 }, { "epoch": 1.544647216796875e-05, "step": 10123, "training_step_time": 0.21503233909606934 }, { "epoch": 1.5447998046875e-05, "model_forward_time": 0.02450394630432129, "step": 10124 }, { "epoch": 1.5447998046875e-05, "step": 10124, "training_step_time": 0.11735272407531738 }, { "epoch": 1.544952392578125e-05, "model_forward_time": 0.023896217346191406, "step": 10125 }, { "epoch": 1.544952392578125e-05, "step": 10125, "training_step_time": 0.11086225509643555 }, { "epoch": 1.54510498046875e-05, "model_forward_time": 0.02550983428955078, "step": 10126 }, { "epoch": 1.54510498046875e-05, "step": 10126, "training_step_time": 0.1104276180267334 }, { "epoch": 1.545257568359375e-05, "model_forward_time": 0.025431156158447266, "step": 10127 }, { "epoch": 1.545257568359375e-05, "step": 10127, "training_step_time": 0.11063575744628906 }, { "epoch": 1.54541015625e-05, "model_forward_time": 0.02494668960571289, "step": 10128 }, { "epoch": 1.54541015625e-05, "step": 10128, "training_step_time": 0.11187005043029785 }, { "epoch": 1.545562744140625e-05, "model_forward_time": 0.024761676788330078, "step": 10129 }, { "epoch": 1.545562744140625e-05, "step": 10129, "training_step_time": 0.10711669921875 }, { "epoch": 1.54571533203125e-05, "grad_norm": 0.44187474250793457, "learning_rate": 7.903140845350153e-05, "loss": 0.0335, "step": 10130 }, { "epoch": 1.54571533203125e-05, "model_forward_time": 0.025022029876708984, "step": 10130 }, { "epoch": 1.54571533203125e-05, "step": 10130, "training_step_time": 0.10806918144226074 }, { "epoch": 1.545867919921875e-05, "model_forward_time": 0.025018692016601562, "step": 10131 }, { "epoch": 1.545867919921875e-05, "step": 10131, "training_step_time": 0.10681724548339844 }, { "epoch": 1.5460205078125e-05, "model_forward_time": 0.025160789489746094, "step": 10132 }, { "epoch": 1.5460205078125e-05, "step": 10132, "training_step_time": 0.10776472091674805 }, { "epoch": 1.546173095703125e-05, "model_forward_time": 0.025214672088623047, "step": 10133 }, { "epoch": 1.546173095703125e-05, "step": 10133, "training_step_time": 0.10763883590698242 }, { "epoch": 1.54632568359375e-05, "model_forward_time": 0.024786710739135742, "step": 10134 }, { "epoch": 1.54632568359375e-05, "step": 10134, "training_step_time": 0.10617494583129883 }, { "epoch": 1.546478271484375e-05, "model_forward_time": 0.025217056274414062, "step": 10135 }, { "epoch": 1.546478271484375e-05, "step": 10135, "training_step_time": 0.11153984069824219 }, { "epoch": 1.546630859375e-05, "model_forward_time": 0.02510857582092285, "step": 10136 }, { "epoch": 1.546630859375e-05, "step": 10136, "training_step_time": 0.11200785636901855 }, { "epoch": 1.546783447265625e-05, "model_forward_time": 0.024888277053833008, "step": 10137 }, { "epoch": 1.546783447265625e-05, "step": 10137, "training_step_time": 0.10623979568481445 }, { "epoch": 1.54693603515625e-05, "model_forward_time": 0.02521514892578125, "step": 10138 }, { "epoch": 1.54693603515625e-05, "step": 10138, "training_step_time": 0.10669827461242676 }, { "epoch": 1.547088623046875e-05, "model_forward_time": 0.02492666244506836, "step": 10139 }, { "epoch": 1.547088623046875e-05, "step": 10139, "training_step_time": 0.1093299388885498 }, { "epoch": 1.5472412109375e-05, "grad_norm": 0.4617132544517517, "learning_rate": 7.898651737020166e-05, "loss": 0.0264, "step": 10140 }, { "epoch": 1.5472412109375e-05, "model_forward_time": 0.024262428283691406, "step": 10140 }, { "epoch": 1.5472412109375e-05, "step": 10140, "training_step_time": 0.10864901542663574 }, { "epoch": 1.547393798828125e-05, "model_forward_time": 0.024130821228027344, "step": 10141 }, { "epoch": 1.547393798828125e-05, "step": 10141, "training_step_time": 0.12260818481445312 }, { "epoch": 1.54754638671875e-05, "model_forward_time": 0.024144411087036133, "step": 10142 }, { "epoch": 1.54754638671875e-05, "step": 10142, "training_step_time": 0.12442755699157715 }, { "epoch": 1.547698974609375e-05, "model_forward_time": 0.024448394775390625, "step": 10143 }, { "epoch": 1.547698974609375e-05, "step": 10143, "training_step_time": 0.12493252754211426 }, { "epoch": 1.5478515625e-05, "model_forward_time": 0.023976802825927734, "step": 10144 }, { "epoch": 1.5478515625e-05, "step": 10144, "training_step_time": 0.12100362777709961 }, { "epoch": 1.548004150390625e-05, "model_forward_time": 0.023984909057617188, "step": 10145 }, { "epoch": 1.548004150390625e-05, "step": 10145, "training_step_time": 0.11830854415893555 }, { "epoch": 1.54815673828125e-05, "model_forward_time": 0.02394866943359375, "step": 10146 }, { "epoch": 1.54815673828125e-05, "step": 10146, "training_step_time": 0.11339521408081055 }, { "epoch": 1.548309326171875e-05, "model_forward_time": 0.025025129318237305, "step": 10147 }, { "epoch": 1.548309326171875e-05, "step": 10147, "training_step_time": 0.1125185489654541 }, { "epoch": 1.5484619140625e-05, "model_forward_time": 0.025191783905029297, "step": 10148 }, { "epoch": 1.5484619140625e-05, "step": 10148, "training_step_time": 0.11363911628723145 }, { "epoch": 1.548614501953125e-05, "model_forward_time": 0.025202512741088867, "step": 10149 }, { "epoch": 1.548614501953125e-05, "step": 10149, "training_step_time": 0.11296248435974121 }, { "epoch": 1.54876708984375e-05, "grad_norm": 0.3276233673095703, "learning_rate": 7.894159106554997e-05, "loss": 0.0357, "step": 10150 }, { "epoch": 1.54876708984375e-05, "model_forward_time": 0.024685382843017578, "step": 10150 }, { "epoch": 1.54876708984375e-05, "step": 10150, "training_step_time": 0.10923314094543457 }, { "epoch": 1.548919677734375e-05, "model_forward_time": 0.025930404663085938, "step": 10151 }, { "epoch": 1.548919677734375e-05, "step": 10151, "training_step_time": 0.10867023468017578 }, { "epoch": 1.549072265625e-05, "model_forward_time": 0.025136947631835938, "step": 10152 }, { "epoch": 1.549072265625e-05, "step": 10152, "training_step_time": 0.14772748947143555 }, { "epoch": 1.549224853515625e-05, "model_forward_time": 0.02504134178161621, "step": 10153 }, { "epoch": 1.549224853515625e-05, "step": 10153, "training_step_time": 0.15914702415466309 }, { "epoch": 1.54937744140625e-05, "model_forward_time": 0.024249792098999023, "step": 10154 }, { "epoch": 1.54937744140625e-05, "step": 10154, "training_step_time": 0.11568784713745117 }, { "epoch": 1.549530029296875e-05, "model_forward_time": 0.02451610565185547, "step": 10155 }, { "epoch": 1.549530029296875e-05, "step": 10155, "training_step_time": 0.13522982597351074 }, { "epoch": 1.5496826171875e-05, "model_forward_time": 0.02516031265258789, "step": 10156 }, { "epoch": 1.5496826171875e-05, "step": 10156, "training_step_time": 0.20038580894470215 }, { "epoch": 1.549835205078125e-05, "model_forward_time": 0.024537086486816406, "step": 10157 }, { "epoch": 1.549835205078125e-05, "step": 10157, "training_step_time": 0.107574462890625 }, { "epoch": 1.54998779296875e-05, "model_forward_time": 0.02442193031311035, "step": 10158 }, { "epoch": 1.54998779296875e-05, "step": 10158, "training_step_time": 0.10739016532897949 }, { "epoch": 1.550140380859375e-05, "model_forward_time": 0.025618553161621094, "step": 10159 }, { "epoch": 1.550140380859375e-05, "step": 10159, "training_step_time": 0.10900402069091797 }, { "epoch": 1.55029296875e-05, "grad_norm": 0.3725495934486389, "learning_rate": 7.88966295941361e-05, "loss": 0.0291, "step": 10160 }, { "epoch": 1.55029296875e-05, "model_forward_time": 0.024832725524902344, "step": 10160 }, { "epoch": 1.55029296875e-05, "step": 10160, "training_step_time": 0.10961580276489258 }, { "epoch": 1.550445556640625e-05, "model_forward_time": 0.024940013885498047, "step": 10161 }, { "epoch": 1.550445556640625e-05, "step": 10161, "training_step_time": 0.12753939628601074 }, { "epoch": 1.55059814453125e-05, "model_forward_time": 0.024743318557739258, "step": 10162 }, { "epoch": 1.55059814453125e-05, "step": 10162, "training_step_time": 0.11195707321166992 }, { "epoch": 1.550750732421875e-05, "model_forward_time": 0.025281429290771484, "step": 10163 }, { "epoch": 1.550750732421875e-05, "step": 10163, "training_step_time": 0.11185574531555176 }, { "epoch": 1.5509033203125e-05, "model_forward_time": 0.024654626846313477, "step": 10164 }, { "epoch": 1.5509033203125e-05, "step": 10164, "training_step_time": 0.11767244338989258 }, { "epoch": 1.551055908203125e-05, "model_forward_time": 0.025166749954223633, "step": 10165 }, { "epoch": 1.551055908203125e-05, "step": 10165, "training_step_time": 0.21035242080688477 }, { "epoch": 1.55120849609375e-05, "model_forward_time": 0.024691343307495117, "step": 10166 }, { "epoch": 1.55120849609375e-05, "step": 10166, "training_step_time": 0.11823272705078125 }, { "epoch": 1.551361083984375e-05, "model_forward_time": 0.024360179901123047, "step": 10167 }, { "epoch": 1.551361083984375e-05, "step": 10167, "training_step_time": 0.16715764999389648 }, { "epoch": 1.551513671875e-05, "model_forward_time": 0.025134563446044922, "step": 10168 }, { "epoch": 1.551513671875e-05, "step": 10168, "training_step_time": 0.14338970184326172 }, { "epoch": 1.551666259765625e-05, "model_forward_time": 0.024564743041992188, "step": 10169 }, { "epoch": 1.551666259765625e-05, "step": 10169, "training_step_time": 0.22011566162109375 }, { "epoch": 1.55181884765625e-05, "grad_norm": 0.2825896739959717, "learning_rate": 7.88516330105925e-05, "loss": 0.0314, "step": 10170 }, { "epoch": 1.55181884765625e-05, "model_forward_time": 0.024059534072875977, "step": 10170 }, { "epoch": 1.55181884765625e-05, "step": 10170, "training_step_time": 0.10860753059387207 }, { "epoch": 1.551971435546875e-05, "model_forward_time": 0.02482771873474121, "step": 10171 }, { "epoch": 1.551971435546875e-05, "step": 10171, "training_step_time": 0.10794401168823242 }, { "epoch": 1.5521240234375e-05, "model_forward_time": 0.024988889694213867, "step": 10172 }, { "epoch": 1.5521240234375e-05, "step": 10172, "training_step_time": 0.12369894981384277 }, { "epoch": 1.552276611328125e-05, "model_forward_time": 0.025028705596923828, "step": 10173 }, { "epoch": 1.552276611328125e-05, "step": 10173, "training_step_time": 0.10767269134521484 }, { "epoch": 1.55242919921875e-05, "model_forward_time": 0.025082111358642578, "step": 10174 }, { "epoch": 1.55242919921875e-05, "step": 10174, "training_step_time": 0.10709834098815918 }, { "epoch": 1.552581787109375e-05, "model_forward_time": 0.025279998779296875, "step": 10175 }, { "epoch": 1.552581787109375e-05, "step": 10175, "training_step_time": 0.10889506340026855 }, { "epoch": 1.552734375e-05, "model_forward_time": 0.024627685546875, "step": 10176 }, { "epoch": 1.552734375e-05, "step": 10176, "training_step_time": 0.11142921447753906 }, { "epoch": 1.552886962890625e-05, "model_forward_time": 0.02795696258544922, "step": 10177 }, { "epoch": 1.552886962890625e-05, "step": 10177, "training_step_time": 0.11047983169555664 }, { "epoch": 1.55303955078125e-05, "model_forward_time": 0.02510523796081543, "step": 10178 }, { "epoch": 1.55303955078125e-05, "step": 10178, "training_step_time": 0.11141157150268555 }, { "epoch": 1.553192138671875e-05, "model_forward_time": 0.025104284286499023, "step": 10179 }, { "epoch": 1.553192138671875e-05, "step": 10179, "training_step_time": 0.11028814315795898 }, { "epoch": 1.5533447265625e-05, "grad_norm": 0.43389415740966797, "learning_rate": 7.880660136959428e-05, "loss": 0.0361, "step": 10180 }, { "epoch": 1.5533447265625e-05, "model_forward_time": 0.02452397346496582, "step": 10180 }, { "epoch": 1.5533447265625e-05, "step": 10180, "training_step_time": 0.10983538627624512 }, { "epoch": 1.553497314453125e-05, "model_forward_time": 0.025237321853637695, "step": 10181 }, { "epoch": 1.553497314453125e-05, "step": 10181, "training_step_time": 0.10766911506652832 }, { "epoch": 1.55364990234375e-05, "model_forward_time": 0.025011539459228516, "step": 10182 }, { "epoch": 1.55364990234375e-05, "step": 10182, "training_step_time": 0.1086127758026123 }, { "epoch": 1.553802490234375e-05, "model_forward_time": 0.025360107421875, "step": 10183 }, { "epoch": 1.553802490234375e-05, "step": 10183, "training_step_time": 0.11029529571533203 }, { "epoch": 1.553955078125e-05, "model_forward_time": 0.025107383728027344, "step": 10184 }, { "epoch": 1.553955078125e-05, "step": 10184, "training_step_time": 0.11351752281188965 }, { "epoch": 1.554107666015625e-05, "model_forward_time": 0.024702072143554688, "step": 10185 }, { "epoch": 1.554107666015625e-05, "step": 10185, "training_step_time": 0.10797858238220215 }, { "epoch": 1.55426025390625e-05, "model_forward_time": 0.02491450309753418, "step": 10186 }, { "epoch": 1.55426025390625e-05, "step": 10186, "training_step_time": 0.1082754135131836 }, { "epoch": 1.554412841796875e-05, "model_forward_time": 0.0250091552734375, "step": 10187 }, { "epoch": 1.554412841796875e-05, "step": 10187, "training_step_time": 0.10904169082641602 }, { "epoch": 1.5545654296875e-05, "model_forward_time": 0.025329113006591797, "step": 10188 }, { "epoch": 1.5545654296875e-05, "step": 10188, "training_step_time": 0.1121213436126709 }, { "epoch": 1.554718017578125e-05, "model_forward_time": 0.024982690811157227, "step": 10189 }, { "epoch": 1.554718017578125e-05, "step": 10189, "training_step_time": 0.10792684555053711 }, { "epoch": 1.55487060546875e-05, "grad_norm": 0.4379105865955353, "learning_rate": 7.87615347258591e-05, "loss": 0.0403, "step": 10190 }, { "epoch": 1.55487060546875e-05, "model_forward_time": 0.02482151985168457, "step": 10190 }, { "epoch": 1.55487060546875e-05, "step": 10190, "training_step_time": 0.10898780822753906 }, { "epoch": 1.555023193359375e-05, "model_forward_time": 0.025065183639526367, "step": 10191 }, { "epoch": 1.555023193359375e-05, "step": 10191, "training_step_time": 0.11155295372009277 }, { "epoch": 1.55517578125e-05, "model_forward_time": 0.024828672409057617, "step": 10192 }, { "epoch": 1.55517578125e-05, "step": 10192, "training_step_time": 0.11032843589782715 }, { "epoch": 1.555328369140625e-05, "model_forward_time": 0.024771451950073242, "step": 10193 }, { "epoch": 1.555328369140625e-05, "step": 10193, "training_step_time": 0.1096959114074707 }, { "epoch": 1.55548095703125e-05, "model_forward_time": 0.024700164794921875, "step": 10194 }, { "epoch": 1.55548095703125e-05, "step": 10194, "training_step_time": 0.10805416107177734 }, { "epoch": 1.555633544921875e-05, "model_forward_time": 0.025272846221923828, "step": 10195 }, { "epoch": 1.555633544921875e-05, "step": 10195, "training_step_time": 0.10793685913085938 }, { "epoch": 1.5557861328125e-05, "model_forward_time": 0.025032520294189453, "step": 10196 }, { "epoch": 1.5557861328125e-05, "step": 10196, "training_step_time": 0.11159372329711914 }, { "epoch": 1.555938720703125e-05, "model_forward_time": 0.024997472763061523, "step": 10197 }, { "epoch": 1.555938720703125e-05, "step": 10197, "training_step_time": 0.1065070629119873 }, { "epoch": 1.55609130859375e-05, "model_forward_time": 0.024725675582885742, "step": 10198 }, { "epoch": 1.55609130859375e-05, "step": 10198, "training_step_time": 0.14882612228393555 }, { "epoch": 1.556243896484375e-05, "model_forward_time": 0.02486896514892578, "step": 10199 }, { "epoch": 1.556243896484375e-05, "step": 10199, "training_step_time": 0.1683499813079834 }, { "epoch": 1.556396484375e-05, "grad_norm": 0.33704426884651184, "learning_rate": 7.871643313414718e-05, "loss": 0.0319, "step": 10200 }, { "epoch": 1.556396484375e-05, "model_forward_time": 0.024451494216918945, "step": 10200 }, { "epoch": 1.556396484375e-05, "step": 10200, "training_step_time": 0.10930633544921875 }, { "epoch": 1.556549072265625e-05, "model_forward_time": 0.024338722229003906, "step": 10201 }, { "epoch": 1.556549072265625e-05, "step": 10201, "training_step_time": 0.13043999671936035 }, { "epoch": 1.55670166015625e-05, "model_forward_time": 0.025401592254638672, "step": 10202 }, { "epoch": 1.55670166015625e-05, "step": 10202, "training_step_time": 0.20703792572021484 }, { "epoch": 1.556854248046875e-05, "model_forward_time": 0.024214982986450195, "step": 10203 }, { "epoch": 1.556854248046875e-05, "step": 10203, "training_step_time": 0.11046314239501953 }, { "epoch": 1.5570068359375e-05, "model_forward_time": 0.025408267974853516, "step": 10204 }, { "epoch": 1.5570068359375e-05, "step": 10204, "training_step_time": 0.1057283878326416 }, { "epoch": 1.557159423828125e-05, "model_forward_time": 0.025188922882080078, "step": 10205 }, { "epoch": 1.557159423828125e-05, "step": 10205, "training_step_time": 0.10866475105285645 }, { "epoch": 1.55731201171875e-05, "model_forward_time": 0.025022268295288086, "step": 10206 }, { "epoch": 1.55731201171875e-05, "step": 10206, "training_step_time": 0.1103060245513916 }, { "epoch": 1.557464599609375e-05, "model_forward_time": 0.024996280670166016, "step": 10207 }, { "epoch": 1.557464599609375e-05, "step": 10207, "training_step_time": 0.1293339729309082 }, { "epoch": 1.5576171875e-05, "model_forward_time": 0.0248410701751709, "step": 10208 }, { "epoch": 1.5576171875e-05, "step": 10208, "training_step_time": 0.1147916316986084 }, { "epoch": 1.557769775390625e-05, "model_forward_time": 0.0248110294342041, "step": 10209 }, { "epoch": 1.557769775390625e-05, "step": 10209, "training_step_time": 0.1178891658782959 }, { "epoch": 1.55792236328125e-05, "grad_norm": 0.41021430492401123, "learning_rate": 7.867129664926123e-05, "loss": 0.0298, "step": 10210 }, { "epoch": 1.55792236328125e-05, "model_forward_time": 0.025182008743286133, "step": 10210 }, { "epoch": 1.55792236328125e-05, "step": 10210, "training_step_time": 0.16698932647705078 }, { "epoch": 1.558074951171875e-05, "model_forward_time": 0.024476289749145508, "step": 10211 }, { "epoch": 1.558074951171875e-05, "step": 10211, "training_step_time": 0.16120672225952148 }, { "epoch": 1.5582275390625e-05, "model_forward_time": 0.02428150177001953, "step": 10212 }, { "epoch": 1.5582275390625e-05, "step": 10212, "training_step_time": 0.2140662670135498 }, { "epoch": 1.558380126953125e-05, "model_forward_time": 0.02470541000366211, "step": 10213 }, { "epoch": 1.558380126953125e-05, "step": 10213, "training_step_time": 0.17453718185424805 }, { "epoch": 1.55853271484375e-05, "model_forward_time": 0.02372288703918457, "step": 10214 }, { "epoch": 1.55853271484375e-05, "step": 10214, "training_step_time": 0.11080789566040039 }, { "epoch": 1.558685302734375e-05, "model_forward_time": 0.0245511531829834, "step": 10215 }, { "epoch": 1.558685302734375e-05, "step": 10215, "training_step_time": 0.12552475929260254 }, { "epoch": 1.558837890625e-05, "model_forward_time": 0.02731609344482422, "step": 10216 }, { "epoch": 1.558837890625e-05, "step": 10216, "training_step_time": 0.13292694091796875 }, { "epoch": 1.558990478515625e-05, "model_forward_time": 0.024525880813598633, "step": 10217 }, { "epoch": 1.558990478515625e-05, "step": 10217, "training_step_time": 0.11364865303039551 }, { "epoch": 1.55914306640625e-05, "model_forward_time": 0.025045156478881836, "step": 10218 }, { "epoch": 1.55914306640625e-05, "step": 10218, "training_step_time": 0.11681747436523438 }, { "epoch": 1.559295654296875e-05, "model_forward_time": 0.02478194236755371, "step": 10219 }, { "epoch": 1.559295654296875e-05, "step": 10219, "training_step_time": 0.10770082473754883 }, { "epoch": 1.5594482421875e-05, "grad_norm": 0.3275561034679413, "learning_rate": 7.862612532604632e-05, "loss": 0.0239, "step": 10220 }, { "epoch": 1.5594482421875e-05, "model_forward_time": 0.024823665618896484, "step": 10220 }, { "epoch": 1.5594482421875e-05, "step": 10220, "training_step_time": 0.10671210289001465 }, { "epoch": 1.559600830078125e-05, "model_forward_time": 0.02500629425048828, "step": 10221 }, { "epoch": 1.559600830078125e-05, "step": 10221, "training_step_time": 0.10480308532714844 }, { "epoch": 1.55975341796875e-05, "model_forward_time": 0.02762746810913086, "step": 10222 }, { "epoch": 1.55975341796875e-05, "step": 10222, "training_step_time": 0.10710310935974121 }, { "epoch": 1.559906005859375e-05, "model_forward_time": 0.024909019470214844, "step": 10223 }, { "epoch": 1.559906005859375e-05, "step": 10223, "training_step_time": 0.10817360877990723 }, { "epoch": 1.56005859375e-05, "model_forward_time": 0.024895906448364258, "step": 10224 }, { "epoch": 1.56005859375e-05, "step": 10224, "training_step_time": 0.11210775375366211 }, { "epoch": 1.560211181640625e-05, "model_forward_time": 0.025146484375, "step": 10225 }, { "epoch": 1.560211181640625e-05, "step": 10225, "training_step_time": 0.17537879943847656 }, { "epoch": 1.56036376953125e-05, "model_forward_time": 0.024442672729492188, "step": 10226 }, { "epoch": 1.56036376953125e-05, "step": 10226, "training_step_time": 0.18540072441101074 }, { "epoch": 1.560516357421875e-05, "model_forward_time": 0.024337053298950195, "step": 10227 }, { "epoch": 1.560516357421875e-05, "step": 10227, "training_step_time": 0.18052291870117188 }, { "epoch": 1.5606689453125e-05, "model_forward_time": 0.0257570743560791, "step": 10228 }, { "epoch": 1.5606689453125e-05, "step": 10228, "training_step_time": 0.16207289695739746 }, { "epoch": 1.560821533203125e-05, "model_forward_time": 0.024485111236572266, "step": 10229 }, { "epoch": 1.560821533203125e-05, "step": 10229, "training_step_time": 0.15619993209838867 }, { "epoch": 1.56097412109375e-05, "grad_norm": 0.5337497591972351, "learning_rate": 7.858091921938988e-05, "loss": 0.0314, "step": 10230 }, { "epoch": 1.56097412109375e-05, "model_forward_time": 0.02418208122253418, "step": 10230 }, { "epoch": 1.56097412109375e-05, "step": 10230, "training_step_time": 0.14315152168273926 }, { "epoch": 1.561126708984375e-05, "model_forward_time": 0.023787736892700195, "step": 10231 }, { "epoch": 1.561126708984375e-05, "step": 10231, "training_step_time": 0.12587380409240723 }, { "epoch": 1.561279296875e-05, "model_forward_time": 0.024656295776367188, "step": 10232 }, { "epoch": 1.561279296875e-05, "step": 10232, "training_step_time": 0.1253657341003418 }, { "epoch": 1.561431884765625e-05, "model_forward_time": 0.02521514892578125, "step": 10233 }, { "epoch": 1.561431884765625e-05, "step": 10233, "training_step_time": 0.12489080429077148 }, { "epoch": 1.56158447265625e-05, "model_forward_time": 0.0247342586517334, "step": 10234 }, { "epoch": 1.56158447265625e-05, "step": 10234, "training_step_time": 0.11517691612243652 }, { "epoch": 1.561737060546875e-05, "model_forward_time": 0.02461695671081543, "step": 10235 }, { "epoch": 1.561737060546875e-05, "step": 10235, "training_step_time": 0.11486172676086426 }, { "epoch": 1.5618896484375e-05, "model_forward_time": 0.0251922607421875, "step": 10236 }, { "epoch": 1.5618896484375e-05, "step": 10236, "training_step_time": 0.11312198638916016 }, { "epoch": 1.562042236328125e-05, "model_forward_time": 0.02558135986328125, "step": 10237 }, { "epoch": 1.562042236328125e-05, "step": 10237, "training_step_time": 0.11275792121887207 }, { "epoch": 1.56219482421875e-05, "model_forward_time": 0.025443077087402344, "step": 10238 }, { "epoch": 1.56219482421875e-05, "step": 10238, "training_step_time": 0.11144113540649414 }, { "epoch": 1.562347412109375e-05, "model_forward_time": 0.02516460418701172, "step": 10239 }, { "epoch": 1.562347412109375e-05, "step": 10239, "training_step_time": 0.108642578125 }, { "epoch": 1.5625e-05, "grad_norm": 0.21214427053928375, "learning_rate": 7.85356783842216e-05, "loss": 0.0287, "step": 10240 }, { "epoch": 1.5625e-05, "model_forward_time": 0.02578902244567871, "step": 10240 }, { "epoch": 1.5625e-05, "step": 10240, "training_step_time": 0.10879278182983398 }, { "epoch": 1.562652587890625e-05, "model_forward_time": 0.024303674697875977, "step": 10241 }, { "epoch": 1.562652587890625e-05, "step": 10241, "training_step_time": 0.1445763111114502 }, { "epoch": 1.56280517578125e-05, "model_forward_time": 0.02421712875366211, "step": 10242 }, { "epoch": 1.56280517578125e-05, "step": 10242, "training_step_time": 0.16765046119689941 }, { "epoch": 1.562957763671875e-05, "model_forward_time": 0.02426433563232422, "step": 10243 }, { "epoch": 1.562957763671875e-05, "step": 10243, "training_step_time": 0.11311125755310059 }, { "epoch": 1.5631103515625e-05, "model_forward_time": 0.024117469787597656, "step": 10244 }, { "epoch": 1.5631103515625e-05, "step": 10244, "training_step_time": 0.1351630687713623 }, { "epoch": 1.563262939453125e-05, "model_forward_time": 0.025215864181518555, "step": 10245 }, { "epoch": 1.563262939453125e-05, "step": 10245, "training_step_time": 0.20808887481689453 }, { "epoch": 1.56341552734375e-05, "model_forward_time": 0.02449488639831543, "step": 10246 }, { "epoch": 1.56341552734375e-05, "step": 10246, "training_step_time": 0.1065220832824707 }, { "epoch": 1.563568115234375e-05, "model_forward_time": 0.024907588958740234, "step": 10247 }, { "epoch": 1.563568115234375e-05, "step": 10247, "training_step_time": 0.10624432563781738 }, { "epoch": 1.563720703125e-05, "model_forward_time": 0.02440476417541504, "step": 10248 }, { "epoch": 1.563720703125e-05, "step": 10248, "training_step_time": 0.16401243209838867 }, { "epoch": 1.563873291015625e-05, "model_forward_time": 0.02478194236755371, "step": 10249 }, { "epoch": 1.563873291015625e-05, "step": 10249, "training_step_time": 0.12877440452575684 }, { "epoch": 1.56402587890625e-05, "grad_norm": 0.3400780260562897, "learning_rate": 7.849040287551331e-05, "loss": 0.0263, "step": 10250 }, { "epoch": 1.56402587890625e-05, "model_forward_time": 0.024584293365478516, "step": 10250 }, { "epoch": 1.56402587890625e-05, "step": 10250, "training_step_time": 0.20516180992126465 }, { "epoch": 1.564178466796875e-05, "model_forward_time": 0.024913311004638672, "step": 10251 }, { "epoch": 1.564178466796875e-05, "step": 10251, "training_step_time": 0.10778260231018066 }, { "epoch": 1.5643310546875e-05, "model_forward_time": 0.02475714683532715, "step": 10252 }, { "epoch": 1.5643310546875e-05, "step": 10252, "training_step_time": 0.11064982414245605 }, { "epoch": 1.564483642578125e-05, "model_forward_time": 0.025543212890625, "step": 10253 }, { "epoch": 1.564483642578125e-05, "step": 10253, "training_step_time": 0.12283635139465332 }, { "epoch": 1.56463623046875e-05, "model_forward_time": 0.02518606185913086, "step": 10254 }, { "epoch": 1.56463623046875e-05, "step": 10254, "training_step_time": 0.1218266487121582 }, { "epoch": 1.564788818359375e-05, "model_forward_time": 0.025127887725830078, "step": 10255 }, { "epoch": 1.564788818359375e-05, "step": 10255, "training_step_time": 0.12517857551574707 }, { "epoch": 1.56494140625e-05, "model_forward_time": 0.025538921356201172, "step": 10256 }, { "epoch": 1.56494140625e-05, "step": 10256, "training_step_time": 0.12697267532348633 }, { "epoch": 1.565093994140625e-05, "model_forward_time": 0.0253446102142334, "step": 10257 }, { "epoch": 1.565093994140625e-05, "step": 10257, "training_step_time": 0.15961241722106934 }, { "epoch": 1.56524658203125e-05, "model_forward_time": 0.02763509750366211, "step": 10258 }, { "epoch": 1.56524658203125e-05, "step": 10258, "training_step_time": 0.17592763900756836 }, { "epoch": 1.565399169921875e-05, "model_forward_time": 0.024128198623657227, "step": 10259 }, { "epoch": 1.565399169921875e-05, "step": 10259, "training_step_time": 0.12496781349182129 }, { "epoch": 1.5655517578125e-05, "grad_norm": 0.3840779960155487, "learning_rate": 7.844509274827907e-05, "loss": 0.0207, "step": 10260 }, { "epoch": 1.5655517578125e-05, "model_forward_time": 0.023936748504638672, "step": 10260 }, { "epoch": 1.5655517578125e-05, "step": 10260, "training_step_time": 0.10991787910461426 }, { "epoch": 1.565704345703125e-05, "model_forward_time": 0.024654865264892578, "step": 10261 }, { "epoch": 1.565704345703125e-05, "step": 10261, "training_step_time": 0.10660600662231445 }, { "epoch": 1.56585693359375e-05, "model_forward_time": 0.026385068893432617, "step": 10262 }, { "epoch": 1.56585693359375e-05, "step": 10262, "training_step_time": 0.11168503761291504 }, { "epoch": 1.566009521484375e-05, "model_forward_time": 0.02561783790588379, "step": 10263 }, { "epoch": 1.566009521484375e-05, "step": 10263, "training_step_time": 0.10788774490356445 }, { "epoch": 1.566162109375e-05, "model_forward_time": 0.025317907333374023, "step": 10264 }, { "epoch": 1.566162109375e-05, "step": 10264, "training_step_time": 0.10884642601013184 }, { "epoch": 1.566314697265625e-05, "model_forward_time": 0.027535438537597656, "step": 10265 }, { "epoch": 1.566314697265625e-05, "step": 10265, "training_step_time": 0.10944414138793945 }, { "epoch": 1.56646728515625e-05, "model_forward_time": 0.02522754669189453, "step": 10266 }, { "epoch": 1.56646728515625e-05, "step": 10266, "training_step_time": 0.11238646507263184 }, { "epoch": 1.566619873046875e-05, "model_forward_time": 0.025668621063232422, "step": 10267 }, { "epoch": 1.566619873046875e-05, "step": 10267, "training_step_time": 0.11189842224121094 }, { "epoch": 1.5667724609375e-05, "model_forward_time": 0.025130271911621094, "step": 10268 }, { "epoch": 1.5667724609375e-05, "step": 10268, "training_step_time": 0.10653567314147949 }, { "epoch": 1.566925048828125e-05, "model_forward_time": 0.025359153747558594, "step": 10269 }, { "epoch": 1.566925048828125e-05, "step": 10269, "training_step_time": 0.1083986759185791 }, { "epoch": 1.56707763671875e-05, "grad_norm": 0.5742058157920837, "learning_rate": 7.839974805757496e-05, "loss": 0.0311, "step": 10270 }, { "epoch": 1.56707763671875e-05, "model_forward_time": 0.02496194839477539, "step": 10270 }, { "epoch": 1.56707763671875e-05, "step": 10270, "training_step_time": 0.11120343208312988 }, { "epoch": 1.567230224609375e-05, "model_forward_time": 0.02530956268310547, "step": 10271 }, { "epoch": 1.567230224609375e-05, "step": 10271, "training_step_time": 0.11691498756408691 }, { "epoch": 1.5673828125e-05, "model_forward_time": 0.02495884895324707, "step": 10272 }, { "epoch": 1.5673828125e-05, "step": 10272, "training_step_time": 0.11290431022644043 }, { "epoch": 1.567535400390625e-05, "model_forward_time": 0.025396347045898438, "step": 10273 }, { "epoch": 1.567535400390625e-05, "step": 10273, "training_step_time": 0.11287784576416016 }, { "epoch": 1.56768798828125e-05, "model_forward_time": 0.02756333351135254, "step": 10274 }, { "epoch": 1.56768798828125e-05, "step": 10274, "training_step_time": 0.11761093139648438 }, { "epoch": 1.567840576171875e-05, "model_forward_time": 0.024433374404907227, "step": 10275 }, { "epoch": 1.567840576171875e-05, "step": 10275, "training_step_time": 0.1141507625579834 }, { "epoch": 1.5679931640625e-05, "model_forward_time": 0.025264263153076172, "step": 10276 }, { "epoch": 1.5679931640625e-05, "step": 10276, "training_step_time": 0.11176371574401855 }, { "epoch": 1.568145751953125e-05, "model_forward_time": 0.024656057357788086, "step": 10277 }, { "epoch": 1.568145751953125e-05, "step": 10277, "training_step_time": 0.11355209350585938 }, { "epoch": 1.56829833984375e-05, "model_forward_time": 0.025318384170532227, "step": 10278 }, { "epoch": 1.56829833984375e-05, "step": 10278, "training_step_time": 0.11013364791870117 }, { "epoch": 1.568450927734375e-05, "model_forward_time": 0.025177478790283203, "step": 10279 }, { "epoch": 1.568450927734375e-05, "step": 10279, "training_step_time": 0.1130211353302002 }, { "epoch": 1.568603515625e-05, "grad_norm": 0.41737622022628784, "learning_rate": 7.835436885849902e-05, "loss": 0.0323, "step": 10280 }, { "epoch": 1.568603515625e-05, "model_forward_time": 0.0251772403717041, "step": 10280 }, { "epoch": 1.568603515625e-05, "step": 10280, "training_step_time": 0.11031460762023926 }, { "epoch": 1.568756103515625e-05, "model_forward_time": 0.025476455688476562, "step": 10281 }, { "epoch": 1.568756103515625e-05, "step": 10281, "training_step_time": 0.10994696617126465 }, { "epoch": 1.56890869140625e-05, "model_forward_time": 0.02496170997619629, "step": 10282 }, { "epoch": 1.56890869140625e-05, "step": 10282, "training_step_time": 0.11352396011352539 }, { "epoch": 1.569061279296875e-05, "model_forward_time": 0.025426626205444336, "step": 10283 }, { "epoch": 1.569061279296875e-05, "step": 10283, "training_step_time": 0.11088180541992188 }, { "epoch": 1.5692138671875e-05, "model_forward_time": 0.025313138961791992, "step": 10284 }, { "epoch": 1.5692138671875e-05, "step": 10284, "training_step_time": 0.11724114418029785 }, { "epoch": 1.569366455078125e-05, "model_forward_time": 0.025242090225219727, "step": 10285 }, { "epoch": 1.569366455078125e-05, "step": 10285, "training_step_time": 0.10913515090942383 }, { "epoch": 1.56951904296875e-05, "model_forward_time": 0.02550983428955078, "step": 10286 }, { "epoch": 1.56951904296875e-05, "step": 10286, "training_step_time": 0.10813069343566895 }, { "epoch": 1.569671630859375e-05, "model_forward_time": 0.025197505950927734, "step": 10287 }, { "epoch": 1.569671630859375e-05, "step": 10287, "training_step_time": 0.201124906539917 }, { "epoch": 1.56982421875e-05, "model_forward_time": 0.024276018142700195, "step": 10288 }, { "epoch": 1.56982421875e-05, "step": 10288, "training_step_time": 0.18512201309204102 }, { "epoch": 1.569976806640625e-05, "model_forward_time": 0.028685569763183594, "step": 10289 }, { "epoch": 1.569976806640625e-05, "step": 10289, "training_step_time": 0.16069865226745605 }, { "epoch": 1.57012939453125e-05, "grad_norm": 0.26310300827026367, "learning_rate": 7.830895520619128e-05, "loss": 0.0373, "step": 10290 }, { "epoch": 1.57012939453125e-05, "model_forward_time": 0.024645566940307617, "step": 10290 }, { "epoch": 1.57012939453125e-05, "step": 10290, "training_step_time": 0.15724515914916992 }, { "epoch": 1.570281982421875e-05, "model_forward_time": 0.02434682846069336, "step": 10291 }, { "epoch": 1.570281982421875e-05, "step": 10291, "training_step_time": 0.10386896133422852 }, { "epoch": 1.5704345703125e-05, "model_forward_time": 0.02530670166015625, "step": 10292 }, { "epoch": 1.5704345703125e-05, "step": 10292, "training_step_time": 0.10911059379577637 }, { "epoch": 1.570587158203125e-05, "model_forward_time": 0.02528858184814453, "step": 10293 }, { "epoch": 1.570587158203125e-05, "step": 10293, "training_step_time": 0.1091923713684082 }, { "epoch": 1.57073974609375e-05, "model_forward_time": 0.025514602661132812, "step": 10294 }, { "epoch": 1.57073974609375e-05, "step": 10294, "training_step_time": 0.1449434757232666 }, { "epoch": 1.570892333984375e-05, "model_forward_time": 0.024932384490966797, "step": 10295 }, { "epoch": 1.570892333984375e-05, "step": 10295, "training_step_time": 0.1384599208831787 }, { "epoch": 1.571044921875e-05, "model_forward_time": 0.024788856506347656, "step": 10296 }, { "epoch": 1.571044921875e-05, "step": 10296, "training_step_time": 0.1149301528930664 }, { "epoch": 1.571197509765625e-05, "model_forward_time": 0.02512359619140625, "step": 10297 }, { "epoch": 1.571197509765625e-05, "step": 10297, "training_step_time": 0.11466646194458008 }, { "epoch": 1.57135009765625e-05, "model_forward_time": 0.024535179138183594, "step": 10298 }, { "epoch": 1.57135009765625e-05, "step": 10298, "training_step_time": 0.11847686767578125 }, { "epoch": 1.571502685546875e-05, "model_forward_time": 0.025227069854736328, "step": 10299 }, { "epoch": 1.571502685546875e-05, "step": 10299, "training_step_time": 0.12407088279724121 }, { "epoch": 1.5716552734375e-05, "grad_norm": 0.6257118582725525, "learning_rate": 7.82635071558336e-05, "loss": 0.028, "step": 10300 }, { "epoch": 1.5716552734375e-05, "model_forward_time": 0.025081872940063477, "step": 10300 }, { "epoch": 1.5716552734375e-05, "step": 10300, "training_step_time": 0.19926214218139648 }, { "epoch": 1.571807861328125e-05, "model_forward_time": 0.02441263198852539, "step": 10301 }, { "epoch": 1.571807861328125e-05, "step": 10301, "training_step_time": 0.12649202346801758 }, { "epoch": 1.57196044921875e-05, "model_forward_time": 0.0240018367767334, "step": 10302 }, { "epoch": 1.57196044921875e-05, "step": 10302, "training_step_time": 0.13016057014465332 }, { "epoch": 1.572113037109375e-05, "model_forward_time": 0.024486541748046875, "step": 10303 }, { "epoch": 1.572113037109375e-05, "step": 10303, "training_step_time": 0.17170429229736328 }, { "epoch": 1.572265625e-05, "model_forward_time": 0.024966955184936523, "step": 10304 }, { "epoch": 1.572265625e-05, "step": 10304, "training_step_time": 0.16825151443481445 }, { "epoch": 1.572418212890625e-05, "model_forward_time": 0.024628400802612305, "step": 10305 }, { "epoch": 1.572418212890625e-05, "step": 10305, "training_step_time": 0.13275146484375 }, { "epoch": 1.57257080078125e-05, "model_forward_time": 0.02416062355041504, "step": 10306 }, { "epoch": 1.57257080078125e-05, "step": 10306, "training_step_time": 0.10798788070678711 }, { "epoch": 1.572723388671875e-05, "model_forward_time": 0.025408267974853516, "step": 10307 }, { "epoch": 1.572723388671875e-05, "step": 10307, "training_step_time": 0.12199568748474121 }, { "epoch": 1.5728759765625e-05, "model_forward_time": 0.025058507919311523, "step": 10308 }, { "epoch": 1.5728759765625e-05, "step": 10308, "training_step_time": 0.10778594017028809 }, { "epoch": 1.573028564453125e-05, "model_forward_time": 0.025176525115966797, "step": 10309 }, { "epoch": 1.573028564453125e-05, "step": 10309, "training_step_time": 0.10720205307006836 }, { "epoch": 1.57318115234375e-05, "grad_norm": 0.36297407746315, "learning_rate": 7.821802476264966e-05, "loss": 0.0224, "step": 10310 }, { "epoch": 1.57318115234375e-05, "model_forward_time": 0.02517247200012207, "step": 10310 }, { "epoch": 1.57318115234375e-05, "step": 10310, "training_step_time": 0.11198067665100098 }, { "epoch": 1.573333740234375e-05, "model_forward_time": 0.025423288345336914, "step": 10311 }, { "epoch": 1.573333740234375e-05, "step": 10311, "training_step_time": 0.10831046104431152 }, { "epoch": 1.573486328125e-05, "model_forward_time": 0.025219202041625977, "step": 10312 }, { "epoch": 1.573486328125e-05, "step": 10312, "training_step_time": 0.10748600959777832 }, { "epoch": 1.573638916015625e-05, "model_forward_time": 0.024961233139038086, "step": 10313 }, { "epoch": 1.573638916015625e-05, "step": 10313, "training_step_time": 0.1071629524230957 }, { "epoch": 1.57379150390625e-05, "model_forward_time": 0.024869203567504883, "step": 10314 }, { "epoch": 1.57379150390625e-05, "step": 10314, "training_step_time": 0.10721874237060547 }, { "epoch": 1.573944091796875e-05, "model_forward_time": 0.025130748748779297, "step": 10315 }, { "epoch": 1.573944091796875e-05, "step": 10315, "training_step_time": 0.10694360733032227 }, { "epoch": 1.5740966796875e-05, "model_forward_time": 0.025161027908325195, "step": 10316 }, { "epoch": 1.5740966796875e-05, "step": 10316, "training_step_time": 0.10783004760742188 }, { "epoch": 1.574249267578125e-05, "model_forward_time": 0.025210142135620117, "step": 10317 }, { "epoch": 1.574249267578125e-05, "step": 10317, "training_step_time": 0.1116178035736084 }, { "epoch": 1.57440185546875e-05, "model_forward_time": 0.024329185485839844, "step": 10318 }, { "epoch": 1.57440185546875e-05, "step": 10318, "training_step_time": 0.10855627059936523 }, { "epoch": 1.574554443359375e-05, "model_forward_time": 0.026468753814697266, "step": 10319 }, { "epoch": 1.574554443359375e-05, "step": 10319, "training_step_time": 0.11608552932739258 }, { "epoch": 1.57470703125e-05, "grad_norm": 0.5447708964347839, "learning_rate": 7.817250808190483e-05, "loss": 0.0283, "step": 10320 }, { "epoch": 1.57470703125e-05, "model_forward_time": 0.024248838424682617, "step": 10320 }, { "epoch": 1.57470703125e-05, "step": 10320, "training_step_time": 0.12022972106933594 }, { "epoch": 1.574859619140625e-05, "model_forward_time": 0.02447676658630371, "step": 10321 }, { "epoch": 1.574859619140625e-05, "step": 10321, "training_step_time": 0.11070585250854492 }, { "epoch": 1.57501220703125e-05, "model_forward_time": 0.02437758445739746, "step": 10322 }, { "epoch": 1.57501220703125e-05, "step": 10322, "training_step_time": 0.1063680648803711 }, { "epoch": 1.575164794921875e-05, "model_forward_time": 0.02397918701171875, "step": 10323 }, { "epoch": 1.575164794921875e-05, "step": 10323, "training_step_time": 0.10804319381713867 }, { "epoch": 1.5753173828125e-05, "model_forward_time": 0.024433612823486328, "step": 10324 }, { "epoch": 1.5753173828125e-05, "step": 10324, "training_step_time": 0.1075127124786377 }, { "epoch": 1.575469970703125e-05, "model_forward_time": 0.024324417114257812, "step": 10325 }, { "epoch": 1.575469970703125e-05, "step": 10325, "training_step_time": 0.10645389556884766 }, { "epoch": 1.57562255859375e-05, "model_forward_time": 0.024509191513061523, "step": 10326 }, { "epoch": 1.57562255859375e-05, "step": 10326, "training_step_time": 0.10995078086853027 }, { "epoch": 1.575775146484375e-05, "model_forward_time": 0.024807214736938477, "step": 10327 }, { "epoch": 1.575775146484375e-05, "step": 10327, "training_step_time": 0.10920119285583496 }, { "epoch": 1.575927734375e-05, "model_forward_time": 0.02449512481689453, "step": 10328 }, { "epoch": 1.575927734375e-05, "step": 10328, "training_step_time": 0.1074056625366211 }, { "epoch": 1.576080322265625e-05, "model_forward_time": 0.02454543113708496, "step": 10329 }, { "epoch": 1.576080322265625e-05, "step": 10329, "training_step_time": 0.10706949234008789 }, { "epoch": 1.57623291015625e-05, "grad_norm": 0.4327964186668396, "learning_rate": 7.81269571689062e-05, "loss": 0.0286, "step": 10330 }, { "epoch": 1.57623291015625e-05, "model_forward_time": 0.02394247055053711, "step": 10330 }, { "epoch": 1.57623291015625e-05, "step": 10330, "training_step_time": 0.10729646682739258 }, { "epoch": 1.576385498046875e-05, "model_forward_time": 0.024763107299804688, "step": 10331 }, { "epoch": 1.576385498046875e-05, "step": 10331, "training_step_time": 0.10574126243591309 }, { "epoch": 1.5765380859375e-05, "model_forward_time": 0.023756027221679688, "step": 10332 }, { "epoch": 1.5765380859375e-05, "step": 10332, "training_step_time": 0.10875678062438965 }, { "epoch": 1.576690673828125e-05, "model_forward_time": 0.023831844329833984, "step": 10333 }, { "epoch": 1.576690673828125e-05, "step": 10333, "training_step_time": 0.11569762229919434 }, { "epoch": 1.57684326171875e-05, "model_forward_time": 0.024391651153564453, "step": 10334 }, { "epoch": 1.57684326171875e-05, "step": 10334, "training_step_time": 0.11629223823547363 }, { "epoch": 1.576995849609375e-05, "model_forward_time": 0.024626970291137695, "step": 10335 }, { "epoch": 1.576995849609375e-05, "step": 10335, "training_step_time": 0.11095881462097168 }, { "epoch": 1.5771484375e-05, "model_forward_time": 0.024695634841918945, "step": 10336 }, { "epoch": 1.5771484375e-05, "step": 10336, "training_step_time": 0.22139835357666016 }, { "epoch": 1.577301025390625e-05, "model_forward_time": 0.023496627807617188, "step": 10337 }, { "epoch": 1.577301025390625e-05, "step": 10337, "training_step_time": 0.12123847007751465 }, { "epoch": 1.57745361328125e-05, "model_forward_time": 0.02317643165588379, "step": 10338 }, { "epoch": 1.57745361328125e-05, "step": 10338, "training_step_time": 0.10971307754516602 }, { "epoch": 1.577606201171875e-05, "model_forward_time": 0.02431178092956543, "step": 10339 }, { "epoch": 1.577606201171875e-05, "step": 10339, "training_step_time": 0.11378884315490723 }, { "epoch": 1.5777587890625e-05, "grad_norm": 0.6996987462043762, "learning_rate": 7.808137207900241e-05, "loss": 0.0295, "step": 10340 }, { "epoch": 1.5777587890625e-05, "model_forward_time": 0.02449941635131836, "step": 10340 }, { "epoch": 1.5777587890625e-05, "step": 10340, "training_step_time": 0.10889911651611328 }, { "epoch": 1.577911376953125e-05, "model_forward_time": 0.024165630340576172, "step": 10341 }, { "epoch": 1.577911376953125e-05, "step": 10341, "training_step_time": 0.1344013214111328 }, { "epoch": 1.57806396484375e-05, "model_forward_time": 0.02767634391784668, "step": 10342 }, { "epoch": 1.57806396484375e-05, "step": 10342, "training_step_time": 0.1290912628173828 }, { "epoch": 1.578216552734375e-05, "model_forward_time": 0.023853302001953125, "step": 10343 }, { "epoch": 1.578216552734375e-05, "step": 10343, "training_step_time": 0.11023759841918945 }, { "epoch": 1.578369140625e-05, "model_forward_time": 0.02432560920715332, "step": 10344 }, { "epoch": 1.578369140625e-05, "step": 10344, "training_step_time": 0.11093735694885254 }, { "epoch": 1.578521728515625e-05, "model_forward_time": 0.024164676666259766, "step": 10345 }, { "epoch": 1.578521728515625e-05, "step": 10345, "training_step_time": 0.11302733421325684 }, { "epoch": 1.57867431640625e-05, "model_forward_time": 0.024091482162475586, "step": 10346 }, { "epoch": 1.57867431640625e-05, "step": 10346, "training_step_time": 0.15638375282287598 }, { "epoch": 1.578826904296875e-05, "model_forward_time": 0.023479461669921875, "step": 10347 }, { "epoch": 1.578826904296875e-05, "step": 10347, "training_step_time": 0.1535797119140625 }, { "epoch": 1.5789794921875e-05, "model_forward_time": 0.02494215965270996, "step": 10348 }, { "epoch": 1.5789794921875e-05, "step": 10348, "training_step_time": 0.1681196689605713 }, { "epoch": 1.579132080078125e-05, "model_forward_time": 0.027090787887573242, "step": 10349 }, { "epoch": 1.579132080078125e-05, "step": 10349, "training_step_time": 0.19069576263427734 }, { "epoch": 1.57928466796875e-05, "grad_norm": 0.40495797991752625, "learning_rate": 7.803575286758364e-05, "loss": 0.0371, "step": 10350 }, { "epoch": 1.57928466796875e-05, "model_forward_time": 0.023637771606445312, "step": 10350 }, { "epoch": 1.57928466796875e-05, "step": 10350, "training_step_time": 0.1645183563232422 }, { "epoch": 1.579437255859375e-05, "model_forward_time": 0.023058652877807617, "step": 10351 }, { "epoch": 1.579437255859375e-05, "step": 10351, "training_step_time": 0.1358168125152588 }, { "epoch": 1.57958984375e-05, "model_forward_time": 0.023500442504882812, "step": 10352 }, { "epoch": 1.57958984375e-05, "step": 10352, "training_step_time": 0.1294393539428711 }, { "epoch": 1.579742431640625e-05, "model_forward_time": 0.02541637420654297, "step": 10353 }, { "epoch": 1.579742431640625e-05, "step": 10353, "training_step_time": 0.10862898826599121 }, { "epoch": 1.57989501953125e-05, "model_forward_time": 0.02476644515991211, "step": 10354 }, { "epoch": 1.57989501953125e-05, "step": 10354, "training_step_time": 0.10879230499267578 }, { "epoch": 1.580047607421875e-05, "model_forward_time": 0.024498462677001953, "step": 10355 }, { "epoch": 1.580047607421875e-05, "step": 10355, "training_step_time": 0.1165318489074707 }, { "epoch": 1.5802001953125e-05, "model_forward_time": 0.02446770668029785, "step": 10356 }, { "epoch": 1.5802001953125e-05, "step": 10356, "training_step_time": 0.1173095703125 }, { "epoch": 1.580352783203125e-05, "model_forward_time": 0.02425408363342285, "step": 10357 }, { "epoch": 1.580352783203125e-05, "step": 10357, "training_step_time": 0.12216401100158691 }, { "epoch": 1.58050537109375e-05, "model_forward_time": 0.02538323402404785, "step": 10358 }, { "epoch": 1.58050537109375e-05, "step": 10358, "training_step_time": 0.12340855598449707 }, { "epoch": 1.580657958984375e-05, "model_forward_time": 0.024383544921875, "step": 10359 }, { "epoch": 1.580657958984375e-05, "step": 10359, "training_step_time": 0.12047386169433594 }, { "epoch": 1.580810546875e-05, "grad_norm": 0.25256213545799255, "learning_rate": 7.799009959008155e-05, "loss": 0.0204, "step": 10360 }, { "epoch": 1.580810546875e-05, "model_forward_time": 0.024165868759155273, "step": 10360 }, { "epoch": 1.580810546875e-05, "step": 10360, "training_step_time": 0.12019491195678711 }, { "epoch": 1.580963134765625e-05, "model_forward_time": 0.02428412437438965, "step": 10361 }, { "epoch": 1.580963134765625e-05, "step": 10361, "training_step_time": 0.11509919166564941 }, { "epoch": 1.58111572265625e-05, "model_forward_time": 0.02392101287841797, "step": 10362 }, { "epoch": 1.58111572265625e-05, "step": 10362, "training_step_time": 0.11330366134643555 }, { "epoch": 1.581268310546875e-05, "model_forward_time": 0.0244138240814209, "step": 10363 }, { "epoch": 1.581268310546875e-05, "step": 10363, "training_step_time": 0.11504054069519043 }, { "epoch": 1.5814208984375e-05, "model_forward_time": 0.024362802505493164, "step": 10364 }, { "epoch": 1.5814208984375e-05, "step": 10364, "training_step_time": 0.11533236503601074 }, { "epoch": 1.581573486328125e-05, "model_forward_time": 0.024304628372192383, "step": 10365 }, { "epoch": 1.581573486328125e-05, "step": 10365, "training_step_time": 0.10999441146850586 }, { "epoch": 1.58172607421875e-05, "model_forward_time": 0.024489402770996094, "step": 10366 }, { "epoch": 1.58172607421875e-05, "step": 10366, "training_step_time": 0.10736966133117676 }, { "epoch": 1.581878662109375e-05, "model_forward_time": 0.024550199508666992, "step": 10367 }, { "epoch": 1.581878662109375e-05, "step": 10367, "training_step_time": 0.11003804206848145 }, { "epoch": 1.58203125e-05, "model_forward_time": 0.02416515350341797, "step": 10368 }, { "epoch": 1.58203125e-05, "step": 10368, "training_step_time": 0.10773372650146484 }, { "epoch": 1.582183837890625e-05, "model_forward_time": 0.024495601654052734, "step": 10369 }, { "epoch": 1.582183837890625e-05, "step": 10369, "training_step_time": 0.11035394668579102 }, { "epoch": 1.58233642578125e-05, "grad_norm": 0.4363476037979126, "learning_rate": 7.794441230196913e-05, "loss": 0.0259, "step": 10370 }, { "epoch": 1.58233642578125e-05, "model_forward_time": 0.02430438995361328, "step": 10370 }, { "epoch": 1.58233642578125e-05, "step": 10370, "training_step_time": 0.10989904403686523 }, { "epoch": 1.582489013671875e-05, "model_forward_time": 0.024333953857421875, "step": 10371 }, { "epoch": 1.582489013671875e-05, "step": 10371, "training_step_time": 0.10590648651123047 }, { "epoch": 1.5826416015625e-05, "model_forward_time": 0.02736210823059082, "step": 10372 }, { "epoch": 1.5826416015625e-05, "step": 10372, "training_step_time": 0.1126255989074707 }, { "epoch": 1.582794189453125e-05, "model_forward_time": 0.025012493133544922, "step": 10373 }, { "epoch": 1.582794189453125e-05, "step": 10373, "training_step_time": 0.10909485816955566 }, { "epoch": 1.58294677734375e-05, "model_forward_time": 0.024019718170166016, "step": 10374 }, { "epoch": 1.58294677734375e-05, "step": 10374, "training_step_time": 0.10557317733764648 }, { "epoch": 1.583099365234375e-05, "model_forward_time": 0.023888826370239258, "step": 10375 }, { "epoch": 1.583099365234375e-05, "step": 10375, "training_step_time": 0.10823607444763184 }, { "epoch": 1.583251953125e-05, "model_forward_time": 0.025122642517089844, "step": 10376 }, { "epoch": 1.583251953125e-05, "step": 10376, "training_step_time": 0.10625076293945312 }, { "epoch": 1.583404541015625e-05, "model_forward_time": 0.024137020111083984, "step": 10377 }, { "epoch": 1.583404541015625e-05, "step": 10377, "training_step_time": 0.10349607467651367 }, { "epoch": 1.58355712890625e-05, "model_forward_time": 0.02344989776611328, "step": 10378 }, { "epoch": 1.58355712890625e-05, "step": 10378, "training_step_time": 0.10759115219116211 }, { "epoch": 1.583709716796875e-05, "model_forward_time": 0.02417278289794922, "step": 10379 }, { "epoch": 1.583709716796875e-05, "step": 10379, "training_step_time": 0.11425375938415527 }, { "epoch": 1.5838623046875e-05, "grad_norm": 0.5339275598526001, "learning_rate": 7.789869105876083e-05, "loss": 0.0274, "step": 10380 }, { "epoch": 1.5838623046875e-05, "model_forward_time": 0.024263858795166016, "step": 10380 }, { "epoch": 1.5838623046875e-05, "step": 10380, "training_step_time": 0.11874818801879883 }, { "epoch": 1.584014892578125e-05, "model_forward_time": 0.024433135986328125, "step": 10381 }, { "epoch": 1.584014892578125e-05, "step": 10381, "training_step_time": 0.10884571075439453 }, { "epoch": 1.58416748046875e-05, "model_forward_time": 0.0244140625, "step": 10382 }, { "epoch": 1.58416748046875e-05, "step": 10382, "training_step_time": 0.1699063777923584 }, { "epoch": 1.584320068359375e-05, "model_forward_time": 0.0238039493560791, "step": 10383 }, { "epoch": 1.584320068359375e-05, "step": 10383, "training_step_time": 0.17198586463928223 }, { "epoch": 1.58447265625e-05, "model_forward_time": 0.023403644561767578, "step": 10384 }, { "epoch": 1.58447265625e-05, "step": 10384, "training_step_time": 0.10480403900146484 }, { "epoch": 1.584625244140625e-05, "model_forward_time": 0.023782730102539062, "step": 10385 }, { "epoch": 1.584625244140625e-05, "step": 10385, "training_step_time": 0.10520076751708984 }, { "epoch": 1.58477783203125e-05, "model_forward_time": 0.024272680282592773, "step": 10386 }, { "epoch": 1.58477783203125e-05, "step": 10386, "training_step_time": 0.10866498947143555 }, { "epoch": 1.584930419921875e-05, "model_forward_time": 0.02450394630432129, "step": 10387 }, { "epoch": 1.584930419921875e-05, "step": 10387, "training_step_time": 0.124359130859375 }, { "epoch": 1.5850830078125e-05, "model_forward_time": 0.023942947387695312, "step": 10388 }, { "epoch": 1.5850830078125e-05, "step": 10388, "training_step_time": 0.12716412544250488 }, { "epoch": 1.585235595703125e-05, "model_forward_time": 0.02417159080505371, "step": 10389 }, { "epoch": 1.585235595703125e-05, "step": 10389, "training_step_time": 0.11841893196105957 }, { "epoch": 1.58538818359375e-05, "grad_norm": 0.3493765890598297, "learning_rate": 7.785293591601217e-05, "loss": 0.0258, "step": 10390 }, { "epoch": 1.58538818359375e-05, "model_forward_time": 0.023992061614990234, "step": 10390 }, { "epoch": 1.58538818359375e-05, "step": 10390, "training_step_time": 0.1124420166015625 }, { "epoch": 1.585540771484375e-05, "model_forward_time": 0.023928403854370117, "step": 10391 }, { "epoch": 1.585540771484375e-05, "step": 10391, "training_step_time": 0.10845112800598145 }, { "epoch": 1.585693359375e-05, "model_forward_time": 0.024823904037475586, "step": 10392 }, { "epoch": 1.585693359375e-05, "step": 10392, "training_step_time": 0.19379806518554688 }, { "epoch": 1.585845947265625e-05, "model_forward_time": 0.02316141128540039, "step": 10393 }, { "epoch": 1.585845947265625e-05, "step": 10393, "training_step_time": 0.1700119972229004 }, { "epoch": 1.58599853515625e-05, "model_forward_time": 0.026870250701904297, "step": 10394 }, { "epoch": 1.58599853515625e-05, "step": 10394, "training_step_time": 0.1321122646331787 }, { "epoch": 1.586151123046875e-05, "model_forward_time": 0.023236513137817383, "step": 10395 }, { "epoch": 1.586151123046875e-05, "step": 10395, "training_step_time": 0.1626434326171875 }, { "epoch": 1.5863037109375e-05, "model_forward_time": 0.023585796356201172, "step": 10396 }, { "epoch": 1.5863037109375e-05, "step": 10396, "training_step_time": 0.17064833641052246 }, { "epoch": 1.586456298828125e-05, "model_forward_time": 0.023578166961669922, "step": 10397 }, { "epoch": 1.586456298828125e-05, "step": 10397, "training_step_time": 0.17374849319458008 }, { "epoch": 1.58660888671875e-05, "model_forward_time": 0.02345132827758789, "step": 10398 }, { "epoch": 1.58660888671875e-05, "step": 10398, "training_step_time": 0.1232905387878418 }, { "epoch": 1.586761474609375e-05, "model_forward_time": 0.02559041976928711, "step": 10399 }, { "epoch": 1.586761474609375e-05, "step": 10399, "training_step_time": 0.11759591102600098 }, { "epoch": 1.5869140625e-05, "grad_norm": 0.35689061880111694, "learning_rate": 7.780714692932002e-05, "loss": 0.031, "step": 10400 }, { "epoch": 1.5869140625e-05, "model_forward_time": 0.024298906326293945, "step": 10400 }, { "epoch": 1.5869140625e-05, "step": 10400, "training_step_time": 0.1124122142791748 }, { "epoch": 1.587066650390625e-05, "model_forward_time": 0.024615049362182617, "step": 10401 }, { "epoch": 1.587066650390625e-05, "step": 10401, "training_step_time": 0.10725879669189453 }, { "epoch": 1.58721923828125e-05, "model_forward_time": 0.02423548698425293, "step": 10402 }, { "epoch": 1.58721923828125e-05, "step": 10402, "training_step_time": 0.10816550254821777 }, { "epoch": 1.587371826171875e-05, "model_forward_time": 0.024158954620361328, "step": 10403 }, { "epoch": 1.587371826171875e-05, "step": 10403, "training_step_time": 0.10678791999816895 }, { "epoch": 1.5875244140625e-05, "model_forward_time": 0.024281978607177734, "step": 10404 }, { "epoch": 1.5875244140625e-05, "step": 10404, "training_step_time": 0.10839986801147461 }, { "epoch": 1.587677001953125e-05, "model_forward_time": 0.024219036102294922, "step": 10405 }, { "epoch": 1.587677001953125e-05, "step": 10405, "training_step_time": 0.10494089126586914 }, { "epoch": 1.58782958984375e-05, "model_forward_time": 0.024111509323120117, "step": 10406 }, { "epoch": 1.58782958984375e-05, "step": 10406, "training_step_time": 0.10543084144592285 }, { "epoch": 1.587982177734375e-05, "model_forward_time": 0.024381637573242188, "step": 10407 }, { "epoch": 1.587982177734375e-05, "step": 10407, "training_step_time": 0.10554051399230957 }, { "epoch": 1.588134765625e-05, "model_forward_time": 0.02364516258239746, "step": 10408 }, { "epoch": 1.588134765625e-05, "step": 10408, "training_step_time": 0.10828971862792969 }, { "epoch": 1.588287353515625e-05, "model_forward_time": 0.024113178253173828, "step": 10409 }, { "epoch": 1.588287353515625e-05, "step": 10409, "training_step_time": 0.10925960540771484 }, { "epoch": 1.58843994140625e-05, "grad_norm": 0.3861542344093323, "learning_rate": 7.776132415432234e-05, "loss": 0.0279, "step": 10410 }, { "epoch": 1.58843994140625e-05, "model_forward_time": 0.024335384368896484, "step": 10410 }, { "epoch": 1.58843994140625e-05, "step": 10410, "training_step_time": 0.1092679500579834 }, { "epoch": 1.588592529296875e-05, "model_forward_time": 0.024267196655273438, "step": 10411 }, { "epoch": 1.588592529296875e-05, "step": 10411, "training_step_time": 0.11186528205871582 }, { "epoch": 1.5887451171875e-05, "model_forward_time": 0.024225473403930664, "step": 10412 }, { "epoch": 1.5887451171875e-05, "step": 10412, "training_step_time": 0.11743903160095215 }, { "epoch": 1.588897705078125e-05, "model_forward_time": 0.02416825294494629, "step": 10413 }, { "epoch": 1.588897705078125e-05, "step": 10413, "training_step_time": 0.11883044242858887 }, { "epoch": 1.58905029296875e-05, "model_forward_time": 0.024515628814697266, "step": 10414 }, { "epoch": 1.58905029296875e-05, "step": 10414, "training_step_time": 0.11916422843933105 }, { "epoch": 1.589202880859375e-05, "model_forward_time": 0.02464914321899414, "step": 10415 }, { "epoch": 1.589202880859375e-05, "step": 10415, "training_step_time": 0.12026143074035645 }, { "epoch": 1.58935546875e-05, "model_forward_time": 0.0243988037109375, "step": 10416 }, { "epoch": 1.58935546875e-05, "step": 10416, "training_step_time": 0.11646246910095215 }, { "epoch": 1.589508056640625e-05, "model_forward_time": 0.024510860443115234, "step": 10417 }, { "epoch": 1.589508056640625e-05, "step": 10417, "training_step_time": 0.11707425117492676 }, { "epoch": 1.58966064453125e-05, "model_forward_time": 0.024254322052001953, "step": 10418 }, { "epoch": 1.58966064453125e-05, "step": 10418, "training_step_time": 0.11407184600830078 }, { "epoch": 1.589813232421875e-05, "model_forward_time": 0.02410125732421875, "step": 10419 }, { "epoch": 1.589813232421875e-05, "step": 10419, "training_step_time": 0.11098575592041016 }, { "epoch": 1.5899658203125e-05, "grad_norm": 0.26254281401634216, "learning_rate": 7.771546764669807e-05, "loss": 0.0283, "step": 10420 }, { "epoch": 1.5899658203125e-05, "model_forward_time": 0.02383875846862793, "step": 10420 }, { "epoch": 1.5899658203125e-05, "step": 10420, "training_step_time": 0.11048102378845215 }, { "epoch": 1.590118408203125e-05, "model_forward_time": 0.024370193481445312, "step": 10421 }, { "epoch": 1.590118408203125e-05, "step": 10421, "training_step_time": 0.11007237434387207 }, { "epoch": 1.59027099609375e-05, "model_forward_time": 0.024503231048583984, "step": 10422 }, { "epoch": 1.59027099609375e-05, "step": 10422, "training_step_time": 0.10834693908691406 }, { "epoch": 1.590423583984375e-05, "model_forward_time": 0.024507522583007812, "step": 10423 }, { "epoch": 1.590423583984375e-05, "step": 10423, "training_step_time": 0.10723018646240234 }, { "epoch": 1.590576171875e-05, "model_forward_time": 0.024882793426513672, "step": 10424 }, { "epoch": 1.590576171875e-05, "step": 10424, "training_step_time": 0.10694742202758789 }, { "epoch": 1.590728759765625e-05, "model_forward_time": 0.024363040924072266, "step": 10425 }, { "epoch": 1.590728759765625e-05, "step": 10425, "training_step_time": 0.16831207275390625 }, { "epoch": 1.59088134765625e-05, "model_forward_time": 0.02494525909423828, "step": 10426 }, { "epoch": 1.59088134765625e-05, "step": 10426, "training_step_time": 0.16390180587768555 }, { "epoch": 1.591033935546875e-05, "model_forward_time": 0.02399158477783203, "step": 10427 }, { "epoch": 1.591033935546875e-05, "step": 10427, "training_step_time": 0.11094951629638672 }, { "epoch": 1.5911865234375e-05, "model_forward_time": 0.0238187313079834, "step": 10428 }, { "epoch": 1.5911865234375e-05, "step": 10428, "training_step_time": 0.10598421096801758 }, { "epoch": 1.591339111328125e-05, "model_forward_time": 0.024426937103271484, "step": 10429 }, { "epoch": 1.591339111328125e-05, "step": 10429, "training_step_time": 0.12009811401367188 }, { "epoch": 1.59149169921875e-05, "grad_norm": 0.4070347249507904, "learning_rate": 7.766957746216721e-05, "loss": 0.0222, "step": 10430 }, { "epoch": 1.59149169921875e-05, "model_forward_time": 0.024413585662841797, "step": 10430 }, { "epoch": 1.59149169921875e-05, "step": 10430, "training_step_time": 0.11384940147399902 }, { "epoch": 1.591644287109375e-05, "model_forward_time": 0.024778127670288086, "step": 10431 }, { "epoch": 1.591644287109375e-05, "step": 10431, "training_step_time": 0.10653281211853027 }, { "epoch": 1.591796875e-05, "model_forward_time": 0.024657487869262695, "step": 10432 }, { "epoch": 1.591796875e-05, "step": 10432, "training_step_time": 0.10950922966003418 }, { "epoch": 1.591949462890625e-05, "model_forward_time": 0.024526119232177734, "step": 10433 }, { "epoch": 1.591949462890625e-05, "step": 10433, "training_step_time": 0.14588642120361328 }, { "epoch": 1.59210205078125e-05, "model_forward_time": 0.02535700798034668, "step": 10434 }, { "epoch": 1.59210205078125e-05, "step": 10434, "training_step_time": 0.1367475986480713 }, { "epoch": 1.592254638671875e-05, "model_forward_time": 0.02455925941467285, "step": 10435 }, { "epoch": 1.592254638671875e-05, "step": 10435, "training_step_time": 0.11002755165100098 }, { "epoch": 1.5924072265625e-05, "model_forward_time": 0.025478124618530273, "step": 10436 }, { "epoch": 1.5924072265625e-05, "step": 10436, "training_step_time": 0.11432528495788574 }, { "epoch": 1.592559814453125e-05, "model_forward_time": 0.026172637939453125, "step": 10437 }, { "epoch": 1.592559814453125e-05, "step": 10437, "training_step_time": 0.11094403266906738 }, { "epoch": 1.59271240234375e-05, "model_forward_time": 0.025421619415283203, "step": 10438 }, { "epoch": 1.59271240234375e-05, "step": 10438, "training_step_time": 0.11231613159179688 }, { "epoch": 1.592864990234375e-05, "model_forward_time": 0.0251615047454834, "step": 10439 }, { "epoch": 1.592864990234375e-05, "step": 10439, "training_step_time": 0.13277983665466309 }, { "epoch": 1.593017578125e-05, "grad_norm": 0.6251704692840576, "learning_rate": 7.762365365649067e-05, "loss": 0.0223, "step": 10440 }, { "epoch": 1.593017578125e-05, "model_forward_time": 0.02540731430053711, "step": 10440 }, { "epoch": 1.593017578125e-05, "step": 10440, "training_step_time": 0.17345547676086426 }, { "epoch": 1.593170166015625e-05, "model_forward_time": 0.024407386779785156, "step": 10441 }, { "epoch": 1.593170166015625e-05, "step": 10441, "training_step_time": 0.1159369945526123 }, { "epoch": 1.59332275390625e-05, "model_forward_time": 0.024710416793823242, "step": 10442 }, { "epoch": 1.59332275390625e-05, "step": 10442, "training_step_time": 0.18603181838989258 }, { "epoch": 1.593475341796875e-05, "model_forward_time": 0.02434563636779785, "step": 10443 }, { "epoch": 1.593475341796875e-05, "step": 10443, "training_step_time": 0.16681909561157227 }, { "epoch": 1.5936279296875e-05, "model_forward_time": 0.024476051330566406, "step": 10444 }, { "epoch": 1.5936279296875e-05, "step": 10444, "training_step_time": 0.18172955513000488 }, { "epoch": 1.593780517578125e-05, "model_forward_time": 0.024565458297729492, "step": 10445 }, { "epoch": 1.593780517578125e-05, "step": 10445, "training_step_time": 0.1333320140838623 }, { "epoch": 1.59393310546875e-05, "model_forward_time": 0.024302005767822266, "step": 10446 }, { "epoch": 1.59393310546875e-05, "step": 10446, "training_step_time": 0.12752246856689453 }, { "epoch": 1.594085693359375e-05, "model_forward_time": 0.024477720260620117, "step": 10447 }, { "epoch": 1.594085693359375e-05, "step": 10447, "training_step_time": 0.1943376064300537 }, { "epoch": 1.59423828125e-05, "model_forward_time": 0.02451014518737793, "step": 10448 }, { "epoch": 1.59423828125e-05, "step": 10448, "training_step_time": 0.1097555160522461 }, { "epoch": 1.594390869140625e-05, "model_forward_time": 0.024710416793823242, "step": 10449 }, { "epoch": 1.594390869140625e-05, "step": 10449, "training_step_time": 0.11312270164489746 }, { "epoch": 1.59454345703125e-05, "grad_norm": 0.31595566868782043, "learning_rate": 7.757769628547018e-05, "loss": 0.0309, "step": 10450 }, { "epoch": 1.59454345703125e-05, "model_forward_time": 0.025224685668945312, "step": 10450 }, { "epoch": 1.59454345703125e-05, "step": 10450, "training_step_time": 0.10930180549621582 }, { "epoch": 1.594696044921875e-05, "model_forward_time": 0.025315523147583008, "step": 10451 }, { "epoch": 1.594696044921875e-05, "step": 10451, "training_step_time": 0.10760951042175293 }, { "epoch": 1.5948486328125e-05, "model_forward_time": 0.0253446102142334, "step": 10452 }, { "epoch": 1.5948486328125e-05, "step": 10452, "training_step_time": 0.10981440544128418 }, { "epoch": 1.595001220703125e-05, "model_forward_time": 0.02613091468811035, "step": 10453 }, { "epoch": 1.595001220703125e-05, "step": 10453, "training_step_time": 0.11004495620727539 }, { "epoch": 1.59515380859375e-05, "model_forward_time": 0.026526927947998047, "step": 10454 }, { "epoch": 1.59515380859375e-05, "step": 10454, "training_step_time": 0.11154627799987793 }, { "epoch": 1.595306396484375e-05, "model_forward_time": 0.02550506591796875, "step": 10455 }, { "epoch": 1.595306396484375e-05, "step": 10455, "training_step_time": 0.10796117782592773 }, { "epoch": 1.595458984375e-05, "model_forward_time": 0.025444984436035156, "step": 10456 }, { "epoch": 1.595458984375e-05, "step": 10456, "training_step_time": 0.10827040672302246 }, { "epoch": 1.595611572265625e-05, "model_forward_time": 0.025686979293823242, "step": 10457 }, { "epoch": 1.595611572265625e-05, "step": 10457, "training_step_time": 0.1076347827911377 }, { "epoch": 1.59576416015625e-05, "model_forward_time": 0.025017976760864258, "step": 10458 }, { "epoch": 1.59576416015625e-05, "step": 10458, "training_step_time": 0.10752582550048828 }, { "epoch": 1.595916748046875e-05, "model_forward_time": 0.025257587432861328, "step": 10459 }, { "epoch": 1.595916748046875e-05, "step": 10459, "training_step_time": 0.10657358169555664 }, { "epoch": 1.5960693359375e-05, "grad_norm": 0.42718175053596497, "learning_rate": 7.753170540494832e-05, "loss": 0.0252, "step": 10460 }, { "epoch": 1.5960693359375e-05, "model_forward_time": 0.025238752365112305, "step": 10460 }, { "epoch": 1.5960693359375e-05, "step": 10460, "training_step_time": 0.1077113151550293 }, { "epoch": 1.596221923828125e-05, "model_forward_time": 0.025685787200927734, "step": 10461 }, { "epoch": 1.596221923828125e-05, "step": 10461, "training_step_time": 0.10884928703308105 }, { "epoch": 1.59637451171875e-05, "model_forward_time": 0.024918556213378906, "step": 10462 }, { "epoch": 1.59637451171875e-05, "step": 10462, "training_step_time": 0.1101377010345459 }, { "epoch": 1.596527099609375e-05, "model_forward_time": 0.024950027465820312, "step": 10463 }, { "epoch": 1.596527099609375e-05, "step": 10463, "training_step_time": 0.10728859901428223 }, { "epoch": 1.5966796875e-05, "model_forward_time": 0.026139259338378906, "step": 10464 }, { "epoch": 1.5966796875e-05, "step": 10464, "training_step_time": 0.10781478881835938 }, { "epoch": 1.596832275390625e-05, "model_forward_time": 0.024890661239624023, "step": 10465 }, { "epoch": 1.596832275390625e-05, "step": 10465, "training_step_time": 0.10637140274047852 }, { "epoch": 1.59698486328125e-05, "model_forward_time": 0.02523517608642578, "step": 10466 }, { "epoch": 1.59698486328125e-05, "step": 10466, "training_step_time": 0.11573934555053711 }, { "epoch": 1.597137451171875e-05, "model_forward_time": 0.025285720825195312, "step": 10467 }, { "epoch": 1.597137451171875e-05, "step": 10467, "training_step_time": 0.10697102546691895 }, { "epoch": 1.5972900390625e-05, "model_forward_time": 0.025109291076660156, "step": 10468 }, { "epoch": 1.5972900390625e-05, "step": 10468, "training_step_time": 0.11490702629089355 }, { "epoch": 1.597442626953125e-05, "model_forward_time": 0.025126218795776367, "step": 10469 }, { "epoch": 1.597442626953125e-05, "step": 10469, "training_step_time": 0.11527299880981445 }, { "epoch": 1.59759521484375e-05, "grad_norm": 0.4019438922405243, "learning_rate": 7.748568107080832e-05, "loss": 0.026, "step": 10470 }, { "epoch": 1.59759521484375e-05, "model_forward_time": 0.025460481643676758, "step": 10470 }, { "epoch": 1.59759521484375e-05, "step": 10470, "training_step_time": 0.10825705528259277 }, { "epoch": 1.597747802734375e-05, "model_forward_time": 0.02575516700744629, "step": 10471 }, { "epoch": 1.597747802734375e-05, "step": 10471, "training_step_time": 0.16867518424987793 }, { "epoch": 1.597900390625e-05, "model_forward_time": 0.02455282211303711, "step": 10472 }, { "epoch": 1.597900390625e-05, "step": 10472, "training_step_time": 0.15870380401611328 }, { "epoch": 1.598052978515625e-05, "model_forward_time": 0.026572227478027344, "step": 10473 }, { "epoch": 1.598052978515625e-05, "step": 10473, "training_step_time": 0.11470770835876465 }, { "epoch": 1.59820556640625e-05, "model_forward_time": 0.024698257446289062, "step": 10474 }, { "epoch": 1.59820556640625e-05, "step": 10474, "training_step_time": 0.10711908340454102 }, { "epoch": 1.598358154296875e-05, "model_forward_time": 0.025254249572753906, "step": 10475 }, { "epoch": 1.598358154296875e-05, "step": 10475, "training_step_time": 0.17104721069335938 }, { "epoch": 1.5985107421875e-05, "model_forward_time": 0.025230884552001953, "step": 10476 }, { "epoch": 1.5985107421875e-05, "step": 10476, "training_step_time": 0.10722947120666504 }, { "epoch": 1.598663330078125e-05, "model_forward_time": 0.024804353713989258, "step": 10477 }, { "epoch": 1.598663330078125e-05, "step": 10477, "training_step_time": 0.10540151596069336 }, { "epoch": 1.59881591796875e-05, "model_forward_time": 0.0254819393157959, "step": 10478 }, { "epoch": 1.59881591796875e-05, "step": 10478, "training_step_time": 0.11165833473205566 }, { "epoch": 1.598968505859375e-05, "model_forward_time": 0.02542710304260254, "step": 10479 }, { "epoch": 1.598968505859375e-05, "step": 10479, "training_step_time": 0.14678025245666504 }, { "epoch": 1.59912109375e-05, "grad_norm": 0.19216464459896088, "learning_rate": 7.743962333897405e-05, "loss": 0.0218, "step": 10480 }, { "epoch": 1.59912109375e-05, "model_forward_time": 0.02490544319152832, "step": 10480 }, { "epoch": 1.59912109375e-05, "step": 10480, "training_step_time": 0.10986471176147461 }, { "epoch": 1.599273681640625e-05, "model_forward_time": 0.024964570999145508, "step": 10481 }, { "epoch": 1.599273681640625e-05, "step": 10481, "training_step_time": 0.11040115356445312 }, { "epoch": 1.59942626953125e-05, "model_forward_time": 0.025069713592529297, "step": 10482 }, { "epoch": 1.59942626953125e-05, "step": 10482, "training_step_time": 0.11390829086303711 }, { "epoch": 1.599578857421875e-05, "model_forward_time": 0.025154829025268555, "step": 10483 }, { "epoch": 1.599578857421875e-05, "step": 10483, "training_step_time": 0.11696314811706543 }, { "epoch": 1.5997314453125e-05, "model_forward_time": 0.024606943130493164, "step": 10484 }, { "epoch": 1.5997314453125e-05, "step": 10484, "training_step_time": 0.14050507545471191 }, { "epoch": 1.599884033203125e-05, "model_forward_time": 0.02508378028869629, "step": 10485 }, { "epoch": 1.599884033203125e-05, "step": 10485, "training_step_time": 0.19504046440124512 }, { "epoch": 1.60003662109375e-05, "model_forward_time": 0.02514791488647461, "step": 10486 }, { "epoch": 1.60003662109375e-05, "step": 10486, "training_step_time": 0.13492178916931152 }, { "epoch": 1.600189208984375e-05, "model_forward_time": 0.0247800350189209, "step": 10487 }, { "epoch": 1.600189208984375e-05, "step": 10487, "training_step_time": 0.10737776756286621 }, { "epoch": 1.600341796875e-05, "model_forward_time": 0.025420188903808594, "step": 10488 }, { "epoch": 1.600341796875e-05, "step": 10488, "training_step_time": 0.16438078880310059 }, { "epoch": 1.600494384765625e-05, "model_forward_time": 0.02596139907836914, "step": 10489 }, { "epoch": 1.600494384765625e-05, "step": 10489, "training_step_time": 0.15338516235351562 }, { "epoch": 1.60064697265625e-05, "grad_norm": 0.24947883188724518, "learning_rate": 7.739353226541009e-05, "loss": 0.0213, "step": 10490 }, { "epoch": 1.60064697265625e-05, "model_forward_time": 0.024484634399414062, "step": 10490 }, { "epoch": 1.60064697265625e-05, "step": 10490, "training_step_time": 0.18201184272766113 }, { "epoch": 1.600799560546875e-05, "model_forward_time": 0.024672508239746094, "step": 10491 }, { "epoch": 1.600799560546875e-05, "step": 10491, "training_step_time": 0.13194608688354492 }, { "epoch": 1.6009521484375e-05, "model_forward_time": 0.024747371673583984, "step": 10492 }, { "epoch": 1.6009521484375e-05, "step": 10492, "training_step_time": 0.11019277572631836 }, { "epoch": 1.601104736328125e-05, "model_forward_time": 0.0252835750579834, "step": 10493 }, { "epoch": 1.601104736328125e-05, "step": 10493, "training_step_time": 0.11962246894836426 }, { "epoch": 1.60125732421875e-05, "model_forward_time": 0.025742053985595703, "step": 10494 }, { "epoch": 1.60125732421875e-05, "step": 10494, "training_step_time": 0.10905981063842773 }, { "epoch": 1.601409912109375e-05, "model_forward_time": 0.025655746459960938, "step": 10495 }, { "epoch": 1.601409912109375e-05, "step": 10495, "training_step_time": 0.10987520217895508 }, { "epoch": 1.6015625e-05, "model_forward_time": 0.025223970413208008, "step": 10496 }, { "epoch": 1.6015625e-05, "step": 10496, "training_step_time": 0.1106257438659668 }, { "epoch": 1.601715087890625e-05, "model_forward_time": 0.025254487991333008, "step": 10497 }, { "epoch": 1.601715087890625e-05, "step": 10497, "training_step_time": 0.10752558708190918 }, { "epoch": 1.60186767578125e-05, "model_forward_time": 0.02494978904724121, "step": 10498 }, { "epoch": 1.60186767578125e-05, "step": 10498, "training_step_time": 0.11049318313598633 }, { "epoch": 1.602020263671875e-05, "model_forward_time": 0.02514028549194336, "step": 10499 }, { "epoch": 1.602020263671875e-05, "step": 10499, "training_step_time": 0.11073827743530273 }, { "epoch": 1.6021728515625e-05, "grad_norm": 0.30641114711761475, "learning_rate": 7.734740790612136e-05, "loss": 0.0176, "step": 10500 }, { "epoch": 1.6021728515625e-05, "model_forward_time": 0.025107145309448242, "step": 10500 }, { "epoch": 1.6021728515625e-05, "step": 10500, "training_step_time": 0.11032819747924805 }, { "epoch": 1.602325439453125e-05, "model_forward_time": 0.025366544723510742, "step": 10501 }, { "epoch": 1.602325439453125e-05, "step": 10501, "training_step_time": 0.10793328285217285 }, { "epoch": 1.60247802734375e-05, "model_forward_time": 0.02537679672241211, "step": 10502 }, { "epoch": 1.60247802734375e-05, "step": 10502, "training_step_time": 0.10836482048034668 }, { "epoch": 1.602630615234375e-05, "model_forward_time": 0.025829076766967773, "step": 10503 }, { "epoch": 1.602630615234375e-05, "step": 10503, "training_step_time": 0.10854649543762207 }, { "epoch": 1.602783203125e-05, "model_forward_time": 0.025397062301635742, "step": 10504 }, { "epoch": 1.602783203125e-05, "step": 10504, "training_step_time": 0.10765290260314941 }, { "epoch": 1.602935791015625e-05, "model_forward_time": 0.02538275718688965, "step": 10505 }, { "epoch": 1.602935791015625e-05, "step": 10505, "training_step_time": 0.11289167404174805 }, { "epoch": 1.60308837890625e-05, "model_forward_time": 0.025197267532348633, "step": 10506 }, { "epoch": 1.60308837890625e-05, "step": 10506, "training_step_time": 0.10851097106933594 }, { "epoch": 1.603240966796875e-05, "model_forward_time": 0.025312185287475586, "step": 10507 }, { "epoch": 1.603240966796875e-05, "step": 10507, "training_step_time": 0.10791516304016113 }, { "epoch": 1.6033935546875e-05, "model_forward_time": 0.02554035186767578, "step": 10508 }, { "epoch": 1.6033935546875e-05, "step": 10508, "training_step_time": 0.10912132263183594 }, { "epoch": 1.603546142578125e-05, "model_forward_time": 0.025310039520263672, "step": 10509 }, { "epoch": 1.603546142578125e-05, "step": 10509, "training_step_time": 0.11201691627502441 }, { "epoch": 1.60369873046875e-05, "grad_norm": 0.4064136743545532, "learning_rate": 7.730125031715331e-05, "loss": 0.0358, "step": 10510 }, { "epoch": 1.60369873046875e-05, "model_forward_time": 0.02521061897277832, "step": 10510 }, { "epoch": 1.60369873046875e-05, "step": 10510, "training_step_time": 0.10727596282958984 }, { "epoch": 1.603851318359375e-05, "model_forward_time": 0.025233983993530273, "step": 10511 }, { "epoch": 1.603851318359375e-05, "step": 10511, "training_step_time": 0.11281418800354004 }, { "epoch": 1.60400390625e-05, "model_forward_time": 0.025403738021850586, "step": 10512 }, { "epoch": 1.60400390625e-05, "step": 10512, "training_step_time": 0.11193084716796875 }, { "epoch": 1.604156494140625e-05, "model_forward_time": 0.02489924430847168, "step": 10513 }, { "epoch": 1.604156494140625e-05, "step": 10513, "training_step_time": 0.11498546600341797 }, { "epoch": 1.60430908203125e-05, "model_forward_time": 0.02527165412902832, "step": 10514 }, { "epoch": 1.60430908203125e-05, "step": 10514, "training_step_time": 0.17951035499572754 }, { "epoch": 1.604461669921875e-05, "model_forward_time": 0.025029659271240234, "step": 10515 }, { "epoch": 1.604461669921875e-05, "step": 10515, "training_step_time": 0.15661096572875977 }, { "epoch": 1.6046142578125e-05, "model_forward_time": 0.024759292602539062, "step": 10516 }, { "epoch": 1.6046142578125e-05, "step": 10516, "training_step_time": 0.13867449760437012 }, { "epoch": 1.604766845703125e-05, "model_forward_time": 0.02432394027709961, "step": 10517 }, { "epoch": 1.604766845703125e-05, "step": 10517, "training_step_time": 0.13477873802185059 }, { "epoch": 1.60491943359375e-05, "model_forward_time": 0.024957656860351562, "step": 10518 }, { "epoch": 1.60491943359375e-05, "step": 10518, "training_step_time": 0.129655122756958 }, { "epoch": 1.605072021484375e-05, "model_forward_time": 0.0246121883392334, "step": 10519 }, { "epoch": 1.605072021484375e-05, "step": 10519, "training_step_time": 0.20784544944763184 }, { "epoch": 1.605224609375e-05, "grad_norm": 0.24456650018692017, "learning_rate": 7.725505955459183e-05, "loss": 0.0165, "step": 10520 }, { "epoch": 1.605224609375e-05, "model_forward_time": 0.023955106735229492, "step": 10520 }, { "epoch": 1.605224609375e-05, "step": 10520, "training_step_time": 0.14185571670532227 }, { "epoch": 1.605377197265625e-05, "model_forward_time": 0.02412557601928711, "step": 10521 }, { "epoch": 1.605377197265625e-05, "step": 10521, "training_step_time": 0.20664763450622559 }, { "epoch": 1.60552978515625e-05, "model_forward_time": 0.024388551712036133, "step": 10522 }, { "epoch": 1.60552978515625e-05, "step": 10522, "training_step_time": 0.11159443855285645 }, { "epoch": 1.605682373046875e-05, "model_forward_time": 0.024804115295410156, "step": 10523 }, { "epoch": 1.605682373046875e-05, "step": 10523, "training_step_time": 0.18281078338623047 }, { "epoch": 1.6058349609375e-05, "model_forward_time": 0.024466276168823242, "step": 10524 }, { "epoch": 1.6058349609375e-05, "step": 10524, "training_step_time": 0.16666436195373535 }, { "epoch": 1.605987548828125e-05, "model_forward_time": 0.02457880973815918, "step": 10525 }, { "epoch": 1.605987548828125e-05, "step": 10525, "training_step_time": 0.19982552528381348 }, { "epoch": 1.60614013671875e-05, "model_forward_time": 0.023839712142944336, "step": 10526 }, { "epoch": 1.60614013671875e-05, "step": 10526, "training_step_time": 0.12900233268737793 }, { "epoch": 1.606292724609375e-05, "model_forward_time": 0.025035858154296875, "step": 10527 }, { "epoch": 1.606292724609375e-05, "step": 10527, "training_step_time": 0.18851661682128906 }, { "epoch": 1.6064453125e-05, "model_forward_time": 0.02438044548034668, "step": 10528 }, { "epoch": 1.6064453125e-05, "step": 10528, "training_step_time": 0.1660594940185547 }, { "epoch": 1.606597900390625e-05, "model_forward_time": 0.0245358943939209, "step": 10529 }, { "epoch": 1.606597900390625e-05, "step": 10529, "training_step_time": 0.14285039901733398 }, { "epoch": 1.60675048828125e-05, "grad_norm": 0.33786284923553467, "learning_rate": 7.720883567456298e-05, "loss": 0.0252, "step": 10530 }, { "epoch": 1.60675048828125e-05, "model_forward_time": 0.02480792999267578, "step": 10530 }, { "epoch": 1.60675048828125e-05, "step": 10530, "training_step_time": 0.12838077545166016 }, { "epoch": 1.606903076171875e-05, "model_forward_time": 0.02434825897216797, "step": 10531 }, { "epoch": 1.606903076171875e-05, "step": 10531, "training_step_time": 0.1361076831817627 }, { "epoch": 1.6070556640625e-05, "model_forward_time": 0.02456355094909668, "step": 10532 }, { "epoch": 1.6070556640625e-05, "step": 10532, "training_step_time": 0.11316776275634766 }, { "epoch": 1.607208251953125e-05, "model_forward_time": 0.025348424911499023, "step": 10533 }, { "epoch": 1.607208251953125e-05, "step": 10533, "training_step_time": 0.15860390663146973 }, { "epoch": 1.60736083984375e-05, "model_forward_time": 0.024854421615600586, "step": 10534 }, { "epoch": 1.60736083984375e-05, "step": 10534, "training_step_time": 0.19716119766235352 }, { "epoch": 1.607513427734375e-05, "model_forward_time": 0.023891448974609375, "step": 10535 }, { "epoch": 1.607513427734375e-05, "step": 10535, "training_step_time": 0.12010931968688965 }, { "epoch": 1.607666015625e-05, "model_forward_time": 0.023906230926513672, "step": 10536 }, { "epoch": 1.607666015625e-05, "step": 10536, "training_step_time": 0.1123809814453125 }, { "epoch": 1.607818603515625e-05, "model_forward_time": 0.024956226348876953, "step": 10537 }, { "epoch": 1.607818603515625e-05, "step": 10537, "training_step_time": 0.11367988586425781 }, { "epoch": 1.60797119140625e-05, "model_forward_time": 0.025176525115966797, "step": 10538 }, { "epoch": 1.60797119140625e-05, "step": 10538, "training_step_time": 0.1213839054107666 }, { "epoch": 1.608123779296875e-05, "model_forward_time": 0.025254487991333008, "step": 10539 }, { "epoch": 1.608123779296875e-05, "step": 10539, "training_step_time": 0.10712575912475586 }, { "epoch": 1.6082763671875e-05, "grad_norm": 0.2922857999801636, "learning_rate": 7.716257873323316e-05, "loss": 0.0218, "step": 10540 }, { "epoch": 1.6082763671875e-05, "model_forward_time": 0.0251467227935791, "step": 10540 }, { "epoch": 1.6082763671875e-05, "step": 10540, "training_step_time": 0.10972046852111816 }, { "epoch": 1.608428955078125e-05, "model_forward_time": 0.028188228607177734, "step": 10541 }, { "epoch": 1.608428955078125e-05, "step": 10541, "training_step_time": 0.11009359359741211 }, { "epoch": 1.60858154296875e-05, "model_forward_time": 0.025024890899658203, "step": 10542 }, { "epoch": 1.60858154296875e-05, "step": 10542, "training_step_time": 0.11162519454956055 }, { "epoch": 1.608734130859375e-05, "model_forward_time": 0.025208473205566406, "step": 10543 }, { "epoch": 1.608734130859375e-05, "step": 10543, "training_step_time": 0.10760021209716797 }, { "epoch": 1.60888671875e-05, "model_forward_time": 0.02494668960571289, "step": 10544 }, { "epoch": 1.60888671875e-05, "step": 10544, "training_step_time": 0.11358499526977539 }, { "epoch": 1.609039306640625e-05, "model_forward_time": 0.025033950805664062, "step": 10545 }, { "epoch": 1.609039306640625e-05, "step": 10545, "training_step_time": 0.10782980918884277 }, { "epoch": 1.60919189453125e-05, "model_forward_time": 0.025456666946411133, "step": 10546 }, { "epoch": 1.60919189453125e-05, "step": 10546, "training_step_time": 0.10735702514648438 }, { "epoch": 1.609344482421875e-05, "model_forward_time": 0.026746511459350586, "step": 10547 }, { "epoch": 1.609344482421875e-05, "step": 10547, "training_step_time": 0.10863375663757324 }, { "epoch": 1.6094970703125e-05, "model_forward_time": 0.025525808334350586, "step": 10548 }, { "epoch": 1.6094970703125e-05, "step": 10548, "training_step_time": 0.10991716384887695 }, { "epoch": 1.609649658203125e-05, "model_forward_time": 0.025144577026367188, "step": 10549 }, { "epoch": 1.609649658203125e-05, "step": 10549, "training_step_time": 0.10965776443481445 }, { "epoch": 1.60980224609375e-05, "grad_norm": 0.2572328746318817, "learning_rate": 7.711628878680892e-05, "loss": 0.0169, "step": 10550 }, { "epoch": 1.60980224609375e-05, "model_forward_time": 0.025022029876708984, "step": 10550 }, { "epoch": 1.60980224609375e-05, "step": 10550, "training_step_time": 0.10916709899902344 }, { "epoch": 1.609954833984375e-05, "model_forward_time": 0.025048255920410156, "step": 10551 }, { "epoch": 1.609954833984375e-05, "step": 10551, "training_step_time": 0.10606551170349121 }, { "epoch": 1.610107421875e-05, "model_forward_time": 0.02530956268310547, "step": 10552 }, { "epoch": 1.610107421875e-05, "step": 10552, "training_step_time": 0.11098814010620117 }, { "epoch": 1.610260009765625e-05, "model_forward_time": 0.024949312210083008, "step": 10553 }, { "epoch": 1.610260009765625e-05, "step": 10553, "training_step_time": 0.10766291618347168 }, { "epoch": 1.61041259765625e-05, "model_forward_time": 0.024953126907348633, "step": 10554 }, { "epoch": 1.61041259765625e-05, "step": 10554, "training_step_time": 0.1059420108795166 }, { "epoch": 1.610565185546875e-05, "model_forward_time": 0.025127649307250977, "step": 10555 }, { "epoch": 1.610565185546875e-05, "step": 10555, "training_step_time": 0.1130363941192627 }, { "epoch": 1.6107177734375e-05, "model_forward_time": 0.025095462799072266, "step": 10556 }, { "epoch": 1.6107177734375e-05, "step": 10556, "training_step_time": 0.10740447044372559 }, { "epoch": 1.610870361328125e-05, "model_forward_time": 0.025441646575927734, "step": 10557 }, { "epoch": 1.610870361328125e-05, "step": 10557, "training_step_time": 0.10951924324035645 }, { "epoch": 1.61102294921875e-05, "model_forward_time": 0.024512529373168945, "step": 10558 }, { "epoch": 1.61102294921875e-05, "step": 10558, "training_step_time": 0.11040949821472168 }, { "epoch": 1.611175537109375e-05, "model_forward_time": 0.02487325668334961, "step": 10559 }, { "epoch": 1.611175537109375e-05, "step": 10559, "training_step_time": 0.10732626914978027 }, { "epoch": 1.611328125e-05, "grad_norm": 0.31317847967147827, "learning_rate": 7.70699658915369e-05, "loss": 0.0187, "step": 10560 }, { "epoch": 1.611328125e-05, "model_forward_time": 0.02568340301513672, "step": 10560 }, { "epoch": 1.611328125e-05, "step": 10560, "training_step_time": 0.10809040069580078 }, { "epoch": 1.611480712890625e-05, "model_forward_time": 0.025400638580322266, "step": 10561 }, { "epoch": 1.611480712890625e-05, "step": 10561, "training_step_time": 0.1192770004272461 }, { "epoch": 1.61163330078125e-05, "model_forward_time": 0.024823904037475586, "step": 10562 }, { "epoch": 1.61163330078125e-05, "step": 10562, "training_step_time": 0.1626591682434082 }, { "epoch": 1.611785888671875e-05, "model_forward_time": 0.024608850479125977, "step": 10563 }, { "epoch": 1.611785888671875e-05, "step": 10563, "training_step_time": 0.2054145336151123 }, { "epoch": 1.6119384765625e-05, "model_forward_time": 0.024033546447753906, "step": 10564 }, { "epoch": 1.6119384765625e-05, "step": 10564, "training_step_time": 0.1954360008239746 }, { "epoch": 1.612091064453125e-05, "model_forward_time": 0.024494171142578125, "step": 10565 }, { "epoch": 1.612091064453125e-05, "step": 10565, "training_step_time": 0.10426878929138184 }, { "epoch": 1.61224365234375e-05, "model_forward_time": 0.02487325668334961, "step": 10566 }, { "epoch": 1.61224365234375e-05, "step": 10566, "training_step_time": 0.10887527465820312 }, { "epoch": 1.612396240234375e-05, "model_forward_time": 0.02513265609741211, "step": 10567 }, { "epoch": 1.612396240234375e-05, "step": 10567, "training_step_time": 0.10985541343688965 }, { "epoch": 1.612548828125e-05, "model_forward_time": 0.025142192840576172, "step": 10568 }, { "epoch": 1.612548828125e-05, "step": 10568, "training_step_time": 0.1395702362060547 }, { "epoch": 1.612701416015625e-05, "model_forward_time": 0.025128841400146484, "step": 10569 }, { "epoch": 1.612701416015625e-05, "step": 10569, "training_step_time": 0.10928678512573242 }, { "epoch": 1.61285400390625e-05, "grad_norm": 0.337839812040329, "learning_rate": 7.70236101037038e-05, "loss": 0.0204, "step": 10570 }, { "epoch": 1.61285400390625e-05, "model_forward_time": 0.0249483585357666, "step": 10570 }, { "epoch": 1.61285400390625e-05, "step": 10570, "training_step_time": 0.11575818061828613 }, { "epoch": 1.613006591796875e-05, "model_forward_time": 0.025151491165161133, "step": 10571 }, { "epoch": 1.613006591796875e-05, "step": 10571, "training_step_time": 0.11050915718078613 }, { "epoch": 1.6131591796875e-05, "model_forward_time": 0.025507450103759766, "step": 10572 }, { "epoch": 1.6131591796875e-05, "step": 10572, "training_step_time": 0.13017654418945312 }, { "epoch": 1.613311767578125e-05, "model_forward_time": 0.025402545928955078, "step": 10573 }, { "epoch": 1.613311767578125e-05, "step": 10573, "training_step_time": 0.1840343475341797 }, { "epoch": 1.61346435546875e-05, "model_forward_time": 0.024535179138183594, "step": 10574 }, { "epoch": 1.61346435546875e-05, "step": 10574, "training_step_time": 0.1960587501525879 }, { "epoch": 1.613616943359375e-05, "model_forward_time": 0.024472713470458984, "step": 10575 }, { "epoch": 1.613616943359375e-05, "step": 10575, "training_step_time": 0.18572068214416504 }, { "epoch": 1.61376953125e-05, "model_forward_time": 0.024379968643188477, "step": 10576 }, { "epoch": 1.61376953125e-05, "step": 10576, "training_step_time": 0.15896248817443848 }, { "epoch": 1.613922119140625e-05, "model_forward_time": 0.024429798126220703, "step": 10577 }, { "epoch": 1.613922119140625e-05, "step": 10577, "training_step_time": 0.18648028373718262 }, { "epoch": 1.61407470703125e-05, "model_forward_time": 0.024054288864135742, "step": 10578 }, { "epoch": 1.61407470703125e-05, "step": 10578, "training_step_time": 0.15274739265441895 }, { "epoch": 1.614227294921875e-05, "model_forward_time": 0.024066925048828125, "step": 10579 }, { "epoch": 1.614227294921875e-05, "step": 10579, "training_step_time": 0.13208365440368652 }, { "epoch": 1.6143798828125e-05, "grad_norm": 0.31975287199020386, "learning_rate": 7.697722147963626e-05, "loss": 0.0282, "step": 10580 }, { "epoch": 1.6143798828125e-05, "model_forward_time": 0.024564504623413086, "step": 10580 }, { "epoch": 1.6143798828125e-05, "step": 10580, "training_step_time": 0.12956881523132324 }, { "epoch": 1.614532470703125e-05, "model_forward_time": 0.024527788162231445, "step": 10581 }, { "epoch": 1.614532470703125e-05, "step": 10581, "training_step_time": 0.23170185089111328 }, { "epoch": 1.61468505859375e-05, "model_forward_time": 0.024355173110961914, "step": 10582 }, { "epoch": 1.61468505859375e-05, "step": 10582, "training_step_time": 0.1052548885345459 }, { "epoch": 1.614837646484375e-05, "model_forward_time": 0.024548053741455078, "step": 10583 }, { "epoch": 1.614837646484375e-05, "step": 10583, "training_step_time": 0.10744786262512207 }, { "epoch": 1.614990234375e-05, "model_forward_time": 0.025532007217407227, "step": 10584 }, { "epoch": 1.614990234375e-05, "step": 10584, "training_step_time": 0.11202406883239746 }, { "epoch": 1.615142822265625e-05, "model_forward_time": 0.025130748748779297, "step": 10585 }, { "epoch": 1.615142822265625e-05, "step": 10585, "training_step_time": 0.11208915710449219 }, { "epoch": 1.61529541015625e-05, "model_forward_time": 0.025387048721313477, "step": 10586 }, { "epoch": 1.61529541015625e-05, "step": 10586, "training_step_time": 0.10964155197143555 }, { "epoch": 1.615447998046875e-05, "model_forward_time": 0.025470256805419922, "step": 10587 }, { "epoch": 1.615447998046875e-05, "step": 10587, "training_step_time": 0.11338639259338379 }, { "epoch": 1.6156005859375e-05, "model_forward_time": 0.02461528778076172, "step": 10588 }, { "epoch": 1.6156005859375e-05, "step": 10588, "training_step_time": 0.10921812057495117 }, { "epoch": 1.615753173828125e-05, "model_forward_time": 0.02517414093017578, "step": 10589 }, { "epoch": 1.615753173828125e-05, "step": 10589, "training_step_time": 0.11305379867553711 }, { "epoch": 1.61590576171875e-05, "grad_norm": 0.4433531165122986, "learning_rate": 7.693080007570084e-05, "loss": 0.0281, "step": 10590 }, { "epoch": 1.61590576171875e-05, "model_forward_time": 0.025302410125732422, "step": 10590 }, { "epoch": 1.61590576171875e-05, "step": 10590, "training_step_time": 0.11017560958862305 }, { "epoch": 1.616058349609375e-05, "model_forward_time": 0.02539825439453125, "step": 10591 }, { "epoch": 1.616058349609375e-05, "step": 10591, "training_step_time": 0.10996890068054199 }, { "epoch": 1.6162109375e-05, "model_forward_time": 0.026195526123046875, "step": 10592 }, { "epoch": 1.6162109375e-05, "step": 10592, "training_step_time": 0.11437010765075684 }, { "epoch": 1.616363525390625e-05, "model_forward_time": 0.02526998519897461, "step": 10593 }, { "epoch": 1.616363525390625e-05, "step": 10593, "training_step_time": 0.1098635196685791 }, { "epoch": 1.61651611328125e-05, "model_forward_time": 0.02582263946533203, "step": 10594 }, { "epoch": 1.61651611328125e-05, "step": 10594, "training_step_time": 0.10863113403320312 }, { "epoch": 1.616668701171875e-05, "model_forward_time": 0.02534770965576172, "step": 10595 }, { "epoch": 1.616668701171875e-05, "step": 10595, "training_step_time": 0.10840177536010742 }, { "epoch": 1.6168212890625e-05, "model_forward_time": 0.02443671226501465, "step": 10596 }, { "epoch": 1.6168212890625e-05, "step": 10596, "training_step_time": 0.10681509971618652 }, { "epoch": 1.616973876953125e-05, "model_forward_time": 0.02490830421447754, "step": 10597 }, { "epoch": 1.616973876953125e-05, "step": 10597, "training_step_time": 0.10789942741394043 }, { "epoch": 1.61712646484375e-05, "model_forward_time": 0.025417089462280273, "step": 10598 }, { "epoch": 1.61712646484375e-05, "step": 10598, "training_step_time": 0.1086282730102539 }, { "epoch": 1.617279052734375e-05, "model_forward_time": 0.025480031967163086, "step": 10599 }, { "epoch": 1.617279052734375e-05, "step": 10599, "training_step_time": 0.10722112655639648 }, { "epoch": 1.617431640625e-05, "grad_norm": 0.3286585807800293, "learning_rate": 7.688434594830392e-05, "loss": 0.0389, "step": 10600 }, { "epoch": 1.617431640625e-05, "model_forward_time": 0.025630712509155273, "step": 10600 }, { "epoch": 1.617431640625e-05, "step": 10600, "training_step_time": 0.11150717735290527 }, { "epoch": 1.617584228515625e-05, "model_forward_time": 0.025196552276611328, "step": 10601 }, { "epoch": 1.617584228515625e-05, "step": 10601, "training_step_time": 0.10967135429382324 }, { "epoch": 1.61773681640625e-05, "model_forward_time": 0.0252687931060791, "step": 10602 }, { "epoch": 1.61773681640625e-05, "step": 10602, "training_step_time": 0.10914802551269531 }, { "epoch": 1.617889404296875e-05, "model_forward_time": 0.024845361709594727, "step": 10603 }, { "epoch": 1.617889404296875e-05, "step": 10603, "training_step_time": 0.10592317581176758 }, { "epoch": 1.6180419921875e-05, "model_forward_time": 0.024120807647705078, "step": 10604 }, { "epoch": 1.6180419921875e-05, "step": 10604, "training_step_time": 0.15098023414611816 }, { "epoch": 1.618194580078125e-05, "model_forward_time": 0.024387836456298828, "step": 10605 }, { "epoch": 1.618194580078125e-05, "step": 10605, "training_step_time": 0.10559844970703125 }, { "epoch": 1.61834716796875e-05, "model_forward_time": 0.024695873260498047, "step": 10606 }, { "epoch": 1.61834716796875e-05, "step": 10606, "training_step_time": 0.11150693893432617 }, { "epoch": 1.618499755859375e-05, "model_forward_time": 0.025368690490722656, "step": 10607 }, { "epoch": 1.618499755859375e-05, "step": 10607, "training_step_time": 0.10921120643615723 }, { "epoch": 1.61865234375e-05, "model_forward_time": 0.02649998664855957, "step": 10608 }, { "epoch": 1.61865234375e-05, "step": 10608, "training_step_time": 0.11756086349487305 }, { "epoch": 1.618804931640625e-05, "model_forward_time": 0.025644540786743164, "step": 10609 }, { "epoch": 1.618804931640625e-05, "step": 10609, "training_step_time": 0.1240396499633789 }, { "epoch": 1.61895751953125e-05, "grad_norm": 0.39339444041252136, "learning_rate": 7.683785915389164e-05, "loss": 0.0516, "step": 10610 }, { "epoch": 1.61895751953125e-05, "model_forward_time": 0.02530384063720703, "step": 10610 }, { "epoch": 1.61895751953125e-05, "step": 10610, "training_step_time": 0.11513924598693848 }, { "epoch": 1.619110107421875e-05, "model_forward_time": 0.025251388549804688, "step": 10611 }, { "epoch": 1.619110107421875e-05, "step": 10611, "training_step_time": 0.10688281059265137 }, { "epoch": 1.6192626953125e-05, "model_forward_time": 0.0250394344329834, "step": 10612 }, { "epoch": 1.6192626953125e-05, "step": 10612, "training_step_time": 0.12037467956542969 }, { "epoch": 1.619415283203125e-05, "model_forward_time": 0.02498483657836914, "step": 10613 }, { "epoch": 1.619415283203125e-05, "step": 10613, "training_step_time": 0.11236143112182617 }, { "epoch": 1.61956787109375e-05, "model_forward_time": 0.024936914443969727, "step": 10614 }, { "epoch": 1.61956787109375e-05, "step": 10614, "training_step_time": 0.18805456161499023 }, { "epoch": 1.619720458984375e-05, "model_forward_time": 0.024165630340576172, "step": 10615 }, { "epoch": 1.619720458984375e-05, "step": 10615, "training_step_time": 0.17286896705627441 }, { "epoch": 1.619873046875e-05, "model_forward_time": 0.02425551414489746, "step": 10616 }, { "epoch": 1.619873046875e-05, "step": 10616, "training_step_time": 0.17725586891174316 }, { "epoch": 1.620025634765625e-05, "model_forward_time": 0.02496051788330078, "step": 10617 }, { "epoch": 1.620025634765625e-05, "step": 10617, "training_step_time": 0.11138200759887695 }, { "epoch": 1.62017822265625e-05, "model_forward_time": 0.02391338348388672, "step": 10618 }, { "epoch": 1.62017822265625e-05, "step": 10618, "training_step_time": 0.13617658615112305 }, { "epoch": 1.620330810546875e-05, "model_forward_time": 0.024419307708740234, "step": 10619 }, { "epoch": 1.620330810546875e-05, "step": 10619, "training_step_time": 0.14745259284973145 }, { "epoch": 1.6204833984375e-05, "grad_norm": 0.48044633865356445, "learning_rate": 7.679133974894983e-05, "loss": 0.037, "step": 10620 }, { "epoch": 1.6204833984375e-05, "model_forward_time": 0.02390575408935547, "step": 10620 }, { "epoch": 1.6204833984375e-05, "step": 10620, "training_step_time": 0.1304788589477539 }, { "epoch": 1.620635986328125e-05, "model_forward_time": 0.024514436721801758, "step": 10621 }, { "epoch": 1.620635986328125e-05, "step": 10621, "training_step_time": 0.13311409950256348 }, { "epoch": 1.62078857421875e-05, "model_forward_time": 0.024771928787231445, "step": 10622 }, { "epoch": 1.62078857421875e-05, "step": 10622, "training_step_time": 0.12976574897766113 }, { "epoch": 1.620941162109375e-05, "model_forward_time": 0.02439713478088379, "step": 10623 }, { "epoch": 1.620941162109375e-05, "step": 10623, "training_step_time": 0.12507915496826172 }, { "epoch": 1.62109375e-05, "model_forward_time": 0.024590730667114258, "step": 10624 }, { "epoch": 1.62109375e-05, "step": 10624, "training_step_time": 0.21639227867126465 }, { "epoch": 1.621246337890625e-05, "model_forward_time": 0.02448439598083496, "step": 10625 }, { "epoch": 1.621246337890625e-05, "step": 10625, "training_step_time": 0.13332295417785645 }, { "epoch": 1.62139892578125e-05, "model_forward_time": 0.02486562728881836, "step": 10626 }, { "epoch": 1.62139892578125e-05, "step": 10626, "training_step_time": 0.12364006042480469 }, { "epoch": 1.621551513671875e-05, "model_forward_time": 0.02440357208251953, "step": 10627 }, { "epoch": 1.621551513671875e-05, "step": 10627, "training_step_time": 0.12582111358642578 }, { "epoch": 1.6217041015625e-05, "model_forward_time": 0.0254213809967041, "step": 10628 }, { "epoch": 1.6217041015625e-05, "step": 10628, "training_step_time": 0.11520004272460938 }, { "epoch": 1.621856689453125e-05, "model_forward_time": 0.025415897369384766, "step": 10629 }, { "epoch": 1.621856689453125e-05, "step": 10629, "training_step_time": 0.11497116088867188 }, { "epoch": 1.62200927734375e-05, "grad_norm": 0.328964501619339, "learning_rate": 7.674478779000398e-05, "loss": 0.0205, "step": 10630 }, { "epoch": 1.62200927734375e-05, "model_forward_time": 0.0247952938079834, "step": 10630 }, { "epoch": 1.62200927734375e-05, "step": 10630, "training_step_time": 0.11280703544616699 }, { "epoch": 1.622161865234375e-05, "model_forward_time": 0.02570486068725586, "step": 10631 }, { "epoch": 1.622161865234375e-05, "step": 10631, "training_step_time": 0.11025571823120117 }, { "epoch": 1.622314453125e-05, "model_forward_time": 0.025430679321289062, "step": 10632 }, { "epoch": 1.622314453125e-05, "step": 10632, "training_step_time": 0.10847854614257812 }, { "epoch": 1.622467041015625e-05, "model_forward_time": 0.024906635284423828, "step": 10633 }, { "epoch": 1.622467041015625e-05, "step": 10633, "training_step_time": 0.1090085506439209 }, { "epoch": 1.62261962890625e-05, "model_forward_time": 0.024785280227661133, "step": 10634 }, { "epoch": 1.62261962890625e-05, "step": 10634, "training_step_time": 0.10831594467163086 }, { "epoch": 1.622772216796875e-05, "model_forward_time": 0.026300668716430664, "step": 10635 }, { "epoch": 1.622772216796875e-05, "step": 10635, "training_step_time": 0.10818266868591309 }, { "epoch": 1.6229248046875e-05, "model_forward_time": 0.025136232376098633, "step": 10636 }, { "epoch": 1.6229248046875e-05, "step": 10636, "training_step_time": 0.10817170143127441 }, { "epoch": 1.623077392578125e-05, "model_forward_time": 0.02570033073425293, "step": 10637 }, { "epoch": 1.623077392578125e-05, "step": 10637, "training_step_time": 0.11177325248718262 }, { "epoch": 1.62322998046875e-05, "model_forward_time": 0.025026798248291016, "step": 10638 }, { "epoch": 1.62322998046875e-05, "step": 10638, "training_step_time": 0.1074068546295166 }, { "epoch": 1.623382568359375e-05, "model_forward_time": 0.025051593780517578, "step": 10639 }, { "epoch": 1.623382568359375e-05, "step": 10639, "training_step_time": 0.10706186294555664 }, { "epoch": 1.62353515625e-05, "grad_norm": 0.3106250762939453, "learning_rate": 7.66982033336191e-05, "loss": 0.0306, "step": 10640 }, { "epoch": 1.62353515625e-05, "model_forward_time": 0.02465534210205078, "step": 10640 }, { "epoch": 1.62353515625e-05, "step": 10640, "training_step_time": 0.10710573196411133 }, { "epoch": 1.623687744140625e-05, "model_forward_time": 0.02523350715637207, "step": 10641 }, { "epoch": 1.623687744140625e-05, "step": 10641, "training_step_time": 0.10778594017028809 }, { "epoch": 1.62384033203125e-05, "model_forward_time": 0.02721261978149414, "step": 10642 }, { "epoch": 1.62384033203125e-05, "step": 10642, "training_step_time": 0.11066079139709473 }, { "epoch": 1.623992919921875e-05, "model_forward_time": 0.02598118782043457, "step": 10643 }, { "epoch": 1.623992919921875e-05, "step": 10643, "training_step_time": 0.10713410377502441 }, { "epoch": 1.6241455078125e-05, "model_forward_time": 0.02494072914123535, "step": 10644 }, { "epoch": 1.6241455078125e-05, "step": 10644, "training_step_time": 0.10544276237487793 }, { "epoch": 1.624298095703125e-05, "model_forward_time": 0.025106191635131836, "step": 10645 }, { "epoch": 1.624298095703125e-05, "step": 10645, "training_step_time": 0.10795307159423828 }, { "epoch": 1.62445068359375e-05, "model_forward_time": 0.02528834342956543, "step": 10646 }, { "epoch": 1.62445068359375e-05, "step": 10646, "training_step_time": 0.1079854965209961 }, { "epoch": 1.624603271484375e-05, "model_forward_time": 0.02504444122314453, "step": 10647 }, { "epoch": 1.624603271484375e-05, "step": 10647, "training_step_time": 0.11431479454040527 }, { "epoch": 1.624755859375e-05, "model_forward_time": 0.024985551834106445, "step": 10648 }, { "epoch": 1.624755859375e-05, "step": 10648, "training_step_time": 0.10938596725463867 }, { "epoch": 1.624908447265625e-05, "model_forward_time": 0.024998903274536133, "step": 10649 }, { "epoch": 1.624908447265625e-05, "step": 10649, "training_step_time": 0.11159086227416992 }, { "epoch": 1.62506103515625e-05, "grad_norm": 0.47151684761047363, "learning_rate": 7.66515864363997e-05, "loss": 0.0332, "step": 10650 }, { "epoch": 1.62506103515625e-05, "model_forward_time": 0.026178359985351562, "step": 10650 }, { "epoch": 1.62506103515625e-05, "step": 10650, "training_step_time": 0.10872888565063477 }, { "epoch": 1.625213623046875e-05, "model_forward_time": 0.025221586227416992, "step": 10651 }, { "epoch": 1.625213623046875e-05, "step": 10651, "training_step_time": 0.10885477066040039 }, { "epoch": 1.6253662109375e-05, "model_forward_time": 0.025015592575073242, "step": 10652 }, { "epoch": 1.6253662109375e-05, "step": 10652, "training_step_time": 0.1336359977722168 }, { "epoch": 1.625518798828125e-05, "model_forward_time": 0.026723146438598633, "step": 10653 }, { "epoch": 1.625518798828125e-05, "step": 10653, "training_step_time": 0.10872745513916016 }, { "epoch": 1.62567138671875e-05, "model_forward_time": 0.025566577911376953, "step": 10654 }, { "epoch": 1.62567138671875e-05, "step": 10654, "training_step_time": 0.10971260070800781 }, { "epoch": 1.625823974609375e-05, "model_forward_time": 0.024880409240722656, "step": 10655 }, { "epoch": 1.625823974609375e-05, "step": 10655, "training_step_time": 0.16914129257202148 }, { "epoch": 1.6259765625e-05, "model_forward_time": 0.024487733840942383, "step": 10656 }, { "epoch": 1.6259765625e-05, "step": 10656, "training_step_time": 0.16909527778625488 }, { "epoch": 1.626129150390625e-05, "model_forward_time": 0.024396181106567383, "step": 10657 }, { "epoch": 1.626129150390625e-05, "step": 10657, "training_step_time": 0.2048330307006836 }, { "epoch": 1.62628173828125e-05, "model_forward_time": 0.02409982681274414, "step": 10658 }, { "epoch": 1.62628173828125e-05, "step": 10658, "training_step_time": 0.13302850723266602 }, { "epoch": 1.626434326171875e-05, "model_forward_time": 0.024069547653198242, "step": 10659 }, { "epoch": 1.626434326171875e-05, "step": 10659, "training_step_time": 0.10647320747375488 }, { "epoch": 1.6265869140625e-05, "grad_norm": 0.30007100105285645, "learning_rate": 7.660493715498969e-05, "loss": 0.0314, "step": 10660 }, { "epoch": 1.6265869140625e-05, "model_forward_time": 0.025798797607421875, "step": 10660 }, { "epoch": 1.6265869140625e-05, "step": 10660, "training_step_time": 0.11492276191711426 }, { "epoch": 1.626739501953125e-05, "model_forward_time": 0.02526259422302246, "step": 10661 }, { "epoch": 1.626739501953125e-05, "step": 10661, "training_step_time": 0.1086728572845459 }, { "epoch": 1.62689208984375e-05, "model_forward_time": 0.025377511978149414, "step": 10662 }, { "epoch": 1.62689208984375e-05, "step": 10662, "training_step_time": 0.15277576446533203 }, { "epoch": 1.627044677734375e-05, "model_forward_time": 0.0243985652923584, "step": 10663 }, { "epoch": 1.627044677734375e-05, "step": 10663, "training_step_time": 0.1591494083404541 }, { "epoch": 1.627197265625e-05, "model_forward_time": 0.024025440216064453, "step": 10664 }, { "epoch": 1.627197265625e-05, "step": 10664, "training_step_time": 0.11126995086669922 }, { "epoch": 1.627349853515625e-05, "model_forward_time": 0.02484726905822754, "step": 10665 }, { "epoch": 1.627349853515625e-05, "step": 10665, "training_step_time": 0.1302940845489502 }, { "epoch": 1.62750244140625e-05, "model_forward_time": 0.02515864372253418, "step": 10666 }, { "epoch": 1.62750244140625e-05, "step": 10666, "training_step_time": 0.17456936836242676 }, { "epoch": 1.627655029296875e-05, "model_forward_time": 0.02462172508239746, "step": 10667 }, { "epoch": 1.627655029296875e-05, "step": 10667, "training_step_time": 0.1685624122619629 }, { "epoch": 1.6278076171875e-05, "model_forward_time": 0.023932695388793945, "step": 10668 }, { "epoch": 1.6278076171875e-05, "step": 10668, "training_step_time": 0.1955556869506836 }, { "epoch": 1.627960205078125e-05, "model_forward_time": 0.025030851364135742, "step": 10669 }, { "epoch": 1.627960205078125e-05, "step": 10669, "training_step_time": 0.19170522689819336 }, { "epoch": 1.62811279296875e-05, "grad_norm": 0.7429282665252686, "learning_rate": 7.655825554607235e-05, "loss": 0.0322, "step": 10670 }, { "epoch": 1.62811279296875e-05, "model_forward_time": 0.024699687957763672, "step": 10670 }, { "epoch": 1.62811279296875e-05, "step": 10670, "training_step_time": 0.16203022003173828 }, { "epoch": 1.628265380859375e-05, "model_forward_time": 0.024254560470581055, "step": 10671 }, { "epoch": 1.628265380859375e-05, "step": 10671, "training_step_time": 0.20164752006530762 }, { "epoch": 1.62841796875e-05, "model_forward_time": 0.024671554565429688, "step": 10672 }, { "epoch": 1.62841796875e-05, "step": 10672, "training_step_time": 0.11528158187866211 }, { "epoch": 1.628570556640625e-05, "model_forward_time": 0.02441120147705078, "step": 10673 }, { "epoch": 1.628570556640625e-05, "step": 10673, "training_step_time": 0.1053001880645752 }, { "epoch": 1.62872314453125e-05, "model_forward_time": 0.024768590927124023, "step": 10674 }, { "epoch": 1.62872314453125e-05, "step": 10674, "training_step_time": 0.10788679122924805 }, { "epoch": 1.628875732421875e-05, "model_forward_time": 0.025109529495239258, "step": 10675 }, { "epoch": 1.628875732421875e-05, "step": 10675, "training_step_time": 0.10856246948242188 }, { "epoch": 1.6290283203125e-05, "model_forward_time": 0.025765419006347656, "step": 10676 }, { "epoch": 1.6290283203125e-05, "step": 10676, "training_step_time": 0.10743021965026855 }, { "epoch": 1.629180908203125e-05, "model_forward_time": 0.02504420280456543, "step": 10677 }, { "epoch": 1.629180908203125e-05, "step": 10677, "training_step_time": 0.11191773414611816 }, { "epoch": 1.62933349609375e-05, "model_forward_time": 0.02527332305908203, "step": 10678 }, { "epoch": 1.62933349609375e-05, "step": 10678, "training_step_time": 0.10640740394592285 }, { "epoch": 1.629486083984375e-05, "model_forward_time": 0.024402379989624023, "step": 10679 }, { "epoch": 1.629486083984375e-05, "step": 10679, "training_step_time": 0.106353759765625 }, { "epoch": 1.629638671875e-05, "grad_norm": 0.36857274174690247, "learning_rate": 7.651154166637025e-05, "loss": 0.016, "step": 10680 }, { "epoch": 1.629638671875e-05, "model_forward_time": 0.02486562728881836, "step": 10680 }, { "epoch": 1.629638671875e-05, "step": 10680, "training_step_time": 0.11055374145507812 }, { "epoch": 1.629791259765625e-05, "model_forward_time": 0.02511906623840332, "step": 10681 }, { "epoch": 1.629791259765625e-05, "step": 10681, "training_step_time": 0.1139230728149414 }, { "epoch": 1.62994384765625e-05, "model_forward_time": 0.025153398513793945, "step": 10682 }, { "epoch": 1.62994384765625e-05, "step": 10682, "training_step_time": 0.11853647232055664 }, { "epoch": 1.630096435546875e-05, "model_forward_time": 0.024991512298583984, "step": 10683 }, { "epoch": 1.630096435546875e-05, "step": 10683, "training_step_time": 0.11930465698242188 }, { "epoch": 1.6302490234375e-05, "model_forward_time": 0.025042295455932617, "step": 10684 }, { "epoch": 1.6302490234375e-05, "step": 10684, "training_step_time": 0.11550092697143555 }, { "epoch": 1.630401611328125e-05, "model_forward_time": 0.025124073028564453, "step": 10685 }, { "epoch": 1.630401611328125e-05, "step": 10685, "training_step_time": 0.11941242218017578 }, { "epoch": 1.63055419921875e-05, "model_forward_time": 0.025148630142211914, "step": 10686 }, { "epoch": 1.63055419921875e-05, "step": 10686, "training_step_time": 0.11960244178771973 }, { "epoch": 1.630706787109375e-05, "model_forward_time": 0.0247650146484375, "step": 10687 }, { "epoch": 1.630706787109375e-05, "step": 10687, "training_step_time": 0.11365675926208496 }, { "epoch": 1.630859375e-05, "model_forward_time": 0.025362730026245117, "step": 10688 }, { "epoch": 1.630859375e-05, "step": 10688, "training_step_time": 0.11823296546936035 }, { "epoch": 1.631011962890625e-05, "model_forward_time": 0.025182247161865234, "step": 10689 }, { "epoch": 1.631011962890625e-05, "step": 10689, "training_step_time": 0.10963582992553711 }, { "epoch": 1.63116455078125e-05, "grad_norm": 0.28537416458129883, "learning_rate": 7.646479557264513e-05, "loss": 0.0208, "step": 10690 }, { "epoch": 1.63116455078125e-05, "model_forward_time": 0.025135278701782227, "step": 10690 }, { "epoch": 1.63116455078125e-05, "step": 10690, "training_step_time": 0.10965394973754883 }, { "epoch": 1.631317138671875e-05, "model_forward_time": 0.0250093936920166, "step": 10691 }, { "epoch": 1.631317138671875e-05, "step": 10691, "training_step_time": 0.10721588134765625 }, { "epoch": 1.6314697265625e-05, "model_forward_time": 0.02524566650390625, "step": 10692 }, { "epoch": 1.6314697265625e-05, "step": 10692, "training_step_time": 0.1097269058227539 }, { "epoch": 1.631622314453125e-05, "model_forward_time": 0.025547266006469727, "step": 10693 }, { "epoch": 1.631622314453125e-05, "step": 10693, "training_step_time": 0.1092386245727539 }, { "epoch": 1.63177490234375e-05, "model_forward_time": 0.02541208267211914, "step": 10694 }, { "epoch": 1.63177490234375e-05, "step": 10694, "training_step_time": 0.10916924476623535 }, { "epoch": 1.631927490234375e-05, "model_forward_time": 0.02486562728881836, "step": 10695 }, { "epoch": 1.631927490234375e-05, "step": 10695, "training_step_time": 0.17072010040283203 }, { "epoch": 1.632080078125e-05, "model_forward_time": 0.024959802627563477, "step": 10696 }, { "epoch": 1.632080078125e-05, "step": 10696, "training_step_time": 0.16221308708190918 }, { "epoch": 1.632232666015625e-05, "model_forward_time": 0.02468395233154297, "step": 10697 }, { "epoch": 1.632232666015625e-05, "step": 10697, "training_step_time": 0.11621689796447754 }, { "epoch": 1.63238525390625e-05, "model_forward_time": 0.025160789489746094, "step": 10698 }, { "epoch": 1.63238525390625e-05, "step": 10698, "training_step_time": 0.21582889556884766 }, { "epoch": 1.632537841796875e-05, "model_forward_time": 0.025089263916015625, "step": 10699 }, { "epoch": 1.632537841796875e-05, "step": 10699, "training_step_time": 0.1169281005859375 }, { "epoch": 1.6326904296875e-05, "grad_norm": 0.44070756435394287, "learning_rate": 7.641801732169795e-05, "loss": 0.0454, "step": 10700 }, { "epoch": 1.6326904296875e-05, "model_forward_time": 0.02423858642578125, "step": 10700 }, { "epoch": 1.6326904296875e-05, "step": 10700, "training_step_time": 0.10693693161010742 }, { "epoch": 1.632843017578125e-05, "model_forward_time": 0.02545166015625, "step": 10701 }, { "epoch": 1.632843017578125e-05, "step": 10701, "training_step_time": 0.11317038536071777 }, { "epoch": 1.63299560546875e-05, "model_forward_time": 0.02544856071472168, "step": 10702 }, { "epoch": 1.63299560546875e-05, "step": 10702, "training_step_time": 0.13507747650146484 }, { "epoch": 1.633148193359375e-05, "model_forward_time": 0.024816274642944336, "step": 10703 }, { "epoch": 1.633148193359375e-05, "step": 10703, "training_step_time": 0.11315536499023438 }, { "epoch": 1.63330078125e-05, "model_forward_time": 0.025484800338745117, "step": 10704 }, { "epoch": 1.63330078125e-05, "step": 10704, "training_step_time": 0.11356902122497559 }, { "epoch": 1.633453369140625e-05, "model_forward_time": 0.025608539581298828, "step": 10705 }, { "epoch": 1.633453369140625e-05, "step": 10705, "training_step_time": 0.11051535606384277 }, { "epoch": 1.63360595703125e-05, "model_forward_time": 0.02506732940673828, "step": 10706 }, { "epoch": 1.63360595703125e-05, "step": 10706, "training_step_time": 0.16840004920959473 }, { "epoch": 1.633758544921875e-05, "model_forward_time": 0.02463531494140625, "step": 10707 }, { "epoch": 1.633758544921875e-05, "step": 10707, "training_step_time": 0.1437091827392578 }, { "epoch": 1.6339111328125e-05, "model_forward_time": 0.024309158325195312, "step": 10708 }, { "epoch": 1.6339111328125e-05, "step": 10708, "training_step_time": 0.10707712173461914 }, { "epoch": 1.634063720703125e-05, "model_forward_time": 0.026467323303222656, "step": 10709 }, { "epoch": 1.634063720703125e-05, "step": 10709, "training_step_time": 0.1662890911102295 }, { "epoch": 1.63421630859375e-05, "grad_norm": 0.3846602737903595, "learning_rate": 7.637120697036866e-05, "loss": 0.0293, "step": 10710 }, { "epoch": 1.63421630859375e-05, "model_forward_time": 0.02404642105102539, "step": 10710 }, { "epoch": 1.63421630859375e-05, "step": 10710, "training_step_time": 0.21455121040344238 }, { "epoch": 1.634368896484375e-05, "model_forward_time": 0.02450418472290039, "step": 10711 }, { "epoch": 1.634368896484375e-05, "step": 10711, "training_step_time": 0.11643743515014648 }, { "epoch": 1.634521484375e-05, "model_forward_time": 0.024669408798217773, "step": 10712 }, { "epoch": 1.634521484375e-05, "step": 10712, "training_step_time": 0.20532822608947754 }, { "epoch": 1.634674072265625e-05, "model_forward_time": 0.024477720260620117, "step": 10713 }, { "epoch": 1.634674072265625e-05, "step": 10713, "training_step_time": 0.11440753936767578 }, { "epoch": 1.63482666015625e-05, "model_forward_time": 0.024648189544677734, "step": 10714 }, { "epoch": 1.63482666015625e-05, "step": 10714, "training_step_time": 0.1765429973602295 }, { "epoch": 1.634979248046875e-05, "model_forward_time": 0.024798154830932617, "step": 10715 }, { "epoch": 1.634979248046875e-05, "step": 10715, "training_step_time": 0.1804804801940918 }, { "epoch": 1.6351318359375e-05, "model_forward_time": 0.024644851684570312, "step": 10716 }, { "epoch": 1.6351318359375e-05, "step": 10716, "training_step_time": 0.10927486419677734 }, { "epoch": 1.635284423828125e-05, "model_forward_time": 0.024281740188598633, "step": 10717 }, { "epoch": 1.635284423828125e-05, "step": 10717, "training_step_time": 0.11864185333251953 }, { "epoch": 1.63543701171875e-05, "model_forward_time": 0.02569866180419922, "step": 10718 }, { "epoch": 1.63543701171875e-05, "step": 10718, "training_step_time": 0.11031961441040039 }, { "epoch": 1.635589599609375e-05, "model_forward_time": 0.024944305419921875, "step": 10719 }, { "epoch": 1.635589599609375e-05, "step": 10719, "training_step_time": 0.10712981224060059 }, { "epoch": 1.6357421875e-05, "grad_norm": 0.3834470510482788, "learning_rate": 7.632436457553625e-05, "loss": 0.0274, "step": 10720 }, { "epoch": 1.6357421875e-05, "model_forward_time": 0.025395870208740234, "step": 10720 }, { "epoch": 1.6357421875e-05, "step": 10720, "training_step_time": 0.11194062232971191 }, { "epoch": 1.635894775390625e-05, "model_forward_time": 0.025382518768310547, "step": 10721 }, { "epoch": 1.635894775390625e-05, "step": 10721, "training_step_time": 0.10833311080932617 }, { "epoch": 1.63604736328125e-05, "model_forward_time": 0.02544879913330078, "step": 10722 }, { "epoch": 1.63604736328125e-05, "step": 10722, "training_step_time": 0.10913491249084473 }, { "epoch": 1.636199951171875e-05, "model_forward_time": 0.024900436401367188, "step": 10723 }, { "epoch": 1.636199951171875e-05, "step": 10723, "training_step_time": 0.11480069160461426 }, { "epoch": 1.6363525390625e-05, "model_forward_time": 0.02527451515197754, "step": 10724 }, { "epoch": 1.6363525390625e-05, "step": 10724, "training_step_time": 0.10774803161621094 }, { "epoch": 1.636505126953125e-05, "model_forward_time": 0.025036096572875977, "step": 10725 }, { "epoch": 1.636505126953125e-05, "step": 10725, "training_step_time": 0.10722041130065918 }, { "epoch": 1.63665771484375e-05, "model_forward_time": 0.024894237518310547, "step": 10726 }, { "epoch": 1.63665771484375e-05, "step": 10726, "training_step_time": 0.10756230354309082 }, { "epoch": 1.636810302734375e-05, "model_forward_time": 0.025210142135620117, "step": 10727 }, { "epoch": 1.636810302734375e-05, "step": 10727, "training_step_time": 0.10796713829040527 }, { "epoch": 1.636962890625e-05, "model_forward_time": 0.025009870529174805, "step": 10728 }, { "epoch": 1.636962890625e-05, "step": 10728, "training_step_time": 0.1083059310913086 }, { "epoch": 1.637115478515625e-05, "model_forward_time": 0.025752544403076172, "step": 10729 }, { "epoch": 1.637115478515625e-05, "step": 10729, "training_step_time": 0.10871720314025879 }, { "epoch": 1.63726806640625e-05, "grad_norm": 0.2945505678653717, "learning_rate": 7.627749019411866e-05, "loss": 0.0208, "step": 10730 }, { "epoch": 1.63726806640625e-05, "model_forward_time": 0.02495408058166504, "step": 10730 }, { "epoch": 1.63726806640625e-05, "step": 10730, "training_step_time": 0.10800051689147949 }, { "epoch": 1.637420654296875e-05, "model_forward_time": 0.024964094161987305, "step": 10731 }, { "epoch": 1.637420654296875e-05, "step": 10731, "training_step_time": 0.11511516571044922 }, { "epoch": 1.6375732421875e-05, "model_forward_time": 0.025507211685180664, "step": 10732 }, { "epoch": 1.6375732421875e-05, "step": 10732, "training_step_time": 0.1115114688873291 }, { "epoch": 1.637725830078125e-05, "model_forward_time": 0.025075912475585938, "step": 10733 }, { "epoch": 1.637725830078125e-05, "step": 10733, "training_step_time": 0.10691142082214355 }, { "epoch": 1.63787841796875e-05, "model_forward_time": 0.025284290313720703, "step": 10734 }, { "epoch": 1.63787841796875e-05, "step": 10734, "training_step_time": 0.1107625961303711 }, { "epoch": 1.638031005859375e-05, "model_forward_time": 0.02559661865234375, "step": 10735 }, { "epoch": 1.638031005859375e-05, "step": 10735, "training_step_time": 0.10758066177368164 }, { "epoch": 1.63818359375e-05, "model_forward_time": 0.025068283081054688, "step": 10736 }, { "epoch": 1.63818359375e-05, "step": 10736, "training_step_time": 0.10826849937438965 }, { "epoch": 1.638336181640625e-05, "model_forward_time": 0.02512335777282715, "step": 10737 }, { "epoch": 1.638336181640625e-05, "step": 10737, "training_step_time": 0.11080789566040039 }, { "epoch": 1.63848876953125e-05, "model_forward_time": 0.023865222930908203, "step": 10738 }, { "epoch": 1.63848876953125e-05, "step": 10738, "training_step_time": 0.10817384719848633 }, { "epoch": 1.638641357421875e-05, "model_forward_time": 0.02419757843017578, "step": 10739 }, { "epoch": 1.638641357421875e-05, "step": 10739, "training_step_time": 0.10743832588195801 }, { "epoch": 1.6387939453125e-05, "grad_norm": 0.3899030089378357, "learning_rate": 7.623058388307269e-05, "loss": 0.0208, "step": 10740 }, { "epoch": 1.6387939453125e-05, "model_forward_time": 0.02422332763671875, "step": 10740 }, { "epoch": 1.6387939453125e-05, "step": 10740, "training_step_time": 0.11203265190124512 }, { "epoch": 1.638946533203125e-05, "model_forward_time": 0.02547001838684082, "step": 10741 }, { "epoch": 1.638946533203125e-05, "step": 10741, "training_step_time": 0.11022305488586426 }, { "epoch": 1.63909912109375e-05, "model_forward_time": 0.025500774383544922, "step": 10742 }, { "epoch": 1.63909912109375e-05, "step": 10742, "training_step_time": 0.11261343955993652 }, { "epoch": 1.639251708984375e-05, "model_forward_time": 0.025368213653564453, "step": 10743 }, { "epoch": 1.639251708984375e-05, "step": 10743, "training_step_time": 0.11301708221435547 }, { "epoch": 1.639404296875e-05, "model_forward_time": 0.025048255920410156, "step": 10744 }, { "epoch": 1.639404296875e-05, "step": 10744, "training_step_time": 0.11323952674865723 }, { "epoch": 1.639556884765625e-05, "model_forward_time": 0.02573370933532715, "step": 10745 }, { "epoch": 1.639556884765625e-05, "step": 10745, "training_step_time": 0.20105409622192383 }, { "epoch": 1.63970947265625e-05, "model_forward_time": 0.024323463439941406, "step": 10746 }, { "epoch": 1.63970947265625e-05, "step": 10746, "training_step_time": 0.2028648853302002 }, { "epoch": 1.639862060546875e-05, "model_forward_time": 0.025032758712768555, "step": 10747 }, { "epoch": 1.639862060546875e-05, "step": 10747, "training_step_time": 0.13688373565673828 }, { "epoch": 1.6400146484375e-05, "model_forward_time": 0.02435779571533203, "step": 10748 }, { "epoch": 1.6400146484375e-05, "step": 10748, "training_step_time": 0.10837864875793457 }, { "epoch": 1.640167236328125e-05, "model_forward_time": 0.02544093132019043, "step": 10749 }, { "epoch": 1.640167236328125e-05, "step": 10749, "training_step_time": 0.11611580848693848 }, { "epoch": 1.64031982421875e-05, "grad_norm": 0.2776572108268738, "learning_rate": 7.618364569939391e-05, "loss": 0.0298, "step": 10750 }, { "epoch": 1.64031982421875e-05, "model_forward_time": 0.025522470474243164, "step": 10750 }, { "epoch": 1.64031982421875e-05, "step": 10750, "training_step_time": 0.10948634147644043 }, { "epoch": 1.640472412109375e-05, "model_forward_time": 0.02557969093322754, "step": 10751 }, { "epoch": 1.640472412109375e-05, "step": 10751, "training_step_time": 0.1764969825744629 }, { "epoch": 1.640625e-05, "model_forward_time": 0.02479696273803711, "step": 10752 }, { "epoch": 1.640625e-05, "step": 10752, "training_step_time": 0.14751124382019043 }, { "epoch": 1.640777587890625e-05, "model_forward_time": 0.02422475814819336, "step": 10753 }, { "epoch": 1.640777587890625e-05, "step": 10753, "training_step_time": 0.10578036308288574 }, { "epoch": 1.64093017578125e-05, "model_forward_time": 0.024740934371948242, "step": 10754 }, { "epoch": 1.64093017578125e-05, "step": 10754, "training_step_time": 0.15947341918945312 }, { "epoch": 1.641082763671875e-05, "model_forward_time": 0.024953126907348633, "step": 10755 }, { "epoch": 1.641082763671875e-05, "step": 10755, "training_step_time": 0.21271038055419922 }, { "epoch": 1.6412353515625e-05, "model_forward_time": 0.024477005004882812, "step": 10756 }, { "epoch": 1.6412353515625e-05, "step": 10756, "training_step_time": 0.12402510643005371 }, { "epoch": 1.641387939453125e-05, "model_forward_time": 0.02431488037109375, "step": 10757 }, { "epoch": 1.641387939453125e-05, "step": 10757, "training_step_time": 0.10337495803833008 }, { "epoch": 1.64154052734375e-05, "model_forward_time": 0.025301456451416016, "step": 10758 }, { "epoch": 1.64154052734375e-05, "step": 10758, "training_step_time": 0.13145756721496582 }, { "epoch": 1.641693115234375e-05, "model_forward_time": 0.02530956268310547, "step": 10759 }, { "epoch": 1.641693115234375e-05, "step": 10759, "training_step_time": 0.11580753326416016 }, { "epoch": 1.641845703125e-05, "grad_norm": 0.3233664333820343, "learning_rate": 7.613667570011663e-05, "loss": 0.0295, "step": 10760 }, { "epoch": 1.641845703125e-05, "model_forward_time": 0.025384902954101562, "step": 10760 }, { "epoch": 1.641845703125e-05, "step": 10760, "training_step_time": 0.21688437461853027 }, { "epoch": 1.641998291015625e-05, "model_forward_time": 0.024252653121948242, "step": 10761 }, { "epoch": 1.641998291015625e-05, "step": 10761, "training_step_time": 0.14340710639953613 }, { "epoch": 1.64215087890625e-05, "model_forward_time": 0.02436542510986328, "step": 10762 }, { "epoch": 1.64215087890625e-05, "step": 10762, "training_step_time": 0.11798572540283203 }, { "epoch": 1.642303466796875e-05, "model_forward_time": 0.024516820907592773, "step": 10763 }, { "epoch": 1.642303466796875e-05, "step": 10763, "training_step_time": 0.13036394119262695 }, { "epoch": 1.6424560546875e-05, "model_forward_time": 0.025243520736694336, "step": 10764 }, { "epoch": 1.6424560546875e-05, "step": 10764, "training_step_time": 0.12154674530029297 }, { "epoch": 1.642608642578125e-05, "model_forward_time": 0.02477884292602539, "step": 10765 }, { "epoch": 1.642608642578125e-05, "step": 10765, "training_step_time": 0.1132802963256836 }, { "epoch": 1.64276123046875e-05, "model_forward_time": 0.0254819393157959, "step": 10766 }, { "epoch": 1.64276123046875e-05, "step": 10766, "training_step_time": 0.11395001411437988 }, { "epoch": 1.642913818359375e-05, "model_forward_time": 0.025023221969604492, "step": 10767 }, { "epoch": 1.642913818359375e-05, "step": 10767, "training_step_time": 0.11502909660339355 }, { "epoch": 1.64306640625e-05, "model_forward_time": 0.025210142135620117, "step": 10768 }, { "epoch": 1.64306640625e-05, "step": 10768, "training_step_time": 0.12074041366577148 }, { "epoch": 1.643218994140625e-05, "model_forward_time": 0.02584528923034668, "step": 10769 }, { "epoch": 1.643218994140625e-05, "step": 10769, "training_step_time": 0.11638975143432617 }, { "epoch": 1.64337158203125e-05, "grad_norm": 0.3310029208660126, "learning_rate": 7.608967394231387e-05, "loss": 0.0302, "step": 10770 }, { "epoch": 1.64337158203125e-05, "model_forward_time": 0.025342464447021484, "step": 10770 }, { "epoch": 1.64337158203125e-05, "step": 10770, "training_step_time": 0.11458420753479004 }, { "epoch": 1.643524169921875e-05, "model_forward_time": 0.025568723678588867, "step": 10771 }, { "epoch": 1.643524169921875e-05, "step": 10771, "training_step_time": 0.11399269104003906 }, { "epoch": 1.6436767578125e-05, "model_forward_time": 0.02423858642578125, "step": 10772 }, { "epoch": 1.6436767578125e-05, "step": 10772, "training_step_time": 0.11171507835388184 }, { "epoch": 1.643829345703125e-05, "model_forward_time": 0.024680614471435547, "step": 10773 }, { "epoch": 1.643829345703125e-05, "step": 10773, "training_step_time": 0.11388969421386719 }, { "epoch": 1.64398193359375e-05, "model_forward_time": 0.025072574615478516, "step": 10774 }, { "epoch": 1.64398193359375e-05, "step": 10774, "training_step_time": 0.11184072494506836 }, { "epoch": 1.644134521484375e-05, "model_forward_time": 0.02514791488647461, "step": 10775 }, { "epoch": 1.644134521484375e-05, "step": 10775, "training_step_time": 0.1119232177734375 }, { "epoch": 1.644287109375e-05, "model_forward_time": 0.02554917335510254, "step": 10776 }, { "epoch": 1.644287109375e-05, "step": 10776, "training_step_time": 0.1116645336151123 }, { "epoch": 1.644439697265625e-05, "model_forward_time": 0.025325298309326172, "step": 10777 }, { "epoch": 1.644439697265625e-05, "step": 10777, "training_step_time": 0.10777425765991211 }, { "epoch": 1.64459228515625e-05, "model_forward_time": 0.025165319442749023, "step": 10778 }, { "epoch": 1.64459228515625e-05, "step": 10778, "training_step_time": 0.10851454734802246 }, { "epoch": 1.644744873046875e-05, "model_forward_time": 0.024199962615966797, "step": 10779 }, { "epoch": 1.644744873046875e-05, "step": 10779, "training_step_time": 0.10663700103759766 }, { "epoch": 1.6448974609375e-05, "grad_norm": 0.2997332513332367, "learning_rate": 7.604264048309717e-05, "loss": 0.0313, "step": 10780 }, { "epoch": 1.6448974609375e-05, "model_forward_time": 0.024306058883666992, "step": 10780 }, { "epoch": 1.6448974609375e-05, "step": 10780, "training_step_time": 0.10718274116516113 }, { "epoch": 1.645050048828125e-05, "model_forward_time": 0.025063514709472656, "step": 10781 }, { "epoch": 1.645050048828125e-05, "step": 10781, "training_step_time": 0.11020898818969727 }, { "epoch": 1.64520263671875e-05, "model_forward_time": 0.0253903865814209, "step": 10782 }, { "epoch": 1.64520263671875e-05, "step": 10782, "training_step_time": 0.10812878608703613 }, { "epoch": 1.645355224609375e-05, "model_forward_time": 0.024985551834106445, "step": 10783 }, { "epoch": 1.645355224609375e-05, "step": 10783, "training_step_time": 0.10933136940002441 }, { "epoch": 1.6455078125e-05, "model_forward_time": 0.025437593460083008, "step": 10784 }, { "epoch": 1.6455078125e-05, "step": 10784, "training_step_time": 0.11381244659423828 }, { "epoch": 1.645660400390625e-05, "model_forward_time": 0.025662899017333984, "step": 10785 }, { "epoch": 1.645660400390625e-05, "step": 10785, "training_step_time": 0.19509506225585938 }, { "epoch": 1.64581298828125e-05, "model_forward_time": 0.02461409568786621, "step": 10786 }, { "epoch": 1.64581298828125e-05, "step": 10786, "training_step_time": 0.10313916206359863 }, { "epoch": 1.645965576171875e-05, "model_forward_time": 0.024393081665039062, "step": 10787 }, { "epoch": 1.645965576171875e-05, "step": 10787, "training_step_time": 0.10772061347961426 }, { "epoch": 1.6461181640625e-05, "model_forward_time": 0.025321245193481445, "step": 10788 }, { "epoch": 1.6461181640625e-05, "step": 10788, "training_step_time": 0.13216710090637207 }, { "epoch": 1.646270751953125e-05, "model_forward_time": 0.025519371032714844, "step": 10789 }, { "epoch": 1.646270751953125e-05, "step": 10789, "training_step_time": 0.16803264617919922 }, { "epoch": 1.64642333984375e-05, "grad_norm": 0.4027278423309326, "learning_rate": 7.599557537961663e-05, "loss": 0.0252, "step": 10790 }, { "epoch": 1.64642333984375e-05, "model_forward_time": 0.024799823760986328, "step": 10790 }, { "epoch": 1.64642333984375e-05, "step": 10790, "training_step_time": 0.10465049743652344 }, { "epoch": 1.646575927734375e-05, "model_forward_time": 0.024894237518310547, "step": 10791 }, { "epoch": 1.646575927734375e-05, "step": 10791, "training_step_time": 0.2023172378540039 }, { "epoch": 1.646728515625e-05, "model_forward_time": 0.024867534637451172, "step": 10792 }, { "epoch": 1.646728515625e-05, "step": 10792, "training_step_time": 0.13690948486328125 }, { "epoch": 1.646881103515625e-05, "model_forward_time": 0.024422883987426758, "step": 10793 }, { "epoch": 1.646881103515625e-05, "step": 10793, "training_step_time": 0.11196303367614746 }, { "epoch": 1.64703369140625e-05, "model_forward_time": 0.02568674087524414, "step": 10794 }, { "epoch": 1.64703369140625e-05, "step": 10794, "training_step_time": 0.11054515838623047 }, { "epoch": 1.647186279296875e-05, "model_forward_time": 0.025094032287597656, "step": 10795 }, { "epoch": 1.647186279296875e-05, "step": 10795, "training_step_time": 0.1194157600402832 }, { "epoch": 1.6473388671875e-05, "model_forward_time": 0.02504563331604004, "step": 10796 }, { "epoch": 1.6473388671875e-05, "step": 10796, "training_step_time": 0.16044259071350098 }, { "epoch": 1.647491455078125e-05, "model_forward_time": 0.025817394256591797, "step": 10797 }, { "epoch": 1.647491455078125e-05, "step": 10797, "training_step_time": 0.14841985702514648 }, { "epoch": 1.64764404296875e-05, "model_forward_time": 0.02500176429748535, "step": 10798 }, { "epoch": 1.64764404296875e-05, "step": 10798, "training_step_time": 0.11089730262756348 }, { "epoch": 1.647796630859375e-05, "model_forward_time": 0.024941682815551758, "step": 10799 }, { "epoch": 1.647796630859375e-05, "step": 10799, "training_step_time": 0.14629697799682617 }, { "epoch": 1.64794921875e-05, "grad_norm": 0.45459648966789246, "learning_rate": 7.594847868906076e-05, "loss": 0.0331, "step": 10800 }, { "epoch": 1.64794921875e-05, "model_forward_time": 0.024891138076782227, "step": 10800 }, { "epoch": 1.64794921875e-05, "step": 10800, "training_step_time": 0.21927833557128906 }, { "epoch": 1.648101806640625e-05, "model_forward_time": 0.0246732234954834, "step": 10801 }, { "epoch": 1.648101806640625e-05, "step": 10801, "training_step_time": 0.10877418518066406 }, { "epoch": 1.64825439453125e-05, "model_forward_time": 0.024944067001342773, "step": 10802 }, { "epoch": 1.64825439453125e-05, "step": 10802, "training_step_time": 0.10377264022827148 }, { "epoch": 1.648406982421875e-05, "model_forward_time": 0.025401592254638672, "step": 10803 }, { "epoch": 1.648406982421875e-05, "step": 10803, "training_step_time": 0.15845608711242676 }, { "epoch": 1.6485595703125e-05, "model_forward_time": 0.02449345588684082, "step": 10804 }, { "epoch": 1.6485595703125e-05, "step": 10804, "training_step_time": 0.11086606979370117 }, { "epoch": 1.648712158203125e-05, "model_forward_time": 0.025146484375, "step": 10805 }, { "epoch": 1.648712158203125e-05, "step": 10805, "training_step_time": 0.11435437202453613 }, { "epoch": 1.64886474609375e-05, "model_forward_time": 0.0251920223236084, "step": 10806 }, { "epoch": 1.64886474609375e-05, "step": 10806, "training_step_time": 0.12303376197814941 }, { "epoch": 1.649017333984375e-05, "model_forward_time": 0.025351285934448242, "step": 10807 }, { "epoch": 1.649017333984375e-05, "step": 10807, "training_step_time": 0.12134909629821777 }, { "epoch": 1.649169921875e-05, "model_forward_time": 0.025442123413085938, "step": 10808 }, { "epoch": 1.649169921875e-05, "step": 10808, "training_step_time": 0.11273550987243652 }, { "epoch": 1.649322509765625e-05, "model_forward_time": 0.02541947364807129, "step": 10809 }, { "epoch": 1.649322509765625e-05, "step": 10809, "training_step_time": 0.11706209182739258 }, { "epoch": 1.64947509765625e-05, "grad_norm": 0.363253653049469, "learning_rate": 7.590135046865651e-05, "loss": 0.0226, "step": 10810 }, { "epoch": 1.64947509765625e-05, "model_forward_time": 0.025206804275512695, "step": 10810 }, { "epoch": 1.64947509765625e-05, "step": 10810, "training_step_time": 0.1110086441040039 }, { "epoch": 1.649627685546875e-05, "model_forward_time": 0.02468729019165039, "step": 10811 }, { "epoch": 1.649627685546875e-05, "step": 10811, "training_step_time": 0.10796141624450684 }, { "epoch": 1.6497802734375e-05, "model_forward_time": 0.02520155906677246, "step": 10812 }, { "epoch": 1.6497802734375e-05, "step": 10812, "training_step_time": 0.11082959175109863 }, { "epoch": 1.649932861328125e-05, "model_forward_time": 0.026639223098754883, "step": 10813 }, { "epoch": 1.649932861328125e-05, "step": 10813, "training_step_time": 0.1131441593170166 }, { "epoch": 1.65008544921875e-05, "model_forward_time": 0.025516033172607422, "step": 10814 }, { "epoch": 1.65008544921875e-05, "step": 10814, "training_step_time": 0.10660457611083984 }, { "epoch": 1.650238037109375e-05, "model_forward_time": 0.02548050880432129, "step": 10815 }, { "epoch": 1.650238037109375e-05, "step": 10815, "training_step_time": 0.14336204528808594 }, { "epoch": 1.650390625e-05, "model_forward_time": 0.024364709854125977, "step": 10816 }, { "epoch": 1.650390625e-05, "step": 10816, "training_step_time": 0.15810155868530273 }, { "epoch": 1.650543212890625e-05, "model_forward_time": 0.0235898494720459, "step": 10817 }, { "epoch": 1.650543212890625e-05, "step": 10817, "training_step_time": 0.14775395393371582 }, { "epoch": 1.65069580078125e-05, "model_forward_time": 0.023796796798706055, "step": 10818 }, { "epoch": 1.65069580078125e-05, "step": 10818, "training_step_time": 0.15061497688293457 }, { "epoch": 1.650848388671875e-05, "model_forward_time": 0.025929689407348633, "step": 10819 }, { "epoch": 1.650848388671875e-05, "step": 10819, "training_step_time": 0.13357257843017578 }, { "epoch": 1.6510009765625e-05, "grad_norm": 0.4513895511627197, "learning_rate": 7.585419077566912e-05, "loss": 0.0287, "step": 10820 }, { "epoch": 1.6510009765625e-05, "model_forward_time": 0.02393484115600586, "step": 10820 }, { "epoch": 1.6510009765625e-05, "step": 10820, "training_step_time": 0.12855839729309082 }, { "epoch": 1.651153564453125e-05, "model_forward_time": 0.02543330192565918, "step": 10821 }, { "epoch": 1.651153564453125e-05, "step": 10821, "training_step_time": 0.1230313777923584 }, { "epoch": 1.65130615234375e-05, "model_forward_time": 0.0242764949798584, "step": 10822 }, { "epoch": 1.65130615234375e-05, "step": 10822, "training_step_time": 0.1207890510559082 }, { "epoch": 1.651458740234375e-05, "model_forward_time": 0.024845600128173828, "step": 10823 }, { "epoch": 1.651458740234375e-05, "step": 10823, "training_step_time": 0.1168060302734375 }, { "epoch": 1.651611328125e-05, "model_forward_time": 0.024271249771118164, "step": 10824 }, { "epoch": 1.651611328125e-05, "step": 10824, "training_step_time": 0.11795210838317871 }, { "epoch": 1.651763916015625e-05, "model_forward_time": 0.024193525314331055, "step": 10825 }, { "epoch": 1.651763916015625e-05, "step": 10825, "training_step_time": 0.11490988731384277 }, { "epoch": 1.65191650390625e-05, "model_forward_time": 0.025233745574951172, "step": 10826 }, { "epoch": 1.65191650390625e-05, "step": 10826, "training_step_time": 0.10995221138000488 }, { "epoch": 1.652069091796875e-05, "model_forward_time": 0.025481462478637695, "step": 10827 }, { "epoch": 1.652069091796875e-05, "step": 10827, "training_step_time": 0.11040210723876953 }, { "epoch": 1.6522216796875e-05, "model_forward_time": 0.02597355842590332, "step": 10828 }, { "epoch": 1.6522216796875e-05, "step": 10828, "training_step_time": 0.10800671577453613 }, { "epoch": 1.652374267578125e-05, "model_forward_time": 0.02449488639831543, "step": 10829 }, { "epoch": 1.652374267578125e-05, "step": 10829, "training_step_time": 0.14438891410827637 }, { "epoch": 1.65252685546875e-05, "grad_norm": 0.2281569391489029, "learning_rate": 7.580699966740201e-05, "loss": 0.027, "step": 10830 }, { "epoch": 1.65252685546875e-05, "model_forward_time": 0.025112628936767578, "step": 10830 }, { "epoch": 1.65252685546875e-05, "step": 10830, "training_step_time": 0.16704463958740234 }, { "epoch": 1.652679443359375e-05, "model_forward_time": 0.02467489242553711, "step": 10831 }, { "epoch": 1.652679443359375e-05, "step": 10831, "training_step_time": 0.11936545372009277 }, { "epoch": 1.65283203125e-05, "model_forward_time": 0.024743080139160156, "step": 10832 }, { "epoch": 1.65283203125e-05, "step": 10832, "training_step_time": 0.13126468658447266 }, { "epoch": 1.652984619140625e-05, "model_forward_time": 0.025455236434936523, "step": 10833 }, { "epoch": 1.652984619140625e-05, "step": 10833, "training_step_time": 0.20081734657287598 }, { "epoch": 1.65313720703125e-05, "model_forward_time": 0.02518320083618164, "step": 10834 }, { "epoch": 1.65313720703125e-05, "step": 10834, "training_step_time": 0.18924999237060547 }, { "epoch": 1.653289794921875e-05, "model_forward_time": 0.02413153648376465, "step": 10835 }, { "epoch": 1.653289794921875e-05, "step": 10835, "training_step_time": 0.1416623592376709 }, { "epoch": 1.6534423828125e-05, "model_forward_time": 0.02464127540588379, "step": 10836 }, { "epoch": 1.6534423828125e-05, "step": 10836, "training_step_time": 0.10721182823181152 }, { "epoch": 1.653594970703125e-05, "model_forward_time": 0.025005817413330078, "step": 10837 }, { "epoch": 1.653594970703125e-05, "step": 10837, "training_step_time": 0.11602282524108887 }, { "epoch": 1.65374755859375e-05, "model_forward_time": 0.024360179901123047, "step": 10838 }, { "epoch": 1.65374755859375e-05, "step": 10838, "training_step_time": 0.11002659797668457 }, { "epoch": 1.653900146484375e-05, "model_forward_time": 0.025038480758666992, "step": 10839 }, { "epoch": 1.653900146484375e-05, "step": 10839, "training_step_time": 0.16441631317138672 }, { "epoch": 1.654052734375e-05, "grad_norm": 0.31513896584510803, "learning_rate": 7.57597772011969e-05, "loss": 0.0347, "step": 10840 }, { "epoch": 1.654052734375e-05, "model_forward_time": 0.02461838722229004, "step": 10840 }, { "epoch": 1.654052734375e-05, "step": 10840, "training_step_time": 0.1494278907775879 }, { "epoch": 1.654205322265625e-05, "model_forward_time": 0.024490833282470703, "step": 10841 }, { "epoch": 1.654205322265625e-05, "step": 10841, "training_step_time": 0.10837817192077637 }, { "epoch": 1.65435791015625e-05, "model_forward_time": 0.024872779846191406, "step": 10842 }, { "epoch": 1.65435791015625e-05, "step": 10842, "training_step_time": 0.1570901870727539 }, { "epoch": 1.654510498046875e-05, "model_forward_time": 0.024687528610229492, "step": 10843 }, { "epoch": 1.654510498046875e-05, "step": 10843, "training_step_time": 0.22075152397155762 }, { "epoch": 1.6546630859375e-05, "model_forward_time": 0.02440047264099121, "step": 10844 }, { "epoch": 1.6546630859375e-05, "step": 10844, "training_step_time": 0.10788726806640625 }, { "epoch": 1.654815673828125e-05, "model_forward_time": 0.024966955184936523, "step": 10845 }, { "epoch": 1.654815673828125e-05, "step": 10845, "training_step_time": 0.10461187362670898 }, { "epoch": 1.65496826171875e-05, "model_forward_time": 0.025684595108032227, "step": 10846 }, { "epoch": 1.65496826171875e-05, "step": 10846, "training_step_time": 0.20926189422607422 }, { "epoch": 1.655120849609375e-05, "model_forward_time": 0.024487733840942383, "step": 10847 }, { "epoch": 1.655120849609375e-05, "step": 10847, "training_step_time": 0.10999631881713867 }, { "epoch": 1.6552734375e-05, "model_forward_time": 0.02434682846069336, "step": 10848 }, { "epoch": 1.6552734375e-05, "step": 10848, "training_step_time": 0.11000919342041016 }, { "epoch": 1.655426025390625e-05, "model_forward_time": 0.025496482849121094, "step": 10849 }, { "epoch": 1.655426025390625e-05, "step": 10849, "training_step_time": 0.12700653076171875 }, { "epoch": 1.65557861328125e-05, "grad_norm": 0.3954463303089142, "learning_rate": 7.571252343443349e-05, "loss": 0.0257, "step": 10850 }, { "epoch": 1.65557861328125e-05, "model_forward_time": 0.02556324005126953, "step": 10850 }, { "epoch": 1.65557861328125e-05, "step": 10850, "training_step_time": 0.12648272514343262 }, { "epoch": 1.655731201171875e-05, "model_forward_time": 0.025029897689819336, "step": 10851 }, { "epoch": 1.655731201171875e-05, "step": 10851, "training_step_time": 0.11715555191040039 }, { "epoch": 1.6558837890625e-05, "model_forward_time": 0.025183439254760742, "step": 10852 }, { "epoch": 1.6558837890625e-05, "step": 10852, "training_step_time": 0.11367082595825195 }, { "epoch": 1.656036376953125e-05, "model_forward_time": 0.02546381950378418, "step": 10853 }, { "epoch": 1.656036376953125e-05, "step": 10853, "training_step_time": 0.10840892791748047 }, { "epoch": 1.65618896484375e-05, "model_forward_time": 0.025184154510498047, "step": 10854 }, { "epoch": 1.65618896484375e-05, "step": 10854, "training_step_time": 0.10849332809448242 }, { "epoch": 1.656341552734375e-05, "model_forward_time": 0.025542497634887695, "step": 10855 }, { "epoch": 1.656341552734375e-05, "step": 10855, "training_step_time": 0.10872411727905273 }, { "epoch": 1.656494140625e-05, "model_forward_time": 0.025752782821655273, "step": 10856 }, { "epoch": 1.656494140625e-05, "step": 10856, "training_step_time": 0.10850095748901367 }, { "epoch": 1.656646728515625e-05, "model_forward_time": 0.02546858787536621, "step": 10857 }, { "epoch": 1.656646728515625e-05, "step": 10857, "training_step_time": 0.1070394515991211 }, { "epoch": 1.65679931640625e-05, "model_forward_time": 0.025093555450439453, "step": 10858 }, { "epoch": 1.65679931640625e-05, "step": 10858, "training_step_time": 0.10788989067077637 }, { "epoch": 1.656951904296875e-05, "model_forward_time": 0.025656461715698242, "step": 10859 }, { "epoch": 1.656951904296875e-05, "step": 10859, "training_step_time": 0.10597538948059082 }, { "epoch": 1.6571044921875e-05, "grad_norm": 0.3234105110168457, "learning_rate": 7.566523842452958e-05, "loss": 0.02, "step": 10860 }, { "epoch": 1.6571044921875e-05, "model_forward_time": 0.025614500045776367, "step": 10860 }, { "epoch": 1.6571044921875e-05, "step": 10860, "training_step_time": 0.10815238952636719 }, { "epoch": 1.657257080078125e-05, "model_forward_time": 0.02545762062072754, "step": 10861 }, { "epoch": 1.657257080078125e-05, "step": 10861, "training_step_time": 0.10886907577514648 }, { "epoch": 1.65740966796875e-05, "model_forward_time": 0.025498628616333008, "step": 10862 }, { "epoch": 1.65740966796875e-05, "step": 10862, "training_step_time": 0.11038327217102051 }, { "epoch": 1.657562255859375e-05, "model_forward_time": 0.026415586471557617, "step": 10863 }, { "epoch": 1.657562255859375e-05, "step": 10863, "training_step_time": 0.11261582374572754 }, { "epoch": 1.65771484375e-05, "model_forward_time": 0.025345802307128906, "step": 10864 }, { "epoch": 1.65771484375e-05, "step": 10864, "training_step_time": 0.11078405380249023 }, { "epoch": 1.657867431640625e-05, "model_forward_time": 0.025328636169433594, "step": 10865 }, { "epoch": 1.657867431640625e-05, "step": 10865, "training_step_time": 0.10863113403320312 }, { "epoch": 1.65802001953125e-05, "model_forward_time": 0.02506232261657715, "step": 10866 }, { "epoch": 1.65802001953125e-05, "step": 10866, "training_step_time": 0.10624146461486816 }, { "epoch": 1.658172607421875e-05, "model_forward_time": 0.02526068687438965, "step": 10867 }, { "epoch": 1.658172607421875e-05, "step": 10867, "training_step_time": 0.10612916946411133 }, { "epoch": 1.6583251953125e-05, "model_forward_time": 0.025470972061157227, "step": 10868 }, { "epoch": 1.6583251953125e-05, "step": 10868, "training_step_time": 0.10814714431762695 }, { "epoch": 1.658477783203125e-05, "model_forward_time": 0.025341033935546875, "step": 10869 }, { "epoch": 1.658477783203125e-05, "step": 10869, "training_step_time": 0.10621404647827148 }, { "epoch": 1.65863037109375e-05, "grad_norm": 0.34885501861572266, "learning_rate": 7.561792222894091e-05, "loss": 0.0254, "step": 10870 }, { "epoch": 1.65863037109375e-05, "model_forward_time": 0.02532505989074707, "step": 10870 }, { "epoch": 1.65863037109375e-05, "step": 10870, "training_step_time": 0.10751581192016602 }, { "epoch": 1.658782958984375e-05, "model_forward_time": 0.02515435218811035, "step": 10871 }, { "epoch": 1.658782958984375e-05, "step": 10871, "training_step_time": 0.1110072135925293 }, { "epoch": 1.658935546875e-05, "model_forward_time": 0.02512812614440918, "step": 10872 }, { "epoch": 1.658935546875e-05, "step": 10872, "training_step_time": 0.10905790328979492 }, { "epoch": 1.659088134765625e-05, "model_forward_time": 0.025911331176757812, "step": 10873 }, { "epoch": 1.659088134765625e-05, "step": 10873, "training_step_time": 0.10709404945373535 }, { "epoch": 1.65924072265625e-05, "model_forward_time": 0.027601957321166992, "step": 10874 }, { "epoch": 1.65924072265625e-05, "step": 10874, "training_step_time": 0.14226984977722168 }, { "epoch": 1.659393310546875e-05, "model_forward_time": 0.024666309356689453, "step": 10875 }, { "epoch": 1.659393310546875e-05, "step": 10875, "training_step_time": 0.14530253410339355 }, { "epoch": 1.6595458984375e-05, "model_forward_time": 0.0247344970703125, "step": 10876 }, { "epoch": 1.6595458984375e-05, "step": 10876, "training_step_time": 0.1425631046295166 }, { "epoch": 1.659698486328125e-05, "model_forward_time": 0.02555108070373535, "step": 10877 }, { "epoch": 1.659698486328125e-05, "step": 10877, "training_step_time": 0.1271219253540039 }, { "epoch": 1.65985107421875e-05, "model_forward_time": 0.02698659896850586, "step": 10878 }, { "epoch": 1.65985107421875e-05, "step": 10878, "training_step_time": 0.19634532928466797 }, { "epoch": 1.660003662109375e-05, "model_forward_time": 0.024924039840698242, "step": 10879 }, { "epoch": 1.660003662109375e-05, "step": 10879, "training_step_time": 0.19521760940551758 }, { "epoch": 1.66015625e-05, "grad_norm": 0.2563628852367401, "learning_rate": 7.557057490516111e-05, "loss": 0.0265, "step": 10880 }, { "epoch": 1.66015625e-05, "model_forward_time": 0.02491474151611328, "step": 10880 }, { "epoch": 1.66015625e-05, "step": 10880, "training_step_time": 0.13405990600585938 }, { "epoch": 1.660308837890625e-05, "model_forward_time": 0.02359795570373535, "step": 10881 }, { "epoch": 1.660308837890625e-05, "step": 10881, "training_step_time": 0.1072690486907959 }, { "epoch": 1.66046142578125e-05, "model_forward_time": 0.025513887405395508, "step": 10882 }, { "epoch": 1.66046142578125e-05, "step": 10882, "training_step_time": 0.12897253036499023 }, { "epoch": 1.660614013671875e-05, "model_forward_time": 0.025318384170532227, "step": 10883 }, { "epoch": 1.660614013671875e-05, "step": 10883, "training_step_time": 0.10889315605163574 }, { "epoch": 1.6607666015625e-05, "model_forward_time": 0.025252103805541992, "step": 10884 }, { "epoch": 1.6607666015625e-05, "step": 10884, "training_step_time": 0.1417233943939209 }, { "epoch": 1.660919189453125e-05, "model_forward_time": 0.025228500366210938, "step": 10885 }, { "epoch": 1.660919189453125e-05, "step": 10885, "training_step_time": 0.19792795181274414 }, { "epoch": 1.66107177734375e-05, "model_forward_time": 0.024687528610229492, "step": 10886 }, { "epoch": 1.66107177734375e-05, "step": 10886, "training_step_time": 0.14832305908203125 }, { "epoch": 1.661224365234375e-05, "model_forward_time": 0.024615049362182617, "step": 10887 }, { "epoch": 1.661224365234375e-05, "step": 10887, "training_step_time": 0.18960833549499512 }, { "epoch": 1.661376953125e-05, "model_forward_time": 0.024505138397216797, "step": 10888 }, { "epoch": 1.661376953125e-05, "step": 10888, "training_step_time": 0.10966348648071289 }, { "epoch": 1.661529541015625e-05, "model_forward_time": 0.02445054054260254, "step": 10889 }, { "epoch": 1.661529541015625e-05, "step": 10889, "training_step_time": 0.10359764099121094 }, { "epoch": 1.66168212890625e-05, "grad_norm": 0.44846391677856445, "learning_rate": 7.552319651072164e-05, "loss": 0.0276, "step": 10890 }, { "epoch": 1.66168212890625e-05, "model_forward_time": 0.02598285675048828, "step": 10890 }, { "epoch": 1.66168212890625e-05, "step": 10890, "training_step_time": 0.10748100280761719 }, { "epoch": 1.661834716796875e-05, "model_forward_time": 0.02523064613342285, "step": 10891 }, { "epoch": 1.661834716796875e-05, "step": 10891, "training_step_time": 0.1123502254486084 }, { "epoch": 1.6619873046875e-05, "model_forward_time": 0.02543020248413086, "step": 10892 }, { "epoch": 1.6619873046875e-05, "step": 10892, "training_step_time": 0.1179811954498291 }, { "epoch": 1.662139892578125e-05, "model_forward_time": 0.025351285934448242, "step": 10893 }, { "epoch": 1.662139892578125e-05, "step": 10893, "training_step_time": 0.11029338836669922 }, { "epoch": 1.66229248046875e-05, "model_forward_time": 0.025508880615234375, "step": 10894 }, { "epoch": 1.66229248046875e-05, "step": 10894, "training_step_time": 0.11455106735229492 }, { "epoch": 1.662445068359375e-05, "model_forward_time": 0.025147676467895508, "step": 10895 }, { "epoch": 1.662445068359375e-05, "step": 10895, "training_step_time": 0.11470556259155273 }, { "epoch": 1.66259765625e-05, "model_forward_time": 0.025081872940063477, "step": 10896 }, { "epoch": 1.66259765625e-05, "step": 10896, "training_step_time": 0.13151311874389648 }, { "epoch": 1.662750244140625e-05, "model_forward_time": 0.024922847747802734, "step": 10897 }, { "epoch": 1.662750244140625e-05, "step": 10897, "training_step_time": 0.12404012680053711 }, { "epoch": 1.66290283203125e-05, "model_forward_time": 0.025073528289794922, "step": 10898 }, { "epoch": 1.66290283203125e-05, "step": 10898, "training_step_time": 0.11920404434204102 }, { "epoch": 1.663055419921875e-05, "model_forward_time": 0.027373790740966797, "step": 10899 }, { "epoch": 1.663055419921875e-05, "step": 10899, "training_step_time": 0.14691615104675293 }, { "epoch": 1.6632080078125e-05, "grad_norm": 0.30816227197647095, "learning_rate": 7.547578710319174e-05, "loss": 0.0272, "step": 10900 }, { "epoch": 1.6632080078125e-05, "model_forward_time": 0.02491593360900879, "step": 10900 }, { "epoch": 1.6632080078125e-05, "step": 10900, "training_step_time": 0.10656142234802246 }, { "epoch": 1.663360595703125e-05, "model_forward_time": 0.0250089168548584, "step": 10901 }, { "epoch": 1.663360595703125e-05, "step": 10901, "training_step_time": 0.10392212867736816 }, { "epoch": 1.66351318359375e-05, "model_forward_time": 0.024938344955444336, "step": 10902 }, { "epoch": 1.66351318359375e-05, "step": 10902, "training_step_time": 0.10459542274475098 }, { "epoch": 1.663665771484375e-05, "model_forward_time": 0.02556610107421875, "step": 10903 }, { "epoch": 1.663665771484375e-05, "step": 10903, "training_step_time": 0.10699081420898438 }, { "epoch": 1.663818359375e-05, "model_forward_time": 0.02520155906677246, "step": 10904 }, { "epoch": 1.663818359375e-05, "step": 10904, "training_step_time": 0.10598516464233398 }, { "epoch": 1.663970947265625e-05, "model_forward_time": 0.025541305541992188, "step": 10905 }, { "epoch": 1.663970947265625e-05, "step": 10905, "training_step_time": 0.10612368583679199 }, { "epoch": 1.66412353515625e-05, "model_forward_time": 0.025435447692871094, "step": 10906 }, { "epoch": 1.66412353515625e-05, "step": 10906, "training_step_time": 0.10801315307617188 }, { "epoch": 1.664276123046875e-05, "model_forward_time": 0.025251388549804688, "step": 10907 }, { "epoch": 1.664276123046875e-05, "step": 10907, "training_step_time": 0.11206316947937012 }, { "epoch": 1.6644287109375e-05, "model_forward_time": 0.025639057159423828, "step": 10908 }, { "epoch": 1.6644287109375e-05, "step": 10908, "training_step_time": 0.1545724868774414 }, { "epoch": 1.664581298828125e-05, "model_forward_time": 0.024622440338134766, "step": 10909 }, { "epoch": 1.664581298828125e-05, "step": 10909, "training_step_time": 0.1860671043395996 }, { "epoch": 1.66473388671875e-05, "grad_norm": 0.2433345466852188, "learning_rate": 7.542834674017831e-05, "loss": 0.0216, "step": 10910 }, { "epoch": 1.66473388671875e-05, "model_forward_time": 0.024281978607177734, "step": 10910 }, { "epoch": 1.66473388671875e-05, "step": 10910, "training_step_time": 0.18406915664672852 }, { "epoch": 1.664886474609375e-05, "model_forward_time": 0.024146080017089844, "step": 10911 }, { "epoch": 1.664886474609375e-05, "step": 10911, "training_step_time": 0.17139720916748047 }, { "epoch": 1.6650390625e-05, "model_forward_time": 0.024176597595214844, "step": 10912 }, { "epoch": 1.6650390625e-05, "step": 10912, "training_step_time": 0.17169570922851562 }, { "epoch": 1.665191650390625e-05, "model_forward_time": 0.027772188186645508, "step": 10913 }, { "epoch": 1.665191650390625e-05, "step": 10913, "training_step_time": 0.15646743774414062 }, { "epoch": 1.66534423828125e-05, "model_forward_time": 0.024075746536254883, "step": 10914 }, { "epoch": 1.66534423828125e-05, "step": 10914, "training_step_time": 0.13920235633850098 }, { "epoch": 1.665496826171875e-05, "model_forward_time": 0.025835752487182617, "step": 10915 }, { "epoch": 1.665496826171875e-05, "step": 10915, "training_step_time": 0.11023092269897461 }, { "epoch": 1.6656494140625e-05, "model_forward_time": 0.024771928787231445, "step": 10916 }, { "epoch": 1.6656494140625e-05, "step": 10916, "training_step_time": 0.1279587745666504 }, { "epoch": 1.665802001953125e-05, "model_forward_time": 0.024875164031982422, "step": 10917 }, { "epoch": 1.665802001953125e-05, "step": 10917, "training_step_time": 0.12012791633605957 }, { "epoch": 1.66595458984375e-05, "model_forward_time": 0.02517223358154297, "step": 10918 }, { "epoch": 1.66595458984375e-05, "step": 10918, "training_step_time": 0.11974024772644043 }, { "epoch": 1.666107177734375e-05, "model_forward_time": 0.02620220184326172, "step": 10919 }, { "epoch": 1.666107177734375e-05, "step": 10919, "training_step_time": 0.11363601684570312 }, { "epoch": 1.666259765625e-05, "grad_norm": 0.3204241693019867, "learning_rate": 7.538087547932585e-05, "loss": 0.0212, "step": 10920 }, { "epoch": 1.666259765625e-05, "model_forward_time": 0.025401592254638672, "step": 10920 }, { "epoch": 1.666259765625e-05, "step": 10920, "training_step_time": 0.21927952766418457 }, { "epoch": 1.666412353515625e-05, "model_forward_time": 0.024571657180786133, "step": 10921 }, { "epoch": 1.666412353515625e-05, "step": 10921, "training_step_time": 0.12032485008239746 }, { "epoch": 1.66656494140625e-05, "model_forward_time": 0.02520442008972168, "step": 10922 }, { "epoch": 1.66656494140625e-05, "step": 10922, "training_step_time": 0.12459945678710938 }, { "epoch": 1.666717529296875e-05, "model_forward_time": 0.025325536727905273, "step": 10923 }, { "epoch": 1.666717529296875e-05, "step": 10923, "training_step_time": 0.14423108100891113 }, { "epoch": 1.6668701171875e-05, "model_forward_time": 0.024826765060424805, "step": 10924 }, { "epoch": 1.6668701171875e-05, "step": 10924, "training_step_time": 0.1121985912322998 }, { "epoch": 1.667022705078125e-05, "model_forward_time": 0.024720191955566406, "step": 10925 }, { "epoch": 1.667022705078125e-05, "step": 10925, "training_step_time": 0.10940265655517578 }, { "epoch": 1.66717529296875e-05, "model_forward_time": 0.02522587776184082, "step": 10926 }, { "epoch": 1.66717529296875e-05, "step": 10926, "training_step_time": 0.13471460342407227 }, { "epoch": 1.667327880859375e-05, "model_forward_time": 0.025567293167114258, "step": 10927 }, { "epoch": 1.667327880859375e-05, "step": 10927, "training_step_time": 0.17352747917175293 }, { "epoch": 1.66748046875e-05, "model_forward_time": 0.02487659454345703, "step": 10928 }, { "epoch": 1.66748046875e-05, "step": 10928, "training_step_time": 0.16414666175842285 }, { "epoch": 1.667633056640625e-05, "model_forward_time": 0.025097131729125977, "step": 10929 }, { "epoch": 1.667633056640625e-05, "step": 10929, "training_step_time": 0.1678166389465332 }, { "epoch": 1.66778564453125e-05, "grad_norm": 0.34175485372543335, "learning_rate": 7.533337337831642e-05, "loss": 0.0334, "step": 10930 }, { "epoch": 1.66778564453125e-05, "model_forward_time": 0.02432417869567871, "step": 10930 }, { "epoch": 1.66778564453125e-05, "step": 10930, "training_step_time": 0.1229853630065918 }, { "epoch": 1.667938232421875e-05, "model_forward_time": 0.02440500259399414, "step": 10931 }, { "epoch": 1.667938232421875e-05, "step": 10931, "training_step_time": 0.11261940002441406 }, { "epoch": 1.6680908203125e-05, "model_forward_time": 0.025114059448242188, "step": 10932 }, { "epoch": 1.6680908203125e-05, "step": 10932, "training_step_time": 0.1064760684967041 }, { "epoch": 1.668243408203125e-05, "model_forward_time": 0.02503371238708496, "step": 10933 }, { "epoch": 1.668243408203125e-05, "step": 10933, "training_step_time": 0.11040306091308594 }, { "epoch": 1.66839599609375e-05, "model_forward_time": 0.02521824836730957, "step": 10934 }, { "epoch": 1.66839599609375e-05, "step": 10934, "training_step_time": 0.10927605628967285 }, { "epoch": 1.668548583984375e-05, "model_forward_time": 0.025493860244750977, "step": 10935 }, { "epoch": 1.668548583984375e-05, "step": 10935, "training_step_time": 0.10708022117614746 }, { "epoch": 1.668701171875e-05, "model_forward_time": 0.025241374969482422, "step": 10936 }, { "epoch": 1.668701171875e-05, "step": 10936, "training_step_time": 0.14281105995178223 }, { "epoch": 1.668853759765625e-05, "model_forward_time": 0.025728225708007812, "step": 10937 }, { "epoch": 1.668853759765625e-05, "step": 10937, "training_step_time": 0.1094818115234375 }, { "epoch": 1.66900634765625e-05, "model_forward_time": 0.025628089904785156, "step": 10938 }, { "epoch": 1.66900634765625e-05, "step": 10938, "training_step_time": 0.11281967163085938 }, { "epoch": 1.669158935546875e-05, "model_forward_time": 0.025096654891967773, "step": 10939 }, { "epoch": 1.669158935546875e-05, "step": 10939, "training_step_time": 0.12493252754211426 }, { "epoch": 1.6693115234375e-05, "grad_norm": 0.34396734833717346, "learning_rate": 7.528584049486955e-05, "loss": 0.0283, "step": 10940 }, { "epoch": 1.6693115234375e-05, "model_forward_time": 0.025281906127929688, "step": 10940 }, { "epoch": 1.6693115234375e-05, "step": 10940, "training_step_time": 0.12320661544799805 }, { "epoch": 1.669464111328125e-05, "model_forward_time": 0.02537226676940918, "step": 10941 }, { "epoch": 1.669464111328125e-05, "step": 10941, "training_step_time": 0.1170663833618164 }, { "epoch": 1.66961669921875e-05, "model_forward_time": 0.025002717971801758, "step": 10942 }, { "epoch": 1.66961669921875e-05, "step": 10942, "training_step_time": 0.12281608581542969 }, { "epoch": 1.669769287109375e-05, "model_forward_time": 0.024997234344482422, "step": 10943 }, { "epoch": 1.669769287109375e-05, "step": 10943, "training_step_time": 0.17627549171447754 }, { "epoch": 1.669921875e-05, "model_forward_time": 0.024360179901123047, "step": 10944 }, { "epoch": 1.669921875e-05, "step": 10944, "training_step_time": 0.10507011413574219 }, { "epoch": 1.670074462890625e-05, "model_forward_time": 0.024075031280517578, "step": 10945 }, { "epoch": 1.670074462890625e-05, "step": 10945, "training_step_time": 0.10422396659851074 }, { "epoch": 1.67022705078125e-05, "model_forward_time": 0.02556753158569336, "step": 10946 }, { "epoch": 1.67022705078125e-05, "step": 10946, "training_step_time": 0.10721778869628906 }, { "epoch": 1.670379638671875e-05, "model_forward_time": 0.025260448455810547, "step": 10947 }, { "epoch": 1.670379638671875e-05, "step": 10947, "training_step_time": 0.10821127891540527 }, { "epoch": 1.6705322265625e-05, "model_forward_time": 0.025302648544311523, "step": 10948 }, { "epoch": 1.6705322265625e-05, "step": 10948, "training_step_time": 0.10439538955688477 }, { "epoch": 1.670684814453125e-05, "model_forward_time": 0.02876114845275879, "step": 10949 }, { "epoch": 1.670684814453125e-05, "step": 10949, "training_step_time": 0.10774660110473633 }, { "epoch": 1.67083740234375e-05, "grad_norm": 0.38681089878082275, "learning_rate": 7.52382768867422e-05, "loss": 0.0211, "step": 10950 }, { "epoch": 1.67083740234375e-05, "model_forward_time": 0.02508234977722168, "step": 10950 }, { "epoch": 1.67083740234375e-05, "step": 10950, "training_step_time": 0.11007428169250488 }, { "epoch": 1.670989990234375e-05, "model_forward_time": 0.024943113327026367, "step": 10951 }, { "epoch": 1.670989990234375e-05, "step": 10951, "training_step_time": 0.10929155349731445 }, { "epoch": 1.671142578125e-05, "model_forward_time": 0.024158716201782227, "step": 10952 }, { "epoch": 1.671142578125e-05, "step": 10952, "training_step_time": 0.11240029335021973 }, { "epoch": 1.671295166015625e-05, "model_forward_time": 0.025289535522460938, "step": 10953 }, { "epoch": 1.671295166015625e-05, "step": 10953, "training_step_time": 0.15502047538757324 }, { "epoch": 1.67144775390625e-05, "model_forward_time": 0.024763107299804688, "step": 10954 }, { "epoch": 1.67144775390625e-05, "step": 10954, "training_step_time": 0.1723630428314209 }, { "epoch": 1.671600341796875e-05, "model_forward_time": 0.02430272102355957, "step": 10955 }, { "epoch": 1.671600341796875e-05, "step": 10955, "training_step_time": 0.1819014549255371 }, { "epoch": 1.6717529296875e-05, "model_forward_time": 0.027202129364013672, "step": 10956 }, { "epoch": 1.6717529296875e-05, "step": 10956, "training_step_time": 0.17137551307678223 }, { "epoch": 1.671905517578125e-05, "model_forward_time": 0.024152755737304688, "step": 10957 }, { "epoch": 1.671905517578125e-05, "step": 10957, "training_step_time": 0.15536761283874512 }, { "epoch": 1.67205810546875e-05, "model_forward_time": 0.024048566818237305, "step": 10958 }, { "epoch": 1.67205810546875e-05, "step": 10958, "training_step_time": 0.13645076751708984 }, { "epoch": 1.672210693359375e-05, "model_forward_time": 0.024808406829833984, "step": 10959 }, { "epoch": 1.672210693359375e-05, "step": 10959, "training_step_time": 0.11322951316833496 }, { "epoch": 1.67236328125e-05, "grad_norm": 0.2570970952510834, "learning_rate": 7.519068261172859e-05, "loss": 0.0166, "step": 10960 }, { "epoch": 1.67236328125e-05, "model_forward_time": 0.025191545486450195, "step": 10960 }, { "epoch": 1.67236328125e-05, "step": 10960, "training_step_time": 0.21571731567382812 }, { "epoch": 1.672515869140625e-05, "model_forward_time": 0.02423834800720215, "step": 10961 }, { "epoch": 1.672515869140625e-05, "step": 10961, "training_step_time": 0.1261765956878662 }, { "epoch": 1.67266845703125e-05, "model_forward_time": 0.024764537811279297, "step": 10962 }, { "epoch": 1.67266845703125e-05, "step": 10962, "training_step_time": 0.11742568016052246 }, { "epoch": 1.672821044921875e-05, "model_forward_time": 0.025266647338867188, "step": 10963 }, { "epoch": 1.672821044921875e-05, "step": 10963, "training_step_time": 0.210890531539917 }, { "epoch": 1.6729736328125e-05, "model_forward_time": 0.02455902099609375, "step": 10964 }, { "epoch": 1.6729736328125e-05, "step": 10964, "training_step_time": 0.12028670310974121 }, { "epoch": 1.673126220703125e-05, "model_forward_time": 0.024129867553710938, "step": 10965 }, { "epoch": 1.673126220703125e-05, "step": 10965, "training_step_time": 0.10618734359741211 }, { "epoch": 1.67327880859375e-05, "model_forward_time": 0.025358915328979492, "step": 10966 }, { "epoch": 1.67327880859375e-05, "step": 10966, "training_step_time": 0.13991093635559082 }, { "epoch": 1.673431396484375e-05, "model_forward_time": 0.02529311180114746, "step": 10967 }, { "epoch": 1.673431396484375e-05, "step": 10967, "training_step_time": 0.10933279991149902 }, { "epoch": 1.673583984375e-05, "model_forward_time": 0.02552342414855957, "step": 10968 }, { "epoch": 1.673583984375e-05, "step": 10968, "training_step_time": 0.10730528831481934 }, { "epoch": 1.673736572265625e-05, "model_forward_time": 0.02536463737487793, "step": 10969 }, { "epoch": 1.673736572265625e-05, "step": 10969, "training_step_time": 0.11017680168151855 }, { "epoch": 1.67388916015625e-05, "grad_norm": 0.24090756475925446, "learning_rate": 7.514305772766031e-05, "loss": 0.0182, "step": 10970 }, { "epoch": 1.67388916015625e-05, "model_forward_time": 0.025098323822021484, "step": 10970 }, { "epoch": 1.67388916015625e-05, "step": 10970, "training_step_time": 0.120697021484375 }, { "epoch": 1.674041748046875e-05, "model_forward_time": 0.02523517608642578, "step": 10971 }, { "epoch": 1.674041748046875e-05, "step": 10971, "training_step_time": 0.18422794342041016 }, { "epoch": 1.6741943359375e-05, "model_forward_time": 0.02452993392944336, "step": 10972 }, { "epoch": 1.6741943359375e-05, "step": 10972, "training_step_time": 0.15419483184814453 }, { "epoch": 1.674346923828125e-05, "model_forward_time": 0.024731159210205078, "step": 10973 }, { "epoch": 1.674346923828125e-05, "step": 10973, "training_step_time": 0.1690690517425537 }, { "epoch": 1.67449951171875e-05, "model_forward_time": 0.024987459182739258, "step": 10974 }, { "epoch": 1.67449951171875e-05, "step": 10974, "training_step_time": 0.12488436698913574 }, { "epoch": 1.674652099609375e-05, "model_forward_time": 0.02503037452697754, "step": 10975 }, { "epoch": 1.674652099609375e-05, "step": 10975, "training_step_time": 0.10767769813537598 }, { "epoch": 1.6748046875e-05, "model_forward_time": 0.025534391403198242, "step": 10976 }, { "epoch": 1.6748046875e-05, "step": 10976, "training_step_time": 0.1154639720916748 }, { "epoch": 1.674957275390625e-05, "model_forward_time": 0.02441692352294922, "step": 10977 }, { "epoch": 1.674957275390625e-05, "step": 10977, "training_step_time": 0.1138458251953125 }, { "epoch": 1.67510986328125e-05, "model_forward_time": 0.025197982788085938, "step": 10978 }, { "epoch": 1.67510986328125e-05, "step": 10978, "training_step_time": 0.17601680755615234 }, { "epoch": 1.675262451171875e-05, "model_forward_time": 0.024521350860595703, "step": 10979 }, { "epoch": 1.675262451171875e-05, "step": 10979, "training_step_time": 0.19137048721313477 }, { "epoch": 1.6754150390625e-05, "grad_norm": 0.2127423733472824, "learning_rate": 7.509540229240601e-05, "loss": 0.0227, "step": 10980 }, { "epoch": 1.6754150390625e-05, "model_forward_time": 0.024681806564331055, "step": 10980 }, { "epoch": 1.6754150390625e-05, "step": 10980, "training_step_time": 0.17478275299072266 }, { "epoch": 1.675567626953125e-05, "model_forward_time": 0.0247194766998291, "step": 10981 }, { "epoch": 1.675567626953125e-05, "step": 10981, "training_step_time": 0.17474007606506348 }, { "epoch": 1.67572021484375e-05, "model_forward_time": 0.023975372314453125, "step": 10982 }, { "epoch": 1.67572021484375e-05, "step": 10982, "training_step_time": 0.16277790069580078 }, { "epoch": 1.675872802734375e-05, "model_forward_time": 0.023458480834960938, "step": 10983 }, { "epoch": 1.675872802734375e-05, "step": 10983, "training_step_time": 0.1749570369720459 }, { "epoch": 1.676025390625e-05, "model_forward_time": 0.024254798889160156, "step": 10984 }, { "epoch": 1.676025390625e-05, "step": 10984, "training_step_time": 0.11450552940368652 }, { "epoch": 1.676177978515625e-05, "model_forward_time": 0.023582935333251953, "step": 10985 }, { "epoch": 1.676177978515625e-05, "step": 10985, "training_step_time": 0.10877871513366699 }, { "epoch": 1.67633056640625e-05, "model_forward_time": 0.025499820709228516, "step": 10986 }, { "epoch": 1.67633056640625e-05, "step": 10986, "training_step_time": 0.10741758346557617 }, { "epoch": 1.676483154296875e-05, "model_forward_time": 0.02541637420654297, "step": 10987 }, { "epoch": 1.676483154296875e-05, "step": 10987, "training_step_time": 0.10689830780029297 }, { "epoch": 1.6766357421875e-05, "model_forward_time": 0.025330543518066406, "step": 10988 }, { "epoch": 1.6766357421875e-05, "step": 10988, "training_step_time": 0.10633111000061035 }, { "epoch": 1.676788330078125e-05, "model_forward_time": 0.025246381759643555, "step": 10989 }, { "epoch": 1.676788330078125e-05, "step": 10989, "training_step_time": 0.10977458953857422 }, { "epoch": 1.67694091796875e-05, "grad_norm": 0.29465189576148987, "learning_rate": 7.504771636387163e-05, "loss": 0.0194, "step": 10990 }, { "epoch": 1.67694091796875e-05, "model_forward_time": 0.025088071823120117, "step": 10990 }, { "epoch": 1.67694091796875e-05, "step": 10990, "training_step_time": 0.11047506332397461 }, { "epoch": 1.677093505859375e-05, "model_forward_time": 0.025476932525634766, "step": 10991 }, { "epoch": 1.677093505859375e-05, "step": 10991, "training_step_time": 0.10808444023132324 }, { "epoch": 1.67724609375e-05, "model_forward_time": 0.025394916534423828, "step": 10992 }, { "epoch": 1.67724609375e-05, "step": 10992, "training_step_time": 0.1133263111114502 }, { "epoch": 1.677398681640625e-05, "model_forward_time": 0.02521800994873047, "step": 10993 }, { "epoch": 1.677398681640625e-05, "step": 10993, "training_step_time": 0.11493611335754395 }, { "epoch": 1.67755126953125e-05, "model_forward_time": 0.02523660659790039, "step": 10994 }, { "epoch": 1.67755126953125e-05, "step": 10994, "training_step_time": 0.11822724342346191 }, { "epoch": 1.677703857421875e-05, "model_forward_time": 0.025361299514770508, "step": 10995 }, { "epoch": 1.677703857421875e-05, "step": 10995, "training_step_time": 0.11452269554138184 }, { "epoch": 1.6778564453125e-05, "model_forward_time": 0.025393962860107422, "step": 10996 }, { "epoch": 1.6778564453125e-05, "step": 10996, "training_step_time": 0.11825871467590332 }, { "epoch": 1.678009033203125e-05, "model_forward_time": 0.0251767635345459, "step": 10997 }, { "epoch": 1.678009033203125e-05, "step": 10997, "training_step_time": 0.11380624771118164 }, { "epoch": 1.67816162109375e-05, "model_forward_time": 0.02502155303955078, "step": 10998 }, { "epoch": 1.67816162109375e-05, "step": 10998, "training_step_time": 0.11469602584838867 }, { "epoch": 1.678314208984375e-05, "model_forward_time": 0.02527451515197754, "step": 10999 }, { "epoch": 1.678314208984375e-05, "step": 10999, "training_step_time": 0.1076805591583252 }, { "epoch": 1.678466796875e-05, "grad_norm": 0.2610098421573639, "learning_rate": 7.500000000000001e-05, "loss": 0.0186, "step": 11000 }, { "epoch": 1.678466796875e-05, "model_forward_time": 0.02453327178955078, "step": 11000 }, { "epoch": 1.678466796875e-05, "step": 11000, "training_step_time": 0.10767960548400879 }, { "epoch": 1.678619384765625e-05, "model_forward_time": 0.023268938064575195, "step": 11001 }, { "epoch": 1.678619384765625e-05, "step": 11001, "training_step_time": 0.0983436107635498 }, { "epoch": 1.67877197265625e-05, "model_forward_time": 0.025027751922607422, "step": 11002 }, { "epoch": 1.67877197265625e-05, "step": 11002, "training_step_time": 0.10344243049621582 }, { "epoch": 1.678924560546875e-05, "model_forward_time": 0.025501728057861328, "step": 11003 }, { "epoch": 1.678924560546875e-05, "step": 11003, "training_step_time": 0.10609126091003418 }, { "epoch": 1.6790771484375e-05, "model_forward_time": 0.025641441345214844, "step": 11004 }, { "epoch": 1.6790771484375e-05, "step": 11004, "training_step_time": 0.10907506942749023 }, { "epoch": 1.679229736328125e-05, "model_forward_time": 0.025785207748413086, "step": 11005 }, { "epoch": 1.679229736328125e-05, "step": 11005, "training_step_time": 0.11139726638793945 }, { "epoch": 1.67938232421875e-05, "model_forward_time": 0.02514958381652832, "step": 11006 }, { "epoch": 1.67938232421875e-05, "step": 11006, "training_step_time": 0.10842037200927734 }, { "epoch": 1.679534912109375e-05, "model_forward_time": 0.02629256248474121, "step": 11007 }, { "epoch": 1.679534912109375e-05, "step": 11007, "training_step_time": 0.11193656921386719 }, { "epoch": 1.6796875e-05, "model_forward_time": 0.025657176971435547, "step": 11008 }, { "epoch": 1.6796875e-05, "step": 11008, "training_step_time": 0.10732531547546387 }, { "epoch": 1.679840087890625e-05, "model_forward_time": 0.02537059783935547, "step": 11009 }, { "epoch": 1.679840087890625e-05, "step": 11009, "training_step_time": 0.11384963989257812 }, { "epoch": 1.67999267578125e-05, "grad_norm": 0.25376853346824646, "learning_rate": 7.495225325877103e-05, "loss": 0.024, "step": 11010 }, { "epoch": 1.67999267578125e-05, "model_forward_time": 0.025641679763793945, "step": 11010 }, { "epoch": 1.67999267578125e-05, "step": 11010, "training_step_time": 0.10683774948120117 }, { "epoch": 1.680145263671875e-05, "model_forward_time": 0.02544713020324707, "step": 11011 }, { "epoch": 1.680145263671875e-05, "step": 11011, "training_step_time": 0.1070106029510498 }, { "epoch": 1.6802978515625e-05, "model_forward_time": 0.025514841079711914, "step": 11012 }, { "epoch": 1.6802978515625e-05, "step": 11012, "training_step_time": 0.10878849029541016 }, { "epoch": 1.680450439453125e-05, "model_forward_time": 0.02551126480102539, "step": 11013 }, { "epoch": 1.680450439453125e-05, "step": 11013, "training_step_time": 0.10933685302734375 }, { "epoch": 1.68060302734375e-05, "model_forward_time": 0.025161266326904297, "step": 11014 }, { "epoch": 1.68060302734375e-05, "step": 11014, "training_step_time": 0.11080050468444824 }, { "epoch": 1.680755615234375e-05, "model_forward_time": 0.025237560272216797, "step": 11015 }, { "epoch": 1.680755615234375e-05, "step": 11015, "training_step_time": 0.17717862129211426 }, { "epoch": 1.680908203125e-05, "model_forward_time": 0.024961233139038086, "step": 11016 }, { "epoch": 1.680908203125e-05, "step": 11016, "training_step_time": 0.1287388801574707 }, { "epoch": 1.681060791015625e-05, "model_forward_time": 0.024422883987426758, "step": 11017 }, { "epoch": 1.681060791015625e-05, "step": 11017, "training_step_time": 0.1295459270477295 }, { "epoch": 1.68121337890625e-05, "model_forward_time": 0.02513885498046875, "step": 11018 }, { "epoch": 1.68121337890625e-05, "step": 11018, "training_step_time": 0.11012840270996094 }, { "epoch": 1.681365966796875e-05, "model_forward_time": 0.02582693099975586, "step": 11019 }, { "epoch": 1.681365966796875e-05, "step": 11019, "training_step_time": 0.17493295669555664 }, { "epoch": 1.6815185546875e-05, "grad_norm": 0.29249677062034607, "learning_rate": 7.490447619820152e-05, "loss": 0.0227, "step": 11020 }, { "epoch": 1.6815185546875e-05, "model_forward_time": 0.024664640426635742, "step": 11020 }, { "epoch": 1.6815185546875e-05, "step": 11020, "training_step_time": 0.15517139434814453 }, { "epoch": 1.681671142578125e-05, "model_forward_time": 0.024332761764526367, "step": 11021 }, { "epoch": 1.681671142578125e-05, "step": 11021, "training_step_time": 0.20817780494689941 }, { "epoch": 1.68182373046875e-05, "model_forward_time": 0.025025606155395508, "step": 11022 }, { "epoch": 1.68182373046875e-05, "step": 11022, "training_step_time": 0.13396811485290527 }, { "epoch": 1.681976318359375e-05, "model_forward_time": 0.02447056770324707, "step": 11023 }, { "epoch": 1.681976318359375e-05, "step": 11023, "training_step_time": 0.18937277793884277 }, { "epoch": 1.68212890625e-05, "model_forward_time": 0.02430891990661621, "step": 11024 }, { "epoch": 1.68212890625e-05, "step": 11024, "training_step_time": 0.17600154876708984 }, { "epoch": 1.682281494140625e-05, "model_forward_time": 0.02432560920715332, "step": 11025 }, { "epoch": 1.682281494140625e-05, "step": 11025, "training_step_time": 0.1586132049560547 }, { "epoch": 1.68243408203125e-05, "model_forward_time": 0.024531126022338867, "step": 11026 }, { "epoch": 1.68243408203125e-05, "step": 11026, "training_step_time": 0.21649765968322754 }, { "epoch": 1.682586669921875e-05, "model_forward_time": 0.02470254898071289, "step": 11027 }, { "epoch": 1.682586669921875e-05, "step": 11027, "training_step_time": 0.12111091613769531 }, { "epoch": 1.6827392578125e-05, "model_forward_time": 0.024679899215698242, "step": 11028 }, { "epoch": 1.6827392578125e-05, "step": 11028, "training_step_time": 0.11159276962280273 }, { "epoch": 1.682891845703125e-05, "model_forward_time": 0.025467395782470703, "step": 11029 }, { "epoch": 1.682891845703125e-05, "step": 11029, "training_step_time": 0.10515832901000977 }, { "epoch": 1.68304443359375e-05, "grad_norm": 0.7210782766342163, "learning_rate": 7.485666887634506e-05, "loss": 0.0266, "step": 11030 }, { "epoch": 1.68304443359375e-05, "model_forward_time": 0.025027990341186523, "step": 11030 }, { "epoch": 1.68304443359375e-05, "step": 11030, "training_step_time": 0.10636568069458008 }, { "epoch": 1.683197021484375e-05, "model_forward_time": 0.02503228187561035, "step": 11031 }, { "epoch": 1.683197021484375e-05, "step": 11031, "training_step_time": 0.10616874694824219 }, { "epoch": 1.683349609375e-05, "model_forward_time": 0.025362491607666016, "step": 11032 }, { "epoch": 1.683349609375e-05, "step": 11032, "training_step_time": 0.11004972457885742 }, { "epoch": 1.683502197265625e-05, "model_forward_time": 0.025506973266601562, "step": 11033 }, { "epoch": 1.683502197265625e-05, "step": 11033, "training_step_time": 0.10942745208740234 }, { "epoch": 1.68365478515625e-05, "model_forward_time": 0.025604724884033203, "step": 11034 }, { "epoch": 1.68365478515625e-05, "step": 11034, "training_step_time": 0.2038862705230713 }, { "epoch": 1.683807373046875e-05, "model_forward_time": 0.024894237518310547, "step": 11035 }, { "epoch": 1.683807373046875e-05, "step": 11035, "training_step_time": 0.11488127708435059 }, { "epoch": 1.6839599609375e-05, "model_forward_time": 0.02425074577331543, "step": 11036 }, { "epoch": 1.6839599609375e-05, "step": 11036, "training_step_time": 0.11260843276977539 }, { "epoch": 1.684112548828125e-05, "model_forward_time": 0.02528858184814453, "step": 11037 }, { "epoch": 1.684112548828125e-05, "step": 11037, "training_step_time": 0.11344647407531738 }, { "epoch": 1.68426513671875e-05, "model_forward_time": 0.025884628295898438, "step": 11038 }, { "epoch": 1.68426513671875e-05, "step": 11038, "training_step_time": 0.13131332397460938 }, { "epoch": 1.684417724609375e-05, "model_forward_time": 0.025574445724487305, "step": 11039 }, { "epoch": 1.684417724609375e-05, "step": 11039, "training_step_time": 0.1150054931640625 }, { "epoch": 1.6845703125e-05, "grad_norm": 0.4200476408004761, "learning_rate": 7.480883135129211e-05, "loss": 0.0215, "step": 11040 }, { "epoch": 1.6845703125e-05, "model_forward_time": 0.024545669555664062, "step": 11040 }, { "epoch": 1.6845703125e-05, "step": 11040, "training_step_time": 0.11476659774780273 }, { "epoch": 1.684722900390625e-05, "model_forward_time": 0.025471925735473633, "step": 11041 }, { "epoch": 1.684722900390625e-05, "step": 11041, "training_step_time": 0.11226963996887207 }, { "epoch": 1.68487548828125e-05, "model_forward_time": 0.02468132972717285, "step": 11042 }, { "epoch": 1.68487548828125e-05, "step": 11042, "training_step_time": 0.1121985912322998 }, { "epoch": 1.685028076171875e-05, "model_forward_time": 0.024639129638671875, "step": 11043 }, { "epoch": 1.685028076171875e-05, "step": 11043, "training_step_time": 0.11081218719482422 }, { "epoch": 1.6851806640625e-05, "model_forward_time": 0.024403810501098633, "step": 11044 }, { "epoch": 1.6851806640625e-05, "step": 11044, "training_step_time": 0.10872173309326172 }, { "epoch": 1.685333251953125e-05, "model_forward_time": 0.02540898323059082, "step": 11045 }, { "epoch": 1.685333251953125e-05, "step": 11045, "training_step_time": 0.10756850242614746 }, { "epoch": 1.68548583984375e-05, "model_forward_time": 0.025561809539794922, "step": 11046 }, { "epoch": 1.68548583984375e-05, "step": 11046, "training_step_time": 0.10976433753967285 }, { "epoch": 1.685638427734375e-05, "model_forward_time": 0.02570319175720215, "step": 11047 }, { "epoch": 1.685638427734375e-05, "step": 11047, "training_step_time": 0.1087808609008789 }, { "epoch": 1.685791015625e-05, "model_forward_time": 0.025281906127929688, "step": 11048 }, { "epoch": 1.685791015625e-05, "step": 11048, "training_step_time": 0.10898041725158691 }, { "epoch": 1.685943603515625e-05, "model_forward_time": 0.02580857276916504, "step": 11049 }, { "epoch": 1.685943603515625e-05, "step": 11049, "training_step_time": 0.10880279541015625 }, { "epoch": 1.68609619140625e-05, "grad_norm": 0.3526730239391327, "learning_rate": 7.476096368116974e-05, "loss": 0.0186, "step": 11050 }, { "epoch": 1.68609619140625e-05, "model_forward_time": 0.025604724884033203, "step": 11050 }, { "epoch": 1.68609619140625e-05, "step": 11050, "training_step_time": 0.10728597640991211 }, { "epoch": 1.686248779296875e-05, "model_forward_time": 0.027692794799804688, "step": 11051 }, { "epoch": 1.686248779296875e-05, "step": 11051, "training_step_time": 0.11530160903930664 }, { "epoch": 1.6864013671875e-05, "model_forward_time": 0.025698184967041016, "step": 11052 }, { "epoch": 1.6864013671875e-05, "step": 11052, "training_step_time": 0.10894060134887695 }, { "epoch": 1.686553955078125e-05, "model_forward_time": 0.02559185028076172, "step": 11053 }, { "epoch": 1.686553955078125e-05, "step": 11053, "training_step_time": 0.10940861701965332 }, { "epoch": 1.68670654296875e-05, "model_forward_time": 0.025597333908081055, "step": 11054 }, { "epoch": 1.68670654296875e-05, "step": 11054, "training_step_time": 0.10698223114013672 }, { "epoch": 1.686859130859375e-05, "model_forward_time": 0.02545475959777832, "step": 11055 }, { "epoch": 1.686859130859375e-05, "step": 11055, "training_step_time": 0.10831570625305176 }, { "epoch": 1.68701171875e-05, "model_forward_time": 0.025638818740844727, "step": 11056 }, { "epoch": 1.68701171875e-05, "step": 11056, "training_step_time": 0.10932183265686035 }, { "epoch": 1.687164306640625e-05, "model_forward_time": 0.0257720947265625, "step": 11057 }, { "epoch": 1.687164306640625e-05, "step": 11057, "training_step_time": 0.11046123504638672 }, { "epoch": 1.68731689453125e-05, "model_forward_time": 0.02495121955871582, "step": 11058 }, { "epoch": 1.68731689453125e-05, "step": 11058, "training_step_time": 0.10667800903320312 }, { "epoch": 1.687469482421875e-05, "model_forward_time": 0.02555251121520996, "step": 11059 }, { "epoch": 1.687469482421875e-05, "step": 11059, "training_step_time": 0.11281013488769531 }, { "epoch": 1.6876220703125e-05, "grad_norm": 0.5046151280403137, "learning_rate": 7.471306592414168e-05, "loss": 0.0312, "step": 11060 }, { "epoch": 1.6876220703125e-05, "model_forward_time": 0.025336742401123047, "step": 11060 }, { "epoch": 1.6876220703125e-05, "step": 11060, "training_step_time": 0.11218976974487305 }, { "epoch": 1.687774658203125e-05, "model_forward_time": 0.02591681480407715, "step": 11061 }, { "epoch": 1.687774658203125e-05, "step": 11061, "training_step_time": 0.10997295379638672 }, { "epoch": 1.68792724609375e-05, "model_forward_time": 0.025686025619506836, "step": 11062 }, { "epoch": 1.68792724609375e-05, "step": 11062, "training_step_time": 0.1153879165649414 }, { "epoch": 1.688079833984375e-05, "model_forward_time": 0.025724411010742188, "step": 11063 }, { "epoch": 1.688079833984375e-05, "step": 11063, "training_step_time": 0.13169550895690918 }, { "epoch": 1.688232421875e-05, "model_forward_time": 0.025657176971435547, "step": 11064 }, { "epoch": 1.688232421875e-05, "step": 11064, "training_step_time": 0.14220118522644043 }, { "epoch": 1.688385009765625e-05, "model_forward_time": 0.024648427963256836, "step": 11065 }, { "epoch": 1.688385009765625e-05, "step": 11065, "training_step_time": 0.128448486328125 }, { "epoch": 1.68853759765625e-05, "model_forward_time": 0.025058507919311523, "step": 11066 }, { "epoch": 1.68853759765625e-05, "step": 11066, "training_step_time": 0.12008404731750488 }, { "epoch": 1.688690185546875e-05, "model_forward_time": 0.025208711624145508, "step": 11067 }, { "epoch": 1.688690185546875e-05, "step": 11067, "training_step_time": 0.11882996559143066 }, { "epoch": 1.6888427734375e-05, "model_forward_time": 0.024825334548950195, "step": 11068 }, { "epoch": 1.6888427734375e-05, "step": 11068, "training_step_time": 0.195112943649292 }, { "epoch": 1.688995361328125e-05, "model_forward_time": 0.025026798248291016, "step": 11069 }, { "epoch": 1.688995361328125e-05, "step": 11069, "training_step_time": 0.1636645793914795 }, { "epoch": 1.68914794921875e-05, "grad_norm": 0.4272726774215698, "learning_rate": 7.466513813840825e-05, "loss": 0.0176, "step": 11070 }, { "epoch": 1.68914794921875e-05, "model_forward_time": 0.024471759796142578, "step": 11070 }, { "epoch": 1.68914794921875e-05, "step": 11070, "training_step_time": 0.13658976554870605 }, { "epoch": 1.689300537109375e-05, "model_forward_time": 0.024288177490234375, "step": 11071 }, { "epoch": 1.689300537109375e-05, "step": 11071, "training_step_time": 0.15856575965881348 }, { "epoch": 1.689453125e-05, "model_forward_time": 0.024667978286743164, "step": 11072 }, { "epoch": 1.689453125e-05, "step": 11072, "training_step_time": 0.17634892463684082 }, { "epoch": 1.689605712890625e-05, "model_forward_time": 0.024953365325927734, "step": 11073 }, { "epoch": 1.689605712890625e-05, "step": 11073, "training_step_time": 0.16644763946533203 }, { "epoch": 1.68975830078125e-05, "model_forward_time": 0.02419114112854004, "step": 11074 }, { "epoch": 1.68975830078125e-05, "step": 11074, "training_step_time": 0.11136651039123535 }, { "epoch": 1.689910888671875e-05, "model_forward_time": 0.024768352508544922, "step": 11075 }, { "epoch": 1.689910888671875e-05, "step": 11075, "training_step_time": 0.10836935043334961 }, { "epoch": 1.6900634765625e-05, "model_forward_time": 0.02548384666442871, "step": 11076 }, { "epoch": 1.6900634765625e-05, "step": 11076, "training_step_time": 0.10998392105102539 }, { "epoch": 1.690216064453125e-05, "model_forward_time": 0.025430679321289062, "step": 11077 }, { "epoch": 1.690216064453125e-05, "step": 11077, "training_step_time": 0.10568499565124512 }, { "epoch": 1.69036865234375e-05, "model_forward_time": 0.025182247161865234, "step": 11078 }, { "epoch": 1.69036865234375e-05, "step": 11078, "training_step_time": 0.10708928108215332 }, { "epoch": 1.690521240234375e-05, "model_forward_time": 0.025151729583740234, "step": 11079 }, { "epoch": 1.690521240234375e-05, "step": 11079, "training_step_time": 0.10581612586975098 }, { "epoch": 1.690673828125e-05, "grad_norm": 0.2934919595718384, "learning_rate": 7.461718038220621e-05, "loss": 0.0255, "step": 11080 }, { "epoch": 1.690673828125e-05, "model_forward_time": 0.025043725967407227, "step": 11080 }, { "epoch": 1.690673828125e-05, "step": 11080, "training_step_time": 0.10827851295471191 }, { "epoch": 1.690826416015625e-05, "model_forward_time": 0.0254669189453125, "step": 11081 }, { "epoch": 1.690826416015625e-05, "step": 11081, "training_step_time": 0.13683199882507324 }, { "epoch": 1.69097900390625e-05, "model_forward_time": 0.025970935821533203, "step": 11082 }, { "epoch": 1.69097900390625e-05, "step": 11082, "training_step_time": 0.1141054630279541 }, { "epoch": 1.691131591796875e-05, "model_forward_time": 0.02505660057067871, "step": 11083 }, { "epoch": 1.691131591796875e-05, "step": 11083, "training_step_time": 0.11047744750976562 }, { "epoch": 1.6912841796875e-05, "model_forward_time": 0.024837017059326172, "step": 11084 }, { "epoch": 1.6912841796875e-05, "step": 11084, "training_step_time": 0.1345205307006836 }, { "epoch": 1.691436767578125e-05, "model_forward_time": 0.025327682495117188, "step": 11085 }, { "epoch": 1.691436767578125e-05, "step": 11085, "training_step_time": 0.13312244415283203 }, { "epoch": 1.69158935546875e-05, "model_forward_time": 0.023540735244750977, "step": 11086 }, { "epoch": 1.69158935546875e-05, "step": 11086, "training_step_time": 0.12646961212158203 }, { "epoch": 1.691741943359375e-05, "model_forward_time": 0.023765087127685547, "step": 11087 }, { "epoch": 1.691741943359375e-05, "step": 11087, "training_step_time": 0.19351792335510254 }, { "epoch": 1.69189453125e-05, "model_forward_time": 0.02450847625732422, "step": 11088 }, { "epoch": 1.69189453125e-05, "step": 11088, "training_step_time": 0.11889886856079102 }, { "epoch": 1.692047119140625e-05, "model_forward_time": 0.02297234535217285, "step": 11089 }, { "epoch": 1.692047119140625e-05, "step": 11089, "training_step_time": 0.11371111869812012 }, { "epoch": 1.69219970703125e-05, "grad_norm": 0.403972327709198, "learning_rate": 7.456919271380875e-05, "loss": 0.0204, "step": 11090 }, { "epoch": 1.69219970703125e-05, "model_forward_time": 0.024688720703125, "step": 11090 }, { "epoch": 1.69219970703125e-05, "step": 11090, "training_step_time": 0.11239242553710938 }, { "epoch": 1.692352294921875e-05, "model_forward_time": 0.024477243423461914, "step": 11091 }, { "epoch": 1.692352294921875e-05, "step": 11091, "training_step_time": 0.1142737865447998 }, { "epoch": 1.6925048828125e-05, "model_forward_time": 0.0252685546875, "step": 11092 }, { "epoch": 1.6925048828125e-05, "step": 11092, "training_step_time": 0.11030888557434082 }, { "epoch": 1.692657470703125e-05, "model_forward_time": 0.02535104751586914, "step": 11093 }, { "epoch": 1.692657470703125e-05, "step": 11093, "training_step_time": 0.10907292366027832 }, { "epoch": 1.69281005859375e-05, "model_forward_time": 0.025489091873168945, "step": 11094 }, { "epoch": 1.69281005859375e-05, "step": 11094, "training_step_time": 0.11093974113464355 }, { "epoch": 1.692962646484375e-05, "model_forward_time": 0.025453567504882812, "step": 11095 }, { "epoch": 1.692962646484375e-05, "step": 11095, "training_step_time": 0.10885071754455566 }, { "epoch": 1.693115234375e-05, "model_forward_time": 0.025442838668823242, "step": 11096 }, { "epoch": 1.693115234375e-05, "step": 11096, "training_step_time": 0.1108086109161377 }, { "epoch": 1.693267822265625e-05, "model_forward_time": 0.025211572647094727, "step": 11097 }, { "epoch": 1.693267822265625e-05, "step": 11097, "training_step_time": 0.11159706115722656 }, { "epoch": 1.69342041015625e-05, "model_forward_time": 0.025383710861206055, "step": 11098 }, { "epoch": 1.69342041015625e-05, "step": 11098, "training_step_time": 0.11027765274047852 }, { "epoch": 1.693572998046875e-05, "model_forward_time": 0.025734424591064453, "step": 11099 }, { "epoch": 1.693572998046875e-05, "step": 11099, "training_step_time": 0.10881423950195312 }, { "epoch": 1.6937255859375e-05, "grad_norm": 0.5232256650924683, "learning_rate": 7.452117519152542e-05, "loss": 0.025, "step": 11100 }, { "epoch": 1.6937255859375e-05, "model_forward_time": 0.025683879852294922, "step": 11100 }, { "epoch": 1.6937255859375e-05, "step": 11100, "training_step_time": 0.1129448413848877 }, { "epoch": 1.693878173828125e-05, "model_forward_time": 0.025221586227416992, "step": 11101 }, { "epoch": 1.693878173828125e-05, "step": 11101, "training_step_time": 0.11133694648742676 }, { "epoch": 1.69403076171875e-05, "model_forward_time": 0.025323152542114258, "step": 11102 }, { "epoch": 1.69403076171875e-05, "step": 11102, "training_step_time": 0.10870361328125 }, { "epoch": 1.694183349609375e-05, "model_forward_time": 0.026496171951293945, "step": 11103 }, { "epoch": 1.694183349609375e-05, "step": 11103, "training_step_time": 0.1148216724395752 }, { "epoch": 1.6943359375e-05, "model_forward_time": 0.025636672973632812, "step": 11104 }, { "epoch": 1.6943359375e-05, "step": 11104, "training_step_time": 0.21802735328674316 }, { "epoch": 1.694488525390625e-05, "model_forward_time": 0.024829387664794922, "step": 11105 }, { "epoch": 1.694488525390625e-05, "step": 11105, "training_step_time": 0.11014008522033691 }, { "epoch": 1.69464111328125e-05, "model_forward_time": 0.02523350715637207, "step": 11106 }, { "epoch": 1.69464111328125e-05, "step": 11106, "training_step_time": 0.11446523666381836 }, { "epoch": 1.694793701171875e-05, "model_forward_time": 0.025832653045654297, "step": 11107 }, { "epoch": 1.694793701171875e-05, "step": 11107, "training_step_time": 0.2147998809814453 }, { "epoch": 1.6949462890625e-05, "model_forward_time": 0.024612903594970703, "step": 11108 }, { "epoch": 1.6949462890625e-05, "step": 11108, "training_step_time": 0.1840822696685791 }, { "epoch": 1.695098876953125e-05, "model_forward_time": 0.02490687370300293, "step": 11109 }, { "epoch": 1.695098876953125e-05, "step": 11109, "training_step_time": 0.14657330513000488 }, { "epoch": 1.69525146484375e-05, "grad_norm": 0.3509308397769928, "learning_rate": 7.447312787370203e-05, "loss": 0.0303, "step": 11110 }, { "epoch": 1.69525146484375e-05, "model_forward_time": 0.024929285049438477, "step": 11110 }, { "epoch": 1.69525146484375e-05, "step": 11110, "training_step_time": 0.10591721534729004 }, { "epoch": 1.695404052734375e-05, "model_forward_time": 0.025742292404174805, "step": 11111 }, { "epoch": 1.695404052734375e-05, "step": 11111, "training_step_time": 0.11249232292175293 }, { "epoch": 1.695556640625e-05, "model_forward_time": 0.025204181671142578, "step": 11112 }, { "epoch": 1.695556640625e-05, "step": 11112, "training_step_time": 0.1134941577911377 }, { "epoch": 1.695709228515625e-05, "model_forward_time": 0.025155067443847656, "step": 11113 }, { "epoch": 1.695709228515625e-05, "step": 11113, "training_step_time": 0.12346720695495605 }, { "epoch": 1.69586181640625e-05, "model_forward_time": 0.025364160537719727, "step": 11114 }, { "epoch": 1.69586181640625e-05, "step": 11114, "training_step_time": 0.17878293991088867 }, { "epoch": 1.696014404296875e-05, "model_forward_time": 0.02509307861328125, "step": 11115 }, { "epoch": 1.696014404296875e-05, "step": 11115, "training_step_time": 0.16571378707885742 }, { "epoch": 1.6961669921875e-05, "model_forward_time": 0.02479863166809082, "step": 11116 }, { "epoch": 1.6961669921875e-05, "step": 11116, "training_step_time": 0.16714882850646973 }, { "epoch": 1.696319580078125e-05, "model_forward_time": 0.024680137634277344, "step": 11117 }, { "epoch": 1.696319580078125e-05, "step": 11117, "training_step_time": 0.1324310302734375 }, { "epoch": 1.69647216796875e-05, "model_forward_time": 0.024886369705200195, "step": 11118 }, { "epoch": 1.69647216796875e-05, "step": 11118, "training_step_time": 0.11041498184204102 }, { "epoch": 1.696624755859375e-05, "model_forward_time": 0.025521516799926758, "step": 11119 }, { "epoch": 1.696624755859375e-05, "step": 11119, "training_step_time": 0.10813593864440918 }, { "epoch": 1.69677734375e-05, "grad_norm": 0.36768996715545654, "learning_rate": 7.44250508187206e-05, "loss": 0.0221, "step": 11120 }, { "epoch": 1.69677734375e-05, "model_forward_time": 0.025114774703979492, "step": 11120 }, { "epoch": 1.69677734375e-05, "step": 11120, "training_step_time": 0.10756945610046387 }, { "epoch": 1.696929931640625e-05, "model_forward_time": 0.025240421295166016, "step": 11121 }, { "epoch": 1.696929931640625e-05, "step": 11121, "training_step_time": 0.10737466812133789 }, { "epoch": 1.69708251953125e-05, "model_forward_time": 0.025578737258911133, "step": 11122 }, { "epoch": 1.69708251953125e-05, "step": 11122, "training_step_time": 0.10948061943054199 }, { "epoch": 1.697235107421875e-05, "model_forward_time": 0.02613973617553711, "step": 11123 }, { "epoch": 1.697235107421875e-05, "step": 11123, "training_step_time": 0.10831975936889648 }, { "epoch": 1.6973876953125e-05, "model_forward_time": 0.025525569915771484, "step": 11124 }, { "epoch": 1.6973876953125e-05, "step": 11124, "training_step_time": 0.10606050491333008 }, { "epoch": 1.697540283203125e-05, "model_forward_time": 0.025348424911499023, "step": 11125 }, { "epoch": 1.697540283203125e-05, "step": 11125, "training_step_time": 0.1058495044708252 }, { "epoch": 1.69769287109375e-05, "model_forward_time": 0.02520442008972168, "step": 11126 }, { "epoch": 1.69769287109375e-05, "step": 11126, "training_step_time": 0.10817551612854004 }, { "epoch": 1.697845458984375e-05, "model_forward_time": 0.025578022003173828, "step": 11127 }, { "epoch": 1.697845458984375e-05, "step": 11127, "training_step_time": 0.12018966674804688 }, { "epoch": 1.697998046875e-05, "model_forward_time": 0.02556777000427246, "step": 11128 }, { "epoch": 1.697998046875e-05, "step": 11128, "training_step_time": 0.11305952072143555 }, { "epoch": 1.698150634765625e-05, "model_forward_time": 0.0253446102142334, "step": 11129 }, { "epoch": 1.698150634765625e-05, "step": 11129, "training_step_time": 0.1105642318725586 }, { "epoch": 1.69830322265625e-05, "grad_norm": 0.18100249767303467, "learning_rate": 7.437694408499933e-05, "loss": 0.0305, "step": 11130 }, { "epoch": 1.69830322265625e-05, "model_forward_time": 0.024835586547851562, "step": 11130 }, { "epoch": 1.69830322265625e-05, "step": 11130, "training_step_time": 0.10748457908630371 }, { "epoch": 1.698455810546875e-05, "model_forward_time": 0.025644540786743164, "step": 11131 }, { "epoch": 1.698455810546875e-05, "step": 11131, "training_step_time": 0.1407938003540039 }, { "epoch": 1.6986083984375e-05, "model_forward_time": 0.02554917335510254, "step": 11132 }, { "epoch": 1.6986083984375e-05, "step": 11132, "training_step_time": 0.10951733589172363 }, { "epoch": 1.698760986328125e-05, "model_forward_time": 0.025508403778076172, "step": 11133 }, { "epoch": 1.698760986328125e-05, "step": 11133, "training_step_time": 0.1130530834197998 }, { "epoch": 1.69891357421875e-05, "model_forward_time": 0.025455474853515625, "step": 11134 }, { "epoch": 1.69891357421875e-05, "step": 11134, "training_step_time": 0.1064448356628418 }, { "epoch": 1.699066162109375e-05, "model_forward_time": 0.02570033073425293, "step": 11135 }, { "epoch": 1.699066162109375e-05, "step": 11135, "training_step_time": 0.11916136741638184 }, { "epoch": 1.69921875e-05, "model_forward_time": 0.02585315704345703, "step": 11136 }, { "epoch": 1.69921875e-05, "step": 11136, "training_step_time": 0.12999725341796875 }, { "epoch": 1.699371337890625e-05, "model_forward_time": 0.02430105209350586, "step": 11137 }, { "epoch": 1.699371337890625e-05, "step": 11137, "training_step_time": 0.12331938743591309 }, { "epoch": 1.69952392578125e-05, "model_forward_time": 0.024066686630249023, "step": 11138 }, { "epoch": 1.69952392578125e-05, "step": 11138, "training_step_time": 0.1278059482574463 }, { "epoch": 1.699676513671875e-05, "model_forward_time": 0.024010658264160156, "step": 11139 }, { "epoch": 1.699676513671875e-05, "step": 11139, "training_step_time": 0.1270914077758789 }, { "epoch": 1.6998291015625e-05, "grad_norm": 0.28290072083473206, "learning_rate": 7.432880773099237e-05, "loss": 0.0227, "step": 11140 }, { "epoch": 1.6998291015625e-05, "model_forward_time": 0.023932695388793945, "step": 11140 }, { "epoch": 1.6998291015625e-05, "step": 11140, "training_step_time": 0.12007665634155273 }, { "epoch": 1.699981689453125e-05, "model_forward_time": 0.025844335556030273, "step": 11141 }, { "epoch": 1.699981689453125e-05, "step": 11141, "training_step_time": 0.11661458015441895 }, { "epoch": 1.70013427734375e-05, "model_forward_time": 0.025217294692993164, "step": 11142 }, { "epoch": 1.70013427734375e-05, "step": 11142, "training_step_time": 0.11357355117797852 }, { "epoch": 1.700286865234375e-05, "model_forward_time": 0.0254364013671875, "step": 11143 }, { "epoch": 1.700286865234375e-05, "step": 11143, "training_step_time": 0.11260271072387695 }, { "epoch": 1.700439453125e-05, "model_forward_time": 0.025177717208862305, "step": 11144 }, { "epoch": 1.700439453125e-05, "step": 11144, "training_step_time": 0.11158370971679688 }, { "epoch": 1.700592041015625e-05, "model_forward_time": 0.026096105575561523, "step": 11145 }, { "epoch": 1.700592041015625e-05, "step": 11145, "training_step_time": 0.11098909378051758 }, { "epoch": 1.70074462890625e-05, "model_forward_time": 0.02504706382751465, "step": 11146 }, { "epoch": 1.70074462890625e-05, "step": 11146, "training_step_time": 0.10995030403137207 }, { "epoch": 1.700897216796875e-05, "model_forward_time": 0.025400638580322266, "step": 11147 }, { "epoch": 1.700897216796875e-05, "step": 11147, "training_step_time": 0.11023283004760742 }, { "epoch": 1.7010498046875e-05, "model_forward_time": 0.026092052459716797, "step": 11148 }, { "epoch": 1.7010498046875e-05, "step": 11148, "training_step_time": 0.11136269569396973 }, { "epoch": 1.701202392578125e-05, "model_forward_time": 0.025768756866455078, "step": 11149 }, { "epoch": 1.701202392578125e-05, "step": 11149, "training_step_time": 0.17134761810302734 }, { "epoch": 1.70135498046875e-05, "grad_norm": 0.25398629903793335, "learning_rate": 7.428064181518997e-05, "loss": 0.0139, "step": 11150 }, { "epoch": 1.70135498046875e-05, "model_forward_time": 0.024932146072387695, "step": 11150 }, { "epoch": 1.70135498046875e-05, "step": 11150, "training_step_time": 0.1583249568939209 }, { "epoch": 1.701507568359375e-05, "model_forward_time": 0.025380373001098633, "step": 11151 }, { "epoch": 1.701507568359375e-05, "step": 11151, "training_step_time": 0.11038780212402344 }, { "epoch": 1.70166015625e-05, "model_forward_time": 0.0251007080078125, "step": 11152 }, { "epoch": 1.70166015625e-05, "step": 11152, "training_step_time": 0.10470890998840332 }, { "epoch": 1.701812744140625e-05, "model_forward_time": 0.0255734920501709, "step": 11153 }, { "epoch": 1.701812744140625e-05, "step": 11153, "training_step_time": 0.11091828346252441 }, { "epoch": 1.70196533203125e-05, "model_forward_time": 0.025669336318969727, "step": 11154 }, { "epoch": 1.70196533203125e-05, "step": 11154, "training_step_time": 0.11453056335449219 }, { "epoch": 1.702117919921875e-05, "model_forward_time": 0.02567124366760254, "step": 11155 }, { "epoch": 1.702117919921875e-05, "step": 11155, "training_step_time": 0.1067659854888916 }, { "epoch": 1.7022705078125e-05, "model_forward_time": 0.02595663070678711, "step": 11156 }, { "epoch": 1.7022705078125e-05, "step": 11156, "training_step_time": 0.17258095741271973 }, { "epoch": 1.702423095703125e-05, "model_forward_time": 0.02476191520690918, "step": 11157 }, { "epoch": 1.702423095703125e-05, "step": 11157, "training_step_time": 0.11350822448730469 }, { "epoch": 1.70257568359375e-05, "model_forward_time": 0.024661779403686523, "step": 11158 }, { "epoch": 1.70257568359375e-05, "step": 11158, "training_step_time": 0.19016647338867188 }, { "epoch": 1.702728271484375e-05, "model_forward_time": 0.024661779403686523, "step": 11159 }, { "epoch": 1.702728271484375e-05, "step": 11159, "training_step_time": 0.17380928993225098 }, { "epoch": 1.702880859375e-05, "grad_norm": 0.3613259792327881, "learning_rate": 7.423244639611826e-05, "loss": 0.0232, "step": 11160 }, { "epoch": 1.702880859375e-05, "model_forward_time": 0.024825096130371094, "step": 11160 }, { "epoch": 1.702880859375e-05, "step": 11160, "training_step_time": 0.2088146209716797 }, { "epoch": 1.703033447265625e-05, "model_forward_time": 0.02473282814025879, "step": 11161 }, { "epoch": 1.703033447265625e-05, "step": 11161, "training_step_time": 0.13893818855285645 }, { "epoch": 1.70318603515625e-05, "model_forward_time": 0.025026321411132812, "step": 11162 }, { "epoch": 1.70318603515625e-05, "step": 11162, "training_step_time": 0.11822867393493652 }, { "epoch": 1.703338623046875e-05, "model_forward_time": 0.025375843048095703, "step": 11163 }, { "epoch": 1.703338623046875e-05, "step": 11163, "training_step_time": 0.12199616432189941 }, { "epoch": 1.7034912109375e-05, "model_forward_time": 0.025544166564941406, "step": 11164 }, { "epoch": 1.7034912109375e-05, "step": 11164, "training_step_time": 0.11992430686950684 }, { "epoch": 1.703643798828125e-05, "model_forward_time": 0.025645732879638672, "step": 11165 }, { "epoch": 1.703643798828125e-05, "step": 11165, "training_step_time": 0.10805416107177734 }, { "epoch": 1.70379638671875e-05, "model_forward_time": 0.025648832321166992, "step": 11166 }, { "epoch": 1.70379638671875e-05, "step": 11166, "training_step_time": 0.1073751449584961 }, { "epoch": 1.703948974609375e-05, "model_forward_time": 0.026532411575317383, "step": 11167 }, { "epoch": 1.703948974609375e-05, "step": 11167, "training_step_time": 0.12313008308410645 }, { "epoch": 1.7041015625e-05, "model_forward_time": 0.025435209274291992, "step": 11168 }, { "epoch": 1.7041015625e-05, "step": 11168, "training_step_time": 0.1097421646118164 }, { "epoch": 1.704254150390625e-05, "model_forward_time": 0.025670289993286133, "step": 11169 }, { "epoch": 1.704254150390625e-05, "step": 11169, "training_step_time": 0.1113283634185791 }, { "epoch": 1.70440673828125e-05, "grad_norm": 0.26340359449386597, "learning_rate": 7.418422153233919e-05, "loss": 0.0187, "step": 11170 }, { "epoch": 1.70440673828125e-05, "model_forward_time": 0.025643348693847656, "step": 11170 }, { "epoch": 1.70440673828125e-05, "step": 11170, "training_step_time": 0.1141502857208252 }, { "epoch": 1.704559326171875e-05, "model_forward_time": 0.02537226676940918, "step": 11171 }, { "epoch": 1.704559326171875e-05, "step": 11171, "training_step_time": 0.10982465744018555 }, { "epoch": 1.7047119140625e-05, "model_forward_time": 0.025803804397583008, "step": 11172 }, { "epoch": 1.7047119140625e-05, "step": 11172, "training_step_time": 0.11042284965515137 }, { "epoch": 1.704864501953125e-05, "model_forward_time": 0.025126934051513672, "step": 11173 }, { "epoch": 1.704864501953125e-05, "step": 11173, "training_step_time": 0.15484094619750977 }, { "epoch": 1.70501708984375e-05, "model_forward_time": 0.025116682052612305, "step": 11174 }, { "epoch": 1.70501708984375e-05, "step": 11174, "training_step_time": 0.1080167293548584 }, { "epoch": 1.705169677734375e-05, "model_forward_time": 0.024929285049438477, "step": 11175 }, { "epoch": 1.705169677734375e-05, "step": 11175, "training_step_time": 0.11369872093200684 }, { "epoch": 1.705322265625e-05, "model_forward_time": 0.026050567626953125, "step": 11176 }, { "epoch": 1.705322265625e-05, "step": 11176, "training_step_time": 0.12411093711853027 }, { "epoch": 1.705474853515625e-05, "model_forward_time": 0.025557279586791992, "step": 11177 }, { "epoch": 1.705474853515625e-05, "step": 11177, "training_step_time": 0.12524175643920898 }, { "epoch": 1.70562744140625e-05, "model_forward_time": 0.025540828704833984, "step": 11178 }, { "epoch": 1.70562744140625e-05, "step": 11178, "training_step_time": 0.11878705024719238 }, { "epoch": 1.705780029296875e-05, "model_forward_time": 0.025159358978271484, "step": 11179 }, { "epoch": 1.705780029296875e-05, "step": 11179, "training_step_time": 0.11896824836730957 }, { "epoch": 1.7059326171875e-05, "grad_norm": 0.25540876388549805, "learning_rate": 7.413596728245054e-05, "loss": 0.022, "step": 11180 }, { "epoch": 1.7059326171875e-05, "model_forward_time": 0.025393962860107422, "step": 11180 }, { "epoch": 1.7059326171875e-05, "step": 11180, "training_step_time": 0.1093132495880127 }, { "epoch": 1.706085205078125e-05, "model_forward_time": 0.025466203689575195, "step": 11181 }, { "epoch": 1.706085205078125e-05, "step": 11181, "training_step_time": 0.10653090476989746 }, { "epoch": 1.70623779296875e-05, "model_forward_time": 0.02513289451599121, "step": 11182 }, { "epoch": 1.70623779296875e-05, "step": 11182, "training_step_time": 0.1080162525177002 }, { "epoch": 1.706390380859375e-05, "model_forward_time": 0.0254671573638916, "step": 11183 }, { "epoch": 1.706390380859375e-05, "step": 11183, "training_step_time": 0.10893774032592773 }, { "epoch": 1.70654296875e-05, "model_forward_time": 0.025591373443603516, "step": 11184 }, { "epoch": 1.70654296875e-05, "step": 11184, "training_step_time": 0.10963010787963867 }, { "epoch": 1.706695556640625e-05, "model_forward_time": 0.024818897247314453, "step": 11185 }, { "epoch": 1.706695556640625e-05, "step": 11185, "training_step_time": 0.11135458946228027 }, { "epoch": 1.70684814453125e-05, "model_forward_time": 0.025203943252563477, "step": 11186 }, { "epoch": 1.70684814453125e-05, "step": 11186, "training_step_time": 0.11042666435241699 }, { "epoch": 1.707000732421875e-05, "model_forward_time": 0.025552034378051758, "step": 11187 }, { "epoch": 1.707000732421875e-05, "step": 11187, "training_step_time": 0.10788965225219727 }, { "epoch": 1.7071533203125e-05, "model_forward_time": 0.02545905113220215, "step": 11188 }, { "epoch": 1.7071533203125e-05, "step": 11188, "training_step_time": 0.10719847679138184 }, { "epoch": 1.707305908203125e-05, "model_forward_time": 0.024295568466186523, "step": 11189 }, { "epoch": 1.707305908203125e-05, "step": 11189, "training_step_time": 0.10810565948486328 }, { "epoch": 1.70745849609375e-05, "grad_norm": 0.3643791973590851, "learning_rate": 7.408768370508576e-05, "loss": 0.0291, "step": 11190 }, { "epoch": 1.70745849609375e-05, "model_forward_time": 0.024782896041870117, "step": 11190 }, { "epoch": 1.70745849609375e-05, "step": 11190, "training_step_time": 0.11055302619934082 }, { "epoch": 1.707611083984375e-05, "model_forward_time": 0.02525925636291504, "step": 11191 }, { "epoch": 1.707611083984375e-05, "step": 11191, "training_step_time": 0.10683584213256836 }, { "epoch": 1.707763671875e-05, "model_forward_time": 0.02523493766784668, "step": 11192 }, { "epoch": 1.707763671875e-05, "step": 11192, "training_step_time": 0.11076211929321289 }, { "epoch": 1.707916259765625e-05, "model_forward_time": 0.025388240814208984, "step": 11193 }, { "epoch": 1.707916259765625e-05, "step": 11193, "training_step_time": 0.10844039916992188 }, { "epoch": 1.70806884765625e-05, "model_forward_time": 0.02474498748779297, "step": 11194 }, { "epoch": 1.70806884765625e-05, "step": 11194, "training_step_time": 0.11204838752746582 }, { "epoch": 1.708221435546875e-05, "model_forward_time": 0.025757312774658203, "step": 11195 }, { "epoch": 1.708221435546875e-05, "step": 11195, "training_step_time": 0.11095595359802246 }, { "epoch": 1.7083740234375e-05, "model_forward_time": 0.024172067642211914, "step": 11196 }, { "epoch": 1.7083740234375e-05, "step": 11196, "training_step_time": 0.10817503929138184 }, { "epoch": 1.708526611328125e-05, "model_forward_time": 0.024150848388671875, "step": 11197 }, { "epoch": 1.708526611328125e-05, "step": 11197, "training_step_time": 0.11745238304138184 }, { "epoch": 1.70867919921875e-05, "model_forward_time": 0.02460336685180664, "step": 11198 }, { "epoch": 1.70867919921875e-05, "step": 11198, "training_step_time": 0.10994124412536621 }, { "epoch": 1.708831787109375e-05, "model_forward_time": 0.025330066680908203, "step": 11199 }, { "epoch": 1.708831787109375e-05, "step": 11199, "training_step_time": 0.1705784797668457 }, { "epoch": 1.708984375e-05, "grad_norm": 0.272895872592926, "learning_rate": 7.403937085891397e-05, "loss": 0.0305, "step": 11200 }, { "epoch": 1.708984375e-05, "model_forward_time": 0.023299217224121094, "step": 11200 }, { "epoch": 1.708984375e-05, "step": 11200, "training_step_time": 0.1596074104309082 }, { "epoch": 1.709136962890625e-05, "model_forward_time": 0.02404618263244629, "step": 11201 }, { "epoch": 1.709136962890625e-05, "step": 11201, "training_step_time": 0.21463704109191895 }, { "epoch": 1.70928955078125e-05, "model_forward_time": 0.02338433265686035, "step": 11202 }, { "epoch": 1.70928955078125e-05, "step": 11202, "training_step_time": 0.10472607612609863 }, { "epoch": 1.709442138671875e-05, "model_forward_time": 0.022625446319580078, "step": 11203 }, { "epoch": 1.709442138671875e-05, "step": 11203, "training_step_time": 0.13037323951721191 }, { "epoch": 1.7095947265625e-05, "model_forward_time": 0.02410435676574707, "step": 11204 }, { "epoch": 1.7095947265625e-05, "step": 11204, "training_step_time": 0.11285948753356934 }, { "epoch": 1.709747314453125e-05, "model_forward_time": 0.02443218231201172, "step": 11205 }, { "epoch": 1.709747314453125e-05, "step": 11205, "training_step_time": 0.17781734466552734 }, { "epoch": 1.70989990234375e-05, "model_forward_time": 0.023778200149536133, "step": 11206 }, { "epoch": 1.70989990234375e-05, "step": 11206, "training_step_time": 0.15695667266845703 }, { "epoch": 1.710052490234375e-05, "model_forward_time": 0.027554035186767578, "step": 11207 }, { "epoch": 1.710052490234375e-05, "step": 11207, "training_step_time": 0.14752459526062012 }, { "epoch": 1.710205078125e-05, "model_forward_time": 0.025366544723510742, "step": 11208 }, { "epoch": 1.710205078125e-05, "step": 11208, "training_step_time": 0.15101313591003418 }, { "epoch": 1.710357666015625e-05, "model_forward_time": 0.02483820915222168, "step": 11209 }, { "epoch": 1.710357666015625e-05, "step": 11209, "training_step_time": 0.21700596809387207 }, { "epoch": 1.71051025390625e-05, "grad_norm": 0.47021782398223877, "learning_rate": 7.399102880263983e-05, "loss": 0.0266, "step": 11210 }, { "epoch": 1.71051025390625e-05, "model_forward_time": 0.02443408966064453, "step": 11210 }, { "epoch": 1.71051025390625e-05, "step": 11210, "training_step_time": 0.1195991039276123 }, { "epoch": 1.710662841796875e-05, "model_forward_time": 0.024375200271606445, "step": 11211 }, { "epoch": 1.710662841796875e-05, "step": 11211, "training_step_time": 0.10855698585510254 }, { "epoch": 1.7108154296875e-05, "model_forward_time": 0.02531886100769043, "step": 11212 }, { "epoch": 1.7108154296875e-05, "step": 11212, "training_step_time": 0.10663461685180664 }, { "epoch": 1.710968017578125e-05, "model_forward_time": 0.025848865509033203, "step": 11213 }, { "epoch": 1.710968017578125e-05, "step": 11213, "training_step_time": 0.10857605934143066 }, { "epoch": 1.71112060546875e-05, "model_forward_time": 0.024697303771972656, "step": 11214 }, { "epoch": 1.71112060546875e-05, "step": 11214, "training_step_time": 0.1137082576751709 }, { "epoch": 1.711273193359375e-05, "model_forward_time": 0.024188518524169922, "step": 11215 }, { "epoch": 1.711273193359375e-05, "step": 11215, "training_step_time": 0.12606310844421387 }, { "epoch": 1.71142578125e-05, "model_forward_time": 0.023963451385498047, "step": 11216 }, { "epoch": 1.71142578125e-05, "step": 11216, "training_step_time": 0.12078261375427246 }, { "epoch": 1.711578369140625e-05, "model_forward_time": 0.025397062301635742, "step": 11217 }, { "epoch": 1.711578369140625e-05, "step": 11217, "training_step_time": 0.12273383140563965 }, { "epoch": 1.71173095703125e-05, "model_forward_time": 0.02507638931274414, "step": 11218 }, { "epoch": 1.71173095703125e-05, "step": 11218, "training_step_time": 0.1221461296081543 }, { "epoch": 1.711883544921875e-05, "model_forward_time": 0.02522730827331543, "step": 11219 }, { "epoch": 1.711883544921875e-05, "step": 11219, "training_step_time": 0.1271045207977295 }, { "epoch": 1.7120361328125e-05, "grad_norm": 0.3241020739078522, "learning_rate": 7.394265759500348e-05, "loss": 0.0207, "step": 11220 }, { "epoch": 1.7120361328125e-05, "model_forward_time": 0.025205373764038086, "step": 11220 }, { "epoch": 1.7120361328125e-05, "step": 11220, "training_step_time": 0.11617779731750488 }, { "epoch": 1.712188720703125e-05, "model_forward_time": 0.025170087814331055, "step": 11221 }, { "epoch": 1.712188720703125e-05, "step": 11221, "training_step_time": 0.21965932846069336 }, { "epoch": 1.71234130859375e-05, "model_forward_time": 0.024204254150390625, "step": 11222 }, { "epoch": 1.71234130859375e-05, "step": 11222, "training_step_time": 0.13701152801513672 }, { "epoch": 1.712493896484375e-05, "model_forward_time": 0.024699926376342773, "step": 11223 }, { "epoch": 1.712493896484375e-05, "step": 11223, "training_step_time": 0.10868310928344727 }, { "epoch": 1.712646484375e-05, "model_forward_time": 0.024086713790893555, "step": 11224 }, { "epoch": 1.712646484375e-05, "step": 11224, "training_step_time": 0.11427521705627441 }, { "epoch": 1.712799072265625e-05, "model_forward_time": 0.026080608367919922, "step": 11225 }, { "epoch": 1.712799072265625e-05, "step": 11225, "training_step_time": 0.11011981964111328 }, { "epoch": 1.71295166015625e-05, "model_forward_time": 0.023947954177856445, "step": 11226 }, { "epoch": 1.71295166015625e-05, "step": 11226, "training_step_time": 0.11208295822143555 }, { "epoch": 1.713104248046875e-05, "model_forward_time": 0.024003267288208008, "step": 11227 }, { "epoch": 1.713104248046875e-05, "step": 11227, "training_step_time": 0.1077268123626709 }, { "epoch": 1.7132568359375e-05, "model_forward_time": 0.024229049682617188, "step": 11228 }, { "epoch": 1.7132568359375e-05, "step": 11228, "training_step_time": 0.10883426666259766 }, { "epoch": 1.713409423828125e-05, "model_forward_time": 0.02457118034362793, "step": 11229 }, { "epoch": 1.713409423828125e-05, "step": 11229, "training_step_time": 0.11219382286071777 }, { "epoch": 1.71356201171875e-05, "grad_norm": 0.36847159266471863, "learning_rate": 7.389425729478051e-05, "loss": 0.033, "step": 11230 }, { "epoch": 1.71356201171875e-05, "model_forward_time": 0.024662256240844727, "step": 11230 }, { "epoch": 1.71356201171875e-05, "step": 11230, "training_step_time": 0.11227750778198242 }, { "epoch": 1.713714599609375e-05, "model_forward_time": 0.02401566505432129, "step": 11231 }, { "epoch": 1.713714599609375e-05, "step": 11231, "training_step_time": 0.10807204246520996 }, { "epoch": 1.7138671875e-05, "model_forward_time": 0.026584148406982422, "step": 11232 }, { "epoch": 1.7138671875e-05, "step": 11232, "training_step_time": 0.10909628868103027 }, { "epoch": 1.714019775390625e-05, "model_forward_time": 0.02404475212097168, "step": 11233 }, { "epoch": 1.714019775390625e-05, "step": 11233, "training_step_time": 0.10792970657348633 }, { "epoch": 1.71417236328125e-05, "model_forward_time": 0.02445387840270996, "step": 11234 }, { "epoch": 1.71417236328125e-05, "step": 11234, "training_step_time": 0.10784673690795898 }, { "epoch": 1.714324951171875e-05, "model_forward_time": 0.02426433563232422, "step": 11235 }, { "epoch": 1.714324951171875e-05, "step": 11235, "training_step_time": 0.10753321647644043 }, { "epoch": 1.7144775390625e-05, "model_forward_time": 0.02454662322998047, "step": 11236 }, { "epoch": 1.7144775390625e-05, "step": 11236, "training_step_time": 0.10737967491149902 }, { "epoch": 1.714630126953125e-05, "model_forward_time": 0.02426934242248535, "step": 11237 }, { "epoch": 1.714630126953125e-05, "step": 11237, "training_step_time": 0.11509585380554199 }, { "epoch": 1.71478271484375e-05, "model_forward_time": 0.024796009063720703, "step": 11238 }, { "epoch": 1.71478271484375e-05, "step": 11238, "training_step_time": 0.11751389503479004 }, { "epoch": 1.714935302734375e-05, "model_forward_time": 0.026983976364135742, "step": 11239 }, { "epoch": 1.714935302734375e-05, "step": 11239, "training_step_time": 0.11664533615112305 }, { "epoch": 1.715087890625e-05, "grad_norm": 0.24231822788715363, "learning_rate": 7.384582796078184e-05, "loss": 0.0213, "step": 11240 }, { "epoch": 1.715087890625e-05, "model_forward_time": 0.030149459838867188, "step": 11240 }, { "epoch": 1.715087890625e-05, "step": 11240, "training_step_time": 0.15903639793395996 }, { "epoch": 1.715240478515625e-05, "model_forward_time": 0.026667356491088867, "step": 11241 }, { "epoch": 1.715240478515625e-05, "step": 11241, "training_step_time": 0.24108576774597168 }, { "epoch": 1.71539306640625e-05, "model_forward_time": 0.028002262115478516, "step": 11242 }, { "epoch": 1.71539306640625e-05, "step": 11242, "training_step_time": 0.2801830768585205 }, { "epoch": 1.715545654296875e-05, "model_forward_time": 0.04683423042297363, "step": 11243 }, { "epoch": 1.715545654296875e-05, "step": 11243, "training_step_time": 0.3141360282897949 }, { "epoch": 1.7156982421875e-05, "model_forward_time": 0.03125810623168945, "step": 11244 }, { "epoch": 1.7156982421875e-05, "step": 11244, "training_step_time": 0.3416590690612793 }, { "epoch": 1.715850830078125e-05, "model_forward_time": 0.0289766788482666, "step": 11245 }, { "epoch": 1.715850830078125e-05, "step": 11245, "training_step_time": 0.40026307106018066 }, { "epoch": 1.71600341796875e-05, "model_forward_time": 0.030750513076782227, "step": 11246 }, { "epoch": 1.71600341796875e-05, "step": 11246, "training_step_time": 0.3159065246582031 }, { "epoch": 1.716156005859375e-05, "model_forward_time": 0.030999422073364258, "step": 11247 }, { "epoch": 1.716156005859375e-05, "step": 11247, "training_step_time": 0.3515892028808594 }, { "epoch": 1.71630859375e-05, "model_forward_time": 0.0354619026184082, "step": 11248 }, { "epoch": 1.71630859375e-05, "step": 11248, "training_step_time": 0.2969551086425781 }, { "epoch": 1.716461181640625e-05, "model_forward_time": 0.03065633773803711, "step": 11249 }, { "epoch": 1.716461181640625e-05, "step": 11249, "training_step_time": 0.2698521614074707 }, { "epoch": 1.71661376953125e-05, "grad_norm": 0.4318647086620331, "learning_rate": 7.379736965185368e-05, "loss": 0.0178, "step": 11250 }, { "epoch": 1.71661376953125e-05, "model_forward_time": 0.032936811447143555, "step": 11250 }, { "epoch": 1.71661376953125e-05, "step": 11250, "training_step_time": 0.23099589347839355 }, { "epoch": 1.716766357421875e-05, "model_forward_time": 0.02979445457458496, "step": 11251 }, { "epoch": 1.716766357421875e-05, "step": 11251, "training_step_time": 0.22020721435546875 }, { "epoch": 1.7169189453125e-05, "model_forward_time": 0.0302276611328125, "step": 11252 }, { "epoch": 1.7169189453125e-05, "step": 11252, "training_step_time": 0.32690000534057617 }, { "epoch": 1.717071533203125e-05, "model_forward_time": 0.030982255935668945, "step": 11253 }, { "epoch": 1.717071533203125e-05, "step": 11253, "training_step_time": 0.2746727466583252 }, { "epoch": 1.71722412109375e-05, "model_forward_time": 0.029308319091796875, "step": 11254 }, { "epoch": 1.71722412109375e-05, "step": 11254, "training_step_time": 0.19051218032836914 }, { "epoch": 1.717376708984375e-05, "model_forward_time": 0.03016066551208496, "step": 11255 }, { "epoch": 1.717376708984375e-05, "step": 11255, "training_step_time": 0.15238165855407715 }, { "epoch": 1.717529296875e-05, "model_forward_time": 0.029612064361572266, "step": 11256 }, { "epoch": 1.717529296875e-05, "step": 11256, "training_step_time": 0.13900303840637207 }, { "epoch": 1.717681884765625e-05, "model_forward_time": 0.030167579650878906, "step": 11257 }, { "epoch": 1.717681884765625e-05, "step": 11257, "training_step_time": 0.1388835906982422 }, { "epoch": 1.71783447265625e-05, "model_forward_time": 0.02804422378540039, "step": 11258 }, { "epoch": 1.71783447265625e-05, "step": 11258, "training_step_time": 0.12262725830078125 }, { "epoch": 1.717987060546875e-05, "model_forward_time": 0.02742290496826172, "step": 11259 }, { "epoch": 1.717987060546875e-05, "step": 11259, "training_step_time": 0.12679076194763184 }, { "epoch": 1.7181396484375e-05, "grad_norm": 0.4221991300582886, "learning_rate": 7.374888242687746e-05, "loss": 0.0152, "step": 11260 }, { "epoch": 1.7181396484375e-05, "model_forward_time": 0.026491165161132812, "step": 11260 }, { "epoch": 1.7181396484375e-05, "step": 11260, "training_step_time": 0.11997270584106445 }, { "epoch": 1.718292236328125e-05, "model_forward_time": 0.027240991592407227, "step": 11261 }, { "epoch": 1.718292236328125e-05, "step": 11261, "training_step_time": 0.11971426010131836 }, { "epoch": 1.71844482421875e-05, "model_forward_time": 0.02800726890563965, "step": 11262 }, { "epoch": 1.71844482421875e-05, "step": 11262, "training_step_time": 0.11366128921508789 }, { "epoch": 1.718597412109375e-05, "model_forward_time": 0.02586054801940918, "step": 11263 }, { "epoch": 1.718597412109375e-05, "step": 11263, "training_step_time": 0.10940194129943848 }, { "epoch": 1.71875e-05, "model_forward_time": 0.029146671295166016, "step": 11264 }, { "epoch": 1.71875e-05, "step": 11264, "training_step_time": 0.10982251167297363 }, { "epoch": 1.718902587890625e-05, "model_forward_time": 0.024967193603515625, "step": 11265 }, { "epoch": 1.718902587890625e-05, "step": 11265, "training_step_time": 0.109954833984375 }, { "epoch": 1.71905517578125e-05, "model_forward_time": 0.02532672882080078, "step": 11266 }, { "epoch": 1.71905517578125e-05, "step": 11266, "training_step_time": 0.10913920402526855 }, { "epoch": 1.719207763671875e-05, "model_forward_time": 0.0249788761138916, "step": 11267 }, { "epoch": 1.719207763671875e-05, "step": 11267, "training_step_time": 0.10576081275939941 }, { "epoch": 1.7193603515625e-05, "model_forward_time": 0.02524280548095703, "step": 11268 }, { "epoch": 1.7193603515625e-05, "step": 11268, "training_step_time": 0.1308290958404541 }, { "epoch": 1.719512939453125e-05, "model_forward_time": 0.024198532104492188, "step": 11269 }, { "epoch": 1.719512939453125e-05, "step": 11269, "training_step_time": 0.19747185707092285 }, { "epoch": 1.71966552734375e-05, "grad_norm": 0.28328460454940796, "learning_rate": 7.37003663447697e-05, "loss": 0.0203, "step": 11270 }, { "epoch": 1.71966552734375e-05, "model_forward_time": 0.025131702423095703, "step": 11270 }, { "epoch": 1.71966552734375e-05, "step": 11270, "training_step_time": 0.19914460182189941 }, { "epoch": 1.719818115234375e-05, "model_forward_time": 0.02462601661682129, "step": 11271 }, { "epoch": 1.719818115234375e-05, "step": 11271, "training_step_time": 0.14717435836791992 }, { "epoch": 1.719970703125e-05, "model_forward_time": 0.024669647216796875, "step": 11272 }, { "epoch": 1.719970703125e-05, "step": 11272, "training_step_time": 0.19244861602783203 }, { "epoch": 1.720123291015625e-05, "model_forward_time": 0.024356603622436523, "step": 11273 }, { "epoch": 1.720123291015625e-05, "step": 11273, "training_step_time": 0.17815256118774414 }, { "epoch": 1.72027587890625e-05, "model_forward_time": 0.024710655212402344, "step": 11274 }, { "epoch": 1.72027587890625e-05, "step": 11274, "training_step_time": 0.1367356777191162 }, { "epoch": 1.720428466796875e-05, "model_forward_time": 0.025089502334594727, "step": 11275 }, { "epoch": 1.720428466796875e-05, "step": 11275, "training_step_time": 0.10929131507873535 }, { "epoch": 1.7205810546875e-05, "model_forward_time": 0.025282859802246094, "step": 11276 }, { "epoch": 1.7205810546875e-05, "step": 11276, "training_step_time": 0.1066431999206543 }, { "epoch": 1.720733642578125e-05, "model_forward_time": 0.025750398635864258, "step": 11277 }, { "epoch": 1.720733642578125e-05, "step": 11277, "training_step_time": 0.11887383460998535 }, { "epoch": 1.72088623046875e-05, "model_forward_time": 0.02523946762084961, "step": 11278 }, { "epoch": 1.72088623046875e-05, "step": 11278, "training_step_time": 0.20529890060424805 }, { "epoch": 1.721038818359375e-05, "model_forward_time": 0.024829387664794922, "step": 11279 }, { "epoch": 1.721038818359375e-05, "step": 11279, "training_step_time": 0.12160778045654297 }, { "epoch": 1.72119140625e-05, "grad_norm": 0.5873472690582275, "learning_rate": 7.365182146448205e-05, "loss": 0.0365, "step": 11280 }, { "epoch": 1.72119140625e-05, "model_forward_time": 0.024168968200683594, "step": 11280 }, { "epoch": 1.72119140625e-05, "step": 11280, "training_step_time": 0.11086606979370117 }, { "epoch": 1.721343994140625e-05, "model_forward_time": 0.02543330192565918, "step": 11281 }, { "epoch": 1.721343994140625e-05, "step": 11281, "training_step_time": 0.16066336631774902 }, { "epoch": 1.72149658203125e-05, "model_forward_time": 0.024605989456176758, "step": 11282 }, { "epoch": 1.72149658203125e-05, "step": 11282, "training_step_time": 0.17148900032043457 }, { "epoch": 1.721649169921875e-05, "model_forward_time": 0.024548768997192383, "step": 11283 }, { "epoch": 1.721649169921875e-05, "step": 11283, "training_step_time": 0.15686869621276855 }, { "epoch": 1.7218017578125e-05, "model_forward_time": 0.024563074111938477, "step": 11284 }, { "epoch": 1.7218017578125e-05, "step": 11284, "training_step_time": 0.11179065704345703 }, { "epoch": 1.721954345703125e-05, "model_forward_time": 0.024017333984375, "step": 11285 }, { "epoch": 1.721954345703125e-05, "step": 11285, "training_step_time": 0.1075735092163086 }, { "epoch": 1.72210693359375e-05, "model_forward_time": 0.02544546127319336, "step": 11286 }, { "epoch": 1.72210693359375e-05, "step": 11286, "training_step_time": 0.11088204383850098 }, { "epoch": 1.722259521484375e-05, "model_forward_time": 0.02534198760986328, "step": 11287 }, { "epoch": 1.722259521484375e-05, "step": 11287, "training_step_time": 0.14317846298217773 }, { "epoch": 1.722412109375e-05, "model_forward_time": 0.025422096252441406, "step": 11288 }, { "epoch": 1.722412109375e-05, "step": 11288, "training_step_time": 0.1554572582244873 }, { "epoch": 1.722564697265625e-05, "model_forward_time": 0.025255441665649414, "step": 11289 }, { "epoch": 1.722564697265625e-05, "step": 11289, "training_step_time": 0.1456451416015625 }, { "epoch": 1.72271728515625e-05, "grad_norm": 0.5029672384262085, "learning_rate": 7.36032478450011e-05, "loss": 0.0234, "step": 11290 }, { "epoch": 1.72271728515625e-05, "model_forward_time": 0.024483919143676758, "step": 11290 }, { "epoch": 1.72271728515625e-05, "step": 11290, "training_step_time": 0.14616131782531738 }, { "epoch": 1.722869873046875e-05, "model_forward_time": 0.025075674057006836, "step": 11291 }, { "epoch": 1.722869873046875e-05, "step": 11291, "training_step_time": 0.1374378204345703 }, { "epoch": 1.7230224609375e-05, "model_forward_time": 0.02429986000061035, "step": 11292 }, { "epoch": 1.7230224609375e-05, "step": 11292, "training_step_time": 0.127288818359375 }, { "epoch": 1.723175048828125e-05, "model_forward_time": 0.025243520736694336, "step": 11293 }, { "epoch": 1.723175048828125e-05, "step": 11293, "training_step_time": 0.14596343040466309 }, { "epoch": 1.72332763671875e-05, "model_forward_time": 0.025833606719970703, "step": 11294 }, { "epoch": 1.72332763671875e-05, "step": 11294, "training_step_time": 0.12303948402404785 }, { "epoch": 1.723480224609375e-05, "model_forward_time": 0.026549339294433594, "step": 11295 }, { "epoch": 1.723480224609375e-05, "step": 11295, "training_step_time": 0.2084650993347168 }, { "epoch": 1.7236328125e-05, "model_forward_time": 0.024619102478027344, "step": 11296 }, { "epoch": 1.7236328125e-05, "step": 11296, "training_step_time": 0.1325082778930664 }, { "epoch": 1.723785400390625e-05, "model_forward_time": 0.02473139762878418, "step": 11297 }, { "epoch": 1.723785400390625e-05, "step": 11297, "training_step_time": 0.11129879951477051 }, { "epoch": 1.72393798828125e-05, "model_forward_time": 0.025101184844970703, "step": 11298 }, { "epoch": 1.72393798828125e-05, "step": 11298, "training_step_time": 0.11524081230163574 }, { "epoch": 1.724090576171875e-05, "model_forward_time": 0.02512335777282715, "step": 11299 }, { "epoch": 1.724090576171875e-05, "step": 11299, "training_step_time": 0.10977411270141602 }, { "epoch": 1.7242431640625e-05, "grad_norm": 0.423922061920166, "learning_rate": 7.355464554534837e-05, "loss": 0.0212, "step": 11300 }, { "epoch": 1.7242431640625e-05, "model_forward_time": 0.024432897567749023, "step": 11300 }, { "epoch": 1.7242431640625e-05, "step": 11300, "training_step_time": 0.1138761043548584 }, { "epoch": 1.724395751953125e-05, "model_forward_time": 0.02531123161315918, "step": 11301 }, { "epoch": 1.724395751953125e-05, "step": 11301, "training_step_time": 0.1101381778717041 }, { "epoch": 1.72454833984375e-05, "model_forward_time": 0.02599191665649414, "step": 11302 }, { "epoch": 1.72454833984375e-05, "step": 11302, "training_step_time": 0.1109776496887207 }, { "epoch": 1.724700927734375e-05, "model_forward_time": 0.025195837020874023, "step": 11303 }, { "epoch": 1.724700927734375e-05, "step": 11303, "training_step_time": 0.11104774475097656 }, { "epoch": 1.724853515625e-05, "model_forward_time": 0.02540278434753418, "step": 11304 }, { "epoch": 1.724853515625e-05, "step": 11304, "training_step_time": 0.1133122444152832 }, { "epoch": 1.725006103515625e-05, "model_forward_time": 0.025713682174682617, "step": 11305 }, { "epoch": 1.725006103515625e-05, "step": 11305, "training_step_time": 0.10701727867126465 }, { "epoch": 1.72515869140625e-05, "model_forward_time": 0.02561044692993164, "step": 11306 }, { "epoch": 1.72515869140625e-05, "step": 11306, "training_step_time": 0.11105775833129883 }, { "epoch": 1.725311279296875e-05, "model_forward_time": 0.02550792694091797, "step": 11307 }, { "epoch": 1.725311279296875e-05, "step": 11307, "training_step_time": 0.10890984535217285 }, { "epoch": 1.7254638671875e-05, "model_forward_time": 0.025569915771484375, "step": 11308 }, { "epoch": 1.7254638671875e-05, "step": 11308, "training_step_time": 0.1134490966796875 }, { "epoch": 1.725616455078125e-05, "model_forward_time": 0.024857759475708008, "step": 11309 }, { "epoch": 1.725616455078125e-05, "step": 11309, "training_step_time": 0.10868215560913086 }, { "epoch": 1.72576904296875e-05, "grad_norm": 0.802464485168457, "learning_rate": 7.350601462458024e-05, "loss": 0.0242, "step": 11310 }, { "epoch": 1.72576904296875e-05, "model_forward_time": 0.026113510131835938, "step": 11310 }, { "epoch": 1.72576904296875e-05, "step": 11310, "training_step_time": 0.10826349258422852 }, { "epoch": 1.725921630859375e-05, "model_forward_time": 0.025789499282836914, "step": 11311 }, { "epoch": 1.725921630859375e-05, "step": 11311, "training_step_time": 0.1684434413909912 }, { "epoch": 1.72607421875e-05, "model_forward_time": 0.024709224700927734, "step": 11312 }, { "epoch": 1.72607421875e-05, "step": 11312, "training_step_time": 0.15661311149597168 }, { "epoch": 1.726226806640625e-05, "model_forward_time": 0.025159597396850586, "step": 11313 }, { "epoch": 1.726226806640625e-05, "step": 11313, "training_step_time": 0.11499834060668945 }, { "epoch": 1.72637939453125e-05, "model_forward_time": 0.025951862335205078, "step": 11314 }, { "epoch": 1.72637939453125e-05, "step": 11314, "training_step_time": 0.1122126579284668 }, { "epoch": 1.726531982421875e-05, "model_forward_time": 0.025228023529052734, "step": 11315 }, { "epoch": 1.726531982421875e-05, "step": 11315, "training_step_time": 0.16967177391052246 }, { "epoch": 1.7266845703125e-05, "model_forward_time": 0.025134801864624023, "step": 11316 }, { "epoch": 1.7266845703125e-05, "step": 11316, "training_step_time": 0.20160579681396484 }, { "epoch": 1.726837158203125e-05, "model_forward_time": 0.02446770668029785, "step": 11317 }, { "epoch": 1.726837158203125e-05, "step": 11317, "training_step_time": 0.14013338088989258 }, { "epoch": 1.72698974609375e-05, "model_forward_time": 0.026013851165771484, "step": 11318 }, { "epoch": 1.72698974609375e-05, "step": 11318, "training_step_time": 0.10466122627258301 }, { "epoch": 1.727142333984375e-05, "model_forward_time": 0.02592945098876953, "step": 11319 }, { "epoch": 1.727142333984375e-05, "step": 11319, "training_step_time": 0.11877894401550293 }, { "epoch": 1.727294921875e-05, "grad_norm": 0.7488951683044434, "learning_rate": 7.345735514178787e-05, "loss": 0.0267, "step": 11320 }, { "epoch": 1.727294921875e-05, "model_forward_time": 0.027347803115844727, "step": 11320 }, { "epoch": 1.727294921875e-05, "step": 11320, "training_step_time": 0.11244988441467285 }, { "epoch": 1.727447509765625e-05, "model_forward_time": 0.026203393936157227, "step": 11321 }, { "epoch": 1.727447509765625e-05, "step": 11321, "training_step_time": 0.1283702850341797 }, { "epoch": 1.72760009765625e-05, "model_forward_time": 0.026607990264892578, "step": 11322 }, { "epoch": 1.72760009765625e-05, "step": 11322, "training_step_time": 0.1818840503692627 }, { "epoch": 1.727752685546875e-05, "model_forward_time": 0.025214433670043945, "step": 11323 }, { "epoch": 1.727752685546875e-05, "step": 11323, "training_step_time": 0.1645801067352295 }, { "epoch": 1.7279052734375e-05, "model_forward_time": 0.02479386329650879, "step": 11324 }, { "epoch": 1.7279052734375e-05, "step": 11324, "training_step_time": 0.17442893981933594 }, { "epoch": 1.728057861328125e-05, "model_forward_time": 0.02448749542236328, "step": 11325 }, { "epoch": 1.728057861328125e-05, "step": 11325, "training_step_time": 0.12282729148864746 }, { "epoch": 1.72821044921875e-05, "model_forward_time": 0.025202274322509766, "step": 11326 }, { "epoch": 1.72821044921875e-05, "step": 11326, "training_step_time": 0.12098073959350586 }, { "epoch": 1.728363037109375e-05, "model_forward_time": 0.025063514709472656, "step": 11327 }, { "epoch": 1.728363037109375e-05, "step": 11327, "training_step_time": 0.11560511589050293 }, { "epoch": 1.728515625e-05, "model_forward_time": 0.02518439292907715, "step": 11328 }, { "epoch": 1.728515625e-05, "step": 11328, "training_step_time": 0.11234426498413086 }, { "epoch": 1.728668212890625e-05, "model_forward_time": 0.02424335479736328, "step": 11329 }, { "epoch": 1.728668212890625e-05, "step": 11329, "training_step_time": 0.11182308197021484 }, { "epoch": 1.72882080078125e-05, "grad_norm": 0.5162204504013062, "learning_rate": 7.340866715609712e-05, "loss": 0.0237, "step": 11330 }, { "epoch": 1.72882080078125e-05, "model_forward_time": 0.02581024169921875, "step": 11330 }, { "epoch": 1.72882080078125e-05, "step": 11330, "training_step_time": 0.11057281494140625 }, { "epoch": 1.728973388671875e-05, "model_forward_time": 0.025432586669921875, "step": 11331 }, { "epoch": 1.728973388671875e-05, "step": 11331, "training_step_time": 0.11118388175964355 }, { "epoch": 1.7291259765625e-05, "model_forward_time": 0.025179147720336914, "step": 11332 }, { "epoch": 1.7291259765625e-05, "step": 11332, "training_step_time": 0.10761761665344238 }, { "epoch": 1.729278564453125e-05, "model_forward_time": 0.025475740432739258, "step": 11333 }, { "epoch": 1.729278564453125e-05, "step": 11333, "training_step_time": 0.1110997200012207 }, { "epoch": 1.72943115234375e-05, "model_forward_time": 0.025519132614135742, "step": 11334 }, { "epoch": 1.72943115234375e-05, "step": 11334, "training_step_time": 0.10777139663696289 }, { "epoch": 1.729583740234375e-05, "model_forward_time": 0.025228500366210938, "step": 11335 }, { "epoch": 1.729583740234375e-05, "step": 11335, "training_step_time": 0.11074209213256836 }, { "epoch": 1.729736328125e-05, "model_forward_time": 0.0243685245513916, "step": 11336 }, { "epoch": 1.729736328125e-05, "step": 11336, "training_step_time": 0.10677742958068848 }, { "epoch": 1.729888916015625e-05, "model_forward_time": 0.02434539794921875, "step": 11337 }, { "epoch": 1.729888916015625e-05, "step": 11337, "training_step_time": 0.10769414901733398 }, { "epoch": 1.73004150390625e-05, "model_forward_time": 0.02644038200378418, "step": 11338 }, { "epoch": 1.73004150390625e-05, "step": 11338, "training_step_time": 0.14895009994506836 }, { "epoch": 1.730194091796875e-05, "model_forward_time": 0.027883291244506836, "step": 11339 }, { "epoch": 1.730194091796875e-05, "step": 11339, "training_step_time": 0.11562299728393555 }, { "epoch": 1.7303466796875e-05, "grad_norm": 0.6040328145027161, "learning_rate": 7.335995072666848e-05, "loss": 0.0211, "step": 11340 }, { "epoch": 1.7303466796875e-05, "model_forward_time": 0.026317119598388672, "step": 11340 }, { "epoch": 1.7303466796875e-05, "step": 11340, "training_step_time": 0.22396183013916016 }, { "epoch": 1.730499267578125e-05, "model_forward_time": 0.024992942810058594, "step": 11341 }, { "epoch": 1.730499267578125e-05, "step": 11341, "training_step_time": 0.12385010719299316 }, { "epoch": 1.73065185546875e-05, "model_forward_time": 0.02417612075805664, "step": 11342 }, { "epoch": 1.73065185546875e-05, "step": 11342, "training_step_time": 0.11368632316589355 }, { "epoch": 1.730804443359375e-05, "model_forward_time": 0.025532960891723633, "step": 11343 }, { "epoch": 1.730804443359375e-05, "step": 11343, "training_step_time": 0.12131762504577637 }, { "epoch": 1.73095703125e-05, "model_forward_time": 0.025728464126586914, "step": 11344 }, { "epoch": 1.73095703125e-05, "step": 11344, "training_step_time": 0.11219191551208496 }, { "epoch": 1.731109619140625e-05, "model_forward_time": 0.025579452514648438, "step": 11345 }, { "epoch": 1.731109619140625e-05, "step": 11345, "training_step_time": 0.10891866683959961 }, { "epoch": 1.73126220703125e-05, "model_forward_time": 0.025442123413085938, "step": 11346 }, { "epoch": 1.73126220703125e-05, "step": 11346, "training_step_time": 0.1104280948638916 }, { "epoch": 1.731414794921875e-05, "model_forward_time": 0.02526712417602539, "step": 11347 }, { "epoch": 1.731414794921875e-05, "step": 11347, "training_step_time": 0.10797810554504395 }, { "epoch": 1.7315673828125e-05, "model_forward_time": 0.025713443756103516, "step": 11348 }, { "epoch": 1.7315673828125e-05, "step": 11348, "training_step_time": 0.11075592041015625 }, { "epoch": 1.731719970703125e-05, "model_forward_time": 0.02565741539001465, "step": 11349 }, { "epoch": 1.731719970703125e-05, "step": 11349, "training_step_time": 0.10775184631347656 }, { "epoch": 1.73187255859375e-05, "grad_norm": 0.3311750292778015, "learning_rate": 7.331120591269701e-05, "loss": 0.0452, "step": 11350 }, { "epoch": 1.73187255859375e-05, "model_forward_time": 0.025605440139770508, "step": 11350 }, { "epoch": 1.73187255859375e-05, "step": 11350, "training_step_time": 0.11237812042236328 }, { "epoch": 1.732025146484375e-05, "model_forward_time": 0.025423049926757812, "step": 11351 }, { "epoch": 1.732025146484375e-05, "step": 11351, "training_step_time": 0.10855555534362793 }, { "epoch": 1.732177734375e-05, "model_forward_time": 0.025609493255615234, "step": 11352 }, { "epoch": 1.732177734375e-05, "step": 11352, "training_step_time": 0.1083838939666748 }, { "epoch": 1.732330322265625e-05, "model_forward_time": 0.025478601455688477, "step": 11353 }, { "epoch": 1.732330322265625e-05, "step": 11353, "training_step_time": 0.10992193222045898 }, { "epoch": 1.73248291015625e-05, "model_forward_time": 0.025615453720092773, "step": 11354 }, { "epoch": 1.73248291015625e-05, "step": 11354, "training_step_time": 0.112457275390625 }, { "epoch": 1.732635498046875e-05, "model_forward_time": 0.02492380142211914, "step": 11355 }, { "epoch": 1.732635498046875e-05, "step": 11355, "training_step_time": 0.11375808715820312 }, { "epoch": 1.7327880859375e-05, "model_forward_time": 0.024895668029785156, "step": 11356 }, { "epoch": 1.7327880859375e-05, "step": 11356, "training_step_time": 0.12656879425048828 }, { "epoch": 1.732940673828125e-05, "model_forward_time": 0.026276350021362305, "step": 11357 }, { "epoch": 1.732940673828125e-05, "step": 11357, "training_step_time": 0.1300828456878662 }, { "epoch": 1.73309326171875e-05, "model_forward_time": 0.02550983428955078, "step": 11358 }, { "epoch": 1.73309326171875e-05, "step": 11358, "training_step_time": 0.18550372123718262 }, { "epoch": 1.733245849609375e-05, "model_forward_time": 0.025446653366088867, "step": 11359 }, { "epoch": 1.733245849609375e-05, "step": 11359, "training_step_time": 0.11735057830810547 }, { "epoch": 1.7333984375e-05, "grad_norm": 0.3064781725406647, "learning_rate": 7.326243277341227e-05, "loss": 0.0209, "step": 11360 }, { "epoch": 1.7333984375e-05, "model_forward_time": 0.024016857147216797, "step": 11360 }, { "epoch": 1.7333984375e-05, "step": 11360, "training_step_time": 0.1140143871307373 }, { "epoch": 1.733551025390625e-05, "model_forward_time": 0.02623891830444336, "step": 11361 }, { "epoch": 1.733551025390625e-05, "step": 11361, "training_step_time": 0.16826081275939941 }, { "epoch": 1.73370361328125e-05, "model_forward_time": 0.025244951248168945, "step": 11362 }, { "epoch": 1.73370361328125e-05, "step": 11362, "training_step_time": 0.11737847328186035 }, { "epoch": 1.733856201171875e-05, "model_forward_time": 0.02947831153869629, "step": 11363 }, { "epoch": 1.733856201171875e-05, "step": 11363, "training_step_time": 0.11568212509155273 }, { "epoch": 1.7340087890625e-05, "model_forward_time": 0.02548670768737793, "step": 11364 }, { "epoch": 1.7340087890625e-05, "step": 11364, "training_step_time": 0.11246085166931152 }, { "epoch": 1.734161376953125e-05, "model_forward_time": 0.024087905883789062, "step": 11365 }, { "epoch": 1.734161376953125e-05, "step": 11365, "training_step_time": 0.11239814758300781 }, { "epoch": 1.73431396484375e-05, "model_forward_time": 0.0251009464263916, "step": 11366 }, { "epoch": 1.73431396484375e-05, "step": 11366, "training_step_time": 0.18930912017822266 }, { "epoch": 1.734466552734375e-05, "model_forward_time": 0.02454066276550293, "step": 11367 }, { "epoch": 1.734466552734375e-05, "step": 11367, "training_step_time": 0.1839885711669922 }, { "epoch": 1.734619140625e-05, "model_forward_time": 0.02461862564086914, "step": 11368 }, { "epoch": 1.734619140625e-05, "step": 11368, "training_step_time": 0.11968660354614258 }, { "epoch": 1.734771728515625e-05, "model_forward_time": 0.02444624900817871, "step": 11369 }, { "epoch": 1.734771728515625e-05, "step": 11369, "training_step_time": 0.13325166702270508 }, { "epoch": 1.73492431640625e-05, "grad_norm": 0.3132316470146179, "learning_rate": 7.32136313680782e-05, "loss": 0.0148, "step": 11370 }, { "epoch": 1.73492431640625e-05, "model_forward_time": 0.025119543075561523, "step": 11370 }, { "epoch": 1.73492431640625e-05, "step": 11370, "training_step_time": 0.15434646606445312 }, { "epoch": 1.735076904296875e-05, "model_forward_time": 0.024571657180786133, "step": 11371 }, { "epoch": 1.735076904296875e-05, "step": 11371, "training_step_time": 0.2157139778137207 }, { "epoch": 1.7352294921875e-05, "model_forward_time": 0.024752140045166016, "step": 11372 }, { "epoch": 1.7352294921875e-05, "step": 11372, "training_step_time": 0.11632943153381348 }, { "epoch": 1.735382080078125e-05, "model_forward_time": 0.024642467498779297, "step": 11373 }, { "epoch": 1.735382080078125e-05, "step": 11373, "training_step_time": 0.10507845878601074 }, { "epoch": 1.73553466796875e-05, "model_forward_time": 0.02550339698791504, "step": 11374 }, { "epoch": 1.73553466796875e-05, "step": 11374, "training_step_time": 0.10614943504333496 }, { "epoch": 1.735687255859375e-05, "model_forward_time": 0.025216102600097656, "step": 11375 }, { "epoch": 1.735687255859375e-05, "step": 11375, "training_step_time": 0.10657405853271484 }, { "epoch": 1.73583984375e-05, "model_forward_time": 0.02566051483154297, "step": 11376 }, { "epoch": 1.73583984375e-05, "step": 11376, "training_step_time": 0.10704159736633301 }, { "epoch": 1.735992431640625e-05, "model_forward_time": 0.02764749526977539, "step": 11377 }, { "epoch": 1.735992431640625e-05, "step": 11377, "training_step_time": 0.10915279388427734 }, { "epoch": 1.73614501953125e-05, "model_forward_time": 0.025922536849975586, "step": 11378 }, { "epoch": 1.73614501953125e-05, "step": 11378, "training_step_time": 0.11047077178955078 }, { "epoch": 1.736297607421875e-05, "model_forward_time": 0.025161266326904297, "step": 11379 }, { "epoch": 1.736297607421875e-05, "step": 11379, "training_step_time": 0.10535717010498047 }, { "epoch": 1.7364501953125e-05, "grad_norm": 0.2911783754825592, "learning_rate": 7.316480175599309e-05, "loss": 0.0229, "step": 11380 }, { "epoch": 1.7364501953125e-05, "model_forward_time": 0.024929523468017578, "step": 11380 }, { "epoch": 1.7364501953125e-05, "step": 11380, "training_step_time": 0.11052322387695312 }, { "epoch": 1.736602783203125e-05, "model_forward_time": 0.024608135223388672, "step": 11381 }, { "epoch": 1.736602783203125e-05, "step": 11381, "training_step_time": 0.10532093048095703 }, { "epoch": 1.73675537109375e-05, "model_forward_time": 0.02503228187561035, "step": 11382 }, { "epoch": 1.73675537109375e-05, "step": 11382, "training_step_time": 0.10649442672729492 }, { "epoch": 1.736907958984375e-05, "model_forward_time": 0.024957895278930664, "step": 11383 }, { "epoch": 1.736907958984375e-05, "step": 11383, "training_step_time": 0.14383554458618164 }, { "epoch": 1.737060546875e-05, "model_forward_time": 0.025818347930908203, "step": 11384 }, { "epoch": 1.737060546875e-05, "step": 11384, "training_step_time": 0.10758614540100098 }, { "epoch": 1.737213134765625e-05, "model_forward_time": 0.02512049674987793, "step": 11385 }, { "epoch": 1.737213134765625e-05, "step": 11385, "training_step_time": 0.11025166511535645 }, { "epoch": 1.73736572265625e-05, "model_forward_time": 0.025229930877685547, "step": 11386 }, { "epoch": 1.73736572265625e-05, "step": 11386, "training_step_time": 0.13555216789245605 }, { "epoch": 1.737518310546875e-05, "model_forward_time": 0.025229930877685547, "step": 11387 }, { "epoch": 1.737518310546875e-05, "step": 11387, "training_step_time": 0.1233208179473877 }, { "epoch": 1.7376708984375e-05, "model_forward_time": 0.024739503860473633, "step": 11388 }, { "epoch": 1.7376708984375e-05, "step": 11388, "training_step_time": 0.1121053695678711 }, { "epoch": 1.737823486328125e-05, "model_forward_time": 0.025371074676513672, "step": 11389 }, { "epoch": 1.737823486328125e-05, "step": 11389, "training_step_time": 0.12697649002075195 }, { "epoch": 1.73797607421875e-05, "grad_norm": 0.2643647789955139, "learning_rate": 7.311594399648957e-05, "loss": 0.0271, "step": 11390 }, { "epoch": 1.73797607421875e-05, "model_forward_time": 0.025416851043701172, "step": 11390 }, { "epoch": 1.73797607421875e-05, "step": 11390, "training_step_time": 0.13001132011413574 }, { "epoch": 1.738128662109375e-05, "model_forward_time": 0.024435997009277344, "step": 11391 }, { "epoch": 1.738128662109375e-05, "step": 11391, "training_step_time": 0.13266682624816895 }, { "epoch": 1.73828125e-05, "model_forward_time": 0.024445295333862305, "step": 11392 }, { "epoch": 1.73828125e-05, "step": 11392, "training_step_time": 0.14111018180847168 }, { "epoch": 1.738433837890625e-05, "model_forward_time": 0.025170564651489258, "step": 11393 }, { "epoch": 1.738433837890625e-05, "step": 11393, "training_step_time": 0.1332406997680664 }, { "epoch": 1.73858642578125e-05, "model_forward_time": 0.02453136444091797, "step": 11394 }, { "epoch": 1.73858642578125e-05, "step": 11394, "training_step_time": 0.13200998306274414 }, { "epoch": 1.738739013671875e-05, "model_forward_time": 0.024686574935913086, "step": 11395 }, { "epoch": 1.738739013671875e-05, "step": 11395, "training_step_time": 0.13438081741333008 }, { "epoch": 1.7388916015625e-05, "model_forward_time": 0.02545952796936035, "step": 11396 }, { "epoch": 1.7388916015625e-05, "step": 11396, "training_step_time": 0.12328314781188965 }, { "epoch": 1.739044189453125e-05, "model_forward_time": 0.02444171905517578, "step": 11397 }, { "epoch": 1.739044189453125e-05, "step": 11397, "training_step_time": 0.1197969913482666 }, { "epoch": 1.73919677734375e-05, "model_forward_time": 0.024901866912841797, "step": 11398 }, { "epoch": 1.73919677734375e-05, "step": 11398, "training_step_time": 0.12039685249328613 }, { "epoch": 1.739349365234375e-05, "model_forward_time": 0.02532029151916504, "step": 11399 }, { "epoch": 1.739349365234375e-05, "step": 11399, "training_step_time": 0.11009931564331055 }, { "epoch": 1.739501953125e-05, "grad_norm": 0.2940637767314911, "learning_rate": 7.30670581489344e-05, "loss": 0.0312, "step": 11400 }, { "epoch": 1.739501953125e-05, "model_forward_time": 0.025704145431518555, "step": 11400 }, { "epoch": 1.739501953125e-05, "step": 11400, "training_step_time": 0.17423462867736816 }, { "epoch": 1.739654541015625e-05, "model_forward_time": 0.024489164352416992, "step": 11401 }, { "epoch": 1.739654541015625e-05, "step": 11401, "training_step_time": 0.14111638069152832 }, { "epoch": 1.73980712890625e-05, "model_forward_time": 0.02502298355102539, "step": 11402 }, { "epoch": 1.73980712890625e-05, "step": 11402, "training_step_time": 0.10875272750854492 }, { "epoch": 1.739959716796875e-05, "model_forward_time": 0.02414989471435547, "step": 11403 }, { "epoch": 1.739959716796875e-05, "step": 11403, "training_step_time": 0.1631312370300293 }, { "epoch": 1.7401123046875e-05, "model_forward_time": 0.02464008331298828, "step": 11404 }, { "epoch": 1.7401123046875e-05, "step": 11404, "training_step_time": 0.15886354446411133 }, { "epoch": 1.740264892578125e-05, "model_forward_time": 0.025199413299560547, "step": 11405 }, { "epoch": 1.740264892578125e-05, "step": 11405, "training_step_time": 0.2125260829925537 }, { "epoch": 1.74041748046875e-05, "model_forward_time": 0.024363279342651367, "step": 11406 }, { "epoch": 1.74041748046875e-05, "step": 11406, "training_step_time": 0.1079411506652832 }, { "epoch": 1.740570068359375e-05, "model_forward_time": 0.024436473846435547, "step": 11407 }, { "epoch": 1.740570068359375e-05, "step": 11407, "training_step_time": 0.11234784126281738 }, { "epoch": 1.74072265625e-05, "model_forward_time": 0.02502608299255371, "step": 11408 }, { "epoch": 1.74072265625e-05, "step": 11408, "training_step_time": 0.11837577819824219 }, { "epoch": 1.740875244140625e-05, "model_forward_time": 0.025286197662353516, "step": 11409 }, { "epoch": 1.740875244140625e-05, "step": 11409, "training_step_time": 0.11015486717224121 }, { "epoch": 1.74102783203125e-05, "grad_norm": 1.000503659248352, "learning_rate": 7.301814427272849e-05, "loss": 0.0217, "step": 11410 }, { "epoch": 1.74102783203125e-05, "model_forward_time": 0.025148630142211914, "step": 11410 }, { "epoch": 1.74102783203125e-05, "step": 11410, "training_step_time": 0.18817424774169922 }, { "epoch": 1.741180419921875e-05, "model_forward_time": 0.024494647979736328, "step": 11411 }, { "epoch": 1.741180419921875e-05, "step": 11411, "training_step_time": 0.10843729972839355 }, { "epoch": 1.7413330078125e-05, "model_forward_time": 0.024289369583129883, "step": 11412 }, { "epoch": 1.7413330078125e-05, "step": 11412, "training_step_time": 0.11371278762817383 }, { "epoch": 1.741485595703125e-05, "model_forward_time": 0.026688575744628906, "step": 11413 }, { "epoch": 1.741485595703125e-05, "step": 11413, "training_step_time": 0.13641595840454102 }, { "epoch": 1.74163818359375e-05, "model_forward_time": 0.024964094161987305, "step": 11414 }, { "epoch": 1.74163818359375e-05, "step": 11414, "training_step_time": 0.15340495109558105 }, { "epoch": 1.741790771484375e-05, "model_forward_time": 0.026422739028930664, "step": 11415 }, { "epoch": 1.741790771484375e-05, "step": 11415, "training_step_time": 0.10624217987060547 }, { "epoch": 1.741943359375e-05, "model_forward_time": 0.024811506271362305, "step": 11416 }, { "epoch": 1.741943359375e-05, "step": 11416, "training_step_time": 0.12379908561706543 }, { "epoch": 1.742095947265625e-05, "model_forward_time": 0.025359153747558594, "step": 11417 }, { "epoch": 1.742095947265625e-05, "step": 11417, "training_step_time": 0.1122884750366211 }, { "epoch": 1.74224853515625e-05, "model_forward_time": 0.025385618209838867, "step": 11418 }, { "epoch": 1.74224853515625e-05, "step": 11418, "training_step_time": 0.1075444221496582 }, { "epoch": 1.742401123046875e-05, "model_forward_time": 0.025077104568481445, "step": 11419 }, { "epoch": 1.742401123046875e-05, "step": 11419, "training_step_time": 0.1084439754486084 }, { "epoch": 1.7425537109375e-05, "grad_norm": 0.2606498599052429, "learning_rate": 7.296920242730682e-05, "loss": 0.0315, "step": 11420 }, { "epoch": 1.7425537109375e-05, "model_forward_time": 0.02472066879272461, "step": 11420 }, { "epoch": 1.7425537109375e-05, "step": 11420, "training_step_time": 0.10718297958374023 }, { "epoch": 1.742706298828125e-05, "model_forward_time": 0.025311946868896484, "step": 11421 }, { "epoch": 1.742706298828125e-05, "step": 11421, "training_step_time": 0.10581040382385254 }, { "epoch": 1.74285888671875e-05, "model_forward_time": 0.025194168090820312, "step": 11422 }, { "epoch": 1.74285888671875e-05, "step": 11422, "training_step_time": 0.10973286628723145 }, { "epoch": 1.743011474609375e-05, "model_forward_time": 0.025191545486450195, "step": 11423 }, { "epoch": 1.743011474609375e-05, "step": 11423, "training_step_time": 0.11003780364990234 }, { "epoch": 1.7431640625e-05, "model_forward_time": 0.02537083625793457, "step": 11424 }, { "epoch": 1.7431640625e-05, "step": 11424, "training_step_time": 0.10600018501281738 }, { "epoch": 1.743316650390625e-05, "model_forward_time": 0.025473594665527344, "step": 11425 }, { "epoch": 1.743316650390625e-05, "step": 11425, "training_step_time": 0.10616827011108398 }, { "epoch": 1.74346923828125e-05, "model_forward_time": 0.024954557418823242, "step": 11426 }, { "epoch": 1.74346923828125e-05, "step": 11426, "training_step_time": 0.11044883728027344 }, { "epoch": 1.743621826171875e-05, "model_forward_time": 0.025084257125854492, "step": 11427 }, { "epoch": 1.743621826171875e-05, "step": 11427, "training_step_time": 0.11030840873718262 }, { "epoch": 1.7437744140625e-05, "model_forward_time": 0.02801227569580078, "step": 11428 }, { "epoch": 1.7437744140625e-05, "step": 11428, "training_step_time": 0.15077877044677734 }, { "epoch": 1.743927001953125e-05, "model_forward_time": 0.025511980056762695, "step": 11429 }, { "epoch": 1.743927001953125e-05, "step": 11429, "training_step_time": 0.10869455337524414 }, { "epoch": 1.74407958984375e-05, "grad_norm": 0.2633598744869232, "learning_rate": 7.292023267213835e-05, "loss": 0.0202, "step": 11430 }, { "epoch": 1.74407958984375e-05, "model_forward_time": 0.024935245513916016, "step": 11430 }, { "epoch": 1.74407958984375e-05, "step": 11430, "training_step_time": 0.11026573181152344 }, { "epoch": 1.744232177734375e-05, "model_forward_time": 0.025504112243652344, "step": 11431 }, { "epoch": 1.744232177734375e-05, "step": 11431, "training_step_time": 0.12179231643676758 }, { "epoch": 1.744384765625e-05, "model_forward_time": 0.025516271591186523, "step": 11432 }, { "epoch": 1.744384765625e-05, "step": 11432, "training_step_time": 0.1312730312347412 }, { "epoch": 1.744537353515625e-05, "model_forward_time": 0.024819135665893555, "step": 11433 }, { "epoch": 1.744537353515625e-05, "step": 11433, "training_step_time": 0.11442136764526367 }, { "epoch": 1.74468994140625e-05, "model_forward_time": 0.02542257308959961, "step": 11434 }, { "epoch": 1.74468994140625e-05, "step": 11434, "training_step_time": 0.10956192016601562 }, { "epoch": 1.744842529296875e-05, "model_forward_time": 0.025786876678466797, "step": 11435 }, { "epoch": 1.744842529296875e-05, "step": 11435, "training_step_time": 0.10843706130981445 }, { "epoch": 1.7449951171875e-05, "model_forward_time": 0.025321483612060547, "step": 11436 }, { "epoch": 1.7449951171875e-05, "step": 11436, "training_step_time": 0.1074676513671875 }, { "epoch": 1.745147705078125e-05, "model_forward_time": 0.02520895004272461, "step": 11437 }, { "epoch": 1.745147705078125e-05, "step": 11437, "training_step_time": 0.10763692855834961 }, { "epoch": 1.74530029296875e-05, "model_forward_time": 0.02523660659790039, "step": 11438 }, { "epoch": 1.74530029296875e-05, "step": 11438, "training_step_time": 0.11479043960571289 }, { "epoch": 1.745452880859375e-05, "model_forward_time": 0.02530074119567871, "step": 11439 }, { "epoch": 1.745452880859375e-05, "step": 11439, "training_step_time": 0.10747933387756348 }, { "epoch": 1.74560546875e-05, "grad_norm": 0.32708433270454407, "learning_rate": 7.287123506672595e-05, "loss": 0.0186, "step": 11440 }, { "epoch": 1.74560546875e-05, "model_forward_time": 0.024248838424682617, "step": 11440 }, { "epoch": 1.74560546875e-05, "step": 11440, "training_step_time": 0.10738229751586914 }, { "epoch": 1.745758056640625e-05, "model_forward_time": 0.024587154388427734, "step": 11441 }, { "epoch": 1.745758056640625e-05, "step": 11441, "training_step_time": 0.10714006423950195 }, { "epoch": 1.74591064453125e-05, "model_forward_time": 0.025261878967285156, "step": 11442 }, { "epoch": 1.74591064453125e-05, "step": 11442, "training_step_time": 0.10979032516479492 }, { "epoch": 1.746063232421875e-05, "model_forward_time": 0.025285005569458008, "step": 11443 }, { "epoch": 1.746063232421875e-05, "step": 11443, "training_step_time": 0.10970783233642578 }, { "epoch": 1.7462158203125e-05, "model_forward_time": 0.025391340255737305, "step": 11444 }, { "epoch": 1.7462158203125e-05, "step": 11444, "training_step_time": 0.10991191864013672 }, { "epoch": 1.746368408203125e-05, "model_forward_time": 0.025222301483154297, "step": 11445 }, { "epoch": 1.746368408203125e-05, "step": 11445, "training_step_time": 0.10866189002990723 }, { "epoch": 1.74652099609375e-05, "model_forward_time": 0.024389028549194336, "step": 11446 }, { "epoch": 1.74652099609375e-05, "step": 11446, "training_step_time": 0.15033507347106934 }, { "epoch": 1.746673583984375e-05, "model_forward_time": 0.024841785430908203, "step": 11447 }, { "epoch": 1.746673583984375e-05, "step": 11447, "training_step_time": 0.17988109588623047 }, { "epoch": 1.746826171875e-05, "model_forward_time": 0.02453923225402832, "step": 11448 }, { "epoch": 1.746826171875e-05, "step": 11448, "training_step_time": 0.10831880569458008 }, { "epoch": 1.746978759765625e-05, "model_forward_time": 0.024659395217895508, "step": 11449 }, { "epoch": 1.746978759765625e-05, "step": 11449, "training_step_time": 0.13698863983154297 }, { "epoch": 1.74713134765625e-05, "grad_norm": 0.42815864086151123, "learning_rate": 7.282220967060633e-05, "loss": 0.0197, "step": 11450 }, { "epoch": 1.74713134765625e-05, "model_forward_time": 0.025053739547729492, "step": 11450 }, { "epoch": 1.74713134765625e-05, "step": 11450, "training_step_time": 0.10758781433105469 }, { "epoch": 1.747283935546875e-05, "model_forward_time": 0.02604508399963379, "step": 11451 }, { "epoch": 1.747283935546875e-05, "step": 11451, "training_step_time": 0.144456148147583 }, { "epoch": 1.7474365234375e-05, "model_forward_time": 0.02534937858581543, "step": 11452 }, { "epoch": 1.7474365234375e-05, "step": 11452, "training_step_time": 0.1423196792602539 }, { "epoch": 1.747589111328125e-05, "model_forward_time": 0.02475881576538086, "step": 11453 }, { "epoch": 1.747589111328125e-05, "step": 11453, "training_step_time": 0.1102907657623291 }, { "epoch": 1.74774169921875e-05, "model_forward_time": 0.024916648864746094, "step": 11454 }, { "epoch": 1.74774169921875e-05, "step": 11454, "training_step_time": 0.11167359352111816 }, { "epoch": 1.747894287109375e-05, "model_forward_time": 0.024913311004638672, "step": 11455 }, { "epoch": 1.747894287109375e-05, "step": 11455, "training_step_time": 0.11174917221069336 }, { "epoch": 1.748046875e-05, "model_forward_time": 0.025392532348632812, "step": 11456 }, { "epoch": 1.748046875e-05, "step": 11456, "training_step_time": 0.1109919548034668 }, { "epoch": 1.748199462890625e-05, "model_forward_time": 0.025796175003051758, "step": 11457 }, { "epoch": 1.748199462890625e-05, "step": 11457, "training_step_time": 0.19932317733764648 }, { "epoch": 1.74835205078125e-05, "model_forward_time": 0.024463891983032227, "step": 11458 }, { "epoch": 1.74835205078125e-05, "step": 11458, "training_step_time": 0.10976171493530273 }, { "epoch": 1.748504638671875e-05, "model_forward_time": 0.024796485900878906, "step": 11459 }, { "epoch": 1.748504638671875e-05, "step": 11459, "training_step_time": 0.10998058319091797 }, { "epoch": 1.7486572265625e-05, "grad_norm": 0.35386383533477783, "learning_rate": 7.277315654334997e-05, "loss": 0.0264, "step": 11460 }, { "epoch": 1.7486572265625e-05, "model_forward_time": 0.025449037551879883, "step": 11460 }, { "epoch": 1.7486572265625e-05, "step": 11460, "training_step_time": 0.11427974700927734 }, { "epoch": 1.748809814453125e-05, "model_forward_time": 0.025151729583740234, "step": 11461 }, { "epoch": 1.748809814453125e-05, "step": 11461, "training_step_time": 0.15288972854614258 }, { "epoch": 1.74896240234375e-05, "model_forward_time": 0.0245361328125, "step": 11462 }, { "epoch": 1.74896240234375e-05, "step": 11462, "training_step_time": 0.22175884246826172 }, { "epoch": 1.749114990234375e-05, "model_forward_time": 0.02476048469543457, "step": 11463 }, { "epoch": 1.749114990234375e-05, "step": 11463, "training_step_time": 0.12286376953125 }, { "epoch": 1.749267578125e-05, "model_forward_time": 0.02408146858215332, "step": 11464 }, { "epoch": 1.749267578125e-05, "step": 11464, "training_step_time": 0.10543489456176758 }, { "epoch": 1.749420166015625e-05, "model_forward_time": 0.025523900985717773, "step": 11465 }, { "epoch": 1.749420166015625e-05, "step": 11465, "training_step_time": 0.10750365257263184 }, { "epoch": 1.74957275390625e-05, "model_forward_time": 0.02597784996032715, "step": 11466 }, { "epoch": 1.74957275390625e-05, "step": 11466, "training_step_time": 0.11063313484191895 }, { "epoch": 1.749725341796875e-05, "model_forward_time": 0.026036739349365234, "step": 11467 }, { "epoch": 1.749725341796875e-05, "step": 11467, "training_step_time": 0.1090087890625 }, { "epoch": 1.7498779296875e-05, "model_forward_time": 0.0258944034576416, "step": 11468 }, { "epoch": 1.7498779296875e-05, "step": 11468, "training_step_time": 0.10801196098327637 }, { "epoch": 1.750030517578125e-05, "model_forward_time": 0.025266170501708984, "step": 11469 }, { "epoch": 1.750030517578125e-05, "step": 11469, "training_step_time": 0.10560250282287598 }, { "epoch": 1.75018310546875e-05, "grad_norm": 0.4047854244709015, "learning_rate": 7.272407574456103e-05, "loss": 0.0161, "step": 11470 }, { "epoch": 1.75018310546875e-05, "model_forward_time": 0.02540898323059082, "step": 11470 }, { "epoch": 1.75018310546875e-05, "step": 11470, "training_step_time": 0.11034297943115234 }, { "epoch": 1.750335693359375e-05, "model_forward_time": 0.025775671005249023, "step": 11471 }, { "epoch": 1.750335693359375e-05, "step": 11471, "training_step_time": 0.11063671112060547 }, { "epoch": 1.75048828125e-05, "model_forward_time": 0.025475025177001953, "step": 11472 }, { "epoch": 1.75048828125e-05, "step": 11472, "training_step_time": 0.1062626838684082 }, { "epoch": 1.750640869140625e-05, "model_forward_time": 0.025372982025146484, "step": 11473 }, { "epoch": 1.750640869140625e-05, "step": 11473, "training_step_time": 0.10675668716430664 }, { "epoch": 1.75079345703125e-05, "model_forward_time": 0.02780771255493164, "step": 11474 }, { "epoch": 1.75079345703125e-05, "step": 11474, "training_step_time": 0.1979062557220459 }, { "epoch": 1.750946044921875e-05, "model_forward_time": 0.024963855743408203, "step": 11475 }, { "epoch": 1.750946044921875e-05, "step": 11475, "training_step_time": 0.10824942588806152 }, { "epoch": 1.7510986328125e-05, "model_forward_time": 0.02449655532836914, "step": 11476 }, { "epoch": 1.7510986328125e-05, "step": 11476, "training_step_time": 0.11250662803649902 }, { "epoch": 1.751251220703125e-05, "model_forward_time": 0.024783849716186523, "step": 11477 }, { "epoch": 1.751251220703125e-05, "step": 11477, "training_step_time": 0.12256050109863281 }, { "epoch": 1.75140380859375e-05, "model_forward_time": 0.025546550750732422, "step": 11478 }, { "epoch": 1.75140380859375e-05, "step": 11478, "training_step_time": 0.12778162956237793 }, { "epoch": 1.751556396484375e-05, "model_forward_time": 0.024765968322753906, "step": 11479 }, { "epoch": 1.751556396484375e-05, "step": 11479, "training_step_time": 0.1285717487335205 }, { "epoch": 1.751708984375e-05, "grad_norm": 0.2882809340953827, "learning_rate": 7.267496733387731e-05, "loss": 0.0173, "step": 11480 }, { "epoch": 1.751708984375e-05, "model_forward_time": 0.02497696876525879, "step": 11480 }, { "epoch": 1.751708984375e-05, "step": 11480, "training_step_time": 0.11112093925476074 }, { "epoch": 1.751861572265625e-05, "model_forward_time": 0.025364160537719727, "step": 11481 }, { "epoch": 1.751861572265625e-05, "step": 11481, "training_step_time": 0.1420884132385254 }, { "epoch": 1.75201416015625e-05, "model_forward_time": 0.025162696838378906, "step": 11482 }, { "epoch": 1.75201416015625e-05, "step": 11482, "training_step_time": 0.11285138130187988 }, { "epoch": 1.752166748046875e-05, "model_forward_time": 0.024341106414794922, "step": 11483 }, { "epoch": 1.752166748046875e-05, "step": 11483, "training_step_time": 0.10926198959350586 }, { "epoch": 1.7523193359375e-05, "model_forward_time": 0.024616241455078125, "step": 11484 }, { "epoch": 1.7523193359375e-05, "step": 11484, "training_step_time": 0.10896539688110352 }, { "epoch": 1.752471923828125e-05, "model_forward_time": 0.02519083023071289, "step": 11485 }, { "epoch": 1.752471923828125e-05, "step": 11485, "training_step_time": 0.10934829711914062 }, { "epoch": 1.75262451171875e-05, "model_forward_time": 0.025469064712524414, "step": 11486 }, { "epoch": 1.75262451171875e-05, "step": 11486, "training_step_time": 0.10724997520446777 }, { "epoch": 1.752777099609375e-05, "model_forward_time": 0.025366544723510742, "step": 11487 }, { "epoch": 1.752777099609375e-05, "step": 11487, "training_step_time": 0.10768651962280273 }, { "epoch": 1.7529296875e-05, "model_forward_time": 0.025552749633789062, "step": 11488 }, { "epoch": 1.7529296875e-05, "step": 11488, "training_step_time": 0.10792922973632812 }, { "epoch": 1.753082275390625e-05, "model_forward_time": 0.025469541549682617, "step": 11489 }, { "epoch": 1.753082275390625e-05, "step": 11489, "training_step_time": 0.11253786087036133 }, { "epoch": 1.75323486328125e-05, "grad_norm": 0.42318224906921387, "learning_rate": 7.262583137097018e-05, "loss": 0.0236, "step": 11490 }, { "epoch": 1.75323486328125e-05, "model_forward_time": 0.027422666549682617, "step": 11490 }, { "epoch": 1.75323486328125e-05, "step": 11490, "training_step_time": 0.1113739013671875 }, { "epoch": 1.753387451171875e-05, "model_forward_time": 0.02533245086669922, "step": 11491 }, { "epoch": 1.753387451171875e-05, "step": 11491, "training_step_time": 0.10569000244140625 }, { "epoch": 1.7535400390625e-05, "model_forward_time": 0.02447223663330078, "step": 11492 }, { "epoch": 1.7535400390625e-05, "step": 11492, "training_step_time": 0.1419835090637207 }, { "epoch": 1.753692626953125e-05, "model_forward_time": 0.02485060691833496, "step": 11493 }, { "epoch": 1.753692626953125e-05, "step": 11493, "training_step_time": 0.12059259414672852 }, { "epoch": 1.75384521484375e-05, "model_forward_time": 0.02569127082824707, "step": 11494 }, { "epoch": 1.75384521484375e-05, "step": 11494, "training_step_time": 0.11272215843200684 }, { "epoch": 1.753997802734375e-05, "model_forward_time": 0.02535700798034668, "step": 11495 }, { "epoch": 1.753997802734375e-05, "step": 11495, "training_step_time": 0.1081850528717041 }, { "epoch": 1.754150390625e-05, "model_forward_time": 0.025861263275146484, "step": 11496 }, { "epoch": 1.754150390625e-05, "step": 11496, "training_step_time": 0.11785888671875 }, { "epoch": 1.754302978515625e-05, "model_forward_time": 0.026509523391723633, "step": 11497 }, { "epoch": 1.754302978515625e-05, "step": 11497, "training_step_time": 0.1850283145904541 }, { "epoch": 1.75445556640625e-05, "model_forward_time": 0.024777889251708984, "step": 11498 }, { "epoch": 1.75445556640625e-05, "step": 11498, "training_step_time": 0.13240861892700195 }, { "epoch": 1.754608154296875e-05, "model_forward_time": 0.02462911605834961, "step": 11499 }, { "epoch": 1.754608154296875e-05, "step": 11499, "training_step_time": 0.10773706436157227 }, { "epoch": 1.7547607421875e-05, "grad_norm": 0.4000401496887207, "learning_rate": 7.257666791554448e-05, "loss": 0.0203, "step": 11500 }, { "epoch": 1.7547607421875e-05, "model_forward_time": 0.02429986000061035, "step": 11500 }, { "epoch": 1.7547607421875e-05, "step": 11500, "training_step_time": 0.11380577087402344 }, { "epoch": 1.754913330078125e-05, "model_forward_time": 0.02381587028503418, "step": 11501 }, { "epoch": 1.754913330078125e-05, "step": 11501, "training_step_time": 0.11920881271362305 }, { "epoch": 1.75506591796875e-05, "model_forward_time": 0.02417445182800293, "step": 11502 }, { "epoch": 1.75506591796875e-05, "step": 11502, "training_step_time": 0.10823202133178711 }, { "epoch": 1.755218505859375e-05, "model_forward_time": 0.025033235549926758, "step": 11503 }, { "epoch": 1.755218505859375e-05, "step": 11503, "training_step_time": 0.19985365867614746 }, { "epoch": 1.75537109375e-05, "model_forward_time": 0.02471780776977539, "step": 11504 }, { "epoch": 1.75537109375e-05, "step": 11504, "training_step_time": 0.1525270938873291 }, { "epoch": 1.755523681640625e-05, "model_forward_time": 0.02434706687927246, "step": 11505 }, { "epoch": 1.755523681640625e-05, "step": 11505, "training_step_time": 0.18303298950195312 }, { "epoch": 1.75567626953125e-05, "model_forward_time": 0.02452993392944336, "step": 11506 }, { "epoch": 1.75567626953125e-05, "step": 11506, "training_step_time": 0.19329404830932617 }, { "epoch": 1.755828857421875e-05, "model_forward_time": 0.0242459774017334, "step": 11507 }, { "epoch": 1.755828857421875e-05, "step": 11507, "training_step_time": 0.17239832878112793 }, { "epoch": 1.7559814453125e-05, "model_forward_time": 0.024364471435546875, "step": 11508 }, { "epoch": 1.7559814453125e-05, "step": 11508, "training_step_time": 0.17524003982543945 }, { "epoch": 1.756134033203125e-05, "model_forward_time": 0.024054288864135742, "step": 11509 }, { "epoch": 1.756134033203125e-05, "step": 11509, "training_step_time": 0.10601544380187988 }, { "epoch": 1.75628662109375e-05, "grad_norm": 0.4016773998737335, "learning_rate": 7.25274770273384e-05, "loss": 0.016, "step": 11510 }, { "epoch": 1.75628662109375e-05, "model_forward_time": 0.02465224266052246, "step": 11510 }, { "epoch": 1.75628662109375e-05, "step": 11510, "training_step_time": 0.10484647750854492 }, { "epoch": 1.756439208984375e-05, "model_forward_time": 0.025226354598999023, "step": 11511 }, { "epoch": 1.756439208984375e-05, "step": 11511, "training_step_time": 0.10895276069641113 }, { "epoch": 1.756591796875e-05, "model_forward_time": 0.025446653366088867, "step": 11512 }, { "epoch": 1.756591796875e-05, "step": 11512, "training_step_time": 0.10906291007995605 }, { "epoch": 1.756744384765625e-05, "model_forward_time": 0.025188207626342773, "step": 11513 }, { "epoch": 1.756744384765625e-05, "step": 11513, "training_step_time": 0.11518192291259766 }, { "epoch": 1.75689697265625e-05, "model_forward_time": 0.02516651153564453, "step": 11514 }, { "epoch": 1.75689697265625e-05, "step": 11514, "training_step_time": 0.11129117012023926 }, { "epoch": 1.757049560546875e-05, "model_forward_time": 0.025735855102539062, "step": 11515 }, { "epoch": 1.757049560546875e-05, "step": 11515, "training_step_time": 0.17116594314575195 }, { "epoch": 1.7572021484375e-05, "model_forward_time": 0.02439427375793457, "step": 11516 }, { "epoch": 1.7572021484375e-05, "step": 11516, "training_step_time": 0.18446779251098633 }, { "epoch": 1.757354736328125e-05, "model_forward_time": 0.025471210479736328, "step": 11517 }, { "epoch": 1.757354736328125e-05, "step": 11517, "training_step_time": 0.16740012168884277 }, { "epoch": 1.75750732421875e-05, "model_forward_time": 0.024227380752563477, "step": 11518 }, { "epoch": 1.75750732421875e-05, "step": 11518, "training_step_time": 0.20984816551208496 }, { "epoch": 1.757659912109375e-05, "model_forward_time": 0.02423405647277832, "step": 11519 }, { "epoch": 1.757659912109375e-05, "step": 11519, "training_step_time": 0.1461503505706787 }, { "epoch": 1.7578125e-05, "grad_norm": 0.3682970702648163, "learning_rate": 7.247825876612353e-05, "loss": 0.0173, "step": 11520 }, { "epoch": 1.7578125e-05, "model_forward_time": 0.024031877517700195, "step": 11520 }, { "epoch": 1.7578125e-05, "step": 11520, "training_step_time": 0.20183467864990234 }, { "epoch": 1.757965087890625e-05, "model_forward_time": 0.024365901947021484, "step": 11521 }, { "epoch": 1.757965087890625e-05, "step": 11521, "training_step_time": 0.14229655265808105 }, { "epoch": 1.75811767578125e-05, "model_forward_time": 0.024404525756835938, "step": 11522 }, { "epoch": 1.75811767578125e-05, "step": 11522, "training_step_time": 0.13160324096679688 }, { "epoch": 1.758270263671875e-05, "model_forward_time": 0.024760007858276367, "step": 11523 }, { "epoch": 1.758270263671875e-05, "step": 11523, "training_step_time": 0.11772799491882324 }, { "epoch": 1.7584228515625e-05, "model_forward_time": 0.025128841400146484, "step": 11524 }, { "epoch": 1.7584228515625e-05, "step": 11524, "training_step_time": 0.11426830291748047 }, { "epoch": 1.758575439453125e-05, "model_forward_time": 0.025171518325805664, "step": 11525 }, { "epoch": 1.758575439453125e-05, "step": 11525, "training_step_time": 0.11496949195861816 }, { "epoch": 1.75872802734375e-05, "model_forward_time": 0.0255281925201416, "step": 11526 }, { "epoch": 1.75872802734375e-05, "step": 11526, "training_step_time": 0.11439156532287598 }, { "epoch": 1.758880615234375e-05, "model_forward_time": 0.025102853775024414, "step": 11527 }, { "epoch": 1.758880615234375e-05, "step": 11527, "training_step_time": 0.10881400108337402 }, { "epoch": 1.759033203125e-05, "model_forward_time": 0.02544236183166504, "step": 11528 }, { "epoch": 1.759033203125e-05, "step": 11528, "training_step_time": 0.10804486274719238 }, { "epoch": 1.759185791015625e-05, "model_forward_time": 0.02587151527404785, "step": 11529 }, { "epoch": 1.759185791015625e-05, "step": 11529, "training_step_time": 0.10777497291564941 }, { "epoch": 1.75933837890625e-05, "grad_norm": 0.2134154587984085, "learning_rate": 7.242901319170471e-05, "loss": 0.0148, "step": 11530 }, { "epoch": 1.75933837890625e-05, "model_forward_time": 0.027271270751953125, "step": 11530 }, { "epoch": 1.75933837890625e-05, "step": 11530, "training_step_time": 0.11302709579467773 }, { "epoch": 1.759490966796875e-05, "model_forward_time": 0.024477481842041016, "step": 11531 }, { "epoch": 1.759490966796875e-05, "step": 11531, "training_step_time": 0.1063687801361084 }, { "epoch": 1.7596435546875e-05, "model_forward_time": 0.025391101837158203, "step": 11532 }, { "epoch": 1.7596435546875e-05, "step": 11532, "training_step_time": 0.10557341575622559 }, { "epoch": 1.759796142578125e-05, "model_forward_time": 0.02544116973876953, "step": 11533 }, { "epoch": 1.759796142578125e-05, "step": 11533, "training_step_time": 0.10719895362854004 }, { "epoch": 1.75994873046875e-05, "model_forward_time": 0.025246858596801758, "step": 11534 }, { "epoch": 1.75994873046875e-05, "step": 11534, "training_step_time": 0.10907173156738281 }, { "epoch": 1.760101318359375e-05, "model_forward_time": 0.025249958038330078, "step": 11535 }, { "epoch": 1.760101318359375e-05, "step": 11535, "training_step_time": 0.1111907958984375 }, { "epoch": 1.76025390625e-05, "model_forward_time": 0.02581787109375, "step": 11536 }, { "epoch": 1.76025390625e-05, "step": 11536, "training_step_time": 0.1111152172088623 }, { "epoch": 1.760406494140625e-05, "model_forward_time": 0.02557063102722168, "step": 11537 }, { "epoch": 1.760406494140625e-05, "step": 11537, "training_step_time": 0.20666003227233887 }, { "epoch": 1.76055908203125e-05, "model_forward_time": 0.025022268295288086, "step": 11538 }, { "epoch": 1.76055908203125e-05, "step": 11538, "training_step_time": 0.13742303848266602 }, { "epoch": 1.760711669921875e-05, "model_forward_time": 0.024531841278076172, "step": 11539 }, { "epoch": 1.760711669921875e-05, "step": 11539, "training_step_time": 0.17015647888183594 }, { "epoch": 1.7608642578125e-05, "grad_norm": 0.21647049486637115, "learning_rate": 7.237974036391992e-05, "loss": 0.0211, "step": 11540 }, { "epoch": 1.7608642578125e-05, "model_forward_time": 0.024743318557739258, "step": 11540 }, { "epoch": 1.7608642578125e-05, "step": 11540, "training_step_time": 0.1389153003692627 }, { "epoch": 1.761016845703125e-05, "model_forward_time": 0.02474522590637207, "step": 11541 }, { "epoch": 1.761016845703125e-05, "step": 11541, "training_step_time": 0.10873770713806152 }, { "epoch": 1.76116943359375e-05, "model_forward_time": 0.025516510009765625, "step": 11542 }, { "epoch": 1.76116943359375e-05, "step": 11542, "training_step_time": 0.12326478958129883 }, { "epoch": 1.761322021484375e-05, "model_forward_time": 0.025625228881835938, "step": 11543 }, { "epoch": 1.761322021484375e-05, "step": 11543, "training_step_time": 0.1107032299041748 }, { "epoch": 1.761474609375e-05, "model_forward_time": 0.0255892276763916, "step": 11544 }, { "epoch": 1.761474609375e-05, "step": 11544, "training_step_time": 0.10715460777282715 }, { "epoch": 1.761627197265625e-05, "model_forward_time": 0.028168439865112305, "step": 11545 }, { "epoch": 1.761627197265625e-05, "step": 11545, "training_step_time": 0.20194244384765625 }, { "epoch": 1.76177978515625e-05, "model_forward_time": 0.02452707290649414, "step": 11546 }, { "epoch": 1.76177978515625e-05, "step": 11546, "training_step_time": 0.10869026184082031 }, { "epoch": 1.761932373046875e-05, "model_forward_time": 0.024935483932495117, "step": 11547 }, { "epoch": 1.761932373046875e-05, "step": 11547, "training_step_time": 0.10661029815673828 }, { "epoch": 1.7620849609375e-05, "model_forward_time": 0.025375843048095703, "step": 11548 }, { "epoch": 1.7620849609375e-05, "step": 11548, "training_step_time": 0.13761067390441895 }, { "epoch": 1.762237548828125e-05, "model_forward_time": 0.02534198760986328, "step": 11549 }, { "epoch": 1.762237548828125e-05, "step": 11549, "training_step_time": 0.16053152084350586 }, { "epoch": 1.76239013671875e-05, "grad_norm": 0.6431434750556946, "learning_rate": 7.233044034264034e-05, "loss": 0.0234, "step": 11550 }, { "epoch": 1.76239013671875e-05, "model_forward_time": 0.02430868148803711, "step": 11550 }, { "epoch": 1.76239013671875e-05, "step": 11550, "training_step_time": 0.22101736068725586 }, { "epoch": 1.762542724609375e-05, "model_forward_time": 0.025033235549926758, "step": 11551 }, { "epoch": 1.762542724609375e-05, "step": 11551, "training_step_time": 0.11492657661437988 }, { "epoch": 1.7626953125e-05, "model_forward_time": 0.02476954460144043, "step": 11552 }, { "epoch": 1.7626953125e-05, "step": 11552, "training_step_time": 0.10618138313293457 }, { "epoch": 1.762847900390625e-05, "model_forward_time": 0.02547764778137207, "step": 11553 }, { "epoch": 1.762847900390625e-05, "step": 11553, "training_step_time": 0.10851502418518066 }, { "epoch": 1.76300048828125e-05, "model_forward_time": 0.025367259979248047, "step": 11554 }, { "epoch": 1.76300048828125e-05, "step": 11554, "training_step_time": 0.10888099670410156 }, { "epoch": 1.763153076171875e-05, "model_forward_time": 0.025510549545288086, "step": 11555 }, { "epoch": 1.763153076171875e-05, "step": 11555, "training_step_time": 0.11215877532958984 }, { "epoch": 1.7633056640625e-05, "model_forward_time": 0.02527785301208496, "step": 11556 }, { "epoch": 1.7633056640625e-05, "step": 11556, "training_step_time": 0.10798525810241699 }, { "epoch": 1.763458251953125e-05, "model_forward_time": 0.02516770362854004, "step": 11557 }, { "epoch": 1.763458251953125e-05, "step": 11557, "training_step_time": 0.11548089981079102 }, { "epoch": 1.76361083984375e-05, "model_forward_time": 0.024836301803588867, "step": 11558 }, { "epoch": 1.76361083984375e-05, "step": 11558, "training_step_time": 0.10821533203125 }, { "epoch": 1.763763427734375e-05, "model_forward_time": 0.025320053100585938, "step": 11559 }, { "epoch": 1.763763427734375e-05, "step": 11559, "training_step_time": 0.10892486572265625 }, { "epoch": 1.763916015625e-05, "grad_norm": 0.35514453053474426, "learning_rate": 7.22811131877701e-05, "loss": 0.0196, "step": 11560 }, { "epoch": 1.763916015625e-05, "model_forward_time": 0.024505138397216797, "step": 11560 }, { "epoch": 1.763916015625e-05, "step": 11560, "training_step_time": 0.10990452766418457 }, { "epoch": 1.764068603515625e-05, "model_forward_time": 0.025338172912597656, "step": 11561 }, { "epoch": 1.764068603515625e-05, "step": 11561, "training_step_time": 0.10818624496459961 }, { "epoch": 1.76422119140625e-05, "model_forward_time": 0.025676727294921875, "step": 11562 }, { "epoch": 1.76422119140625e-05, "step": 11562, "training_step_time": 0.10762429237365723 }, { "epoch": 1.764373779296875e-05, "model_forward_time": 0.024802684783935547, "step": 11563 }, { "epoch": 1.764373779296875e-05, "step": 11563, "training_step_time": 0.1070241928100586 }, { "epoch": 1.7645263671875e-05, "model_forward_time": 0.025272607803344727, "step": 11564 }, { "epoch": 1.7645263671875e-05, "step": 11564, "training_step_time": 0.10660171508789062 }, { "epoch": 1.764678955078125e-05, "model_forward_time": 0.025356531143188477, "step": 11565 }, { "epoch": 1.764678955078125e-05, "step": 11565, "training_step_time": 0.17947673797607422 }, { "epoch": 1.76483154296875e-05, "model_forward_time": 0.02485060691833496, "step": 11566 }, { "epoch": 1.76483154296875e-05, "step": 11566, "training_step_time": 0.11304736137390137 }, { "epoch": 1.764984130859375e-05, "model_forward_time": 0.024890899658203125, "step": 11567 }, { "epoch": 1.764984130859375e-05, "step": 11567, "training_step_time": 0.11322975158691406 }, { "epoch": 1.76513671875e-05, "model_forward_time": 0.02559828758239746, "step": 11568 }, { "epoch": 1.76513671875e-05, "step": 11568, "training_step_time": 0.11980247497558594 }, { "epoch": 1.765289306640625e-05, "model_forward_time": 0.02533102035522461, "step": 11569 }, { "epoch": 1.765289306640625e-05, "step": 11569, "training_step_time": 0.12816262245178223 }, { "epoch": 1.76544189453125e-05, "grad_norm": 0.23926854133605957, "learning_rate": 7.223175895924638e-05, "loss": 0.0162, "step": 11570 }, { "epoch": 1.76544189453125e-05, "model_forward_time": 0.024827003479003906, "step": 11570 }, { "epoch": 1.76544189453125e-05, "step": 11570, "training_step_time": 0.11135435104370117 }, { "epoch": 1.765594482421875e-05, "model_forward_time": 0.0253448486328125, "step": 11571 }, { "epoch": 1.765594482421875e-05, "step": 11571, "training_step_time": 0.11720108985900879 }, { "epoch": 1.7657470703125e-05, "model_forward_time": 0.025081157684326172, "step": 11572 }, { "epoch": 1.7657470703125e-05, "step": 11572, "training_step_time": 0.11251521110534668 }, { "epoch": 1.765899658203125e-05, "model_forward_time": 0.025596141815185547, "step": 11573 }, { "epoch": 1.765899658203125e-05, "step": 11573, "training_step_time": 0.1108241081237793 }, { "epoch": 1.76605224609375e-05, "model_forward_time": 0.025294780731201172, "step": 11574 }, { "epoch": 1.76605224609375e-05, "step": 11574, "training_step_time": 0.10776066780090332 }, { "epoch": 1.766204833984375e-05, "model_forward_time": 0.02524852752685547, "step": 11575 }, { "epoch": 1.766204833984375e-05, "step": 11575, "training_step_time": 0.11140084266662598 }, { "epoch": 1.766357421875e-05, "model_forward_time": 0.025841951370239258, "step": 11576 }, { "epoch": 1.766357421875e-05, "step": 11576, "training_step_time": 0.109710693359375 }, { "epoch": 1.766510009765625e-05, "model_forward_time": 0.025137662887573242, "step": 11577 }, { "epoch": 1.766510009765625e-05, "step": 11577, "training_step_time": 0.11013913154602051 }, { "epoch": 1.76666259765625e-05, "model_forward_time": 0.025402545928955078, "step": 11578 }, { "epoch": 1.76666259765625e-05, "step": 11578, "training_step_time": 0.10993123054504395 }, { "epoch": 1.766815185546875e-05, "model_forward_time": 0.0252535343170166, "step": 11579 }, { "epoch": 1.766815185546875e-05, "step": 11579, "training_step_time": 0.10754990577697754 }, { "epoch": 1.7669677734375e-05, "grad_norm": 0.44454053044319153, "learning_rate": 7.218237771703921e-05, "loss": 0.026, "step": 11580 }, { "epoch": 1.7669677734375e-05, "model_forward_time": 0.025673627853393555, "step": 11580 }, { "epoch": 1.7669677734375e-05, "step": 11580, "training_step_time": 0.10871028900146484 }, { "epoch": 1.767120361328125e-05, "model_forward_time": 0.025425434112548828, "step": 11581 }, { "epoch": 1.767120361328125e-05, "step": 11581, "training_step_time": 0.13261079788208008 }, { "epoch": 1.76727294921875e-05, "model_forward_time": 0.024957656860351562, "step": 11582 }, { "epoch": 1.76727294921875e-05, "step": 11582, "training_step_time": 0.12287592887878418 }, { "epoch": 1.767425537109375e-05, "model_forward_time": 0.025315046310424805, "step": 11583 }, { "epoch": 1.767425537109375e-05, "step": 11583, "training_step_time": 0.11443495750427246 }, { "epoch": 1.767578125e-05, "model_forward_time": 0.025872468948364258, "step": 11584 }, { "epoch": 1.767578125e-05, "step": 11584, "training_step_time": 0.11126446723937988 }, { "epoch": 1.767730712890625e-05, "model_forward_time": 0.0257112979888916, "step": 11585 }, { "epoch": 1.767730712890625e-05, "step": 11585, "training_step_time": 0.12569880485534668 }, { "epoch": 1.76788330078125e-05, "model_forward_time": 0.025865793228149414, "step": 11586 }, { "epoch": 1.76788330078125e-05, "step": 11586, "training_step_time": 0.20575428009033203 }, { "epoch": 1.768035888671875e-05, "model_forward_time": 0.0246734619140625, "step": 11587 }, { "epoch": 1.768035888671875e-05, "step": 11587, "training_step_time": 0.15195798873901367 }, { "epoch": 1.7681884765625e-05, "model_forward_time": 0.025154590606689453, "step": 11588 }, { "epoch": 1.7681884765625e-05, "step": 11588, "training_step_time": 0.13219165802001953 }, { "epoch": 1.768341064453125e-05, "model_forward_time": 0.024851083755493164, "step": 11589 }, { "epoch": 1.768341064453125e-05, "step": 11589, "training_step_time": 0.1083836555480957 }, { "epoch": 1.76849365234375e-05, "grad_norm": 0.3830711543560028, "learning_rate": 7.213296952115144e-05, "loss": 0.023, "step": 11590 }, { "epoch": 1.76849365234375e-05, "model_forward_time": 0.025335311889648438, "step": 11590 }, { "epoch": 1.76849365234375e-05, "step": 11590, "training_step_time": 0.1896038055419922 }, { "epoch": 1.768646240234375e-05, "model_forward_time": 0.025109052658081055, "step": 11591 }, { "epoch": 1.768646240234375e-05, "step": 11591, "training_step_time": 0.20410871505737305 }, { "epoch": 1.768798828125e-05, "model_forward_time": 0.0247805118560791, "step": 11592 }, { "epoch": 1.768798828125e-05, "step": 11592, "training_step_time": 0.1139223575592041 }, { "epoch": 1.768951416015625e-05, "model_forward_time": 0.02457571029663086, "step": 11593 }, { "epoch": 1.768951416015625e-05, "step": 11593, "training_step_time": 0.14266324043273926 }, { "epoch": 1.76910400390625e-05, "model_forward_time": 0.0252077579498291, "step": 11594 }, { "epoch": 1.76910400390625e-05, "step": 11594, "training_step_time": 0.16104626655578613 }, { "epoch": 1.769256591796875e-05, "model_forward_time": 0.024526357650756836, "step": 11595 }, { "epoch": 1.769256591796875e-05, "step": 11595, "training_step_time": 0.21456432342529297 }, { "epoch": 1.7694091796875e-05, "model_forward_time": 0.025170087814331055, "step": 11596 }, { "epoch": 1.7694091796875e-05, "step": 11596, "training_step_time": 0.10934233665466309 }, { "epoch": 1.769561767578125e-05, "model_forward_time": 0.024762630462646484, "step": 11597 }, { "epoch": 1.769561767578125e-05, "step": 11597, "training_step_time": 0.10715699195861816 }, { "epoch": 1.76971435546875e-05, "model_forward_time": 0.024912595748901367, "step": 11598 }, { "epoch": 1.76971435546875e-05, "step": 11598, "training_step_time": 0.10789823532104492 }, { "epoch": 1.769866943359375e-05, "model_forward_time": 0.026125669479370117, "step": 11599 }, { "epoch": 1.769866943359375e-05, "step": 11599, "training_step_time": 0.11000180244445801 }, { "epoch": 1.77001953125e-05, "grad_norm": 0.24346691370010376, "learning_rate": 7.20835344316187e-05, "loss": 0.016, "step": 11600 }, { "epoch": 1.77001953125e-05, "model_forward_time": 0.02569890022277832, "step": 11600 }, { "epoch": 1.77001953125e-05, "step": 11600, "training_step_time": 0.10920929908752441 }, { "epoch": 1.770172119140625e-05, "model_forward_time": 0.025798320770263672, "step": 11601 }, { "epoch": 1.770172119140625e-05, "step": 11601, "training_step_time": 0.10577130317687988 }, { "epoch": 1.77032470703125e-05, "model_forward_time": 0.02657341957092285, "step": 11602 }, { "epoch": 1.77032470703125e-05, "step": 11602, "training_step_time": 0.10783815383911133 }, { "epoch": 1.770477294921875e-05, "model_forward_time": 0.025250673294067383, "step": 11603 }, { "epoch": 1.770477294921875e-05, "step": 11603, "training_step_time": 0.10725259780883789 }, { "epoch": 1.7706298828125e-05, "model_forward_time": 0.029310226440429688, "step": 11604 }, { "epoch": 1.7706298828125e-05, "step": 11604, "training_step_time": 0.1129915714263916 }, { "epoch": 1.770782470703125e-05, "model_forward_time": 0.025476455688476562, "step": 11605 }, { "epoch": 1.770782470703125e-05, "step": 11605, "training_step_time": 0.10977768898010254 }, { "epoch": 1.77093505859375e-05, "model_forward_time": 0.0253756046295166, "step": 11606 }, { "epoch": 1.77093505859375e-05, "step": 11606, "training_step_time": 0.10576105117797852 }, { "epoch": 1.771087646484375e-05, "model_forward_time": 0.02524423599243164, "step": 11607 }, { "epoch": 1.771087646484375e-05, "step": 11607, "training_step_time": 0.10758042335510254 }, { "epoch": 1.771240234375e-05, "model_forward_time": 0.02527141571044922, "step": 11608 }, { "epoch": 1.771240234375e-05, "step": 11608, "training_step_time": 0.1098628044128418 }, { "epoch": 1.771392822265625e-05, "model_forward_time": 0.025182485580444336, "step": 11609 }, { "epoch": 1.771392822265625e-05, "step": 11609, "training_step_time": 0.10816168785095215 }, { "epoch": 1.77154541015625e-05, "grad_norm": 0.2136213779449463, "learning_rate": 7.203407250850928e-05, "loss": 0.0335, "step": 11610 }, { "epoch": 1.77154541015625e-05, "model_forward_time": 0.024566173553466797, "step": 11610 }, { "epoch": 1.77154541015625e-05, "step": 11610, "training_step_time": 0.11071109771728516 }, { "epoch": 1.771697998046875e-05, "model_forward_time": 0.024147748947143555, "step": 11611 }, { "epoch": 1.771697998046875e-05, "step": 11611, "training_step_time": 0.12538456916809082 }, { "epoch": 1.7718505859375e-05, "model_forward_time": 0.024240493774414062, "step": 11612 }, { "epoch": 1.7718505859375e-05, "step": 11612, "training_step_time": 0.1155402660369873 }, { "epoch": 1.772003173828125e-05, "model_forward_time": 0.02580738067626953, "step": 11613 }, { "epoch": 1.772003173828125e-05, "step": 11613, "training_step_time": 0.1743919849395752 }, { "epoch": 1.77215576171875e-05, "model_forward_time": 0.025368452072143555, "step": 11614 }, { "epoch": 1.77215576171875e-05, "step": 11614, "training_step_time": 0.17767572402954102 }, { "epoch": 1.772308349609375e-05, "model_forward_time": 0.02798604965209961, "step": 11615 }, { "epoch": 1.772308349609375e-05, "step": 11615, "training_step_time": 0.12120676040649414 }, { "epoch": 1.7724609375e-05, "model_forward_time": 0.02402329444885254, "step": 11616 }, { "epoch": 1.7724609375e-05, "step": 11616, "training_step_time": 0.1259171962738037 }, { "epoch": 1.772613525390625e-05, "model_forward_time": 0.02382349967956543, "step": 11617 }, { "epoch": 1.772613525390625e-05, "step": 11617, "training_step_time": 0.1247856616973877 }, { "epoch": 1.77276611328125e-05, "model_forward_time": 0.02445697784423828, "step": 11618 }, { "epoch": 1.77276611328125e-05, "step": 11618, "training_step_time": 0.1168069839477539 }, { "epoch": 1.772918701171875e-05, "model_forward_time": 0.02736973762512207, "step": 11619 }, { "epoch": 1.772918701171875e-05, "step": 11619, "training_step_time": 0.11309957504272461 }, { "epoch": 1.7730712890625e-05, "grad_norm": 0.35636040568351746, "learning_rate": 7.19845838119241e-05, "loss": 0.0323, "step": 11620 }, { "epoch": 1.7730712890625e-05, "model_forward_time": 0.02549433708190918, "step": 11620 }, { "epoch": 1.7730712890625e-05, "step": 11620, "training_step_time": 0.12039041519165039 }, { "epoch": 1.773223876953125e-05, "model_forward_time": 0.02460765838623047, "step": 11621 }, { "epoch": 1.773223876953125e-05, "step": 11621, "training_step_time": 0.11919426918029785 }, { "epoch": 1.77337646484375e-05, "model_forward_time": 0.024448871612548828, "step": 11622 }, { "epoch": 1.77337646484375e-05, "step": 11622, "training_step_time": 0.11164331436157227 }, { "epoch": 1.773529052734375e-05, "model_forward_time": 0.024251222610473633, "step": 11623 }, { "epoch": 1.773529052734375e-05, "step": 11623, "training_step_time": 0.10883402824401855 }, { "epoch": 1.773681640625e-05, "model_forward_time": 0.024695634841918945, "step": 11624 }, { "epoch": 1.773681640625e-05, "step": 11624, "training_step_time": 0.10844802856445312 }, { "epoch": 1.773834228515625e-05, "model_forward_time": 0.025716543197631836, "step": 11625 }, { "epoch": 1.773834228515625e-05, "step": 11625, "training_step_time": 0.10801029205322266 }, { "epoch": 1.77398681640625e-05, "model_forward_time": 0.025342941284179688, "step": 11626 }, { "epoch": 1.77398681640625e-05, "step": 11626, "training_step_time": 0.152634859085083 }, { "epoch": 1.774139404296875e-05, "model_forward_time": 0.02515554428100586, "step": 11627 }, { "epoch": 1.774139404296875e-05, "step": 11627, "training_step_time": 0.10698914527893066 }, { "epoch": 1.7742919921875e-05, "model_forward_time": 0.0254518985748291, "step": 11628 }, { "epoch": 1.7742919921875e-05, "step": 11628, "training_step_time": 0.19255375862121582 }, { "epoch": 1.774444580078125e-05, "model_forward_time": 0.024674415588378906, "step": 11629 }, { "epoch": 1.774444580078125e-05, "step": 11629, "training_step_time": 0.14569759368896484 }, { "epoch": 1.77459716796875e-05, "grad_norm": 0.33946698904037476, "learning_rate": 7.193506840199657e-05, "loss": 0.0382, "step": 11630 }, { "epoch": 1.77459716796875e-05, "model_forward_time": 0.025098323822021484, "step": 11630 }, { "epoch": 1.77459716796875e-05, "step": 11630, "training_step_time": 0.18776917457580566 }, { "epoch": 1.774749755859375e-05, "model_forward_time": 0.024368762969970703, "step": 11631 }, { "epoch": 1.774749755859375e-05, "step": 11631, "training_step_time": 0.17843222618103027 }, { "epoch": 1.77490234375e-05, "model_forward_time": 0.02482771873474121, "step": 11632 }, { "epoch": 1.77490234375e-05, "step": 11632, "training_step_time": 0.11714029312133789 }, { "epoch": 1.775054931640625e-05, "model_forward_time": 0.024992942810058594, "step": 11633 }, { "epoch": 1.775054931640625e-05, "step": 11633, "training_step_time": 0.10330963134765625 }, { "epoch": 1.77520751953125e-05, "model_forward_time": 0.025561809539794922, "step": 11634 }, { "epoch": 1.77520751953125e-05, "step": 11634, "training_step_time": 0.19768619537353516 }, { "epoch": 1.775360107421875e-05, "model_forward_time": 0.02456045150756836, "step": 11635 }, { "epoch": 1.775360107421875e-05, "step": 11635, "training_step_time": 0.127349853515625 }, { "epoch": 1.7755126953125e-05, "model_forward_time": 0.02493429183959961, "step": 11636 }, { "epoch": 1.7755126953125e-05, "step": 11636, "training_step_time": 0.1079869270324707 }, { "epoch": 1.775665283203125e-05, "model_forward_time": 0.024838924407958984, "step": 11637 }, { "epoch": 1.775665283203125e-05, "step": 11637, "training_step_time": 0.143815279006958 }, { "epoch": 1.77581787109375e-05, "model_forward_time": 0.025330066680908203, "step": 11638 }, { "epoch": 1.77581787109375e-05, "step": 11638, "training_step_time": 0.1566317081451416 }, { "epoch": 1.775970458984375e-05, "model_forward_time": 0.024809837341308594, "step": 11639 }, { "epoch": 1.775970458984375e-05, "step": 11639, "training_step_time": 0.23610234260559082 }, { "epoch": 1.776123046875e-05, "grad_norm": 0.26525023579597473, "learning_rate": 7.188552633889259e-05, "loss": 0.0178, "step": 11640 }, { "epoch": 1.776123046875e-05, "model_forward_time": 0.023571491241455078, "step": 11640 }, { "epoch": 1.776123046875e-05, "step": 11640, "training_step_time": 0.12482643127441406 }, { "epoch": 1.776275634765625e-05, "model_forward_time": 0.024907350540161133, "step": 11641 }, { "epoch": 1.776275634765625e-05, "step": 11641, "training_step_time": 0.1228783130645752 }, { "epoch": 1.77642822265625e-05, "model_forward_time": 0.025429487228393555, "step": 11642 }, { "epoch": 1.77642822265625e-05, "step": 11642, "training_step_time": 0.11895179748535156 }, { "epoch": 1.776580810546875e-05, "model_forward_time": 0.025605201721191406, "step": 11643 }, { "epoch": 1.776580810546875e-05, "step": 11643, "training_step_time": 0.11651968955993652 }, { "epoch": 1.7767333984375e-05, "model_forward_time": 0.02673172950744629, "step": 11644 }, { "epoch": 1.7767333984375e-05, "step": 11644, "training_step_time": 0.11143970489501953 }, { "epoch": 1.776885986328125e-05, "model_forward_time": 0.02569580078125, "step": 11645 }, { "epoch": 1.776885986328125e-05, "step": 11645, "training_step_time": 0.10963630676269531 }, { "epoch": 1.77703857421875e-05, "model_forward_time": 0.02579808235168457, "step": 11646 }, { "epoch": 1.77703857421875e-05, "step": 11646, "training_step_time": 0.11155867576599121 }, { "epoch": 1.777191162109375e-05, "model_forward_time": 0.026164531707763672, "step": 11647 }, { "epoch": 1.777191162109375e-05, "step": 11647, "training_step_time": 0.10988450050354004 }, { "epoch": 1.77734375e-05, "model_forward_time": 0.025140762329101562, "step": 11648 }, { "epoch": 1.77734375e-05, "step": 11648, "training_step_time": 0.10914969444274902 }, { "epoch": 1.777496337890625e-05, "model_forward_time": 0.025704145431518555, "step": 11649 }, { "epoch": 1.777496337890625e-05, "step": 11649, "training_step_time": 0.1092679500579834 }, { "epoch": 1.77764892578125e-05, "grad_norm": 0.5020220279693604, "learning_rate": 7.183595768281043e-05, "loss": 0.0212, "step": 11650 }, { "epoch": 1.77764892578125e-05, "model_forward_time": 0.024474382400512695, "step": 11650 }, { "epoch": 1.77764892578125e-05, "step": 11650, "training_step_time": 0.10893607139587402 }, { "epoch": 1.777801513671875e-05, "model_forward_time": 0.02434539794921875, "step": 11651 }, { "epoch": 1.777801513671875e-05, "step": 11651, "training_step_time": 0.1083683967590332 }, { "epoch": 1.7779541015625e-05, "model_forward_time": 0.0255429744720459, "step": 11652 }, { "epoch": 1.7779541015625e-05, "step": 11652, "training_step_time": 0.10792088508605957 }, { "epoch": 1.778106689453125e-05, "model_forward_time": 0.025328636169433594, "step": 11653 }, { "epoch": 1.778106689453125e-05, "step": 11653, "training_step_time": 0.10857510566711426 }, { "epoch": 1.77825927734375e-05, "model_forward_time": 0.025704622268676758, "step": 11654 }, { "epoch": 1.77825927734375e-05, "step": 11654, "training_step_time": 0.10866785049438477 }, { "epoch": 1.778411865234375e-05, "model_forward_time": 0.029287099838256836, "step": 11655 }, { "epoch": 1.778411865234375e-05, "step": 11655, "training_step_time": 0.11653470993041992 }, { "epoch": 1.778564453125e-05, "model_forward_time": 0.025564908981323242, "step": 11656 }, { "epoch": 1.778564453125e-05, "step": 11656, "training_step_time": 0.11843705177307129 }, { "epoch": 1.778717041015625e-05, "model_forward_time": 0.025386810302734375, "step": 11657 }, { "epoch": 1.778717041015625e-05, "step": 11657, "training_step_time": 0.17536282539367676 }, { "epoch": 1.77886962890625e-05, "model_forward_time": 0.024785995483398438, "step": 11658 }, { "epoch": 1.77886962890625e-05, "step": 11658, "training_step_time": 0.1766061782836914 }, { "epoch": 1.779022216796875e-05, "model_forward_time": 0.024843931198120117, "step": 11659 }, { "epoch": 1.779022216796875e-05, "step": 11659, "training_step_time": 0.11484909057617188 }, { "epoch": 1.7791748046875e-05, "grad_norm": 0.2558012008666992, "learning_rate": 7.178636249398072e-05, "loss": 0.0205, "step": 11660 }, { "epoch": 1.7791748046875e-05, "model_forward_time": 0.02511906623840332, "step": 11660 }, { "epoch": 1.7791748046875e-05, "step": 11660, "training_step_time": 0.11955547332763672 }, { "epoch": 1.779327392578125e-05, "model_forward_time": 0.025829553604125977, "step": 11661 }, { "epoch": 1.779327392578125e-05, "step": 11661, "training_step_time": 0.10851550102233887 }, { "epoch": 1.77947998046875e-05, "model_forward_time": 0.026876449584960938, "step": 11662 }, { "epoch": 1.77947998046875e-05, "step": 11662, "training_step_time": 0.10976052284240723 }, { "epoch": 1.779632568359375e-05, "model_forward_time": 0.025194406509399414, "step": 11663 }, { "epoch": 1.779632568359375e-05, "step": 11663, "training_step_time": 0.11402177810668945 }, { "epoch": 1.77978515625e-05, "model_forward_time": 0.025280237197875977, "step": 11664 }, { "epoch": 1.77978515625e-05, "step": 11664, "training_step_time": 0.10649633407592773 }, { "epoch": 1.779937744140625e-05, "model_forward_time": 0.02542853355407715, "step": 11665 }, { "epoch": 1.779937744140625e-05, "step": 11665, "training_step_time": 0.10787367820739746 }, { "epoch": 1.78009033203125e-05, "model_forward_time": 0.025123119354248047, "step": 11666 }, { "epoch": 1.78009033203125e-05, "step": 11666, "training_step_time": 0.10682225227355957 }, { "epoch": 1.780242919921875e-05, "model_forward_time": 0.025790691375732422, "step": 11667 }, { "epoch": 1.780242919921875e-05, "step": 11667, "training_step_time": 0.10874795913696289 }, { "epoch": 1.7803955078125e-05, "model_forward_time": 0.025238037109375, "step": 11668 }, { "epoch": 1.7803955078125e-05, "step": 11668, "training_step_time": 0.10617542266845703 }, { "epoch": 1.780548095703125e-05, "model_forward_time": 0.024748563766479492, "step": 11669 }, { "epoch": 1.780548095703125e-05, "step": 11669, "training_step_time": 0.10572290420532227 }, { "epoch": 1.78070068359375e-05, "grad_norm": 0.24784539639949799, "learning_rate": 7.173674083266624e-05, "loss": 0.0134, "step": 11670 }, { "epoch": 1.78070068359375e-05, "model_forward_time": 0.026944637298583984, "step": 11670 }, { "epoch": 1.78070068359375e-05, "step": 11670, "training_step_time": 0.11552238464355469 }, { "epoch": 1.780853271484375e-05, "model_forward_time": 0.025324106216430664, "step": 11671 }, { "epoch": 1.780853271484375e-05, "step": 11671, "training_step_time": 0.14961695671081543 }, { "epoch": 1.781005859375e-05, "model_forward_time": 0.025301694869995117, "step": 11672 }, { "epoch": 1.781005859375e-05, "step": 11672, "training_step_time": 0.20115900039672852 }, { "epoch": 1.781158447265625e-05, "model_forward_time": 0.024788618087768555, "step": 11673 }, { "epoch": 1.781158447265625e-05, "step": 11673, "training_step_time": 0.19629263877868652 }, { "epoch": 1.78131103515625e-05, "model_forward_time": 0.02545166015625, "step": 11674 }, { "epoch": 1.78131103515625e-05, "step": 11674, "training_step_time": 0.1674516201019287 }, { "epoch": 1.781463623046875e-05, "model_forward_time": 0.025110960006713867, "step": 11675 }, { "epoch": 1.781463623046875e-05, "step": 11675, "training_step_time": 0.15121912956237793 }, { "epoch": 1.7816162109375e-05, "model_forward_time": 0.026407241821289062, "step": 11676 }, { "epoch": 1.7816162109375e-05, "step": 11676, "training_step_time": 0.12232398986816406 }, { "epoch": 1.781768798828125e-05, "model_forward_time": 0.024682044982910156, "step": 11677 }, { "epoch": 1.781768798828125e-05, "step": 11677, "training_step_time": 0.106842041015625 }, { "epoch": 1.78192138671875e-05, "model_forward_time": 0.025202035903930664, "step": 11678 }, { "epoch": 1.78192138671875e-05, "step": 11678, "training_step_time": 0.19699525833129883 }, { "epoch": 1.782073974609375e-05, "model_forward_time": 0.024320125579833984, "step": 11679 }, { "epoch": 1.782073974609375e-05, "step": 11679, "training_step_time": 0.12567377090454102 }, { "epoch": 1.7822265625e-05, "grad_norm": 0.2874007821083069, "learning_rate": 7.1687092759162e-05, "loss": 0.0249, "step": 11680 }, { "epoch": 1.7822265625e-05, "model_forward_time": 0.02434396743774414, "step": 11680 }, { "epoch": 1.7822265625e-05, "step": 11680, "training_step_time": 0.12557053565979004 }, { "epoch": 1.782379150390625e-05, "model_forward_time": 0.0240786075592041, "step": 11681 }, { "epoch": 1.782379150390625e-05, "step": 11681, "training_step_time": 0.11521697044372559 }, { "epoch": 1.78253173828125e-05, "model_forward_time": 0.025308609008789062, "step": 11682 }, { "epoch": 1.78253173828125e-05, "step": 11682, "training_step_time": 0.14262676239013672 }, { "epoch": 1.782684326171875e-05, "model_forward_time": 0.025249481201171875, "step": 11683 }, { "epoch": 1.782684326171875e-05, "step": 11683, "training_step_time": 0.11459207534790039 }, { "epoch": 1.7828369140625e-05, "model_forward_time": 0.02537393569946289, "step": 11684 }, { "epoch": 1.7828369140625e-05, "step": 11684, "training_step_time": 0.12342381477355957 }, { "epoch": 1.782989501953125e-05, "model_forward_time": 0.02558135986328125, "step": 11685 }, { "epoch": 1.782989501953125e-05, "step": 11685, "training_step_time": 0.11874103546142578 }, { "epoch": 1.78314208984375e-05, "model_forward_time": 0.02527594566345215, "step": 11686 }, { "epoch": 1.78314208984375e-05, "step": 11686, "training_step_time": 0.1051032543182373 }, { "epoch": 1.783294677734375e-05, "model_forward_time": 0.02523040771484375, "step": 11687 }, { "epoch": 1.783294677734375e-05, "step": 11687, "training_step_time": 0.10513138771057129 }, { "epoch": 1.783447265625e-05, "model_forward_time": 0.025600910186767578, "step": 11688 }, { "epoch": 1.783447265625e-05, "step": 11688, "training_step_time": 0.11179089546203613 }, { "epoch": 1.783599853515625e-05, "model_forward_time": 0.025002002716064453, "step": 11689 }, { "epoch": 1.783599853515625e-05, "step": 11689, "training_step_time": 0.10659551620483398 }, { "epoch": 1.78375244140625e-05, "grad_norm": 0.30975258350372314, "learning_rate": 7.16374183337951e-05, "loss": 0.0213, "step": 11690 }, { "epoch": 1.78375244140625e-05, "model_forward_time": 0.025231361389160156, "step": 11690 }, { "epoch": 1.78375244140625e-05, "step": 11690, "training_step_time": 0.1071159839630127 }, { "epoch": 1.783905029296875e-05, "model_forward_time": 0.026062726974487305, "step": 11691 }, { "epoch": 1.783905029296875e-05, "step": 11691, "training_step_time": 0.12330913543701172 }, { "epoch": 1.7840576171875e-05, "model_forward_time": 0.024029254913330078, "step": 11692 }, { "epoch": 1.7840576171875e-05, "step": 11692, "training_step_time": 0.17774629592895508 }, { "epoch": 1.784210205078125e-05, "model_forward_time": 0.024976015090942383, "step": 11693 }, { "epoch": 1.784210205078125e-05, "step": 11693, "training_step_time": 0.18052935600280762 }, { "epoch": 1.78436279296875e-05, "model_forward_time": 0.02461981773376465, "step": 11694 }, { "epoch": 1.78436279296875e-05, "step": 11694, "training_step_time": 0.15902400016784668 }, { "epoch": 1.784515380859375e-05, "model_forward_time": 0.02421116828918457, "step": 11695 }, { "epoch": 1.784515380859375e-05, "step": 11695, "training_step_time": 0.15053915977478027 }, { "epoch": 1.78466796875e-05, "model_forward_time": 0.0243990421295166, "step": 11696 }, { "epoch": 1.78466796875e-05, "step": 11696, "training_step_time": 0.1726226806640625 }, { "epoch": 1.784820556640625e-05, "model_forward_time": 0.02503514289855957, "step": 11697 }, { "epoch": 1.784820556640625e-05, "step": 11697, "training_step_time": 0.15319347381591797 }, { "epoch": 1.78497314453125e-05, "model_forward_time": 0.024226665496826172, "step": 11698 }, { "epoch": 1.78497314453125e-05, "step": 11698, "training_step_time": 0.17513108253479004 }, { "epoch": 1.785125732421875e-05, "model_forward_time": 0.02492499351501465, "step": 11699 }, { "epoch": 1.785125732421875e-05, "step": 11699, "training_step_time": 0.12773919105529785 }, { "epoch": 1.7852783203125e-05, "grad_norm": 0.3183434009552002, "learning_rate": 7.158771761692464e-05, "loss": 0.0194, "step": 11700 }, { "epoch": 1.7852783203125e-05, "model_forward_time": 0.02441549301147461, "step": 11700 }, { "epoch": 1.7852783203125e-05, "step": 11700, "training_step_time": 0.12762832641601562 }, { "epoch": 1.785430908203125e-05, "model_forward_time": 0.024658679962158203, "step": 11701 }, { "epoch": 1.785430908203125e-05, "step": 11701, "training_step_time": 0.12233352661132812 }, { "epoch": 1.78558349609375e-05, "model_forward_time": 0.024938344955444336, "step": 11702 }, { "epoch": 1.78558349609375e-05, "step": 11702, "training_step_time": 0.19814038276672363 }, { "epoch": 1.785736083984375e-05, "model_forward_time": 0.02831292152404785, "step": 11703 }, { "epoch": 1.785736083984375e-05, "step": 11703, "training_step_time": 0.11129140853881836 }, { "epoch": 1.785888671875e-05, "model_forward_time": 0.02491450309753418, "step": 11704 }, { "epoch": 1.785888671875e-05, "step": 11704, "training_step_time": 0.11177992820739746 }, { "epoch": 1.786041259765625e-05, "model_forward_time": 0.02515721321105957, "step": 11705 }, { "epoch": 1.786041259765625e-05, "step": 11705, "training_step_time": 0.11127138137817383 }, { "epoch": 1.78619384765625e-05, "model_forward_time": 0.02515864372253418, "step": 11706 }, { "epoch": 1.78619384765625e-05, "step": 11706, "training_step_time": 0.10737419128417969 }, { "epoch": 1.786346435546875e-05, "model_forward_time": 0.025577545166015625, "step": 11707 }, { "epoch": 1.786346435546875e-05, "step": 11707, "training_step_time": 0.10860109329223633 }, { "epoch": 1.7864990234375e-05, "model_forward_time": 0.025356769561767578, "step": 11708 }, { "epoch": 1.7864990234375e-05, "step": 11708, "training_step_time": 0.11063647270202637 }, { "epoch": 1.786651611328125e-05, "model_forward_time": 0.0246732234954834, "step": 11709 }, { "epoch": 1.786651611328125e-05, "step": 11709, "training_step_time": 0.10815548896789551 }, { "epoch": 1.78680419921875e-05, "grad_norm": 0.4140734374523163, "learning_rate": 7.153799066894171e-05, "loss": 0.0188, "step": 11710 }, { "epoch": 1.78680419921875e-05, "model_forward_time": 0.025480985641479492, "step": 11710 }, { "epoch": 1.78680419921875e-05, "step": 11710, "training_step_time": 0.10764598846435547 }, { "epoch": 1.786956787109375e-05, "model_forward_time": 0.028128623962402344, "step": 11711 }, { "epoch": 1.786956787109375e-05, "step": 11711, "training_step_time": 0.15357565879821777 }, { "epoch": 1.787109375e-05, "model_forward_time": 0.024831533432006836, "step": 11712 }, { "epoch": 1.787109375e-05, "step": 11712, "training_step_time": 0.15476202964782715 }, { "epoch": 1.787261962890625e-05, "model_forward_time": 0.024794578552246094, "step": 11713 }, { "epoch": 1.787261962890625e-05, "step": 11713, "training_step_time": 0.11474776268005371 }, { "epoch": 1.78741455078125e-05, "model_forward_time": 0.025244474411010742, "step": 11714 }, { "epoch": 1.78741455078125e-05, "step": 11714, "training_step_time": 0.12900328636169434 }, { "epoch": 1.787567138671875e-05, "model_forward_time": 0.025837182998657227, "step": 11715 }, { "epoch": 1.787567138671875e-05, "step": 11715, "training_step_time": 0.2018749713897705 }, { "epoch": 1.7877197265625e-05, "model_forward_time": 0.024688005447387695, "step": 11716 }, { "epoch": 1.7877197265625e-05, "step": 11716, "training_step_time": 0.11519742012023926 }, { "epoch": 1.787872314453125e-05, "model_forward_time": 0.02411198616027832, "step": 11717 }, { "epoch": 1.787872314453125e-05, "step": 11717, "training_step_time": 0.10938572883605957 }, { "epoch": 1.78802490234375e-05, "model_forward_time": 0.026111364364624023, "step": 11718 }, { "epoch": 1.78802490234375e-05, "step": 11718, "training_step_time": 0.1160573959350586 }, { "epoch": 1.788177490234375e-05, "model_forward_time": 0.02622056007385254, "step": 11719 }, { "epoch": 1.788177490234375e-05, "step": 11719, "training_step_time": 0.11627006530761719 }, { "epoch": 1.788330078125e-05, "grad_norm": 0.4929561913013458, "learning_rate": 7.148823755026921e-05, "loss": 0.0196, "step": 11720 }, { "epoch": 1.788330078125e-05, "model_forward_time": 0.025667190551757812, "step": 11720 }, { "epoch": 1.788330078125e-05, "step": 11720, "training_step_time": 0.10977578163146973 }, { "epoch": 1.788482666015625e-05, "model_forward_time": 0.02518296241760254, "step": 11721 }, { "epoch": 1.788482666015625e-05, "step": 11721, "training_step_time": 0.1930091381072998 }, { "epoch": 1.78863525390625e-05, "model_forward_time": 0.024659156799316406, "step": 11722 }, { "epoch": 1.78863525390625e-05, "step": 11722, "training_step_time": 0.1693124771118164 }, { "epoch": 1.788787841796875e-05, "model_forward_time": 0.024460792541503906, "step": 11723 }, { "epoch": 1.788787841796875e-05, "step": 11723, "training_step_time": 0.1668837070465088 }, { "epoch": 1.7889404296875e-05, "model_forward_time": 0.024379730224609375, "step": 11724 }, { "epoch": 1.7889404296875e-05, "step": 11724, "training_step_time": 0.13912343978881836 }, { "epoch": 1.789093017578125e-05, "model_forward_time": 0.024727582931518555, "step": 11725 }, { "epoch": 1.789093017578125e-05, "step": 11725, "training_step_time": 0.11472392082214355 }, { "epoch": 1.78924560546875e-05, "model_forward_time": 0.025653839111328125, "step": 11726 }, { "epoch": 1.78924560546875e-05, "step": 11726, "training_step_time": 0.12459659576416016 }, { "epoch": 1.789398193359375e-05, "model_forward_time": 0.0254518985748291, "step": 11727 }, { "epoch": 1.789398193359375e-05, "step": 11727, "training_step_time": 0.12235260009765625 }, { "epoch": 1.78955078125e-05, "model_forward_time": 0.025080204010009766, "step": 11728 }, { "epoch": 1.78955078125e-05, "step": 11728, "training_step_time": 0.1136469841003418 }, { "epoch": 1.789703369140625e-05, "model_forward_time": 0.02559971809387207, "step": 11729 }, { "epoch": 1.789703369140625e-05, "step": 11729, "training_step_time": 0.11441326141357422 }, { "epoch": 1.78985595703125e-05, "grad_norm": 0.2370370477437973, "learning_rate": 7.143845832136188e-05, "loss": 0.0137, "step": 11730 }, { "epoch": 1.78985595703125e-05, "model_forward_time": 0.025336503982543945, "step": 11730 }, { "epoch": 1.78985595703125e-05, "step": 11730, "training_step_time": 0.11599612236022949 }, { "epoch": 1.790008544921875e-05, "model_forward_time": 0.0253753662109375, "step": 11731 }, { "epoch": 1.790008544921875e-05, "step": 11731, "training_step_time": 0.11584115028381348 }, { "epoch": 1.7901611328125e-05, "model_forward_time": 0.0266873836517334, "step": 11732 }, { "epoch": 1.7901611328125e-05, "step": 11732, "training_step_time": 0.11408209800720215 }, { "epoch": 1.790313720703125e-05, "model_forward_time": 0.025597572326660156, "step": 11733 }, { "epoch": 1.790313720703125e-05, "step": 11733, "training_step_time": 0.11321783065795898 }, { "epoch": 1.79046630859375e-05, "model_forward_time": 0.025423765182495117, "step": 11734 }, { "epoch": 1.79046630859375e-05, "step": 11734, "training_step_time": 0.11077570915222168 }, { "epoch": 1.790618896484375e-05, "model_forward_time": 0.027895212173461914, "step": 11735 }, { "epoch": 1.790618896484375e-05, "step": 11735, "training_step_time": 0.11109590530395508 }, { "epoch": 1.790771484375e-05, "model_forward_time": 0.025504350662231445, "step": 11736 }, { "epoch": 1.790771484375e-05, "step": 11736, "training_step_time": 0.11061596870422363 }, { "epoch": 1.790924072265625e-05, "model_forward_time": 0.025641918182373047, "step": 11737 }, { "epoch": 1.790924072265625e-05, "step": 11737, "training_step_time": 0.11015653610229492 }, { "epoch": 1.79107666015625e-05, "model_forward_time": 0.02518153190612793, "step": 11738 }, { "epoch": 1.79107666015625e-05, "step": 11738, "training_step_time": 0.10755777359008789 }, { "epoch": 1.791229248046875e-05, "model_forward_time": 0.025189638137817383, "step": 11739 }, { "epoch": 1.791229248046875e-05, "step": 11739, "training_step_time": 0.10906028747558594 }, { "epoch": 1.7913818359375e-05, "grad_norm": 0.3798167407512665, "learning_rate": 7.138865304270616e-05, "loss": 0.0193, "step": 11740 }, { "epoch": 1.7913818359375e-05, "model_forward_time": 0.025629520416259766, "step": 11740 }, { "epoch": 1.7913818359375e-05, "step": 11740, "training_step_time": 0.11115503311157227 }, { "epoch": 1.791534423828125e-05, "model_forward_time": 0.025362730026245117, "step": 11741 }, { "epoch": 1.791534423828125e-05, "step": 11741, "training_step_time": 0.11635255813598633 }, { "epoch": 1.79168701171875e-05, "model_forward_time": 0.027070283889770508, "step": 11742 }, { "epoch": 1.79168701171875e-05, "step": 11742, "training_step_time": 0.20132780075073242 }, { "epoch": 1.791839599609375e-05, "model_forward_time": 0.024280309677124023, "step": 11743 }, { "epoch": 1.791839599609375e-05, "step": 11743, "training_step_time": 0.10520076751708984 }, { "epoch": 1.7919921875e-05, "model_forward_time": 0.0245358943939209, "step": 11744 }, { "epoch": 1.7919921875e-05, "step": 11744, "training_step_time": 0.11055636405944824 }, { "epoch": 1.792144775390625e-05, "model_forward_time": 0.025367259979248047, "step": 11745 }, { "epoch": 1.792144775390625e-05, "step": 11745, "training_step_time": 0.12089133262634277 }, { "epoch": 1.79229736328125e-05, "model_forward_time": 0.025229930877685547, "step": 11746 }, { "epoch": 1.79229736328125e-05, "step": 11746, "training_step_time": 0.13059186935424805 }, { "epoch": 1.792449951171875e-05, "model_forward_time": 0.024973154067993164, "step": 11747 }, { "epoch": 1.792449951171875e-05, "step": 11747, "training_step_time": 0.10904932022094727 }, { "epoch": 1.7926025390625e-05, "model_forward_time": 0.02531147003173828, "step": 11748 }, { "epoch": 1.7926025390625e-05, "step": 11748, "training_step_time": 0.10708236694335938 }, { "epoch": 1.792755126953125e-05, "model_forward_time": 0.025348186492919922, "step": 11749 }, { "epoch": 1.792755126953125e-05, "step": 11749, "training_step_time": 0.11211419105529785 }, { "epoch": 1.79290771484375e-05, "grad_norm": 0.20920109748840332, "learning_rate": 7.133882177482019e-05, "loss": 0.0259, "step": 11750 }, { "epoch": 1.79290771484375e-05, "model_forward_time": 0.02493119239807129, "step": 11750 }, { "epoch": 1.79290771484375e-05, "step": 11750, "training_step_time": 0.10653901100158691 }, { "epoch": 1.793060302734375e-05, "model_forward_time": 0.026560544967651367, "step": 11751 }, { "epoch": 1.793060302734375e-05, "step": 11751, "training_step_time": 0.10820913314819336 }, { "epoch": 1.793212890625e-05, "model_forward_time": 0.025300025939941406, "step": 11752 }, { "epoch": 1.793212890625e-05, "step": 11752, "training_step_time": 0.10830354690551758 }, { "epoch": 1.793365478515625e-05, "model_forward_time": 0.025789976119995117, "step": 11753 }, { "epoch": 1.793365478515625e-05, "step": 11753, "training_step_time": 0.11249709129333496 }, { "epoch": 1.79351806640625e-05, "model_forward_time": 0.02571702003479004, "step": 11754 }, { "epoch": 1.79351806640625e-05, "step": 11754, "training_step_time": 0.10808610916137695 }, { "epoch": 1.793670654296875e-05, "model_forward_time": 0.025809526443481445, "step": 11755 }, { "epoch": 1.793670654296875e-05, "step": 11755, "training_step_time": 0.10536909103393555 }, { "epoch": 1.7938232421875e-05, "model_forward_time": 0.024606704711914062, "step": 11756 }, { "epoch": 1.7938232421875e-05, "step": 11756, "training_step_time": 0.14014959335327148 }, { "epoch": 1.793975830078125e-05, "model_forward_time": 0.025243520736694336, "step": 11757 }, { "epoch": 1.793975830078125e-05, "step": 11757, "training_step_time": 0.11704897880554199 }, { "epoch": 1.79412841796875e-05, "model_forward_time": 0.025142431259155273, "step": 11758 }, { "epoch": 1.79412841796875e-05, "step": 11758, "training_step_time": 0.19900965690612793 }, { "epoch": 1.794281005859375e-05, "model_forward_time": 0.02420520782470703, "step": 11759 }, { "epoch": 1.794281005859375e-05, "step": 11759, "training_step_time": 0.13672661781311035 }, { "epoch": 1.79443359375e-05, "grad_norm": 0.30930909514427185, "learning_rate": 7.128896457825364e-05, "loss": 0.0168, "step": 11760 }, { "epoch": 1.79443359375e-05, "model_forward_time": 0.02392745018005371, "step": 11760 }, { "epoch": 1.79443359375e-05, "step": 11760, "training_step_time": 0.19931745529174805 }, { "epoch": 1.794586181640625e-05, "model_forward_time": 0.024522066116333008, "step": 11761 }, { "epoch": 1.794586181640625e-05, "step": 11761, "training_step_time": 0.18280506134033203 }, { "epoch": 1.79473876953125e-05, "model_forward_time": 0.023884057998657227, "step": 11762 }, { "epoch": 1.79473876953125e-05, "step": 11762, "training_step_time": 0.11298418045043945 }, { "epoch": 1.794891357421875e-05, "model_forward_time": 0.024363994598388672, "step": 11763 }, { "epoch": 1.794891357421875e-05, "step": 11763, "training_step_time": 0.11811637878417969 }, { "epoch": 1.7950439453125e-05, "model_forward_time": 0.025239944458007812, "step": 11764 }, { "epoch": 1.7950439453125e-05, "step": 11764, "training_step_time": 0.11014318466186523 }, { "epoch": 1.795196533203125e-05, "model_forward_time": 0.025240182876586914, "step": 11765 }, { "epoch": 1.795196533203125e-05, "step": 11765, "training_step_time": 0.19793152809143066 }, { "epoch": 1.79534912109375e-05, "model_forward_time": 0.024227619171142578, "step": 11766 }, { "epoch": 1.79534912109375e-05, "step": 11766, "training_step_time": 0.1875934600830078 }, { "epoch": 1.795501708984375e-05, "model_forward_time": 0.024090290069580078, "step": 11767 }, { "epoch": 1.795501708984375e-05, "step": 11767, "training_step_time": 0.14180922508239746 }, { "epoch": 1.795654296875e-05, "model_forward_time": 0.024306297302246094, "step": 11768 }, { "epoch": 1.795654296875e-05, "step": 11768, "training_step_time": 0.1201925277709961 }, { "epoch": 1.795806884765625e-05, "model_forward_time": 0.024745941162109375, "step": 11769 }, { "epoch": 1.795806884765625e-05, "step": 11769, "training_step_time": 0.10758209228515625 }, { "epoch": 1.79595947265625e-05, "grad_norm": 0.3645468056201935, "learning_rate": 7.12390815135877e-05, "loss": 0.0218, "step": 11770 }, { "epoch": 1.79595947265625e-05, "model_forward_time": 0.025122404098510742, "step": 11770 }, { "epoch": 1.79595947265625e-05, "step": 11770, "training_step_time": 0.11304521560668945 }, { "epoch": 1.796112060546875e-05, "model_forward_time": 0.025554656982421875, "step": 11771 }, { "epoch": 1.796112060546875e-05, "step": 11771, "training_step_time": 0.11222124099731445 }, { "epoch": 1.7962646484375e-05, "model_forward_time": 0.02506566047668457, "step": 11772 }, { "epoch": 1.7962646484375e-05, "step": 11772, "training_step_time": 0.10831093788146973 }, { "epoch": 1.796417236328125e-05, "model_forward_time": 0.024943828582763672, "step": 11773 }, { "epoch": 1.796417236328125e-05, "step": 11773, "training_step_time": 0.10556483268737793 }, { "epoch": 1.79656982421875e-05, "model_forward_time": 0.024845361709594727, "step": 11774 }, { "epoch": 1.79656982421875e-05, "step": 11774, "training_step_time": 0.10753655433654785 }, { "epoch": 1.796722412109375e-05, "model_forward_time": 0.025279760360717773, "step": 11775 }, { "epoch": 1.796722412109375e-05, "step": 11775, "training_step_time": 0.11156797409057617 }, { "epoch": 1.796875e-05, "model_forward_time": 0.02526545524597168, "step": 11776 }, { "epoch": 1.796875e-05, "step": 11776, "training_step_time": 0.18631601333618164 }, { "epoch": 1.797027587890625e-05, "model_forward_time": 0.02446126937866211, "step": 11777 }, { "epoch": 1.797027587890625e-05, "step": 11777, "training_step_time": 0.20564889907836914 }, { "epoch": 1.79718017578125e-05, "model_forward_time": 0.024088621139526367, "step": 11778 }, { "epoch": 1.79718017578125e-05, "step": 11778, "training_step_time": 0.1980876922607422 }, { "epoch": 1.797332763671875e-05, "model_forward_time": 0.024466514587402344, "step": 11779 }, { "epoch": 1.797332763671875e-05, "step": 11779, "training_step_time": 0.19675660133361816 }, { "epoch": 1.7974853515625e-05, "grad_norm": 0.5852621793746948, "learning_rate": 7.118917264143501e-05, "loss": 0.0177, "step": 11780 }, { "epoch": 1.7974853515625e-05, "model_forward_time": 0.0243985652923584, "step": 11780 }, { "epoch": 1.7974853515625e-05, "step": 11780, "training_step_time": 0.18271350860595703 }, { "epoch": 1.797637939453125e-05, "model_forward_time": 0.024523496627807617, "step": 11781 }, { "epoch": 1.797637939453125e-05, "step": 11781, "training_step_time": 0.10518002510070801 }, { "epoch": 1.79779052734375e-05, "model_forward_time": 0.025251150131225586, "step": 11782 }, { "epoch": 1.79779052734375e-05, "step": 11782, "training_step_time": 0.1033945083618164 }, { "epoch": 1.797943115234375e-05, "model_forward_time": 0.025688648223876953, "step": 11783 }, { "epoch": 1.797943115234375e-05, "step": 11783, "training_step_time": 0.10590195655822754 }, { "epoch": 1.798095703125e-05, "model_forward_time": 0.025027036666870117, "step": 11784 }, { "epoch": 1.798095703125e-05, "step": 11784, "training_step_time": 0.20040297508239746 }, { "epoch": 1.798248291015625e-05, "model_forward_time": 0.024587154388427734, "step": 11785 }, { "epoch": 1.798248291015625e-05, "step": 11785, "training_step_time": 0.11007285118103027 }, { "epoch": 1.79840087890625e-05, "model_forward_time": 0.024486064910888672, "step": 11786 }, { "epoch": 1.79840087890625e-05, "step": 11786, "training_step_time": 0.10959553718566895 }, { "epoch": 1.798553466796875e-05, "model_forward_time": 0.024246692657470703, "step": 11787 }, { "epoch": 1.798553466796875e-05, "step": 11787, "training_step_time": 0.18020939826965332 }, { "epoch": 1.7987060546875e-05, "model_forward_time": 0.02375340461730957, "step": 11788 }, { "epoch": 1.7987060546875e-05, "step": 11788, "training_step_time": 0.23661470413208008 }, { "epoch": 1.798858642578125e-05, "model_forward_time": 0.024754047393798828, "step": 11789 }, { "epoch": 1.798858642578125e-05, "step": 11789, "training_step_time": 0.2100536823272705 }, { "epoch": 1.79901123046875e-05, "grad_norm": 0.321106880903244, "learning_rate": 7.113923802243957e-05, "loss": 0.0186, "step": 11790 }, { "epoch": 1.79901123046875e-05, "model_forward_time": 0.0238034725189209, "step": 11790 }, { "epoch": 1.79901123046875e-05, "step": 11790, "training_step_time": 0.17432641983032227 }, { "epoch": 1.799163818359375e-05, "model_forward_time": 0.02335953712463379, "step": 11791 }, { "epoch": 1.799163818359375e-05, "step": 11791, "training_step_time": 0.16030454635620117 }, { "epoch": 1.79931640625e-05, "model_forward_time": 0.02325129508972168, "step": 11792 }, { "epoch": 1.79931640625e-05, "step": 11792, "training_step_time": 0.14293956756591797 }, { "epoch": 1.799468994140625e-05, "model_forward_time": 0.02725958824157715, "step": 11793 }, { "epoch": 1.799468994140625e-05, "step": 11793, "training_step_time": 0.11002135276794434 }, { "epoch": 1.79962158203125e-05, "model_forward_time": 0.025141239166259766, "step": 11794 }, { "epoch": 1.79962158203125e-05, "step": 11794, "training_step_time": 0.10535001754760742 }, { "epoch": 1.799774169921875e-05, "model_forward_time": 0.024230480194091797, "step": 11795 }, { "epoch": 1.799774169921875e-05, "step": 11795, "training_step_time": 0.14729523658752441 }, { "epoch": 1.7999267578125e-05, "model_forward_time": 0.02480792999267578, "step": 11796 }, { "epoch": 1.7999267578125e-05, "step": 11796, "training_step_time": 0.1072535514831543 }, { "epoch": 1.800079345703125e-05, "model_forward_time": 0.024956941604614258, "step": 11797 }, { "epoch": 1.800079345703125e-05, "step": 11797, "training_step_time": 0.10697817802429199 }, { "epoch": 1.80023193359375e-05, "model_forward_time": 0.0248563289642334, "step": 11798 }, { "epoch": 1.80023193359375e-05, "step": 11798, "training_step_time": 0.10707211494445801 }, { "epoch": 1.800384521484375e-05, "model_forward_time": 0.026293277740478516, "step": 11799 }, { "epoch": 1.800384521484375e-05, "step": 11799, "training_step_time": 0.12851214408874512 }, { "epoch": 1.800537109375e-05, "grad_norm": 0.3286162316799164, "learning_rate": 7.108927771727661e-05, "loss": 0.0198, "step": 11800 }, { "epoch": 1.800537109375e-05, "model_forward_time": 0.02498602867126465, "step": 11800 }, { "epoch": 1.800537109375e-05, "step": 11800, "training_step_time": 0.21477150917053223 }, { "epoch": 1.800689697265625e-05, "model_forward_time": 0.024430274963378906, "step": 11801 }, { "epoch": 1.800689697265625e-05, "step": 11801, "training_step_time": 0.10818862915039062 }, { "epoch": 1.80084228515625e-05, "model_forward_time": 0.024498462677001953, "step": 11802 }, { "epoch": 1.80084228515625e-05, "step": 11802, "training_step_time": 0.11890983581542969 }, { "epoch": 1.800994873046875e-05, "model_forward_time": 0.025299072265625, "step": 11803 }, { "epoch": 1.800994873046875e-05, "step": 11803, "training_step_time": 0.11484479904174805 }, { "epoch": 1.8011474609375e-05, "model_forward_time": 0.02626514434814453, "step": 11804 }, { "epoch": 1.8011474609375e-05, "step": 11804, "training_step_time": 0.1143801212310791 }, { "epoch": 1.801300048828125e-05, "model_forward_time": 0.02494049072265625, "step": 11805 }, { "epoch": 1.801300048828125e-05, "step": 11805, "training_step_time": 0.19628286361694336 }, { "epoch": 1.80145263671875e-05, "model_forward_time": 0.02511119842529297, "step": 11806 }, { "epoch": 1.80145263671875e-05, "step": 11806, "training_step_time": 0.1489872932434082 }, { "epoch": 1.801605224609375e-05, "model_forward_time": 0.025616168975830078, "step": 11807 }, { "epoch": 1.801605224609375e-05, "step": 11807, "training_step_time": 0.14116692543029785 }, { "epoch": 1.8017578125e-05, "model_forward_time": 0.025133132934570312, "step": 11808 }, { "epoch": 1.8017578125e-05, "step": 11808, "training_step_time": 0.1482248306274414 }, { "epoch": 1.801910400390625e-05, "model_forward_time": 0.026373624801635742, "step": 11809 }, { "epoch": 1.801910400390625e-05, "step": 11809, "training_step_time": 0.15588116645812988 }, { "epoch": 1.80206298828125e-05, "grad_norm": 0.43684709072113037, "learning_rate": 7.103929178665266e-05, "loss": 0.0305, "step": 11810 }, { "epoch": 1.80206298828125e-05, "model_forward_time": 0.024808883666992188, "step": 11810 }, { "epoch": 1.80206298828125e-05, "step": 11810, "training_step_time": 0.22199082374572754 }, { "epoch": 1.802215576171875e-05, "model_forward_time": 0.02487468719482422, "step": 11811 }, { "epoch": 1.802215576171875e-05, "step": 11811, "training_step_time": 0.12003731727600098 }, { "epoch": 1.8023681640625e-05, "model_forward_time": 0.02356243133544922, "step": 11812 }, { "epoch": 1.8023681640625e-05, "step": 11812, "training_step_time": 0.11510682106018066 }, { "epoch": 1.802520751953125e-05, "model_forward_time": 0.02469921112060547, "step": 11813 }, { "epoch": 1.802520751953125e-05, "step": 11813, "training_step_time": 0.11623358726501465 }, { "epoch": 1.80267333984375e-05, "model_forward_time": 0.025042295455932617, "step": 11814 }, { "epoch": 1.80267333984375e-05, "step": 11814, "training_step_time": 0.11191964149475098 }, { "epoch": 1.802825927734375e-05, "model_forward_time": 0.0251467227935791, "step": 11815 }, { "epoch": 1.802825927734375e-05, "step": 11815, "training_step_time": 0.10897064208984375 }, { "epoch": 1.802978515625e-05, "model_forward_time": 0.025455236434936523, "step": 11816 }, { "epoch": 1.802978515625e-05, "step": 11816, "training_step_time": 0.11017823219299316 }, { "epoch": 1.803131103515625e-05, "model_forward_time": 0.02565288543701172, "step": 11817 }, { "epoch": 1.803131103515625e-05, "step": 11817, "training_step_time": 0.10978913307189941 }, { "epoch": 1.80328369140625e-05, "model_forward_time": 0.025326251983642578, "step": 11818 }, { "epoch": 1.80328369140625e-05, "step": 11818, "training_step_time": 0.10839009284973145 }, { "epoch": 1.803436279296875e-05, "model_forward_time": 0.02567315101623535, "step": 11819 }, { "epoch": 1.803436279296875e-05, "step": 11819, "training_step_time": 0.1093289852142334 }, { "epoch": 1.8035888671875e-05, "grad_norm": 0.3068019449710846, "learning_rate": 7.09892802913053e-05, "loss": 0.0223, "step": 11820 }, { "epoch": 1.8035888671875e-05, "model_forward_time": 0.025632143020629883, "step": 11820 }, { "epoch": 1.8035888671875e-05, "step": 11820, "training_step_time": 0.10836362838745117 }, { "epoch": 1.803741455078125e-05, "model_forward_time": 0.025618791580200195, "step": 11821 }, { "epoch": 1.803741455078125e-05, "step": 11821, "training_step_time": 0.11076545715332031 }, { "epoch": 1.80389404296875e-05, "model_forward_time": 0.025796890258789062, "step": 11822 }, { "epoch": 1.80389404296875e-05, "step": 11822, "training_step_time": 0.10805392265319824 }, { "epoch": 1.804046630859375e-05, "model_forward_time": 0.025356531143188477, "step": 11823 }, { "epoch": 1.804046630859375e-05, "step": 11823, "training_step_time": 0.10704660415649414 }, { "epoch": 1.80419921875e-05, "model_forward_time": 0.025439977645874023, "step": 11824 }, { "epoch": 1.80419921875e-05, "step": 11824, "training_step_time": 0.10770082473754883 }, { "epoch": 1.804351806640625e-05, "model_forward_time": 0.025523900985717773, "step": 11825 }, { "epoch": 1.804351806640625e-05, "step": 11825, "training_step_time": 0.10774683952331543 }, { "epoch": 1.80450439453125e-05, "model_forward_time": 0.025355100631713867, "step": 11826 }, { "epoch": 1.80450439453125e-05, "step": 11826, "training_step_time": 0.10748481750488281 }, { "epoch": 1.804656982421875e-05, "model_forward_time": 0.025360822677612305, "step": 11827 }, { "epoch": 1.804656982421875e-05, "step": 11827, "training_step_time": 0.16775155067443848 }, { "epoch": 1.8048095703125e-05, "model_forward_time": 0.02608013153076172, "step": 11828 }, { "epoch": 1.8048095703125e-05, "step": 11828, "training_step_time": 0.11002421379089355 }, { "epoch": 1.804962158203125e-05, "model_forward_time": 0.024909257888793945, "step": 11829 }, { "epoch": 1.804962158203125e-05, "step": 11829, "training_step_time": 0.11364579200744629 }, { "epoch": 1.80511474609375e-05, "grad_norm": 0.5004920959472656, "learning_rate": 7.093924329200321e-05, "loss": 0.0354, "step": 11830 }, { "epoch": 1.80511474609375e-05, "model_forward_time": 0.02544713020324707, "step": 11830 }, { "epoch": 1.80511474609375e-05, "step": 11830, "training_step_time": 0.12013840675354004 }, { "epoch": 1.805267333984375e-05, "model_forward_time": 0.025325775146484375, "step": 11831 }, { "epoch": 1.805267333984375e-05, "step": 11831, "training_step_time": 0.12206459045410156 }, { "epoch": 1.805419921875e-05, "model_forward_time": 0.025445222854614258, "step": 11832 }, { "epoch": 1.805419921875e-05, "step": 11832, "training_step_time": 0.12179994583129883 }, { "epoch": 1.805572509765625e-05, "model_forward_time": 0.025418519973754883, "step": 11833 }, { "epoch": 1.805572509765625e-05, "step": 11833, "training_step_time": 0.13859272003173828 }, { "epoch": 1.80572509765625e-05, "model_forward_time": 0.025030136108398438, "step": 11834 }, { "epoch": 1.80572509765625e-05, "step": 11834, "training_step_time": 0.11196327209472656 }, { "epoch": 1.805877685546875e-05, "model_forward_time": 0.025281667709350586, "step": 11835 }, { "epoch": 1.805877685546875e-05, "step": 11835, "training_step_time": 0.10619878768920898 }, { "epoch": 1.8060302734375e-05, "model_forward_time": 0.0253908634185791, "step": 11836 }, { "epoch": 1.8060302734375e-05, "step": 11836, "training_step_time": 0.10717105865478516 }, { "epoch": 1.806182861328125e-05, "model_forward_time": 0.025280475616455078, "step": 11837 }, { "epoch": 1.806182861328125e-05, "step": 11837, "training_step_time": 0.11041641235351562 }, { "epoch": 1.80633544921875e-05, "model_forward_time": 0.025144338607788086, "step": 11838 }, { "epoch": 1.80633544921875e-05, "step": 11838, "training_step_time": 0.10631942749023438 }, { "epoch": 1.806488037109375e-05, "model_forward_time": 0.025485992431640625, "step": 11839 }, { "epoch": 1.806488037109375e-05, "step": 11839, "training_step_time": 0.10799312591552734 }, { "epoch": 1.806640625e-05, "grad_norm": 0.4685319662094116, "learning_rate": 7.08891808495461e-05, "loss": 0.0223, "step": 11840 }, { "epoch": 1.806640625e-05, "model_forward_time": 0.026576757431030273, "step": 11840 }, { "epoch": 1.806640625e-05, "step": 11840, "training_step_time": 0.10597634315490723 }, { "epoch": 1.806793212890625e-05, "model_forward_time": 0.02459096908569336, "step": 11841 }, { "epoch": 1.806793212890625e-05, "step": 11841, "training_step_time": 0.15087127685546875 }, { "epoch": 1.80694580078125e-05, "model_forward_time": 0.024808883666992188, "step": 11842 }, { "epoch": 1.80694580078125e-05, "step": 11842, "training_step_time": 0.16709494590759277 }, { "epoch": 1.807098388671875e-05, "model_forward_time": 0.02557682991027832, "step": 11843 }, { "epoch": 1.807098388671875e-05, "step": 11843, "training_step_time": 0.10812878608703613 }, { "epoch": 1.8072509765625e-05, "model_forward_time": 0.028900623321533203, "step": 11844 }, { "epoch": 1.8072509765625e-05, "step": 11844, "training_step_time": 0.1749575138092041 }, { "epoch": 1.807403564453125e-05, "model_forward_time": 0.02477431297302246, "step": 11845 }, { "epoch": 1.807403564453125e-05, "step": 11845, "training_step_time": 0.15839695930480957 }, { "epoch": 1.80755615234375e-05, "model_forward_time": 0.024523496627807617, "step": 11846 }, { "epoch": 1.80755615234375e-05, "step": 11846, "training_step_time": 0.20357394218444824 }, { "epoch": 1.807708740234375e-05, "model_forward_time": 0.02464580535888672, "step": 11847 }, { "epoch": 1.807708740234375e-05, "step": 11847, "training_step_time": 0.11320328712463379 }, { "epoch": 1.807861328125e-05, "model_forward_time": 0.024836063385009766, "step": 11848 }, { "epoch": 1.807861328125e-05, "step": 11848, "training_step_time": 0.10975027084350586 }, { "epoch": 1.808013916015625e-05, "model_forward_time": 0.025541067123413086, "step": 11849 }, { "epoch": 1.808013916015625e-05, "step": 11849, "training_step_time": 0.1591174602508545 }, { "epoch": 1.80816650390625e-05, "grad_norm": 0.20132340490818024, "learning_rate": 7.083909302476453e-05, "loss": 0.0198, "step": 11850 }, { "epoch": 1.80816650390625e-05, "model_forward_time": 0.024764537811279297, "step": 11850 }, { "epoch": 1.80816650390625e-05, "step": 11850, "training_step_time": 0.1755080223083496 }, { "epoch": 1.808319091796875e-05, "model_forward_time": 0.025552749633789062, "step": 11851 }, { "epoch": 1.808319091796875e-05, "step": 11851, "training_step_time": 0.12903404235839844 }, { "epoch": 1.8084716796875e-05, "model_forward_time": 0.027469873428344727, "step": 11852 }, { "epoch": 1.8084716796875e-05, "step": 11852, "training_step_time": 0.10930442810058594 }, { "epoch": 1.808624267578125e-05, "model_forward_time": 0.025430679321289062, "step": 11853 }, { "epoch": 1.808624267578125e-05, "step": 11853, "training_step_time": 0.11730742454528809 }, { "epoch": 1.80877685546875e-05, "model_forward_time": 0.02575516700744629, "step": 11854 }, { "epoch": 1.80877685546875e-05, "step": 11854, "training_step_time": 0.10805583000183105 }, { "epoch": 1.808929443359375e-05, "model_forward_time": 0.02515864372253418, "step": 11855 }, { "epoch": 1.808929443359375e-05, "step": 11855, "training_step_time": 0.11034750938415527 }, { "epoch": 1.80908203125e-05, "model_forward_time": 0.0248870849609375, "step": 11856 }, { "epoch": 1.80908203125e-05, "step": 11856, "training_step_time": 0.10940027236938477 }, { "epoch": 1.809234619140625e-05, "model_forward_time": 0.02563166618347168, "step": 11857 }, { "epoch": 1.809234619140625e-05, "step": 11857, "training_step_time": 0.11179566383361816 }, { "epoch": 1.80938720703125e-05, "model_forward_time": 0.027478456497192383, "step": 11858 }, { "epoch": 1.80938720703125e-05, "step": 11858, "training_step_time": 0.10853338241577148 }, { "epoch": 1.809539794921875e-05, "model_forward_time": 0.02540898323059082, "step": 11859 }, { "epoch": 1.809539794921875e-05, "step": 11859, "training_step_time": 0.11035943031311035 }, { "epoch": 1.8096923828125e-05, "grad_norm": 0.2759787142276764, "learning_rate": 7.078897987851993e-05, "loss": 0.0268, "step": 11860 }, { "epoch": 1.8096923828125e-05, "model_forward_time": 0.025503158569335938, "step": 11860 }, { "epoch": 1.8096923828125e-05, "step": 11860, "training_step_time": 0.11227083206176758 }, { "epoch": 1.809844970703125e-05, "model_forward_time": 0.02586817741394043, "step": 11861 }, { "epoch": 1.809844970703125e-05, "step": 11861, "training_step_time": 0.12222790718078613 }, { "epoch": 1.80999755859375e-05, "model_forward_time": 0.025417089462280273, "step": 11862 }, { "epoch": 1.80999755859375e-05, "step": 11862, "training_step_time": 0.12229728698730469 }, { "epoch": 1.810150146484375e-05, "model_forward_time": 0.025314807891845703, "step": 11863 }, { "epoch": 1.810150146484375e-05, "step": 11863, "training_step_time": 0.11486244201660156 }, { "epoch": 1.810302734375e-05, "model_forward_time": 0.02563762664794922, "step": 11864 }, { "epoch": 1.810302734375e-05, "step": 11864, "training_step_time": 0.11915469169616699 }, { "epoch": 1.810455322265625e-05, "model_forward_time": 0.02538013458251953, "step": 11865 }, { "epoch": 1.810455322265625e-05, "step": 11865, "training_step_time": 0.11410689353942871 }, { "epoch": 1.81060791015625e-05, "model_forward_time": 0.025915861129760742, "step": 11866 }, { "epoch": 1.81060791015625e-05, "step": 11866, "training_step_time": 0.10910224914550781 }, { "epoch": 1.810760498046875e-05, "model_forward_time": 0.02537369728088379, "step": 11867 }, { "epoch": 1.810760498046875e-05, "step": 11867, "training_step_time": 0.1100761890411377 }, { "epoch": 1.8109130859375e-05, "model_forward_time": 0.02442002296447754, "step": 11868 }, { "epoch": 1.8109130859375e-05, "step": 11868, "training_step_time": 0.10900139808654785 }, { "epoch": 1.811065673828125e-05, "model_forward_time": 0.024627685546875, "step": 11869 }, { "epoch": 1.811065673828125e-05, "step": 11869, "training_step_time": 0.10837507247924805 }, { "epoch": 1.81121826171875e-05, "grad_norm": 0.40154775977134705, "learning_rate": 7.073884147170452e-05, "loss": 0.02, "step": 11870 }, { "epoch": 1.81121826171875e-05, "model_forward_time": 0.025444507598876953, "step": 11870 }, { "epoch": 1.81121826171875e-05, "step": 11870, "training_step_time": 0.10881567001342773 }, { "epoch": 1.811370849609375e-05, "model_forward_time": 0.025404691696166992, "step": 11871 }, { "epoch": 1.811370849609375e-05, "step": 11871, "training_step_time": 0.10889887809753418 }, { "epoch": 1.8115234375e-05, "model_forward_time": 0.025213003158569336, "step": 11872 }, { "epoch": 1.8115234375e-05, "step": 11872, "training_step_time": 0.1129615306854248 }, { "epoch": 1.811676025390625e-05, "model_forward_time": 0.025122404098510742, "step": 11873 }, { "epoch": 1.811676025390625e-05, "step": 11873, "training_step_time": 0.163055419921875 }, { "epoch": 1.81182861328125e-05, "model_forward_time": 0.025009632110595703, "step": 11874 }, { "epoch": 1.81182861328125e-05, "step": 11874, "training_step_time": 0.11141037940979004 }, { "epoch": 1.811981201171875e-05, "model_forward_time": 0.02526235580444336, "step": 11875 }, { "epoch": 1.811981201171875e-05, "step": 11875, "training_step_time": 0.11639595031738281 }, { "epoch": 1.8121337890625e-05, "model_forward_time": 0.025880098342895508, "step": 11876 }, { "epoch": 1.8121337890625e-05, "step": 11876, "training_step_time": 0.11485481262207031 }, { "epoch": 1.812286376953125e-05, "model_forward_time": 0.02543354034423828, "step": 11877 }, { "epoch": 1.812286376953125e-05, "step": 11877, "training_step_time": 0.1216130256652832 }, { "epoch": 1.81243896484375e-05, "model_forward_time": 0.025363683700561523, "step": 11878 }, { "epoch": 1.81243896484375e-05, "step": 11878, "training_step_time": 0.11167597770690918 }, { "epoch": 1.812591552734375e-05, "model_forward_time": 0.02561020851135254, "step": 11879 }, { "epoch": 1.812591552734375e-05, "step": 11879, "training_step_time": 0.12296438217163086 }, { "epoch": 1.812744140625e-05, "grad_norm": 0.18449322879314423, "learning_rate": 7.068867786524116e-05, "loss": 0.032, "step": 11880 }, { "epoch": 1.812744140625e-05, "model_forward_time": 0.025257587432861328, "step": 11880 }, { "epoch": 1.812744140625e-05, "step": 11880, "training_step_time": 0.11114859580993652 }, { "epoch": 1.812896728515625e-05, "model_forward_time": 0.02570056915283203, "step": 11881 }, { "epoch": 1.812896728515625e-05, "step": 11881, "training_step_time": 0.10738897323608398 }, { "epoch": 1.81304931640625e-05, "model_forward_time": 0.025901317596435547, "step": 11882 }, { "epoch": 1.81304931640625e-05, "step": 11882, "training_step_time": 0.10774636268615723 }, { "epoch": 1.813201904296875e-05, "model_forward_time": 0.025601625442504883, "step": 11883 }, { "epoch": 1.813201904296875e-05, "step": 11883, "training_step_time": 0.11485099792480469 }, { "epoch": 1.8133544921875e-05, "model_forward_time": 0.025133371353149414, "step": 11884 }, { "epoch": 1.8133544921875e-05, "step": 11884, "training_step_time": 0.10707569122314453 }, { "epoch": 1.813507080078125e-05, "model_forward_time": 0.025577068328857422, "step": 11885 }, { "epoch": 1.813507080078125e-05, "step": 11885, "training_step_time": 0.1050560474395752 }, { "epoch": 1.81365966796875e-05, "model_forward_time": 0.025252103805541992, "step": 11886 }, { "epoch": 1.81365966796875e-05, "step": 11886, "training_step_time": 0.12417411804199219 }, { "epoch": 1.813812255859375e-05, "model_forward_time": 0.02517557144165039, "step": 11887 }, { "epoch": 1.813812255859375e-05, "step": 11887, "training_step_time": 0.1231238842010498 }, { "epoch": 1.81396484375e-05, "model_forward_time": 0.025306224822998047, "step": 11888 }, { "epoch": 1.81396484375e-05, "step": 11888, "training_step_time": 0.10798764228820801 }, { "epoch": 1.814117431640625e-05, "model_forward_time": 0.02523326873779297, "step": 11889 }, { "epoch": 1.814117431640625e-05, "step": 11889, "training_step_time": 0.11379218101501465 }, { "epoch": 1.81427001953125e-05, "grad_norm": 0.43687736988067627, "learning_rate": 7.06384891200834e-05, "loss": 0.0194, "step": 11890 }, { "epoch": 1.81427001953125e-05, "model_forward_time": 0.02571249008178711, "step": 11890 }, { "epoch": 1.81427001953125e-05, "step": 11890, "training_step_time": 0.12089157104492188 }, { "epoch": 1.814422607421875e-05, "model_forward_time": 0.025255918502807617, "step": 11891 }, { "epoch": 1.814422607421875e-05, "step": 11891, "training_step_time": 0.12742257118225098 }, { "epoch": 1.8145751953125e-05, "model_forward_time": 0.025601625442504883, "step": 11892 }, { "epoch": 1.8145751953125e-05, "step": 11892, "training_step_time": 0.11220884323120117 }, { "epoch": 1.814727783203125e-05, "model_forward_time": 0.02551126480102539, "step": 11893 }, { "epoch": 1.814727783203125e-05, "step": 11893, "training_step_time": 0.11354947090148926 }, { "epoch": 1.81488037109375e-05, "model_forward_time": 0.02521657943725586, "step": 11894 }, { "epoch": 1.81488037109375e-05, "step": 11894, "training_step_time": 0.11655473709106445 }, { "epoch": 1.815032958984375e-05, "model_forward_time": 0.025279521942138672, "step": 11895 }, { "epoch": 1.815032958984375e-05, "step": 11895, "training_step_time": 0.10974621772766113 }, { "epoch": 1.815185546875e-05, "model_forward_time": 0.025346994400024414, "step": 11896 }, { "epoch": 1.815185546875e-05, "step": 11896, "training_step_time": 0.19293427467346191 }, { "epoch": 1.815338134765625e-05, "model_forward_time": 0.024799108505249023, "step": 11897 }, { "epoch": 1.815338134765625e-05, "step": 11897, "training_step_time": 0.19765734672546387 }, { "epoch": 1.81549072265625e-05, "model_forward_time": 0.02512335777282715, "step": 11898 }, { "epoch": 1.81549072265625e-05, "step": 11898, "training_step_time": 0.12115073204040527 }, { "epoch": 1.815643310546875e-05, "model_forward_time": 0.02482891082763672, "step": 11899 }, { "epoch": 1.815643310546875e-05, "step": 11899, "training_step_time": 0.13337302207946777 }, { "epoch": 1.8157958984375e-05, "grad_norm": 0.2076655775308609, "learning_rate": 7.058827529721525e-05, "loss": 0.0209, "step": 11900 }, { "epoch": 1.8157958984375e-05, "model_forward_time": 0.024956941604614258, "step": 11900 }, { "epoch": 1.8157958984375e-05, "step": 11900, "training_step_time": 0.10858893394470215 }, { "epoch": 1.815948486328125e-05, "model_forward_time": 0.02528238296508789, "step": 11901 }, { "epoch": 1.815948486328125e-05, "step": 11901, "training_step_time": 0.17809653282165527 }, { "epoch": 1.81610107421875e-05, "model_forward_time": 0.024624109268188477, "step": 11902 }, { "epoch": 1.81610107421875e-05, "step": 11902, "training_step_time": 0.1360483169555664 }, { "epoch": 1.816253662109375e-05, "model_forward_time": 0.025101184844970703, "step": 11903 }, { "epoch": 1.816253662109375e-05, "step": 11903, "training_step_time": 0.11453604698181152 }, { "epoch": 1.81640625e-05, "model_forward_time": 0.02535557746887207, "step": 11904 }, { "epoch": 1.81640625e-05, "step": 11904, "training_step_time": 0.11059975624084473 }, { "epoch": 1.816558837890625e-05, "model_forward_time": 0.02505970001220703, "step": 11905 }, { "epoch": 1.816558837890625e-05, "step": 11905, "training_step_time": 0.10806918144226074 }, { "epoch": 1.81671142578125e-05, "model_forward_time": 0.02533578872680664, "step": 11906 }, { "epoch": 1.81671142578125e-05, "step": 11906, "training_step_time": 0.10772705078125 }, { "epoch": 1.816864013671875e-05, "model_forward_time": 0.025324583053588867, "step": 11907 }, { "epoch": 1.816864013671875e-05, "step": 11907, "training_step_time": 0.1111443042755127 }, { "epoch": 1.8170166015625e-05, "model_forward_time": 0.024606943130493164, "step": 11908 }, { "epoch": 1.8170166015625e-05, "step": 11908, "training_step_time": 0.11343693733215332 }, { "epoch": 1.817169189453125e-05, "model_forward_time": 0.024335861206054688, "step": 11909 }, { "epoch": 1.817169189453125e-05, "step": 11909, "training_step_time": 0.1142129898071289 }, { "epoch": 1.81732177734375e-05, "grad_norm": 0.48608723282814026, "learning_rate": 7.053803645765128e-05, "loss": 0.0218, "step": 11910 }, { "epoch": 1.81732177734375e-05, "model_forward_time": 0.0243072509765625, "step": 11910 }, { "epoch": 1.81732177734375e-05, "step": 11910, "training_step_time": 0.11305618286132812 }, { "epoch": 1.817474365234375e-05, "model_forward_time": 0.026094913482666016, "step": 11911 }, { "epoch": 1.817474365234375e-05, "step": 11911, "training_step_time": 0.11337161064147949 }, { "epoch": 1.817626953125e-05, "model_forward_time": 0.02553081512451172, "step": 11912 }, { "epoch": 1.817626953125e-05, "step": 11912, "training_step_time": 0.11325335502624512 }, { "epoch": 1.817779541015625e-05, "model_forward_time": 0.025168895721435547, "step": 11913 }, { "epoch": 1.817779541015625e-05, "step": 11913, "training_step_time": 0.10983967781066895 }, { "epoch": 1.81793212890625e-05, "model_forward_time": 0.02547287940979004, "step": 11914 }, { "epoch": 1.81793212890625e-05, "step": 11914, "training_step_time": 0.11479926109313965 }, { "epoch": 1.818084716796875e-05, "model_forward_time": 0.025351285934448242, "step": 11915 }, { "epoch": 1.818084716796875e-05, "step": 11915, "training_step_time": 0.11152219772338867 }, { "epoch": 1.8182373046875e-05, "model_forward_time": 0.025853633880615234, "step": 11916 }, { "epoch": 1.8182373046875e-05, "step": 11916, "training_step_time": 0.11192202568054199 }, { "epoch": 1.818389892578125e-05, "model_forward_time": 0.025203227996826172, "step": 11917 }, { "epoch": 1.818389892578125e-05, "step": 11917, "training_step_time": 0.10853385925292969 }, { "epoch": 1.81854248046875e-05, "model_forward_time": 0.025252580642700195, "step": 11918 }, { "epoch": 1.81854248046875e-05, "step": 11918, "training_step_time": 0.11115026473999023 }, { "epoch": 1.818695068359375e-05, "model_forward_time": 0.025269508361816406, "step": 11919 }, { "epoch": 1.818695068359375e-05, "step": 11919, "training_step_time": 0.18666505813598633 }, { "epoch": 1.81884765625e-05, "grad_norm": 0.2840731143951416, "learning_rate": 7.04877726624364e-05, "loss": 0.0183, "step": 11920 }, { "epoch": 1.81884765625e-05, "model_forward_time": 0.024741649627685547, "step": 11920 }, { "epoch": 1.81884765625e-05, "step": 11920, "training_step_time": 0.1402902603149414 }, { "epoch": 1.819000244140625e-05, "model_forward_time": 0.024918317794799805, "step": 11921 }, { "epoch": 1.819000244140625e-05, "step": 11921, "training_step_time": 0.11569976806640625 }, { "epoch": 1.81915283203125e-05, "model_forward_time": 0.024883508682250977, "step": 11922 }, { "epoch": 1.81915283203125e-05, "step": 11922, "training_step_time": 0.1259171962738037 }, { "epoch": 1.819305419921875e-05, "model_forward_time": 0.025394439697265625, "step": 11923 }, { "epoch": 1.819305419921875e-05, "step": 11923, "training_step_time": 0.11852478981018066 }, { "epoch": 1.8194580078125e-05, "model_forward_time": 0.025624990463256836, "step": 11924 }, { "epoch": 1.8194580078125e-05, "step": 11924, "training_step_time": 0.13027596473693848 }, { "epoch": 1.819610595703125e-05, "model_forward_time": 0.025112390518188477, "step": 11925 }, { "epoch": 1.819610595703125e-05, "step": 11925, "training_step_time": 0.11115908622741699 }, { "epoch": 1.81976318359375e-05, "model_forward_time": 0.025536537170410156, "step": 11926 }, { "epoch": 1.81976318359375e-05, "step": 11926, "training_step_time": 0.11501097679138184 }, { "epoch": 1.819915771484375e-05, "model_forward_time": 0.025500774383544922, "step": 11927 }, { "epoch": 1.819915771484375e-05, "step": 11927, "training_step_time": 0.11031556129455566 }, { "epoch": 1.820068359375e-05, "model_forward_time": 0.025410175323486328, "step": 11928 }, { "epoch": 1.820068359375e-05, "step": 11928, "training_step_time": 0.10895919799804688 }, { "epoch": 1.820220947265625e-05, "model_forward_time": 0.024275541305541992, "step": 11929 }, { "epoch": 1.820220947265625e-05, "step": 11929, "training_step_time": 0.1081991195678711 }, { "epoch": 1.82037353515625e-05, "grad_norm": 0.3231754004955292, "learning_rate": 7.043748397264587e-05, "loss": 0.0208, "step": 11930 }, { "epoch": 1.82037353515625e-05, "model_forward_time": 0.02547287940979004, "step": 11930 }, { "epoch": 1.82037353515625e-05, "step": 11930, "training_step_time": 0.10900688171386719 }, { "epoch": 1.820526123046875e-05, "model_forward_time": 0.02678704261779785, "step": 11931 }, { "epoch": 1.820526123046875e-05, "step": 11931, "training_step_time": 0.11053705215454102 }, { "epoch": 1.8206787109375e-05, "model_forward_time": 0.026015520095825195, "step": 11932 }, { "epoch": 1.8206787109375e-05, "step": 11932, "training_step_time": 0.10813021659851074 }, { "epoch": 1.820831298828125e-05, "model_forward_time": 0.025629758834838867, "step": 11933 }, { "epoch": 1.820831298828125e-05, "step": 11933, "training_step_time": 0.14456939697265625 }, { "epoch": 1.82098388671875e-05, "model_forward_time": 0.02581048011779785, "step": 11934 }, { "epoch": 1.82098388671875e-05, "step": 11934, "training_step_time": 0.1088559627532959 }, { "epoch": 1.821136474609375e-05, "model_forward_time": 0.026561737060546875, "step": 11935 }, { "epoch": 1.821136474609375e-05, "step": 11935, "training_step_time": 0.198958158493042 }, { "epoch": 1.8212890625e-05, "model_forward_time": 0.024384498596191406, "step": 11936 }, { "epoch": 1.8212890625e-05, "step": 11936, "training_step_time": 0.18587398529052734 }, { "epoch": 1.821441650390625e-05, "model_forward_time": 0.025376081466674805, "step": 11937 }, { "epoch": 1.821441650390625e-05, "step": 11937, "training_step_time": 0.15593957901000977 }, { "epoch": 1.82159423828125e-05, "model_forward_time": 0.0261688232421875, "step": 11938 }, { "epoch": 1.82159423828125e-05, "step": 11938, "training_step_time": 0.18075871467590332 }, { "epoch": 1.821746826171875e-05, "model_forward_time": 0.024759531021118164, "step": 11939 }, { "epoch": 1.821746826171875e-05, "step": 11939, "training_step_time": 0.10346627235412598 }, { "epoch": 1.8218994140625e-05, "grad_norm": 0.21150803565979004, "learning_rate": 7.038717044938519e-05, "loss": 0.0185, "step": 11940 }, { "epoch": 1.8218994140625e-05, "model_forward_time": 0.02482318878173828, "step": 11940 }, { "epoch": 1.8218994140625e-05, "step": 11940, "training_step_time": 0.10776424407958984 }, { "epoch": 1.822052001953125e-05, "model_forward_time": 0.025606155395507812, "step": 11941 }, { "epoch": 1.822052001953125e-05, "step": 11941, "training_step_time": 0.10492753982543945 }, { "epoch": 1.82220458984375e-05, "model_forward_time": 0.02588629722595215, "step": 11942 }, { "epoch": 1.82220458984375e-05, "step": 11942, "training_step_time": 0.1085667610168457 }, { "epoch": 1.822357177734375e-05, "model_forward_time": 0.02530646324157715, "step": 11943 }, { "epoch": 1.822357177734375e-05, "step": 11943, "training_step_time": 0.15528392791748047 }, { "epoch": 1.822509765625e-05, "model_forward_time": 0.024695873260498047, "step": 11944 }, { "epoch": 1.822509765625e-05, "step": 11944, "training_step_time": 0.11482357978820801 }, { "epoch": 1.822662353515625e-05, "model_forward_time": 0.024913549423217773, "step": 11945 }, { "epoch": 1.822662353515625e-05, "step": 11945, "training_step_time": 0.1322178840637207 }, { "epoch": 1.82281494140625e-05, "model_forward_time": 0.025041580200195312, "step": 11946 }, { "epoch": 1.82281494140625e-05, "step": 11946, "training_step_time": 0.13593602180480957 }, { "epoch": 1.822967529296875e-05, "model_forward_time": 0.02477741241455078, "step": 11947 }, { "epoch": 1.822967529296875e-05, "step": 11947, "training_step_time": 0.1152200698852539 }, { "epoch": 1.8231201171875e-05, "model_forward_time": 0.025507688522338867, "step": 11948 }, { "epoch": 1.8231201171875e-05, "step": 11948, "training_step_time": 0.12780261039733887 }, { "epoch": 1.823272705078125e-05, "model_forward_time": 0.025537967681884766, "step": 11949 }, { "epoch": 1.823272705078125e-05, "step": 11949, "training_step_time": 0.11397361755371094 }, { "epoch": 1.82342529296875e-05, "grad_norm": 0.4072251319885254, "learning_rate": 7.033683215379002e-05, "loss": 0.014, "step": 11950 }, { "epoch": 1.82342529296875e-05, "model_forward_time": 0.025521278381347656, "step": 11950 }, { "epoch": 1.82342529296875e-05, "step": 11950, "training_step_time": 0.10644054412841797 }, { "epoch": 1.823577880859375e-05, "model_forward_time": 0.02577829360961914, "step": 11951 }, { "epoch": 1.823577880859375e-05, "step": 11951, "training_step_time": 0.12633109092712402 }, { "epoch": 1.82373046875e-05, "model_forward_time": 0.0243985652923584, "step": 11952 }, { "epoch": 1.82373046875e-05, "step": 11952, "training_step_time": 0.16186285018920898 }, { "epoch": 1.823883056640625e-05, "model_forward_time": 0.02521061897277832, "step": 11953 }, { "epoch": 1.823883056640625e-05, "step": 11953, "training_step_time": 0.1615769863128662 }, { "epoch": 1.82403564453125e-05, "model_forward_time": 0.024550914764404297, "step": 11954 }, { "epoch": 1.82403564453125e-05, "step": 11954, "training_step_time": 0.16106295585632324 }, { "epoch": 1.824188232421875e-05, "model_forward_time": 0.02491593360900879, "step": 11955 }, { "epoch": 1.824188232421875e-05, "step": 11955, "training_step_time": 0.13564062118530273 }, { "epoch": 1.8243408203125e-05, "model_forward_time": 0.024775028228759766, "step": 11956 }, { "epoch": 1.8243408203125e-05, "step": 11956, "training_step_time": 0.1397690773010254 }, { "epoch": 1.824493408203125e-05, "model_forward_time": 0.024669885635375977, "step": 11957 }, { "epoch": 1.824493408203125e-05, "step": 11957, "training_step_time": 0.12720513343811035 }, { "epoch": 1.82464599609375e-05, "model_forward_time": 0.025741100311279297, "step": 11958 }, { "epoch": 1.82464599609375e-05, "step": 11958, "training_step_time": 0.12394833564758301 }, { "epoch": 1.824798583984375e-05, "model_forward_time": 0.027563810348510742, "step": 11959 }, { "epoch": 1.824798583984375e-05, "step": 11959, "training_step_time": 0.12021398544311523 }, { "epoch": 1.824951171875e-05, "grad_norm": 0.36168545484542847, "learning_rate": 7.028646914702614e-05, "loss": 0.017, "step": 11960 }, { "epoch": 1.824951171875e-05, "model_forward_time": 0.024377107620239258, "step": 11960 }, { "epoch": 1.824951171875e-05, "step": 11960, "training_step_time": 0.11315059661865234 }, { "epoch": 1.825103759765625e-05, "model_forward_time": 0.02544093132019043, "step": 11961 }, { "epoch": 1.825103759765625e-05, "step": 11961, "training_step_time": 0.11243128776550293 }, { "epoch": 1.82525634765625e-05, "model_forward_time": 0.026935577392578125, "step": 11962 }, { "epoch": 1.82525634765625e-05, "step": 11962, "training_step_time": 0.11411857604980469 }, { "epoch": 1.825408935546875e-05, "model_forward_time": 0.02548694610595703, "step": 11963 }, { "epoch": 1.825408935546875e-05, "step": 11963, "training_step_time": 0.11361551284790039 }, { "epoch": 1.8255615234375e-05, "model_forward_time": 0.026876449584960938, "step": 11964 }, { "epoch": 1.8255615234375e-05, "step": 11964, "training_step_time": 0.1275477409362793 }, { "epoch": 1.825714111328125e-05, "model_forward_time": 0.025558948516845703, "step": 11965 }, { "epoch": 1.825714111328125e-05, "step": 11965, "training_step_time": 0.1113595962524414 }, { "epoch": 1.82586669921875e-05, "model_forward_time": 0.025162220001220703, "step": 11966 }, { "epoch": 1.82586669921875e-05, "step": 11966, "training_step_time": 0.10969996452331543 }, { "epoch": 1.826019287109375e-05, "model_forward_time": 0.02564406394958496, "step": 11967 }, { "epoch": 1.826019287109375e-05, "step": 11967, "training_step_time": 0.1226496696472168 }, { "epoch": 1.826171875e-05, "model_forward_time": 0.025851011276245117, "step": 11968 }, { "epoch": 1.826171875e-05, "step": 11968, "training_step_time": 0.12860870361328125 }, { "epoch": 1.826324462890625e-05, "model_forward_time": 0.02527642250061035, "step": 11969 }, { "epoch": 1.826324462890625e-05, "step": 11969, "training_step_time": 0.10725736618041992 }, { "epoch": 1.82647705078125e-05, "grad_norm": 0.3516203761100769, "learning_rate": 7.023608149028937e-05, "loss": 0.0215, "step": 11970 }, { "epoch": 1.82647705078125e-05, "model_forward_time": 0.027187585830688477, "step": 11970 }, { "epoch": 1.82647705078125e-05, "step": 11970, "training_step_time": 0.1186378002166748 }, { "epoch": 1.826629638671875e-05, "model_forward_time": 0.025493860244750977, "step": 11971 }, { "epoch": 1.826629638671875e-05, "step": 11971, "training_step_time": 0.10960865020751953 }, { "epoch": 1.8267822265625e-05, "model_forward_time": 0.025491952896118164, "step": 11972 }, { "epoch": 1.8267822265625e-05, "step": 11972, "training_step_time": 0.10711383819580078 }, { "epoch": 1.826934814453125e-05, "model_forward_time": 0.025420188903808594, "step": 11973 }, { "epoch": 1.826934814453125e-05, "step": 11973, "training_step_time": 0.10788583755493164 }, { "epoch": 1.82708740234375e-05, "model_forward_time": 0.02543807029724121, "step": 11974 }, { "epoch": 1.82708740234375e-05, "step": 11974, "training_step_time": 0.1073615550994873 }, { "epoch": 1.827239990234375e-05, "model_forward_time": 0.02555704116821289, "step": 11975 }, { "epoch": 1.827239990234375e-05, "step": 11975, "training_step_time": 0.10794305801391602 }, { "epoch": 1.827392578125e-05, "model_forward_time": 0.025316476821899414, "step": 11976 }, { "epoch": 1.827392578125e-05, "step": 11976, "training_step_time": 0.10689735412597656 }, { "epoch": 1.827545166015625e-05, "model_forward_time": 0.025530338287353516, "step": 11977 }, { "epoch": 1.827545166015625e-05, "step": 11977, "training_step_time": 0.10715746879577637 }, { "epoch": 1.82769775390625e-05, "model_forward_time": 0.02518749237060547, "step": 11978 }, { "epoch": 1.82769775390625e-05, "step": 11978, "training_step_time": 0.14312481880187988 }, { "epoch": 1.827850341796875e-05, "model_forward_time": 0.025446176528930664, "step": 11979 }, { "epoch": 1.827850341796875e-05, "step": 11979, "training_step_time": 0.1727156639099121 }, { "epoch": 1.8280029296875e-05, "grad_norm": 0.47945356369018555, "learning_rate": 7.018566924480543e-05, "loss": 0.0236, "step": 11980 }, { "epoch": 1.8280029296875e-05, "model_forward_time": 0.025285005569458008, "step": 11980 }, { "epoch": 1.8280029296875e-05, "step": 11980, "training_step_time": 0.1599712371826172 }, { "epoch": 1.828155517578125e-05, "model_forward_time": 0.02505350112915039, "step": 11981 }, { "epoch": 1.828155517578125e-05, "step": 11981, "training_step_time": 0.17855405807495117 }, { "epoch": 1.82830810546875e-05, "model_forward_time": 0.024480819702148438, "step": 11982 }, { "epoch": 1.82830810546875e-05, "step": 11982, "training_step_time": 0.18897771835327148 }, { "epoch": 1.828460693359375e-05, "model_forward_time": 0.024651765823364258, "step": 11983 }, { "epoch": 1.828460693359375e-05, "step": 11983, "training_step_time": 0.10640764236450195 }, { "epoch": 1.82861328125e-05, "model_forward_time": 0.02447819709777832, "step": 11984 }, { "epoch": 1.82861328125e-05, "step": 11984, "training_step_time": 0.11172819137573242 }, { "epoch": 1.828765869140625e-05, "model_forward_time": 0.0251615047454834, "step": 11985 }, { "epoch": 1.828765869140625e-05, "step": 11985, "training_step_time": 0.19770359992980957 }, { "epoch": 1.82891845703125e-05, "model_forward_time": 0.024591922760009766, "step": 11986 }, { "epoch": 1.82891845703125e-05, "step": 11986, "training_step_time": 0.10596251487731934 }, { "epoch": 1.829071044921875e-05, "model_forward_time": 0.02465057373046875, "step": 11987 }, { "epoch": 1.829071044921875e-05, "step": 11987, "training_step_time": 0.1787106990814209 }, { "epoch": 1.8292236328125e-05, "model_forward_time": 0.02493739128112793, "step": 11988 }, { "epoch": 1.8292236328125e-05, "step": 11988, "training_step_time": 0.13008356094360352 }, { "epoch": 1.829376220703125e-05, "model_forward_time": 0.02479410171508789, "step": 11989 }, { "epoch": 1.829376220703125e-05, "step": 11989, "training_step_time": 0.1316540241241455 }, { "epoch": 1.82952880859375e-05, "grad_norm": 0.2896246910095215, "learning_rate": 7.013523247183e-05, "loss": 0.0228, "step": 11990 }, { "epoch": 1.82952880859375e-05, "model_forward_time": 0.02452993392944336, "step": 11990 }, { "epoch": 1.82952880859375e-05, "step": 11990, "training_step_time": 0.14707183837890625 }, { "epoch": 1.829681396484375e-05, "model_forward_time": 0.024994611740112305, "step": 11991 }, { "epoch": 1.829681396484375e-05, "step": 11991, "training_step_time": 0.23108363151550293 }, { "epoch": 1.829833984375e-05, "model_forward_time": 0.024966955184936523, "step": 11992 }, { "epoch": 1.829833984375e-05, "step": 11992, "training_step_time": 0.11815857887268066 }, { "epoch": 1.829986572265625e-05, "model_forward_time": 0.0237424373626709, "step": 11993 }, { "epoch": 1.829986572265625e-05, "step": 11993, "training_step_time": 0.11592936515808105 }, { "epoch": 1.83013916015625e-05, "model_forward_time": 0.024388790130615234, "step": 11994 }, { "epoch": 1.83013916015625e-05, "step": 11994, "training_step_time": 0.11292243003845215 }, { "epoch": 1.830291748046875e-05, "model_forward_time": 0.024291515350341797, "step": 11995 }, { "epoch": 1.830291748046875e-05, "step": 11995, "training_step_time": 0.11221694946289062 }, { "epoch": 1.8304443359375e-05, "model_forward_time": 0.025256872177124023, "step": 11996 }, { "epoch": 1.8304443359375e-05, "step": 11996, "training_step_time": 0.10759568214416504 }, { "epoch": 1.830596923828125e-05, "model_forward_time": 0.025072574615478516, "step": 11997 }, { "epoch": 1.830596923828125e-05, "step": 11997, "training_step_time": 0.1103677749633789 }, { "epoch": 1.83074951171875e-05, "model_forward_time": 0.025444746017456055, "step": 11998 }, { "epoch": 1.83074951171875e-05, "step": 11998, "training_step_time": 0.11180949211120605 }, { "epoch": 1.830902099609375e-05, "model_forward_time": 0.02547001838684082, "step": 11999 }, { "epoch": 1.830902099609375e-05, "step": 11999, "training_step_time": 0.11112141609191895 }, { "epoch": 1.8310546875e-05, "grad_norm": 0.4744170308113098, "learning_rate": 7.008477123264848e-05, "loss": 0.0324, "step": 12000 }, { "epoch": 1.8310546875e-05, "model_forward_time": 0.024791479110717773, "step": 12000 }, { "epoch": 1.8310546875e-05, "step": 12000, "training_step_time": 0.10333871841430664 }, { "epoch": 1.831207275390625e-05, "model_forward_time": 0.023221254348754883, "step": 12001 }, { "epoch": 1.831207275390625e-05, "step": 12001, "training_step_time": 0.10272741317749023 }, { "epoch": 1.83135986328125e-05, "model_forward_time": 0.024647951126098633, "step": 12002 }, { "epoch": 1.83135986328125e-05, "step": 12002, "training_step_time": 0.13118982315063477 }, { "epoch": 1.831512451171875e-05, "model_forward_time": 0.025197982788085938, "step": 12003 }, { "epoch": 1.831512451171875e-05, "step": 12003, "training_step_time": 0.11956167221069336 }, { "epoch": 1.8316650390625e-05, "model_forward_time": 0.024960041046142578, "step": 12004 }, { "epoch": 1.8316650390625e-05, "step": 12004, "training_step_time": 0.10664987564086914 }, { "epoch": 1.831817626953125e-05, "model_forward_time": 0.02516341209411621, "step": 12005 }, { "epoch": 1.831817626953125e-05, "step": 12005, "training_step_time": 0.106658935546875 }, { "epoch": 1.83197021484375e-05, "model_forward_time": 0.025056838989257812, "step": 12006 }, { "epoch": 1.83197021484375e-05, "step": 12006, "training_step_time": 0.10868263244628906 }, { "epoch": 1.832122802734375e-05, "model_forward_time": 0.025405406951904297, "step": 12007 }, { "epoch": 1.832122802734375e-05, "step": 12007, "training_step_time": 0.10533976554870605 }, { "epoch": 1.832275390625e-05, "model_forward_time": 0.02851414680480957, "step": 12008 }, { "epoch": 1.832275390625e-05, "step": 12008, "training_step_time": 0.11095070838928223 }, { "epoch": 1.832427978515625e-05, "model_forward_time": 0.025354385375976562, "step": 12009 }, { "epoch": 1.832427978515625e-05, "step": 12009, "training_step_time": 0.11118721961975098 }, { "epoch": 1.83258056640625e-05, "grad_norm": 0.5417850613594055, "learning_rate": 7.003428558857604e-05, "loss": 0.0235, "step": 12010 }, { "epoch": 1.83258056640625e-05, "model_forward_time": 0.024917125701904297, "step": 12010 }, { "epoch": 1.83258056640625e-05, "step": 12010, "training_step_time": 0.10479569435119629 }, { "epoch": 1.832733154296875e-05, "model_forward_time": 0.02581787109375, "step": 12011 }, { "epoch": 1.832733154296875e-05, "step": 12011, "training_step_time": 0.10652303695678711 }, { "epoch": 1.8328857421875e-05, "model_forward_time": 0.0252072811126709, "step": 12012 }, { "epoch": 1.8328857421875e-05, "step": 12012, "training_step_time": 0.10690808296203613 }, { "epoch": 1.833038330078125e-05, "model_forward_time": 0.02561473846435547, "step": 12013 }, { "epoch": 1.833038330078125e-05, "step": 12013, "training_step_time": 0.13345026969909668 }, { "epoch": 1.83319091796875e-05, "model_forward_time": 0.025255918502807617, "step": 12014 }, { "epoch": 1.83319091796875e-05, "step": 12014, "training_step_time": 0.11126542091369629 }, { "epoch": 1.833343505859375e-05, "model_forward_time": 0.025852441787719727, "step": 12015 }, { "epoch": 1.833343505859375e-05, "step": 12015, "training_step_time": 0.1119697093963623 }, { "epoch": 1.83349609375e-05, "model_forward_time": 0.027318954467773438, "step": 12016 }, { "epoch": 1.83349609375e-05, "step": 12016, "training_step_time": 0.11923694610595703 }, { "epoch": 1.833648681640625e-05, "model_forward_time": 0.025828123092651367, "step": 12017 }, { "epoch": 1.833648681640625e-05, "step": 12017, "training_step_time": 0.11551046371459961 }, { "epoch": 1.83380126953125e-05, "model_forward_time": 0.027040719985961914, "step": 12018 }, { "epoch": 1.83380126953125e-05, "step": 12018, "training_step_time": 0.11421847343444824 }, { "epoch": 1.833953857421875e-05, "model_forward_time": 0.025554656982421875, "step": 12019 }, { "epoch": 1.833953857421875e-05, "step": 12019, "training_step_time": 0.16489005088806152 }, { "epoch": 1.8341064453125e-05, "grad_norm": 0.4069768190383911, "learning_rate": 6.99837756009575e-05, "loss": 0.0192, "step": 12020 }, { "epoch": 1.8341064453125e-05, "model_forward_time": 0.02474355697631836, "step": 12020 }, { "epoch": 1.8341064453125e-05, "step": 12020, "training_step_time": 0.11671948432922363 }, { "epoch": 1.834259033203125e-05, "model_forward_time": 0.02435612678527832, "step": 12021 }, { "epoch": 1.834259033203125e-05, "step": 12021, "training_step_time": 0.21508049964904785 }, { "epoch": 1.83441162109375e-05, "model_forward_time": 0.02411198616027832, "step": 12022 }, { "epoch": 1.83441162109375e-05, "step": 12022, "training_step_time": 0.13992547988891602 }, { "epoch": 1.834564208984375e-05, "model_forward_time": 0.024952411651611328, "step": 12023 }, { "epoch": 1.834564208984375e-05, "step": 12023, "training_step_time": 0.11423468589782715 }, { "epoch": 1.834716796875e-05, "model_forward_time": 0.02573871612548828, "step": 12024 }, { "epoch": 1.834716796875e-05, "step": 12024, "training_step_time": 0.12189459800720215 }, { "epoch": 1.834869384765625e-05, "model_forward_time": 0.024405956268310547, "step": 12025 }, { "epoch": 1.834869384765625e-05, "step": 12025, "training_step_time": 0.11147475242614746 }, { "epoch": 1.83502197265625e-05, "model_forward_time": 0.025376081466674805, "step": 12026 }, { "epoch": 1.83502197265625e-05, "step": 12026, "training_step_time": 0.11270976066589355 }, { "epoch": 1.835174560546875e-05, "model_forward_time": 0.02550220489501953, "step": 12027 }, { "epoch": 1.835174560546875e-05, "step": 12027, "training_step_time": 0.10857820510864258 }, { "epoch": 1.8353271484375e-05, "model_forward_time": 0.02524089813232422, "step": 12028 }, { "epoch": 1.8353271484375e-05, "step": 12028, "training_step_time": 0.10954976081848145 }, { "epoch": 1.835479736328125e-05, "model_forward_time": 0.024864673614501953, "step": 12029 }, { "epoch": 1.835479736328125e-05, "step": 12029, "training_step_time": 0.11189651489257812 }, { "epoch": 1.83563232421875e-05, "grad_norm": 0.46286314725875854, "learning_rate": 6.993324133116726e-05, "loss": 0.0186, "step": 12030 }, { "epoch": 1.83563232421875e-05, "model_forward_time": 0.02532505989074707, "step": 12030 }, { "epoch": 1.83563232421875e-05, "step": 12030, "training_step_time": 0.10837435722351074 }, { "epoch": 1.835784912109375e-05, "model_forward_time": 0.024984359741210938, "step": 12031 }, { "epoch": 1.835784912109375e-05, "step": 12031, "training_step_time": 0.10553956031799316 }, { "epoch": 1.8359375e-05, "model_forward_time": 0.02545166015625, "step": 12032 }, { "epoch": 1.8359375e-05, "step": 12032, "training_step_time": 0.1087334156036377 }, { "epoch": 1.836090087890625e-05, "model_forward_time": 0.025705814361572266, "step": 12033 }, { "epoch": 1.836090087890625e-05, "step": 12033, "training_step_time": 0.1954329013824463 }, { "epoch": 1.83624267578125e-05, "model_forward_time": 0.024984121322631836, "step": 12034 }, { "epoch": 1.83624267578125e-05, "step": 12034, "training_step_time": 0.13936972618103027 }, { "epoch": 1.836395263671875e-05, "model_forward_time": 0.024842023849487305, "step": 12035 }, { "epoch": 1.836395263671875e-05, "step": 12035, "training_step_time": 0.162977933883667 }, { "epoch": 1.8365478515625e-05, "model_forward_time": 0.02440619468688965, "step": 12036 }, { "epoch": 1.8365478515625e-05, "step": 12036, "training_step_time": 0.11282873153686523 }, { "epoch": 1.836700439453125e-05, "model_forward_time": 0.025048255920410156, "step": 12037 }, { "epoch": 1.836700439453125e-05, "step": 12037, "training_step_time": 0.10887646675109863 }, { "epoch": 1.83685302734375e-05, "model_forward_time": 0.02525186538696289, "step": 12038 }, { "epoch": 1.83685302734375e-05, "step": 12038, "training_step_time": 0.10628604888916016 }, { "epoch": 1.837005615234375e-05, "model_forward_time": 0.025298595428466797, "step": 12039 }, { "epoch": 1.837005615234375e-05, "step": 12039, "training_step_time": 0.1114356517791748 }, { "epoch": 1.837158203125e-05, "grad_norm": 0.31339505314826965, "learning_rate": 6.988268284060922e-05, "loss": 0.0231, "step": 12040 }, { "epoch": 1.837158203125e-05, "model_forward_time": 0.025748729705810547, "step": 12040 }, { "epoch": 1.837158203125e-05, "step": 12040, "training_step_time": 0.22601866722106934 }, { "epoch": 1.837310791015625e-05, "model_forward_time": 0.02509307861328125, "step": 12041 }, { "epoch": 1.837310791015625e-05, "step": 12041, "training_step_time": 0.1063692569732666 }, { "epoch": 1.83746337890625e-05, "model_forward_time": 0.024160385131835938, "step": 12042 }, { "epoch": 1.83746337890625e-05, "step": 12042, "training_step_time": 0.17446517944335938 }, { "epoch": 1.837615966796875e-05, "model_forward_time": 0.024640560150146484, "step": 12043 }, { "epoch": 1.837615966796875e-05, "step": 12043, "training_step_time": 0.11757159233093262 }, { "epoch": 1.8377685546875e-05, "model_forward_time": 0.024425029754638672, "step": 12044 }, { "epoch": 1.8377685546875e-05, "step": 12044, "training_step_time": 0.10908699035644531 }, { "epoch": 1.837921142578125e-05, "model_forward_time": 0.02488088607788086, "step": 12045 }, { "epoch": 1.837921142578125e-05, "step": 12045, "training_step_time": 0.11651110649108887 }, { "epoch": 1.83807373046875e-05, "model_forward_time": 0.025441646575927734, "step": 12046 }, { "epoch": 1.83807373046875e-05, "step": 12046, "training_step_time": 0.1179807186126709 }, { "epoch": 1.838226318359375e-05, "model_forward_time": 0.024173736572265625, "step": 12047 }, { "epoch": 1.838226318359375e-05, "step": 12047, "training_step_time": 0.11464452743530273 }, { "epoch": 1.83837890625e-05, "model_forward_time": 0.025205373764038086, "step": 12048 }, { "epoch": 1.83837890625e-05, "step": 12048, "training_step_time": 0.12032604217529297 }, { "epoch": 1.838531494140625e-05, "model_forward_time": 0.025057077407836914, "step": 12049 }, { "epoch": 1.838531494140625e-05, "step": 12049, "training_step_time": 0.11566925048828125 }, { "epoch": 1.83868408203125e-05, "grad_norm": 0.1898471564054489, "learning_rate": 6.98321001907167e-05, "loss": 0.0227, "step": 12050 }, { "epoch": 1.83868408203125e-05, "model_forward_time": 0.02514791488647461, "step": 12050 }, { "epoch": 1.83868408203125e-05, "step": 12050, "training_step_time": 0.11374044418334961 }, { "epoch": 1.838836669921875e-05, "model_forward_time": 0.02511906623840332, "step": 12051 }, { "epoch": 1.838836669921875e-05, "step": 12051, "training_step_time": 0.11185145378112793 }, { "epoch": 1.8389892578125e-05, "model_forward_time": 0.0259554386138916, "step": 12052 }, { "epoch": 1.8389892578125e-05, "step": 12052, "training_step_time": 0.11596989631652832 }, { "epoch": 1.839141845703125e-05, "model_forward_time": 0.024854183197021484, "step": 12053 }, { "epoch": 1.839141845703125e-05, "step": 12053, "training_step_time": 0.11325240135192871 }, { "epoch": 1.83929443359375e-05, "model_forward_time": 0.026573896408081055, "step": 12054 }, { "epoch": 1.83929443359375e-05, "step": 12054, "training_step_time": 0.10871315002441406 }, { "epoch": 1.839447021484375e-05, "model_forward_time": 0.024114608764648438, "step": 12055 }, { "epoch": 1.839447021484375e-05, "step": 12055, "training_step_time": 0.11040806770324707 }, { "epoch": 1.839599609375e-05, "model_forward_time": 0.02606654167175293, "step": 12056 }, { "epoch": 1.839599609375e-05, "step": 12056, "training_step_time": 0.1094658374786377 }, { "epoch": 1.839752197265625e-05, "model_forward_time": 0.025458335876464844, "step": 12057 }, { "epoch": 1.839752197265625e-05, "step": 12057, "training_step_time": 0.1075904369354248 }, { "epoch": 1.83990478515625e-05, "model_forward_time": 0.025005340576171875, "step": 12058 }, { "epoch": 1.83990478515625e-05, "step": 12058, "training_step_time": 0.1155710220336914 }, { "epoch": 1.840057373046875e-05, "model_forward_time": 0.02527594566345215, "step": 12059 }, { "epoch": 1.840057373046875e-05, "step": 12059, "training_step_time": 0.11220669746398926 }, { "epoch": 1.8402099609375e-05, "grad_norm": 0.28267902135849, "learning_rate": 6.978149344295242e-05, "loss": 0.0201, "step": 12060 }, { "epoch": 1.8402099609375e-05, "model_forward_time": 0.025480270385742188, "step": 12060 }, { "epoch": 1.8402099609375e-05, "step": 12060, "training_step_time": 0.11409258842468262 }, { "epoch": 1.840362548828125e-05, "model_forward_time": 0.025754690170288086, "step": 12061 }, { "epoch": 1.840362548828125e-05, "step": 12061, "training_step_time": 0.10948038101196289 }, { "epoch": 1.84051513671875e-05, "model_forward_time": 0.025450468063354492, "step": 12062 }, { "epoch": 1.84051513671875e-05, "step": 12062, "training_step_time": 0.11128354072570801 }, { "epoch": 1.840667724609375e-05, "model_forward_time": 0.025153160095214844, "step": 12063 }, { "epoch": 1.840667724609375e-05, "step": 12063, "training_step_time": 0.10832738876342773 }, { "epoch": 1.8408203125e-05, "model_forward_time": 0.025362014770507812, "step": 12064 }, { "epoch": 1.8408203125e-05, "step": 12064, "training_step_time": 0.10732388496398926 }, { "epoch": 1.840972900390625e-05, "model_forward_time": 0.02524590492248535, "step": 12065 }, { "epoch": 1.840972900390625e-05, "step": 12065, "training_step_time": 0.10705780982971191 }, { "epoch": 1.84112548828125e-05, "model_forward_time": 0.0252230167388916, "step": 12066 }, { "epoch": 1.84112548828125e-05, "step": 12066, "training_step_time": 0.10941743850708008 }, { "epoch": 1.841278076171875e-05, "model_forward_time": 0.02524733543395996, "step": 12067 }, { "epoch": 1.841278076171875e-05, "step": 12067, "training_step_time": 0.1461181640625 }, { "epoch": 1.8414306640625e-05, "model_forward_time": 0.02521347999572754, "step": 12068 }, { "epoch": 1.8414306640625e-05, "step": 12068, "training_step_time": 0.11349010467529297 }, { "epoch": 1.841583251953125e-05, "model_forward_time": 0.025375843048095703, "step": 12069 }, { "epoch": 1.841583251953125e-05, "step": 12069, "training_step_time": 0.1832740306854248 }, { "epoch": 1.84173583984375e-05, "grad_norm": 0.1628815084695816, "learning_rate": 6.973086265880833e-05, "loss": 0.0341, "step": 12070 }, { "epoch": 1.84173583984375e-05, "model_forward_time": 0.026209115982055664, "step": 12070 }, { "epoch": 1.84173583984375e-05, "step": 12070, "training_step_time": 0.17536640167236328 }, { "epoch": 1.841888427734375e-05, "model_forward_time": 0.02425694465637207, "step": 12071 }, { "epoch": 1.841888427734375e-05, "step": 12071, "training_step_time": 0.11565947532653809 }, { "epoch": 1.842041015625e-05, "model_forward_time": 0.024743080139160156, "step": 12072 }, { "epoch": 1.842041015625e-05, "step": 12072, "training_step_time": 0.12029480934143066 }, { "epoch": 1.842193603515625e-05, "model_forward_time": 0.025445222854614258, "step": 12073 }, { "epoch": 1.842193603515625e-05, "step": 12073, "training_step_time": 0.10524106025695801 }, { "epoch": 1.84234619140625e-05, "model_forward_time": 0.02561807632446289, "step": 12074 }, { "epoch": 1.84234619140625e-05, "step": 12074, "training_step_time": 0.10607767105102539 }, { "epoch": 1.842498779296875e-05, "model_forward_time": 0.02587413787841797, "step": 12075 }, { "epoch": 1.842498779296875e-05, "step": 12075, "training_step_time": 0.10817146301269531 }, { "epoch": 1.8426513671875e-05, "model_forward_time": 0.02539801597595215, "step": 12076 }, { "epoch": 1.8426513671875e-05, "step": 12076, "training_step_time": 0.10470080375671387 }, { "epoch": 1.842803955078125e-05, "model_forward_time": 0.024849891662597656, "step": 12077 }, { "epoch": 1.842803955078125e-05, "step": 12077, "training_step_time": 0.10503196716308594 }, { "epoch": 1.84295654296875e-05, "model_forward_time": 0.02556300163269043, "step": 12078 }, { "epoch": 1.84295654296875e-05, "step": 12078, "training_step_time": 0.11263847351074219 }, { "epoch": 1.843109130859375e-05, "model_forward_time": 0.025606393814086914, "step": 12079 }, { "epoch": 1.843109130859375e-05, "step": 12079, "training_step_time": 0.11065435409545898 }, { "epoch": 1.84326171875e-05, "grad_norm": 0.4551478326320648, "learning_rate": 6.968020789980562e-05, "loss": 0.0202, "step": 12080 }, { "epoch": 1.84326171875e-05, "model_forward_time": 0.025592327117919922, "step": 12080 }, { "epoch": 1.84326171875e-05, "step": 12080, "training_step_time": 0.12091970443725586 }, { "epoch": 1.843414306640625e-05, "model_forward_time": 0.025911808013916016, "step": 12081 }, { "epoch": 1.843414306640625e-05, "step": 12081, "training_step_time": 0.1156303882598877 }, { "epoch": 1.84356689453125e-05, "model_forward_time": 0.025475740432739258, "step": 12082 }, { "epoch": 1.84356689453125e-05, "step": 12082, "training_step_time": 0.12085914611816406 }, { "epoch": 1.843719482421875e-05, "model_forward_time": 0.02576160430908203, "step": 12083 }, { "epoch": 1.843719482421875e-05, "step": 12083, "training_step_time": 0.13818883895874023 }, { "epoch": 1.8438720703125e-05, "model_forward_time": 0.02576160430908203, "step": 12084 }, { "epoch": 1.8438720703125e-05, "step": 12084, "training_step_time": 0.11002945899963379 }, { "epoch": 1.844024658203125e-05, "model_forward_time": 0.024962663650512695, "step": 12085 }, { "epoch": 1.844024658203125e-05, "step": 12085, "training_step_time": 0.128340482711792 }, { "epoch": 1.84417724609375e-05, "model_forward_time": 0.025377988815307617, "step": 12086 }, { "epoch": 1.84417724609375e-05, "step": 12086, "training_step_time": 0.12563443183898926 }, { "epoch": 1.844329833984375e-05, "model_forward_time": 0.025313615798950195, "step": 12087 }, { "epoch": 1.844329833984375e-05, "step": 12087, "training_step_time": 0.10725092887878418 }, { "epoch": 1.844482421875e-05, "model_forward_time": 0.02515578269958496, "step": 12088 }, { "epoch": 1.844482421875e-05, "step": 12088, "training_step_time": 0.11072826385498047 }, { "epoch": 1.844635009765625e-05, "model_forward_time": 0.02522730827331543, "step": 12089 }, { "epoch": 1.844635009765625e-05, "step": 12089, "training_step_time": 0.11376070976257324 }, { "epoch": 1.84478759765625e-05, "grad_norm": 0.43498626351356506, "learning_rate": 6.962952922749457e-05, "loss": 0.0161, "step": 12090 }, { "epoch": 1.84478759765625e-05, "model_forward_time": 0.025298357009887695, "step": 12090 }, { "epoch": 1.84478759765625e-05, "step": 12090, "training_step_time": 0.11504173278808594 }, { "epoch": 1.844940185546875e-05, "model_forward_time": 0.025652408599853516, "step": 12091 }, { "epoch": 1.844940185546875e-05, "step": 12091, "training_step_time": 0.11237835884094238 }, { "epoch": 1.8450927734375e-05, "model_forward_time": 0.025173425674438477, "step": 12092 }, { "epoch": 1.8450927734375e-05, "step": 12092, "training_step_time": 0.1069028377532959 }, { "epoch": 1.845245361328125e-05, "model_forward_time": 0.02529764175415039, "step": 12093 }, { "epoch": 1.845245361328125e-05, "step": 12093, "training_step_time": 0.11280584335327148 }, { "epoch": 1.84539794921875e-05, "model_forward_time": 0.025171518325805664, "step": 12094 }, { "epoch": 1.84539794921875e-05, "step": 12094, "training_step_time": 0.17817163467407227 }, { "epoch": 1.845550537109375e-05, "model_forward_time": 0.02435445785522461, "step": 12095 }, { "epoch": 1.845550537109375e-05, "step": 12095, "training_step_time": 0.12177467346191406 }, { "epoch": 1.845703125e-05, "model_forward_time": 0.024264097213745117, "step": 12096 }, { "epoch": 1.845703125e-05, "step": 12096, "training_step_time": 0.18004798889160156 }, { "epoch": 1.845855712890625e-05, "model_forward_time": 0.02441883087158203, "step": 12097 }, { "epoch": 1.845855712890625e-05, "step": 12097, "training_step_time": 0.19928359985351562 }, { "epoch": 1.84600830078125e-05, "model_forward_time": 0.024761438369750977, "step": 12098 }, { "epoch": 1.84600830078125e-05, "step": 12098, "training_step_time": 0.1907656192779541 }, { "epoch": 1.846160888671875e-05, "model_forward_time": 0.027601242065429688, "step": 12099 }, { "epoch": 1.846160888671875e-05, "step": 12099, "training_step_time": 0.18149161338806152 }, { "epoch": 1.8463134765625e-05, "grad_norm": 0.3368780016899109, "learning_rate": 6.957882670345458e-05, "loss": 0.0264, "step": 12100 }, { "epoch": 1.8463134765625e-05, "model_forward_time": 0.024541139602661133, "step": 12100 }, { "epoch": 1.8463134765625e-05, "step": 12100, "training_step_time": 0.15851855278015137 }, { "epoch": 1.846466064453125e-05, "model_forward_time": 0.024677753448486328, "step": 12101 }, { "epoch": 1.846466064453125e-05, "step": 12101, "training_step_time": 0.16266083717346191 }, { "epoch": 1.84661865234375e-05, "model_forward_time": 0.02415299415588379, "step": 12102 }, { "epoch": 1.84661865234375e-05, "step": 12102, "training_step_time": 0.14628338813781738 }, { "epoch": 1.846771240234375e-05, "model_forward_time": 0.024596691131591797, "step": 12103 }, { "epoch": 1.846771240234375e-05, "step": 12103, "training_step_time": 0.13750195503234863 }, { "epoch": 1.846923828125e-05, "model_forward_time": 0.02510666847229004, "step": 12104 }, { "epoch": 1.846923828125e-05, "step": 12104, "training_step_time": 0.10332322120666504 }, { "epoch": 1.847076416015625e-05, "model_forward_time": 0.025179147720336914, "step": 12105 }, { "epoch": 1.847076416015625e-05, "step": 12105, "training_step_time": 0.12081432342529297 }, { "epoch": 1.84722900390625e-05, "model_forward_time": 0.025976896286010742, "step": 12106 }, { "epoch": 1.84722900390625e-05, "step": 12106, "training_step_time": 0.1088707447052002 }, { "epoch": 1.847381591796875e-05, "model_forward_time": 0.025481224060058594, "step": 12107 }, { "epoch": 1.847381591796875e-05, "step": 12107, "training_step_time": 0.11642813682556152 }, { "epoch": 1.8475341796875e-05, "model_forward_time": 0.02563929557800293, "step": 12108 }, { "epoch": 1.8475341796875e-05, "step": 12108, "training_step_time": 0.10981416702270508 }, { "epoch": 1.847686767578125e-05, "model_forward_time": 0.02545952796936035, "step": 12109 }, { "epoch": 1.847686767578125e-05, "step": 12109, "training_step_time": 0.10700488090515137 }, { "epoch": 1.84783935546875e-05, "grad_norm": 0.2847748100757599, "learning_rate": 6.952810038929397e-05, "loss": 0.02, "step": 12110 }, { "epoch": 1.84783935546875e-05, "model_forward_time": 0.026195764541625977, "step": 12110 }, { "epoch": 1.84783935546875e-05, "step": 12110, "training_step_time": 0.10912251472473145 }, { "epoch": 1.847991943359375e-05, "model_forward_time": 0.025635957717895508, "step": 12111 }, { "epoch": 1.847991943359375e-05, "step": 12111, "training_step_time": 0.15905380249023438 }, { "epoch": 1.84814453125e-05, "model_forward_time": 0.02497577667236328, "step": 12112 }, { "epoch": 1.84814453125e-05, "step": 12112, "training_step_time": 0.11531567573547363 }, { "epoch": 1.848297119140625e-05, "model_forward_time": 0.024684906005859375, "step": 12113 }, { "epoch": 1.848297119140625e-05, "step": 12113, "training_step_time": 0.17767906188964844 }, { "epoch": 1.84844970703125e-05, "model_forward_time": 0.024710655212402344, "step": 12114 }, { "epoch": 1.84844970703125e-05, "step": 12114, "training_step_time": 0.1831378936767578 }, { "epoch": 1.848602294921875e-05, "model_forward_time": 0.024237871170043945, "step": 12115 }, { "epoch": 1.848602294921875e-05, "step": 12115, "training_step_time": 0.11511063575744629 }, { "epoch": 1.8487548828125e-05, "model_forward_time": 0.024487733840942383, "step": 12116 }, { "epoch": 1.8487548828125e-05, "step": 12116, "training_step_time": 0.11063528060913086 }, { "epoch": 1.848907470703125e-05, "model_forward_time": 0.025523900985717773, "step": 12117 }, { "epoch": 1.848907470703125e-05, "step": 12117, "training_step_time": 0.1070411205291748 }, { "epoch": 1.84906005859375e-05, "model_forward_time": 0.02556014060974121, "step": 12118 }, { "epoch": 1.84906005859375e-05, "step": 12118, "training_step_time": 0.11070895195007324 }, { "epoch": 1.849212646484375e-05, "model_forward_time": 0.025222301483154297, "step": 12119 }, { "epoch": 1.849212646484375e-05, "step": 12119, "training_step_time": 0.10846853256225586 }, { "epoch": 1.849365234375e-05, "grad_norm": 0.28749608993530273, "learning_rate": 6.947735034665002e-05, "loss": 0.014, "step": 12120 }, { "epoch": 1.849365234375e-05, "model_forward_time": 0.025959253311157227, "step": 12120 }, { "epoch": 1.849365234375e-05, "step": 12120, "training_step_time": 0.10850644111633301 }, { "epoch": 1.849517822265625e-05, "model_forward_time": 0.024909019470214844, "step": 12121 }, { "epoch": 1.849517822265625e-05, "step": 12121, "training_step_time": 0.10658764839172363 }, { "epoch": 1.84967041015625e-05, "model_forward_time": 0.02747201919555664, "step": 12122 }, { "epoch": 1.84967041015625e-05, "step": 12122, "training_step_time": 0.11704349517822266 }, { "epoch": 1.849822998046875e-05, "model_forward_time": 0.024912595748901367, "step": 12123 }, { "epoch": 1.849822998046875e-05, "step": 12123, "training_step_time": 0.20823168754577637 }, { "epoch": 1.8499755859375e-05, "model_forward_time": 0.024495363235473633, "step": 12124 }, { "epoch": 1.8499755859375e-05, "step": 12124, "training_step_time": 0.1333141326904297 }, { "epoch": 1.850128173828125e-05, "model_forward_time": 0.02456808090209961, "step": 12125 }, { "epoch": 1.850128173828125e-05, "step": 12125, "training_step_time": 0.15640497207641602 }, { "epoch": 1.85028076171875e-05, "model_forward_time": 0.024996519088745117, "step": 12126 }, { "epoch": 1.85028076171875e-05, "step": 12126, "training_step_time": 0.14251399040222168 }, { "epoch": 1.850433349609375e-05, "model_forward_time": 0.02455925941467285, "step": 12127 }, { "epoch": 1.850433349609375e-05, "step": 12127, "training_step_time": 0.11280488967895508 }, { "epoch": 1.8505859375e-05, "model_forward_time": 0.024810314178466797, "step": 12128 }, { "epoch": 1.8505859375e-05, "step": 12128, "training_step_time": 0.10538744926452637 }, { "epoch": 1.850738525390625e-05, "model_forward_time": 0.025376558303833008, "step": 12129 }, { "epoch": 1.850738525390625e-05, "step": 12129, "training_step_time": 0.12279915809631348 }, { "epoch": 1.85089111328125e-05, "grad_norm": 0.37205004692077637, "learning_rate": 6.942657663718879e-05, "loss": 0.0213, "step": 12130 }, { "epoch": 1.85089111328125e-05, "model_forward_time": 0.025405168533325195, "step": 12130 }, { "epoch": 1.85089111328125e-05, "step": 12130, "training_step_time": 0.15722894668579102 }, { "epoch": 1.851043701171875e-05, "model_forward_time": 0.024766206741333008, "step": 12131 }, { "epoch": 1.851043701171875e-05, "step": 12131, "training_step_time": 0.15096163749694824 }, { "epoch": 1.8511962890625e-05, "model_forward_time": 0.025306224822998047, "step": 12132 }, { "epoch": 1.8511962890625e-05, "step": 12132, "training_step_time": 0.15341806411743164 }, { "epoch": 1.851348876953125e-05, "model_forward_time": 0.02415633201599121, "step": 12133 }, { "epoch": 1.851348876953125e-05, "step": 12133, "training_step_time": 0.13424420356750488 }, { "epoch": 1.85150146484375e-05, "model_forward_time": 0.02494049072265625, "step": 12134 }, { "epoch": 1.85150146484375e-05, "step": 12134, "training_step_time": 0.13079428672790527 }, { "epoch": 1.851654052734375e-05, "model_forward_time": 0.025194168090820312, "step": 12135 }, { "epoch": 1.851654052734375e-05, "step": 12135, "training_step_time": 0.1319594383239746 }, { "epoch": 1.851806640625e-05, "model_forward_time": 0.024608373641967773, "step": 12136 }, { "epoch": 1.851806640625e-05, "step": 12136, "training_step_time": 0.21648597717285156 }, { "epoch": 1.851959228515625e-05, "model_forward_time": 0.02446579933166504, "step": 12137 }, { "epoch": 1.851959228515625e-05, "step": 12137, "training_step_time": 0.1127159595489502 }, { "epoch": 1.85211181640625e-05, "model_forward_time": 0.02412724494934082, "step": 12138 }, { "epoch": 1.85211181640625e-05, "step": 12138, "training_step_time": 0.10964059829711914 }, { "epoch": 1.852264404296875e-05, "model_forward_time": 0.02886343002319336, "step": 12139 }, { "epoch": 1.852264404296875e-05, "step": 12139, "training_step_time": 0.12058329582214355 }, { "epoch": 1.8524169921875e-05, "grad_norm": 0.3380381762981415, "learning_rate": 6.937577932260515e-05, "loss": 0.0153, "step": 12140 }, { "epoch": 1.8524169921875e-05, "model_forward_time": 0.025558948516845703, "step": 12140 }, { "epoch": 1.8524169921875e-05, "step": 12140, "training_step_time": 0.11328268051147461 }, { "epoch": 1.852569580078125e-05, "model_forward_time": 0.025569677352905273, "step": 12141 }, { "epoch": 1.852569580078125e-05, "step": 12141, "training_step_time": 0.12047195434570312 }, { "epoch": 1.85272216796875e-05, "model_forward_time": 0.02518606185913086, "step": 12142 }, { "epoch": 1.85272216796875e-05, "step": 12142, "training_step_time": 0.10771536827087402 }, { "epoch": 1.852874755859375e-05, "model_forward_time": 0.02576589584350586, "step": 12143 }, { "epoch": 1.852874755859375e-05, "step": 12143, "training_step_time": 0.11299729347229004 }, { "epoch": 1.85302734375e-05, "model_forward_time": 0.02525925636291504, "step": 12144 }, { "epoch": 1.85302734375e-05, "step": 12144, "training_step_time": 0.10834002494812012 }, { "epoch": 1.853179931640625e-05, "model_forward_time": 0.025398731231689453, "step": 12145 }, { "epoch": 1.853179931640625e-05, "step": 12145, "training_step_time": 0.10854387283325195 }, { "epoch": 1.85333251953125e-05, "model_forward_time": 0.025328636169433594, "step": 12146 }, { "epoch": 1.85333251953125e-05, "step": 12146, "training_step_time": 0.1081233024597168 }, { "epoch": 1.853485107421875e-05, "model_forward_time": 0.024988889694213867, "step": 12147 }, { "epoch": 1.853485107421875e-05, "step": 12147, "training_step_time": 0.10960221290588379 }, { "epoch": 1.8536376953125e-05, "model_forward_time": 0.02519822120666504, "step": 12148 }, { "epoch": 1.8536376953125e-05, "step": 12148, "training_step_time": 0.8354175090789795 }, { "epoch": 1.853790283203125e-05, "model_forward_time": 0.022663354873657227, "step": 12149 }, { "epoch": 1.853790283203125e-05, "step": 12149, "training_step_time": 0.10273098945617676 }, { "epoch": 1.85394287109375e-05, "grad_norm": 0.39317330718040466, "learning_rate": 6.932495846462261e-05, "loss": 0.0173, "step": 12150 }, { "epoch": 1.85394287109375e-05, "model_forward_time": 0.0239410400390625, "step": 12150 }, { "epoch": 1.85394287109375e-05, "step": 12150, "training_step_time": 0.17058658599853516 }, { "epoch": 1.854095458984375e-05, "model_forward_time": 0.024542808532714844, "step": 12151 }, { "epoch": 1.854095458984375e-05, "step": 12151, "training_step_time": 0.10926365852355957 }, { "epoch": 1.854248046875e-05, "model_forward_time": 0.02427816390991211, "step": 12152 }, { "epoch": 1.854248046875e-05, "step": 12152, "training_step_time": 0.11043620109558105 }, { "epoch": 1.854400634765625e-05, "model_forward_time": 0.025025129318237305, "step": 12153 }, { "epoch": 1.854400634765625e-05, "step": 12153, "training_step_time": 0.12153434753417969 }, { "epoch": 1.85455322265625e-05, "model_forward_time": 0.025335073471069336, "step": 12154 }, { "epoch": 1.85455322265625e-05, "step": 12154, "training_step_time": 0.1292552947998047 }, { "epoch": 1.854705810546875e-05, "model_forward_time": 0.02577519416809082, "step": 12155 }, { "epoch": 1.854705810546875e-05, "step": 12155, "training_step_time": 0.11459016799926758 }, { "epoch": 1.8548583984375e-05, "model_forward_time": 0.0253298282623291, "step": 12156 }, { "epoch": 1.8548583984375e-05, "step": 12156, "training_step_time": 0.11096453666687012 }, { "epoch": 1.855010986328125e-05, "model_forward_time": 0.025742292404174805, "step": 12157 }, { "epoch": 1.855010986328125e-05, "step": 12157, "training_step_time": 0.10995149612426758 }, { "epoch": 1.85516357421875e-05, "model_forward_time": 0.025273799896240234, "step": 12158 }, { "epoch": 1.85516357421875e-05, "step": 12158, "training_step_time": 0.10606193542480469 }, { "epoch": 1.855316162109375e-05, "model_forward_time": 0.02552056312561035, "step": 12159 }, { "epoch": 1.855316162109375e-05, "step": 12159, "training_step_time": 0.10692667961120605 }, { "epoch": 1.85546875e-05, "grad_norm": 0.28868332505226135, "learning_rate": 6.927411412499332e-05, "loss": 0.0146, "step": 12160 }, { "epoch": 1.85546875e-05, "model_forward_time": 0.02510809898376465, "step": 12160 }, { "epoch": 1.85546875e-05, "step": 12160, "training_step_time": 0.10293769836425781 }, { "epoch": 1.855621337890625e-05, "model_forward_time": 0.02461838722229004, "step": 12161 }, { "epoch": 1.855621337890625e-05, "step": 12161, "training_step_time": 0.14577412605285645 }, { "epoch": 1.85577392578125e-05, "model_forward_time": 0.024579286575317383, "step": 12162 }, { "epoch": 1.85577392578125e-05, "step": 12162, "training_step_time": 0.10670280456542969 }, { "epoch": 1.855926513671875e-05, "model_forward_time": 0.02532172203063965, "step": 12163 }, { "epoch": 1.855926513671875e-05, "step": 12163, "training_step_time": 0.1863081455230713 }, { "epoch": 1.8560791015625e-05, "model_forward_time": 0.024344921112060547, "step": 12164 }, { "epoch": 1.8560791015625e-05, "step": 12164, "training_step_time": 0.13602018356323242 }, { "epoch": 1.856231689453125e-05, "model_forward_time": 0.024878978729248047, "step": 12165 }, { "epoch": 1.856231689453125e-05, "step": 12165, "training_step_time": 0.1930091381072998 }, { "epoch": 1.85638427734375e-05, "model_forward_time": 0.024596214294433594, "step": 12166 }, { "epoch": 1.85638427734375e-05, "step": 12166, "training_step_time": 0.14045000076293945 }, { "epoch": 1.856536865234375e-05, "model_forward_time": 0.024628400802612305, "step": 12167 }, { "epoch": 1.856536865234375e-05, "step": 12167, "training_step_time": 0.11679720878601074 }, { "epoch": 1.856689453125e-05, "model_forward_time": 0.02660655975341797, "step": 12168 }, { "epoch": 1.856689453125e-05, "step": 12168, "training_step_time": 0.11935281753540039 }, { "epoch": 1.856842041015625e-05, "model_forward_time": 0.02539682388305664, "step": 12169 }, { "epoch": 1.856842041015625e-05, "step": 12169, "training_step_time": 0.10825419425964355 }, { "epoch": 1.85699462890625e-05, "grad_norm": 0.24523653090000153, "learning_rate": 6.922324636549795e-05, "loss": 0.0156, "step": 12170 }, { "epoch": 1.85699462890625e-05, "model_forward_time": 0.025346994400024414, "step": 12170 }, { "epoch": 1.85699462890625e-05, "step": 12170, "training_step_time": 0.1883068084716797 }, { "epoch": 1.857147216796875e-05, "model_forward_time": 0.027129411697387695, "step": 12171 }, { "epoch": 1.857147216796875e-05, "step": 12171, "training_step_time": 0.1811690330505371 }, { "epoch": 1.8572998046875e-05, "model_forward_time": 0.023925065994262695, "step": 12172 }, { "epoch": 1.8572998046875e-05, "step": 12172, "training_step_time": 0.12838196754455566 }, { "epoch": 1.857452392578125e-05, "model_forward_time": 0.024210691452026367, "step": 12173 }, { "epoch": 1.857452392578125e-05, "step": 12173, "training_step_time": 0.10532450675964355 }, { "epoch": 1.85760498046875e-05, "model_forward_time": 0.025958776473999023, "step": 12174 }, { "epoch": 1.85760498046875e-05, "step": 12174, "training_step_time": 0.12211942672729492 }, { "epoch": 1.857757568359375e-05, "model_forward_time": 0.024934053421020508, "step": 12175 }, { "epoch": 1.857757568359375e-05, "step": 12175, "training_step_time": 0.10917043685913086 }, { "epoch": 1.85791015625e-05, "model_forward_time": 0.025083303451538086, "step": 12176 }, { "epoch": 1.85791015625e-05, "step": 12176, "training_step_time": 0.12683629989624023 }, { "epoch": 1.858062744140625e-05, "model_forward_time": 0.02460622787475586, "step": 12177 }, { "epoch": 1.858062744140625e-05, "step": 12177, "training_step_time": 0.1219024658203125 }, { "epoch": 1.85821533203125e-05, "model_forward_time": 0.025339365005493164, "step": 12178 }, { "epoch": 1.85821533203125e-05, "step": 12178, "training_step_time": 0.11470603942871094 }, { "epoch": 1.858367919921875e-05, "model_forward_time": 0.025956153869628906, "step": 12179 }, { "epoch": 1.858367919921875e-05, "step": 12179, "training_step_time": 0.1099996566772461 }, { "epoch": 1.8585205078125e-05, "grad_norm": 0.4693124294281006, "learning_rate": 6.917235524794558e-05, "loss": 0.0299, "step": 12180 }, { "epoch": 1.8585205078125e-05, "model_forward_time": 0.025193214416503906, "step": 12180 }, { "epoch": 1.8585205078125e-05, "step": 12180, "training_step_time": 0.10866832733154297 }, { "epoch": 1.858673095703125e-05, "model_forward_time": 0.025312423706054688, "step": 12181 }, { "epoch": 1.858673095703125e-05, "step": 12181, "training_step_time": 0.10819029808044434 }, { "epoch": 1.85882568359375e-05, "model_forward_time": 0.025712251663208008, "step": 12182 }, { "epoch": 1.85882568359375e-05, "step": 12182, "training_step_time": 0.10832548141479492 }, { "epoch": 1.858978271484375e-05, "model_forward_time": 0.025365591049194336, "step": 12183 }, { "epoch": 1.858978271484375e-05, "step": 12183, "training_step_time": 0.10812616348266602 }, { "epoch": 1.859130859375e-05, "model_forward_time": 0.025095701217651367, "step": 12184 }, { "epoch": 1.859130859375e-05, "step": 12184, "training_step_time": 0.10722160339355469 }, { "epoch": 1.859283447265625e-05, "model_forward_time": 0.02508258819580078, "step": 12185 }, { "epoch": 1.859283447265625e-05, "step": 12185, "training_step_time": 0.10984325408935547 }, { "epoch": 1.85943603515625e-05, "model_forward_time": 0.0254666805267334, "step": 12186 }, { "epoch": 1.85943603515625e-05, "step": 12186, "training_step_time": 0.10843181610107422 }, { "epoch": 1.859588623046875e-05, "model_forward_time": 0.02558612823486328, "step": 12187 }, { "epoch": 1.859588623046875e-05, "step": 12187, "training_step_time": 0.10842323303222656 }, { "epoch": 1.8597412109375e-05, "model_forward_time": 0.02507305145263672, "step": 12188 }, { "epoch": 1.8597412109375e-05, "step": 12188, "training_step_time": 0.11646866798400879 }, { "epoch": 1.859893798828125e-05, "model_forward_time": 0.025184154510498047, "step": 12189 }, { "epoch": 1.859893798828125e-05, "step": 12189, "training_step_time": 0.1075582504272461 }, { "epoch": 1.86004638671875e-05, "grad_norm": 0.3445988893508911, "learning_rate": 6.912144083417376e-05, "loss": 0.0154, "step": 12190 }, { "epoch": 1.86004638671875e-05, "model_forward_time": 0.025242328643798828, "step": 12190 }, { "epoch": 1.86004638671875e-05, "step": 12190, "training_step_time": 0.10817742347717285 }, { "epoch": 1.860198974609375e-05, "model_forward_time": 0.024753332138061523, "step": 12191 }, { "epoch": 1.860198974609375e-05, "step": 12191, "training_step_time": 0.10774350166320801 }, { "epoch": 1.8603515625e-05, "model_forward_time": 0.02552509307861328, "step": 12192 }, { "epoch": 1.8603515625e-05, "step": 12192, "training_step_time": 0.11028528213500977 }, { "epoch": 1.860504150390625e-05, "model_forward_time": 0.02488851547241211, "step": 12193 }, { "epoch": 1.860504150390625e-05, "step": 12193, "training_step_time": 0.10948014259338379 }, { "epoch": 1.86065673828125e-05, "model_forward_time": 0.02522730827331543, "step": 12194 }, { "epoch": 1.86065673828125e-05, "step": 12194, "training_step_time": 0.11433005332946777 }, { "epoch": 1.860809326171875e-05, "model_forward_time": 0.028829336166381836, "step": 12195 }, { "epoch": 1.860809326171875e-05, "step": 12195, "training_step_time": 0.11514568328857422 }, { "epoch": 1.8609619140625e-05, "model_forward_time": 0.025232553482055664, "step": 12196 }, { "epoch": 1.8609619140625e-05, "step": 12196, "training_step_time": 0.11287474632263184 }, { "epoch": 1.861114501953125e-05, "model_forward_time": 0.025613069534301758, "step": 12197 }, { "epoch": 1.861114501953125e-05, "step": 12197, "training_step_time": 0.12331390380859375 }, { "epoch": 1.86126708984375e-05, "model_forward_time": 0.025481462478637695, "step": 12198 }, { "epoch": 1.86126708984375e-05, "step": 12198, "training_step_time": 0.11653327941894531 }, { "epoch": 1.861419677734375e-05, "model_forward_time": 0.025342941284179688, "step": 12199 }, { "epoch": 1.861419677734375e-05, "step": 12199, "training_step_time": 0.22729110717773438 }, { "epoch": 1.861572265625e-05, "grad_norm": 0.2358154058456421, "learning_rate": 6.90705031860483e-05, "loss": 0.0249, "step": 12200 }, { "epoch": 1.861572265625e-05, "model_forward_time": 0.02457141876220703, "step": 12200 }, { "epoch": 1.861572265625e-05, "step": 12200, "training_step_time": 0.1245582103729248 }, { "epoch": 1.861724853515625e-05, "model_forward_time": 0.024930953979492188, "step": 12201 }, { "epoch": 1.861724853515625e-05, "step": 12201, "training_step_time": 0.11001896858215332 }, { "epoch": 1.86187744140625e-05, "model_forward_time": 0.028508424758911133, "step": 12202 }, { "epoch": 1.86187744140625e-05, "step": 12202, "training_step_time": 0.11352682113647461 }, { "epoch": 1.862030029296875e-05, "model_forward_time": 0.025401830673217773, "step": 12203 }, { "epoch": 1.862030029296875e-05, "step": 12203, "training_step_time": 0.11102414131164551 }, { "epoch": 1.8621826171875e-05, "model_forward_time": 0.0267031192779541, "step": 12204 }, { "epoch": 1.8621826171875e-05, "step": 12204, "training_step_time": 0.10894393920898438 }, { "epoch": 1.862335205078125e-05, "model_forward_time": 0.025269269943237305, "step": 12205 }, { "epoch": 1.862335205078125e-05, "step": 12205, "training_step_time": 0.1085808277130127 }, { "epoch": 1.86248779296875e-05, "model_forward_time": 0.02559947967529297, "step": 12206 }, { "epoch": 1.86248779296875e-05, "step": 12206, "training_step_time": 0.10754179954528809 }, { "epoch": 1.862640380859375e-05, "model_forward_time": 0.024886369705200195, "step": 12207 }, { "epoch": 1.862640380859375e-05, "step": 12207, "training_step_time": 0.14352679252624512 }, { "epoch": 1.86279296875e-05, "model_forward_time": 0.024336576461791992, "step": 12208 }, { "epoch": 1.86279296875e-05, "step": 12208, "training_step_time": 0.1642289161682129 }, { "epoch": 1.862945556640625e-05, "model_forward_time": 0.028010845184326172, "step": 12209 }, { "epoch": 1.862945556640625e-05, "step": 12209, "training_step_time": 0.11157894134521484 }, { "epoch": 1.86309814453125e-05, "grad_norm": 0.27985885739326477, "learning_rate": 6.901954236546323e-05, "loss": 0.0307, "step": 12210 }, { "epoch": 1.86309814453125e-05, "model_forward_time": 0.024744033813476562, "step": 12210 }, { "epoch": 1.86309814453125e-05, "step": 12210, "training_step_time": 0.1626591682434082 }, { "epoch": 1.863250732421875e-05, "model_forward_time": 0.024492979049682617, "step": 12211 }, { "epoch": 1.863250732421875e-05, "step": 12211, "training_step_time": 0.1714036464691162 }, { "epoch": 1.8634033203125e-05, "model_forward_time": 0.0251007080078125, "step": 12212 }, { "epoch": 1.8634033203125e-05, "step": 12212, "training_step_time": 0.19283533096313477 }, { "epoch": 1.863555908203125e-05, "model_forward_time": 0.024848461151123047, "step": 12213 }, { "epoch": 1.863555908203125e-05, "step": 12213, "training_step_time": 0.11724138259887695 }, { "epoch": 1.86370849609375e-05, "model_forward_time": 0.025547266006469727, "step": 12214 }, { "epoch": 1.86370849609375e-05, "step": 12214, "training_step_time": 0.10605788230895996 }, { "epoch": 1.863861083984375e-05, "model_forward_time": 0.024709463119506836, "step": 12215 }, { "epoch": 1.863861083984375e-05, "step": 12215, "training_step_time": 0.19892477989196777 }, { "epoch": 1.864013671875e-05, "model_forward_time": 0.024607181549072266, "step": 12216 }, { "epoch": 1.864013671875e-05, "step": 12216, "training_step_time": 0.10834670066833496 }, { "epoch": 1.864166259765625e-05, "model_forward_time": 0.024436235427856445, "step": 12217 }, { "epoch": 1.864166259765625e-05, "step": 12217, "training_step_time": 0.1727888584136963 }, { "epoch": 1.86431884765625e-05, "model_forward_time": 0.024678468704223633, "step": 12218 }, { "epoch": 1.86431884765625e-05, "step": 12218, "training_step_time": 0.11142873764038086 }, { "epoch": 1.864471435546875e-05, "model_forward_time": 0.024569988250732422, "step": 12219 }, { "epoch": 1.864471435546875e-05, "step": 12219, "training_step_time": 0.1053457260131836 }, { "epoch": 1.8646240234375e-05, "grad_norm": 0.5920787453651428, "learning_rate": 6.896855843434078e-05, "loss": 0.0179, "step": 12220 }, { "epoch": 1.8646240234375e-05, "model_forward_time": 0.025516748428344727, "step": 12220 }, { "epoch": 1.8646240234375e-05, "step": 12220, "training_step_time": 0.12272787094116211 }, { "epoch": 1.864776611328125e-05, "model_forward_time": 0.024941682815551758, "step": 12221 }, { "epoch": 1.864776611328125e-05, "step": 12221, "training_step_time": 0.10534191131591797 }, { "epoch": 1.86492919921875e-05, "model_forward_time": 0.02530694007873535, "step": 12222 }, { "epoch": 1.86492919921875e-05, "step": 12222, "training_step_time": 0.12171316146850586 }, { "epoch": 1.865081787109375e-05, "model_forward_time": 0.02616715431213379, "step": 12223 }, { "epoch": 1.865081787109375e-05, "step": 12223, "training_step_time": 0.10560345649719238 }, { "epoch": 1.865234375e-05, "model_forward_time": 0.025304317474365234, "step": 12224 }, { "epoch": 1.865234375e-05, "step": 12224, "training_step_time": 0.11466836929321289 }, { "epoch": 1.865386962890625e-05, "model_forward_time": 0.02510809898376465, "step": 12225 }, { "epoch": 1.865386962890625e-05, "step": 12225, "training_step_time": 0.10394167900085449 }, { "epoch": 1.86553955078125e-05, "model_forward_time": 0.025491714477539062, "step": 12226 }, { "epoch": 1.86553955078125e-05, "step": 12226, "training_step_time": 0.10468292236328125 }, { "epoch": 1.865692138671875e-05, "model_forward_time": 0.02528214454650879, "step": 12227 }, { "epoch": 1.865692138671875e-05, "step": 12227, "training_step_time": 0.10531830787658691 }, { "epoch": 1.8658447265625e-05, "model_forward_time": 0.02516794204711914, "step": 12228 }, { "epoch": 1.8658447265625e-05, "step": 12228, "training_step_time": 0.10705089569091797 }, { "epoch": 1.865997314453125e-05, "model_forward_time": 0.02544116973876953, "step": 12229 }, { "epoch": 1.865997314453125e-05, "step": 12229, "training_step_time": 0.10489082336425781 }, { "epoch": 1.86614990234375e-05, "grad_norm": 0.38967519998550415, "learning_rate": 6.89175514546312e-05, "loss": 0.0238, "step": 12230 }, { "epoch": 1.86614990234375e-05, "model_forward_time": 0.02520465850830078, "step": 12230 }, { "epoch": 1.86614990234375e-05, "step": 12230, "training_step_time": 0.10487127304077148 }, { "epoch": 1.866302490234375e-05, "model_forward_time": 0.02511119842529297, "step": 12231 }, { "epoch": 1.866302490234375e-05, "step": 12231, "training_step_time": 0.10860204696655273 }, { "epoch": 1.866455078125e-05, "model_forward_time": 0.0253448486328125, "step": 12232 }, { "epoch": 1.866455078125e-05, "step": 12232, "training_step_time": 0.10773730278015137 }, { "epoch": 1.866607666015625e-05, "model_forward_time": 0.025053024291992188, "step": 12233 }, { "epoch": 1.866607666015625e-05, "step": 12233, "training_step_time": 0.11473274230957031 }, { "epoch": 1.86676025390625e-05, "model_forward_time": 0.02533435821533203, "step": 12234 }, { "epoch": 1.86676025390625e-05, "step": 12234, "training_step_time": 0.10961723327636719 }, { "epoch": 1.866912841796875e-05, "model_forward_time": 0.025982141494750977, "step": 12235 }, { "epoch": 1.866912841796875e-05, "step": 12235, "training_step_time": 0.10794568061828613 }, { "epoch": 1.8670654296875e-05, "model_forward_time": 0.024712324142456055, "step": 12236 }, { "epoch": 1.8670654296875e-05, "step": 12236, "training_step_time": 0.10839700698852539 }, { "epoch": 1.867218017578125e-05, "model_forward_time": 0.024310827255249023, "step": 12237 }, { "epoch": 1.867218017578125e-05, "step": 12237, "training_step_time": 0.10738158226013184 }, { "epoch": 1.86737060546875e-05, "model_forward_time": 0.024081945419311523, "step": 12238 }, { "epoch": 1.86737060546875e-05, "step": 12238, "training_step_time": 0.11095452308654785 }, { "epoch": 1.867523193359375e-05, "model_forward_time": 0.025173187255859375, "step": 12239 }, { "epoch": 1.867523193359375e-05, "step": 12239, "training_step_time": 0.1131296157836914 }, { "epoch": 1.86767578125e-05, "grad_norm": 0.4197467565536499, "learning_rate": 6.886652148831279e-05, "loss": 0.017, "step": 12240 }, { "epoch": 1.86767578125e-05, "model_forward_time": 0.025347232818603516, "step": 12240 }, { "epoch": 1.86767578125e-05, "step": 12240, "training_step_time": 0.10682916641235352 }, { "epoch": 1.867828369140625e-05, "model_forward_time": 0.025358915328979492, "step": 12241 }, { "epoch": 1.867828369140625e-05, "step": 12241, "training_step_time": 0.10932016372680664 }, { "epoch": 1.86798095703125e-05, "model_forward_time": 0.025012493133544922, "step": 12242 }, { "epoch": 1.86798095703125e-05, "step": 12242, "training_step_time": 0.10710310935974121 }, { "epoch": 1.868133544921875e-05, "model_forward_time": 0.025623321533203125, "step": 12243 }, { "epoch": 1.868133544921875e-05, "step": 12243, "training_step_time": 0.16004395484924316 }, { "epoch": 1.8682861328125e-05, "model_forward_time": 0.024783849716186523, "step": 12244 }, { "epoch": 1.8682861328125e-05, "step": 12244, "training_step_time": 0.10759615898132324 }, { "epoch": 1.868438720703125e-05, "model_forward_time": 0.0251162052154541, "step": 12245 }, { "epoch": 1.868438720703125e-05, "step": 12245, "training_step_time": 0.11182188987731934 }, { "epoch": 1.86859130859375e-05, "model_forward_time": 0.025276899337768555, "step": 12246 }, { "epoch": 1.86859130859375e-05, "step": 12246, "training_step_time": 0.12452292442321777 }, { "epoch": 1.868743896484375e-05, "model_forward_time": 0.02532339096069336, "step": 12247 }, { "epoch": 1.868743896484375e-05, "step": 12247, "training_step_time": 0.13311219215393066 }, { "epoch": 1.868896484375e-05, "model_forward_time": 0.025262832641601562, "step": 12248 }, { "epoch": 1.868896484375e-05, "step": 12248, "training_step_time": 0.11025476455688477 }, { "epoch": 1.869049072265625e-05, "model_forward_time": 0.025403499603271484, "step": 12249 }, { "epoch": 1.869049072265625e-05, "step": 12249, "training_step_time": 0.11726641654968262 }, { "epoch": 1.86920166015625e-05, "grad_norm": 0.3409532904624939, "learning_rate": 6.881546859739179e-05, "loss": 0.0286, "step": 12250 }, { "epoch": 1.86920166015625e-05, "model_forward_time": 0.02521657943725586, "step": 12250 }, { "epoch": 1.86920166015625e-05, "step": 12250, "training_step_time": 0.10749268531799316 }, { "epoch": 1.869354248046875e-05, "model_forward_time": 0.02523040771484375, "step": 12251 }, { "epoch": 1.869354248046875e-05, "step": 12251, "training_step_time": 0.10775041580200195 }, { "epoch": 1.8695068359375e-05, "model_forward_time": 0.025638341903686523, "step": 12252 }, { "epoch": 1.8695068359375e-05, "step": 12252, "training_step_time": 0.1044301986694336 }, { "epoch": 1.869659423828125e-05, "model_forward_time": 0.02445220947265625, "step": 12253 }, { "epoch": 1.869659423828125e-05, "step": 12253, "training_step_time": 0.1233832836151123 }, { "epoch": 1.86981201171875e-05, "model_forward_time": 0.02466130256652832, "step": 12254 }, { "epoch": 1.86981201171875e-05, "step": 12254, "training_step_time": 0.12880825996398926 }, { "epoch": 1.869964599609375e-05, "model_forward_time": 0.025395631790161133, "step": 12255 }, { "epoch": 1.869964599609375e-05, "step": 12255, "training_step_time": 0.10640430450439453 }, { "epoch": 1.8701171875e-05, "model_forward_time": 0.025504350662231445, "step": 12256 }, { "epoch": 1.8701171875e-05, "step": 12256, "training_step_time": 0.12272858619689941 }, { "epoch": 1.870269775390625e-05, "model_forward_time": 0.02539205551147461, "step": 12257 }, { "epoch": 1.870269775390625e-05, "step": 12257, "training_step_time": 0.1430068016052246 }, { "epoch": 1.87042236328125e-05, "model_forward_time": 0.025111913681030273, "step": 12258 }, { "epoch": 1.87042236328125e-05, "step": 12258, "training_step_time": 0.2016453742980957 }, { "epoch": 1.870574951171875e-05, "model_forward_time": 0.02498006820678711, "step": 12259 }, { "epoch": 1.870574951171875e-05, "step": 12259, "training_step_time": 0.1595308780670166 }, { "epoch": 1.8707275390625e-05, "grad_norm": 0.43211686611175537, "learning_rate": 6.876439284390223e-05, "loss": 0.0281, "step": 12260 }, { "epoch": 1.8707275390625e-05, "model_forward_time": 0.024408340454101562, "step": 12260 }, { "epoch": 1.8707275390625e-05, "step": 12260, "training_step_time": 0.11379694938659668 }, { "epoch": 1.870880126953125e-05, "model_forward_time": 0.024810314178466797, "step": 12261 }, { "epoch": 1.870880126953125e-05, "step": 12261, "training_step_time": 0.1100618839263916 }, { "epoch": 1.87103271484375e-05, "model_forward_time": 0.02516651153564453, "step": 12262 }, { "epoch": 1.87103271484375e-05, "step": 12262, "training_step_time": 0.19472813606262207 }, { "epoch": 1.871185302734375e-05, "model_forward_time": 0.02423572540283203, "step": 12263 }, { "epoch": 1.871185302734375e-05, "step": 12263, "training_step_time": 0.10750007629394531 }, { "epoch": 1.871337890625e-05, "model_forward_time": 0.023868560791015625, "step": 12264 }, { "epoch": 1.871337890625e-05, "step": 12264, "training_step_time": 0.1659398078918457 }, { "epoch": 1.871490478515625e-05, "model_forward_time": 0.024478435516357422, "step": 12265 }, { "epoch": 1.871490478515625e-05, "step": 12265, "training_step_time": 0.12684178352355957 }, { "epoch": 1.87164306640625e-05, "model_forward_time": 0.02469158172607422, "step": 12266 }, { "epoch": 1.87164306640625e-05, "step": 12266, "training_step_time": 0.16400361061096191 }, { "epoch": 1.871795654296875e-05, "model_forward_time": 0.02426433563232422, "step": 12267 }, { "epoch": 1.871795654296875e-05, "step": 12267, "training_step_time": 0.11981987953186035 }, { "epoch": 1.8719482421875e-05, "model_forward_time": 0.027681350708007812, "step": 12268 }, { "epoch": 1.8719482421875e-05, "step": 12268, "training_step_time": 0.10977530479431152 }, { "epoch": 1.872100830078125e-05, "model_forward_time": 0.025437355041503906, "step": 12269 }, { "epoch": 1.872100830078125e-05, "step": 12269, "training_step_time": 0.13452506065368652 }, { "epoch": 1.87225341796875e-05, "grad_norm": 0.24758252501487732, "learning_rate": 6.871329428990602e-05, "loss": 0.0216, "step": 12270 }, { "epoch": 1.87225341796875e-05, "model_forward_time": 0.02554917335510254, "step": 12270 }, { "epoch": 1.87225341796875e-05, "step": 12270, "training_step_time": 0.10578608512878418 }, { "epoch": 1.872406005859375e-05, "model_forward_time": 0.02552628517150879, "step": 12271 }, { "epoch": 1.872406005859375e-05, "step": 12271, "training_step_time": 0.10780692100524902 }, { "epoch": 1.87255859375e-05, "model_forward_time": 0.02537679672241211, "step": 12272 }, { "epoch": 1.87255859375e-05, "step": 12272, "training_step_time": 0.11155557632446289 }, { "epoch": 1.872711181640625e-05, "model_forward_time": 0.025572776794433594, "step": 12273 }, { "epoch": 1.872711181640625e-05, "step": 12273, "training_step_time": 0.1747570037841797 }, { "epoch": 1.87286376953125e-05, "model_forward_time": 0.024780988693237305, "step": 12274 }, { "epoch": 1.87286376953125e-05, "step": 12274, "training_step_time": 0.20663881301879883 }, { "epoch": 1.873016357421875e-05, "model_forward_time": 0.02562093734741211, "step": 12275 }, { "epoch": 1.873016357421875e-05, "step": 12275, "training_step_time": 0.1924285888671875 }, { "epoch": 1.8731689453125e-05, "model_forward_time": 0.02427196502685547, "step": 12276 }, { "epoch": 1.8731689453125e-05, "step": 12276, "training_step_time": 0.1919260025024414 }, { "epoch": 1.873321533203125e-05, "model_forward_time": 0.024675846099853516, "step": 12277 }, { "epoch": 1.873321533203125e-05, "step": 12277, "training_step_time": 0.17362666130065918 }, { "epoch": 1.87347412109375e-05, "model_forward_time": 0.023099660873413086, "step": 12278 }, { "epoch": 1.87347412109375e-05, "step": 12278, "training_step_time": 0.1619856357574463 }, { "epoch": 1.873626708984375e-05, "model_forward_time": 0.0247800350189209, "step": 12279 }, { "epoch": 1.873626708984375e-05, "step": 12279, "training_step_time": 0.1556997299194336 }, { "epoch": 1.873779296875e-05, "grad_norm": 0.20096886157989502, "learning_rate": 6.86621729974927e-05, "loss": 0.0179, "step": 12280 }, { "epoch": 1.873779296875e-05, "model_forward_time": 0.024496793746948242, "step": 12280 }, { "epoch": 1.873779296875e-05, "step": 12280, "training_step_time": 0.1393299102783203 }, { "epoch": 1.873931884765625e-05, "model_forward_time": 0.024519681930541992, "step": 12281 }, { "epoch": 1.873931884765625e-05, "step": 12281, "training_step_time": 0.12804031372070312 }, { "epoch": 1.87408447265625e-05, "model_forward_time": 0.024983644485473633, "step": 12282 }, { "epoch": 1.87408447265625e-05, "step": 12282, "training_step_time": 0.12546658515930176 }, { "epoch": 1.874237060546875e-05, "model_forward_time": 0.024938106536865234, "step": 12283 }, { "epoch": 1.874237060546875e-05, "step": 12283, "training_step_time": 0.12483000755310059 }, { "epoch": 1.8743896484375e-05, "model_forward_time": 0.024898290634155273, "step": 12284 }, { "epoch": 1.8743896484375e-05, "step": 12284, "training_step_time": 0.12295699119567871 }, { "epoch": 1.874542236328125e-05, "model_forward_time": 0.025463581085205078, "step": 12285 }, { "epoch": 1.874542236328125e-05, "step": 12285, "training_step_time": 0.11552762985229492 }, { "epoch": 1.87469482421875e-05, "model_forward_time": 0.025156497955322266, "step": 12286 }, { "epoch": 1.87469482421875e-05, "step": 12286, "training_step_time": 0.15613532066345215 }, { "epoch": 1.874847412109375e-05, "model_forward_time": 0.02501535415649414, "step": 12287 }, { "epoch": 1.874847412109375e-05, "step": 12287, "training_step_time": 0.16799545288085938 }, { "epoch": 1.875e-05, "model_forward_time": 0.02463984489440918, "step": 12288 }, { "epoch": 1.875e-05, "step": 12288, "training_step_time": 0.1384732723236084 }, { "epoch": 1.875152587890625e-05, "model_forward_time": 0.02461719512939453, "step": 12289 }, { "epoch": 1.875152587890625e-05, "step": 12289, "training_step_time": 0.1095285415649414 }, { "epoch": 1.87530517578125e-05, "grad_norm": 0.3073076903820038, "learning_rate": 6.861102902877946e-05, "loss": 0.0256, "step": 12290 }, { "epoch": 1.87530517578125e-05, "model_forward_time": 0.024844884872436523, "step": 12290 }, { "epoch": 1.87530517578125e-05, "step": 12290, "training_step_time": 0.1140146255493164 }, { "epoch": 1.875457763671875e-05, "model_forward_time": 0.02497720718383789, "step": 12291 }, { "epoch": 1.875457763671875e-05, "step": 12291, "training_step_time": 0.13397526741027832 }, { "epoch": 1.8756103515625e-05, "model_forward_time": 0.024916648864746094, "step": 12292 }, { "epoch": 1.8756103515625e-05, "step": 12292, "training_step_time": 0.10604643821716309 }, { "epoch": 1.875762939453125e-05, "model_forward_time": 0.02440166473388672, "step": 12293 }, { "epoch": 1.875762939453125e-05, "step": 12293, "training_step_time": 0.10359025001525879 }, { "epoch": 1.87591552734375e-05, "model_forward_time": 0.026600360870361328, "step": 12294 }, { "epoch": 1.87591552734375e-05, "step": 12294, "training_step_time": 0.1537926197052002 }, { "epoch": 1.876068115234375e-05, "model_forward_time": 0.0252988338470459, "step": 12295 }, { "epoch": 1.876068115234375e-05, "step": 12295, "training_step_time": 0.11036539077758789 }, { "epoch": 1.876220703125e-05, "model_forward_time": 0.025516986846923828, "step": 12296 }, { "epoch": 1.876220703125e-05, "step": 12296, "training_step_time": 0.19042229652404785 }, { "epoch": 1.876373291015625e-05, "model_forward_time": 0.02401447296142578, "step": 12297 }, { "epoch": 1.876373291015625e-05, "step": 12297, "training_step_time": 0.14038825035095215 }, { "epoch": 1.87652587890625e-05, "model_forward_time": 0.024471044540405273, "step": 12298 }, { "epoch": 1.87652587890625e-05, "step": 12298, "training_step_time": 0.19980883598327637 }, { "epoch": 1.876678466796875e-05, "model_forward_time": 0.02475738525390625, "step": 12299 }, { "epoch": 1.876678466796875e-05, "step": 12299, "training_step_time": 0.13838839530944824 }, { "epoch": 1.8768310546875e-05, "grad_norm": 0.3325486481189728, "learning_rate": 6.855986244591104e-05, "loss": 0.014, "step": 12300 }, { "epoch": 1.8768310546875e-05, "model_forward_time": 0.024495363235473633, "step": 12300 }, { "epoch": 1.8768310546875e-05, "step": 12300, "training_step_time": 0.1112680435180664 }, { "epoch": 1.876983642578125e-05, "model_forward_time": 0.025615215301513672, "step": 12301 }, { "epoch": 1.876983642578125e-05, "step": 12301, "training_step_time": 0.10787606239318848 }, { "epoch": 1.87713623046875e-05, "model_forward_time": 0.025152921676635742, "step": 12302 }, { "epoch": 1.87713623046875e-05, "step": 12302, "training_step_time": 0.19803667068481445 }, { "epoch": 1.877288818359375e-05, "model_forward_time": 0.024358510971069336, "step": 12303 }, { "epoch": 1.877288818359375e-05, "step": 12303, "training_step_time": 0.10419797897338867 }, { "epoch": 1.87744140625e-05, "model_forward_time": 0.02458047866821289, "step": 12304 }, { "epoch": 1.87744140625e-05, "step": 12304, "training_step_time": 0.1511688232421875 }, { "epoch": 1.877593994140625e-05, "model_forward_time": 0.02506422996520996, "step": 12305 }, { "epoch": 1.877593994140625e-05, "step": 12305, "training_step_time": 0.11256766319274902 }, { "epoch": 1.87774658203125e-05, "model_forward_time": 0.024837017059326172, "step": 12306 }, { "epoch": 1.87774658203125e-05, "step": 12306, "training_step_time": 0.21063733100891113 }, { "epoch": 1.877899169921875e-05, "model_forward_time": 0.029025793075561523, "step": 12307 }, { "epoch": 1.877899169921875e-05, "step": 12307, "training_step_time": 0.10807490348815918 }, { "epoch": 1.8780517578125e-05, "model_forward_time": 0.02538013458251953, "step": 12308 }, { "epoch": 1.8780517578125e-05, "step": 12308, "training_step_time": 0.10535120964050293 }, { "epoch": 1.878204345703125e-05, "model_forward_time": 0.025409698486328125, "step": 12309 }, { "epoch": 1.878204345703125e-05, "step": 12309, "training_step_time": 0.12454891204833984 }, { "epoch": 1.87835693359375e-05, "grad_norm": 0.23103386163711548, "learning_rate": 6.850867331105967e-05, "loss": 0.0171, "step": 12310 }, { "epoch": 1.87835693359375e-05, "model_forward_time": 0.02554154396057129, "step": 12310 }, { "epoch": 1.87835693359375e-05, "step": 12310, "training_step_time": 0.10951566696166992 }, { "epoch": 1.878509521484375e-05, "model_forward_time": 0.025292634963989258, "step": 12311 }, { "epoch": 1.878509521484375e-05, "step": 12311, "training_step_time": 0.11287808418273926 }, { "epoch": 1.878662109375e-05, "model_forward_time": 0.02538466453552246, "step": 12312 }, { "epoch": 1.878662109375e-05, "step": 12312, "training_step_time": 0.10770463943481445 }, { "epoch": 1.878814697265625e-05, "model_forward_time": 0.02527475357055664, "step": 12313 }, { "epoch": 1.878814697265625e-05, "step": 12313, "training_step_time": 0.10830116271972656 }, { "epoch": 1.87896728515625e-05, "model_forward_time": 0.025318145751953125, "step": 12314 }, { "epoch": 1.87896728515625e-05, "step": 12314, "training_step_time": 0.10912108421325684 }, { "epoch": 1.879119873046875e-05, "model_forward_time": 0.024888277053833008, "step": 12315 }, { "epoch": 1.879119873046875e-05, "step": 12315, "training_step_time": 0.1120748519897461 }, { "epoch": 1.8792724609375e-05, "model_forward_time": 0.025374174118041992, "step": 12316 }, { "epoch": 1.8792724609375e-05, "step": 12316, "training_step_time": 0.11240935325622559 }, { "epoch": 1.879425048828125e-05, "model_forward_time": 0.025285720825195312, "step": 12317 }, { "epoch": 1.879425048828125e-05, "step": 12317, "training_step_time": 0.10642027854919434 }, { "epoch": 1.87957763671875e-05, "model_forward_time": 0.025480985641479492, "step": 12318 }, { "epoch": 1.87957763671875e-05, "step": 12318, "training_step_time": 0.10683417320251465 }, { "epoch": 1.879730224609375e-05, "model_forward_time": 0.02534770965576172, "step": 12319 }, { "epoch": 1.879730224609375e-05, "step": 12319, "training_step_time": 0.10824346542358398 }, { "epoch": 1.8798828125e-05, "grad_norm": 0.3637978136539459, "learning_rate": 6.845746168642497e-05, "loss": 0.0225, "step": 12320 }, { "epoch": 1.8798828125e-05, "model_forward_time": 0.02551412582397461, "step": 12320 }, { "epoch": 1.8798828125e-05, "step": 12320, "training_step_time": 0.1079092025756836 }, { "epoch": 1.880035400390625e-05, "model_forward_time": 0.025127172470092773, "step": 12321 }, { "epoch": 1.880035400390625e-05, "step": 12321, "training_step_time": 0.10747361183166504 }, { "epoch": 1.88018798828125e-05, "model_forward_time": 0.025167465209960938, "step": 12322 }, { "epoch": 1.88018798828125e-05, "step": 12322, "training_step_time": 0.10764551162719727 }, { "epoch": 1.880340576171875e-05, "model_forward_time": 0.025203943252563477, "step": 12323 }, { "epoch": 1.880340576171875e-05, "step": 12323, "training_step_time": 0.11088705062866211 }, { "epoch": 1.8804931640625e-05, "model_forward_time": 0.025739431381225586, "step": 12324 }, { "epoch": 1.8804931640625e-05, "step": 12324, "training_step_time": 0.10899090766906738 }, { "epoch": 1.880645751953125e-05, "model_forward_time": 0.025204896926879883, "step": 12325 }, { "epoch": 1.880645751953125e-05, "step": 12325, "training_step_time": 0.1083824634552002 }, { "epoch": 1.88079833984375e-05, "model_forward_time": 0.02506232261657715, "step": 12326 }, { "epoch": 1.88079833984375e-05, "step": 12326, "training_step_time": 0.10599541664123535 }, { "epoch": 1.880950927734375e-05, "model_forward_time": 0.025223731994628906, "step": 12327 }, { "epoch": 1.880950927734375e-05, "step": 12327, "training_step_time": 0.10683751106262207 }, { "epoch": 1.881103515625e-05, "model_forward_time": 0.026131391525268555, "step": 12328 }, { "epoch": 1.881103515625e-05, "step": 12328, "training_step_time": 0.10608911514282227 }, { "epoch": 1.881256103515625e-05, "model_forward_time": 0.02544426918029785, "step": 12329 }, { "epoch": 1.881256103515625e-05, "step": 12329, "training_step_time": 0.10672307014465332 }, { "epoch": 1.88140869140625e-05, "grad_norm": 0.1840587705373764, "learning_rate": 6.840622763423391e-05, "loss": 0.0171, "step": 12330 }, { "epoch": 1.88140869140625e-05, "model_forward_time": 0.025277137756347656, "step": 12330 }, { "epoch": 1.88140869140625e-05, "step": 12330, "training_step_time": 0.18488264083862305 }, { "epoch": 1.881561279296875e-05, "model_forward_time": 0.02463221549987793, "step": 12331 }, { "epoch": 1.881561279296875e-05, "step": 12331, "training_step_time": 0.10830545425415039 }, { "epoch": 1.8817138671875e-05, "model_forward_time": 0.025836467742919922, "step": 12332 }, { "epoch": 1.8817138671875e-05, "step": 12332, "training_step_time": 0.1074824333190918 }, { "epoch": 1.881866455078125e-05, "model_forward_time": 0.025452852249145508, "step": 12333 }, { "epoch": 1.881866455078125e-05, "step": 12333, "training_step_time": 0.12705016136169434 }, { "epoch": 1.88201904296875e-05, "model_forward_time": 0.02521491050720215, "step": 12334 }, { "epoch": 1.88201904296875e-05, "step": 12334, "training_step_time": 0.12420487403869629 }, { "epoch": 1.882171630859375e-05, "model_forward_time": 0.02533435821533203, "step": 12335 }, { "epoch": 1.882171630859375e-05, "step": 12335, "training_step_time": 0.11873888969421387 }, { "epoch": 1.88232421875e-05, "model_forward_time": 0.02522873878479004, "step": 12336 }, { "epoch": 1.88232421875e-05, "step": 12336, "training_step_time": 0.11315345764160156 }, { "epoch": 1.882476806640625e-05, "model_forward_time": 0.02526116371154785, "step": 12337 }, { "epoch": 1.882476806640625e-05, "step": 12337, "training_step_time": 0.11102008819580078 }, { "epoch": 1.88262939453125e-05, "model_forward_time": 0.025408029556274414, "step": 12338 }, { "epoch": 1.88262939453125e-05, "step": 12338, "training_step_time": 0.10679817199707031 }, { "epoch": 1.882781982421875e-05, "model_forward_time": 0.0257565975189209, "step": 12339 }, { "epoch": 1.882781982421875e-05, "step": 12339, "training_step_time": 0.10509538650512695 }, { "epoch": 1.8829345703125e-05, "grad_norm": 0.34008651971817017, "learning_rate": 6.835497121674066e-05, "loss": 0.0246, "step": 12340 }, { "epoch": 1.8829345703125e-05, "model_forward_time": 0.024199962615966797, "step": 12340 }, { "epoch": 1.8829345703125e-05, "step": 12340, "training_step_time": 0.1422128677368164 }, { "epoch": 1.883087158203125e-05, "model_forward_time": 0.023710250854492188, "step": 12341 }, { "epoch": 1.883087158203125e-05, "step": 12341, "training_step_time": 0.16194796562194824 }, { "epoch": 1.88323974609375e-05, "model_forward_time": 0.02458047866821289, "step": 12342 }, { "epoch": 1.88323974609375e-05, "step": 12342, "training_step_time": 0.11478424072265625 }, { "epoch": 1.883392333984375e-05, "model_forward_time": 0.024599313735961914, "step": 12343 }, { "epoch": 1.883392333984375e-05, "step": 12343, "training_step_time": 0.1451280117034912 }, { "epoch": 1.883544921875e-05, "model_forward_time": 0.024982690811157227, "step": 12344 }, { "epoch": 1.883544921875e-05, "step": 12344, "training_step_time": 0.21150898933410645 }, { "epoch": 1.883697509765625e-05, "model_forward_time": 0.025720596313476562, "step": 12345 }, { "epoch": 1.883697509765625e-05, "step": 12345, "training_step_time": 0.13385963439941406 }, { "epoch": 1.88385009765625e-05, "model_forward_time": 0.02490544319152832, "step": 12346 }, { "epoch": 1.88385009765625e-05, "step": 12346, "training_step_time": 0.1907804012298584 }, { "epoch": 1.884002685546875e-05, "model_forward_time": 0.023786544799804688, "step": 12347 }, { "epoch": 1.884002685546875e-05, "step": 12347, "training_step_time": 0.12320876121520996 }, { "epoch": 1.8841552734375e-05, "model_forward_time": 0.02321457862854004, "step": 12348 }, { "epoch": 1.8841552734375e-05, "step": 12348, "training_step_time": 0.11755156517028809 }, { "epoch": 1.884307861328125e-05, "model_forward_time": 0.02467942237854004, "step": 12349 }, { "epoch": 1.884307861328125e-05, "step": 12349, "training_step_time": 0.20057892799377441 }, { "epoch": 1.88446044921875e-05, "grad_norm": 0.37574952840805054, "learning_rate": 6.830369249622662e-05, "loss": 0.0208, "step": 12350 }, { "epoch": 1.88446044921875e-05, "model_forward_time": 0.02397608757019043, "step": 12350 }, { "epoch": 1.88446044921875e-05, "step": 12350, "training_step_time": 0.12459111213684082 }, { "epoch": 1.884613037109375e-05, "model_forward_time": 0.024800777435302734, "step": 12351 }, { "epoch": 1.884613037109375e-05, "step": 12351, "training_step_time": 0.13967084884643555 }, { "epoch": 1.884765625e-05, "model_forward_time": 0.026838064193725586, "step": 12352 }, { "epoch": 1.884765625e-05, "step": 12352, "training_step_time": 0.10840272903442383 }, { "epoch": 1.884918212890625e-05, "model_forward_time": 0.02546525001525879, "step": 12353 }, { "epoch": 1.884918212890625e-05, "step": 12353, "training_step_time": 0.15772199630737305 }, { "epoch": 1.88507080078125e-05, "model_forward_time": 0.025690317153930664, "step": 12354 }, { "epoch": 1.88507080078125e-05, "step": 12354, "training_step_time": 0.1327216625213623 }, { "epoch": 1.885223388671875e-05, "model_forward_time": 0.024763107299804688, "step": 12355 }, { "epoch": 1.885223388671875e-05, "step": 12355, "training_step_time": 0.11109471321105957 }, { "epoch": 1.8853759765625e-05, "model_forward_time": 0.025065183639526367, "step": 12356 }, { "epoch": 1.8853759765625e-05, "step": 12356, "training_step_time": 0.10661697387695312 }, { "epoch": 1.885528564453125e-05, "model_forward_time": 0.025434494018554688, "step": 12357 }, { "epoch": 1.885528564453125e-05, "step": 12357, "training_step_time": 0.10590958595275879 }, { "epoch": 1.88568115234375e-05, "model_forward_time": 0.025129079818725586, "step": 12358 }, { "epoch": 1.88568115234375e-05, "step": 12358, "training_step_time": 0.10525918006896973 }, { "epoch": 1.885833740234375e-05, "model_forward_time": 0.025310039520263672, "step": 12359 }, { "epoch": 1.885833740234375e-05, "step": 12359, "training_step_time": 0.10958719253540039 }, { "epoch": 1.885986328125e-05, "grad_norm": 0.19731856882572174, "learning_rate": 6.825239153500029e-05, "loss": 0.0218, "step": 12360 }, { "epoch": 1.885986328125e-05, "model_forward_time": 0.025300264358520508, "step": 12360 }, { "epoch": 1.885986328125e-05, "step": 12360, "training_step_time": 0.10790109634399414 }, { "epoch": 1.886138916015625e-05, "model_forward_time": 0.025275468826293945, "step": 12361 }, { "epoch": 1.886138916015625e-05, "step": 12361, "training_step_time": 0.10804510116577148 }, { "epoch": 1.88629150390625e-05, "model_forward_time": 0.02518486976623535, "step": 12362 }, { "epoch": 1.88629150390625e-05, "step": 12362, "training_step_time": 0.10567140579223633 }, { "epoch": 1.886444091796875e-05, "model_forward_time": 0.025133132934570312, "step": 12363 }, { "epoch": 1.886444091796875e-05, "step": 12363, "training_step_time": 0.10894584655761719 }, { "epoch": 1.8865966796875e-05, "model_forward_time": 0.02524566650390625, "step": 12364 }, { "epoch": 1.8865966796875e-05, "step": 12364, "training_step_time": 0.10762310028076172 }, { "epoch": 1.886749267578125e-05, "model_forward_time": 0.02541327476501465, "step": 12365 }, { "epoch": 1.886749267578125e-05, "step": 12365, "training_step_time": 0.1047513484954834 }, { "epoch": 1.88690185546875e-05, "model_forward_time": 0.025325536727905273, "step": 12366 }, { "epoch": 1.88690185546875e-05, "step": 12366, "training_step_time": 0.10537528991699219 }, { "epoch": 1.887054443359375e-05, "model_forward_time": 0.025299787521362305, "step": 12367 }, { "epoch": 1.887054443359375e-05, "step": 12367, "training_step_time": 0.10898709297180176 }, { "epoch": 1.88720703125e-05, "model_forward_time": 0.02564525604248047, "step": 12368 }, { "epoch": 1.88720703125e-05, "step": 12368, "training_step_time": 0.10590147972106934 }, { "epoch": 1.887359619140625e-05, "model_forward_time": 0.02532482147216797, "step": 12369 }, { "epoch": 1.887359619140625e-05, "step": 12369, "training_step_time": 0.10904717445373535 }, { "epoch": 1.88751220703125e-05, "grad_norm": 0.29167595505714417, "learning_rate": 6.820106839539715e-05, "loss": 0.0181, "step": 12370 }, { "epoch": 1.88751220703125e-05, "model_forward_time": 0.025223731994628906, "step": 12370 }, { "epoch": 1.88751220703125e-05, "step": 12370, "training_step_time": 0.10453557968139648 }, { "epoch": 1.887664794921875e-05, "model_forward_time": 0.02516651153564453, "step": 12371 }, { "epoch": 1.887664794921875e-05, "step": 12371, "training_step_time": 0.10536074638366699 }, { "epoch": 1.8878173828125e-05, "model_forward_time": 0.02500629425048828, "step": 12372 }, { "epoch": 1.8878173828125e-05, "step": 12372, "training_step_time": 0.10740852355957031 }, { "epoch": 1.887969970703125e-05, "model_forward_time": 0.0249783992767334, "step": 12373 }, { "epoch": 1.887969970703125e-05, "step": 12373, "training_step_time": 0.10741233825683594 }, { "epoch": 1.88812255859375e-05, "model_forward_time": 0.025302648544311523, "step": 12374 }, { "epoch": 1.88812255859375e-05, "step": 12374, "training_step_time": 0.10703158378601074 }, { "epoch": 1.888275146484375e-05, "model_forward_time": 0.02522134780883789, "step": 12375 }, { "epoch": 1.888275146484375e-05, "step": 12375, "training_step_time": 0.10625791549682617 }, { "epoch": 1.888427734375e-05, "model_forward_time": 0.02533245086669922, "step": 12376 }, { "epoch": 1.888427734375e-05, "step": 12376, "training_step_time": 0.1524808406829834 }, { "epoch": 1.888580322265625e-05, "model_forward_time": 0.024506807327270508, "step": 12377 }, { "epoch": 1.888580322265625e-05, "step": 12377, "training_step_time": 0.11447715759277344 }, { "epoch": 1.88873291015625e-05, "model_forward_time": 0.02473306655883789, "step": 12378 }, { "epoch": 1.88873291015625e-05, "step": 12378, "training_step_time": 0.14330244064331055 }, { "epoch": 1.888885498046875e-05, "model_forward_time": 0.024526357650756836, "step": 12379 }, { "epoch": 1.888885498046875e-05, "step": 12379, "training_step_time": 0.12606072425842285 }, { "epoch": 1.8890380859375e-05, "grad_norm": 0.4402039647102356, "learning_rate": 6.814972313977967e-05, "loss": 0.018, "step": 12380 }, { "epoch": 1.8890380859375e-05, "model_forward_time": 0.0242919921875, "step": 12380 }, { "epoch": 1.8890380859375e-05, "step": 12380, "training_step_time": 0.12673258781433105 }, { "epoch": 1.889190673828125e-05, "model_forward_time": 0.024732589721679688, "step": 12381 }, { "epoch": 1.889190673828125e-05, "step": 12381, "training_step_time": 0.10816407203674316 }, { "epoch": 1.88934326171875e-05, "model_forward_time": 0.025115251541137695, "step": 12382 }, { "epoch": 1.88934326171875e-05, "step": 12382, "training_step_time": 0.12321949005126953 }, { "epoch": 1.889495849609375e-05, "model_forward_time": 0.02582526206970215, "step": 12383 }, { "epoch": 1.889495849609375e-05, "step": 12383, "training_step_time": 0.10942721366882324 }, { "epoch": 1.8896484375e-05, "model_forward_time": 0.026068687438964844, "step": 12384 }, { "epoch": 1.8896484375e-05, "step": 12384, "training_step_time": 0.10623717308044434 }, { "epoch": 1.889801025390625e-05, "model_forward_time": 0.02541971206665039, "step": 12385 }, { "epoch": 1.889801025390625e-05, "step": 12385, "training_step_time": 0.10899567604064941 }, { "epoch": 1.88995361328125e-05, "model_forward_time": 0.02564263343811035, "step": 12386 }, { "epoch": 1.88995361328125e-05, "step": 12386, "training_step_time": 0.11678028106689453 }, { "epoch": 1.890106201171875e-05, "model_forward_time": 0.02530193328857422, "step": 12387 }, { "epoch": 1.890106201171875e-05, "step": 12387, "training_step_time": 0.16473865509033203 }, { "epoch": 1.8902587890625e-05, "model_forward_time": 0.0247039794921875, "step": 12388 }, { "epoch": 1.8902587890625e-05, "step": 12388, "training_step_time": 0.19936466217041016 }, { "epoch": 1.890411376953125e-05, "model_forward_time": 0.02464914321899414, "step": 12389 }, { "epoch": 1.890411376953125e-05, "step": 12389, "training_step_time": 0.1688237190246582 }, { "epoch": 1.89056396484375e-05, "grad_norm": 0.3412000834941864, "learning_rate": 6.809835583053715e-05, "loss": 0.0229, "step": 12390 }, { "epoch": 1.89056396484375e-05, "model_forward_time": 0.024265527725219727, "step": 12390 }, { "epoch": 1.89056396484375e-05, "step": 12390, "training_step_time": 0.19477415084838867 }, { "epoch": 1.890716552734375e-05, "model_forward_time": 0.024587392807006836, "step": 12391 }, { "epoch": 1.890716552734375e-05, "step": 12391, "training_step_time": 0.14510011672973633 }, { "epoch": 1.890869140625e-05, "model_forward_time": 0.02468085289001465, "step": 12392 }, { "epoch": 1.890869140625e-05, "step": 12392, "training_step_time": 0.13998651504516602 }, { "epoch": 1.891021728515625e-05, "model_forward_time": 0.0245053768157959, "step": 12393 }, { "epoch": 1.891021728515625e-05, "step": 12393, "training_step_time": 0.16028904914855957 }, { "epoch": 1.89117431640625e-05, "model_forward_time": 0.024384737014770508, "step": 12394 }, { "epoch": 1.89117431640625e-05, "step": 12394, "training_step_time": 0.12204265594482422 }, { "epoch": 1.891326904296875e-05, "model_forward_time": 0.024790287017822266, "step": 12395 }, { "epoch": 1.891326904296875e-05, "step": 12395, "training_step_time": 0.15529417991638184 }, { "epoch": 1.8914794921875e-05, "model_forward_time": 0.024407148361206055, "step": 12396 }, { "epoch": 1.8914794921875e-05, "step": 12396, "training_step_time": 0.10825872421264648 }, { "epoch": 1.891632080078125e-05, "model_forward_time": 0.02492070198059082, "step": 12397 }, { "epoch": 1.891632080078125e-05, "step": 12397, "training_step_time": 0.12683939933776855 }, { "epoch": 1.89178466796875e-05, "model_forward_time": 0.025208711624145508, "step": 12398 }, { "epoch": 1.89178466796875e-05, "step": 12398, "training_step_time": 0.10716462135314941 }, { "epoch": 1.891937255859375e-05, "model_forward_time": 0.025267362594604492, "step": 12399 }, { "epoch": 1.891937255859375e-05, "step": 12399, "training_step_time": 0.14610719680786133 }, { "epoch": 1.89208984375e-05, "grad_norm": 0.4185691773891449, "learning_rate": 6.804696653008575e-05, "loss": 0.026, "step": 12400 }, { "epoch": 1.89208984375e-05, "model_forward_time": 0.02498483657836914, "step": 12400 }, { "epoch": 1.89208984375e-05, "step": 12400, "training_step_time": 0.12882542610168457 }, { "epoch": 1.892242431640625e-05, "model_forward_time": 0.02451777458190918, "step": 12401 }, { "epoch": 1.892242431640625e-05, "step": 12401, "training_step_time": 0.11398959159851074 }, { "epoch": 1.89239501953125e-05, "model_forward_time": 0.025174617767333984, "step": 12402 }, { "epoch": 1.89239501953125e-05, "step": 12402, "training_step_time": 0.10223889350891113 }, { "epoch": 1.892547607421875e-05, "model_forward_time": 0.025710105895996094, "step": 12403 }, { "epoch": 1.892547607421875e-05, "step": 12403, "training_step_time": 0.11376714706420898 }, { "epoch": 1.8927001953125e-05, "model_forward_time": 0.02510380744934082, "step": 12404 }, { "epoch": 1.8927001953125e-05, "step": 12404, "training_step_time": 0.11000561714172363 }, { "epoch": 1.892852783203125e-05, "model_forward_time": 0.025359392166137695, "step": 12405 }, { "epoch": 1.892852783203125e-05, "step": 12405, "training_step_time": 0.1058349609375 }, { "epoch": 1.89300537109375e-05, "model_forward_time": 0.025488853454589844, "step": 12406 }, { "epoch": 1.89300537109375e-05, "step": 12406, "training_step_time": 0.10675549507141113 }, { "epoch": 1.893157958984375e-05, "model_forward_time": 0.025101184844970703, "step": 12407 }, { "epoch": 1.893157958984375e-05, "step": 12407, "training_step_time": 0.10515546798706055 }, { "epoch": 1.893310546875e-05, "model_forward_time": 0.02524423599243164, "step": 12408 }, { "epoch": 1.893310546875e-05, "step": 12408, "training_step_time": 0.10808348655700684 }, { "epoch": 1.893463134765625e-05, "model_forward_time": 0.025549650192260742, "step": 12409 }, { "epoch": 1.893463134765625e-05, "step": 12409, "training_step_time": 1.0445003509521484 }, { "epoch": 1.89361572265625e-05, "grad_norm": 0.48495861887931824, "learning_rate": 6.799555530086828e-05, "loss": 0.0246, "step": 12410 }, { "epoch": 1.89361572265625e-05, "model_forward_time": 0.02288508415222168, "step": 12410 }, { "epoch": 1.89361572265625e-05, "step": 12410, "training_step_time": 0.09758305549621582 }, { "epoch": 1.893768310546875e-05, "model_forward_time": 0.024596691131591797, "step": 12411 }, { "epoch": 1.893768310546875e-05, "step": 12411, "training_step_time": 0.10153460502624512 }, { "epoch": 1.8939208984375e-05, "model_forward_time": 0.025188922882080078, "step": 12412 }, { "epoch": 1.8939208984375e-05, "step": 12412, "training_step_time": 0.104034423828125 }, { "epoch": 1.894073486328125e-05, "model_forward_time": 0.025454998016357422, "step": 12413 }, { "epoch": 1.894073486328125e-05, "step": 12413, "training_step_time": 0.1116485595703125 }, { "epoch": 1.89422607421875e-05, "model_forward_time": 0.02714228630065918, "step": 12414 }, { "epoch": 1.89422607421875e-05, "step": 12414, "training_step_time": 0.10633015632629395 }, { "epoch": 1.894378662109375e-05, "model_forward_time": 0.02537226676940918, "step": 12415 }, { "epoch": 1.894378662109375e-05, "step": 12415, "training_step_time": 0.17282557487487793 }, { "epoch": 1.89453125e-05, "model_forward_time": 0.025146961212158203, "step": 12416 }, { "epoch": 1.89453125e-05, "step": 12416, "training_step_time": 0.12714505195617676 }, { "epoch": 1.894683837890625e-05, "model_forward_time": 0.024581432342529297, "step": 12417 }, { "epoch": 1.894683837890625e-05, "step": 12417, "training_step_time": 0.12929415702819824 }, { "epoch": 1.89483642578125e-05, "model_forward_time": 0.024431943893432617, "step": 12418 }, { "epoch": 1.89483642578125e-05, "step": 12418, "training_step_time": 0.12660908699035645 }, { "epoch": 1.894989013671875e-05, "model_forward_time": 0.02477717399597168, "step": 12419 }, { "epoch": 1.894989013671875e-05, "step": 12419, "training_step_time": 0.12832999229431152 }, { "epoch": 1.8951416015625e-05, "grad_norm": 0.317827433347702, "learning_rate": 6.794412220535426e-05, "loss": 0.0161, "step": 12420 }, { "epoch": 1.8951416015625e-05, "model_forward_time": 0.024704456329345703, "step": 12420 }, { "epoch": 1.8951416015625e-05, "step": 12420, "training_step_time": 0.2005174160003662 }, { "epoch": 1.895294189453125e-05, "model_forward_time": 0.024624347686767578, "step": 12421 }, { "epoch": 1.895294189453125e-05, "step": 12421, "training_step_time": 0.12208342552185059 }, { "epoch": 1.89544677734375e-05, "model_forward_time": 0.024376630783081055, "step": 12422 }, { "epoch": 1.89544677734375e-05, "step": 12422, "training_step_time": 0.11449456214904785 }, { "epoch": 1.895599365234375e-05, "model_forward_time": 0.025345325469970703, "step": 12423 }, { "epoch": 1.895599365234375e-05, "step": 12423, "training_step_time": 0.19217419624328613 }, { "epoch": 1.895751953125e-05, "model_forward_time": 0.0243682861328125, "step": 12424 }, { "epoch": 1.895751953125e-05, "step": 12424, "training_step_time": 0.12922215461730957 }, { "epoch": 1.895904541015625e-05, "model_forward_time": 0.024464130401611328, "step": 12425 }, { "epoch": 1.895904541015625e-05, "step": 12425, "training_step_time": 0.10964369773864746 }, { "epoch": 1.89605712890625e-05, "model_forward_time": 0.02536153793334961, "step": 12426 }, { "epoch": 1.89605712890625e-05, "step": 12426, "training_step_time": 0.1135101318359375 }, { "epoch": 1.896209716796875e-05, "model_forward_time": 0.0253145694732666, "step": 12427 }, { "epoch": 1.896209716796875e-05, "step": 12427, "training_step_time": 0.13592147827148438 }, { "epoch": 1.8963623046875e-05, "model_forward_time": 0.02542877197265625, "step": 12428 }, { "epoch": 1.8963623046875e-05, "step": 12428, "training_step_time": 0.11644196510314941 }, { "epoch": 1.896514892578125e-05, "model_forward_time": 0.025406837463378906, "step": 12429 }, { "epoch": 1.896514892578125e-05, "step": 12429, "training_step_time": 0.20539259910583496 }, { "epoch": 1.89666748046875e-05, "grad_norm": 0.4247405529022217, "learning_rate": 6.789266730603974e-05, "loss": 0.0322, "step": 12430 }, { "epoch": 1.89666748046875e-05, "model_forward_time": 0.024506330490112305, "step": 12430 }, { "epoch": 1.89666748046875e-05, "step": 12430, "training_step_time": 0.10954165458679199 }, { "epoch": 1.896820068359375e-05, "model_forward_time": 0.02474379539489746, "step": 12431 }, { "epoch": 1.896820068359375e-05, "step": 12431, "training_step_time": 0.10731339454650879 }, { "epoch": 1.89697265625e-05, "model_forward_time": 0.026944875717163086, "step": 12432 }, { "epoch": 1.89697265625e-05, "step": 12432, "training_step_time": 0.16124701499938965 }, { "epoch": 1.897125244140625e-05, "model_forward_time": 0.024966001510620117, "step": 12433 }, { "epoch": 1.897125244140625e-05, "step": 12433, "training_step_time": 0.2124950885772705 }, { "epoch": 1.89727783203125e-05, "model_forward_time": 0.024837255477905273, "step": 12434 }, { "epoch": 1.89727783203125e-05, "step": 12434, "training_step_time": 0.12279462814331055 }, { "epoch": 1.897430419921875e-05, "model_forward_time": 0.023987293243408203, "step": 12435 }, { "epoch": 1.897430419921875e-05, "step": 12435, "training_step_time": 0.10796785354614258 }, { "epoch": 1.8975830078125e-05, "model_forward_time": 0.02544426918029785, "step": 12436 }, { "epoch": 1.8975830078125e-05, "step": 12436, "training_step_time": 0.10940217971801758 }, { "epoch": 1.897735595703125e-05, "model_forward_time": 0.025070667266845703, "step": 12437 }, { "epoch": 1.897735595703125e-05, "step": 12437, "training_step_time": 0.16007423400878906 }, { "epoch": 1.89788818359375e-05, "model_forward_time": 0.02470707893371582, "step": 12438 }, { "epoch": 1.89788818359375e-05, "step": 12438, "training_step_time": 0.1268634796142578 }, { "epoch": 1.898040771484375e-05, "model_forward_time": 0.02463555335998535, "step": 12439 }, { "epoch": 1.898040771484375e-05, "step": 12439, "training_step_time": 0.11903238296508789 }, { "epoch": 1.898193359375e-05, "grad_norm": 0.5766150951385498, "learning_rate": 6.784119066544727e-05, "loss": 0.0265, "step": 12440 }, { "epoch": 1.898193359375e-05, "model_forward_time": 0.024762868881225586, "step": 12440 }, { "epoch": 1.898193359375e-05, "step": 12440, "training_step_time": 0.10688614845275879 }, { "epoch": 1.898345947265625e-05, "model_forward_time": 0.02472209930419922, "step": 12441 }, { "epoch": 1.898345947265625e-05, "step": 12441, "training_step_time": 0.10756731033325195 }, { "epoch": 1.89849853515625e-05, "model_forward_time": 0.02513885498046875, "step": 12442 }, { "epoch": 1.89849853515625e-05, "step": 12442, "training_step_time": 0.10503816604614258 }, { "epoch": 1.898651123046875e-05, "model_forward_time": 0.025223255157470703, "step": 12443 }, { "epoch": 1.898651123046875e-05, "step": 12443, "training_step_time": 0.10659146308898926 }, { "epoch": 1.8988037109375e-05, "model_forward_time": 0.02519702911376953, "step": 12444 }, { "epoch": 1.8988037109375e-05, "step": 12444, "training_step_time": 0.1053168773651123 }, { "epoch": 1.898956298828125e-05, "model_forward_time": 0.02677631378173828, "step": 12445 }, { "epoch": 1.898956298828125e-05, "step": 12445, "training_step_time": 0.12026596069335938 }, { "epoch": 1.89910888671875e-05, "model_forward_time": 0.026805877685546875, "step": 12446 }, { "epoch": 1.89910888671875e-05, "step": 12446, "training_step_time": 0.11909794807434082 }, { "epoch": 1.899261474609375e-05, "model_forward_time": 0.025559425354003906, "step": 12447 }, { "epoch": 1.899261474609375e-05, "step": 12447, "training_step_time": 0.1145484447479248 }, { "epoch": 1.8994140625e-05, "model_forward_time": 0.02417159080505371, "step": 12448 }, { "epoch": 1.8994140625e-05, "step": 12448, "training_step_time": 0.1136021614074707 }, { "epoch": 1.899566650390625e-05, "model_forward_time": 0.0243222713470459, "step": 12449 }, { "epoch": 1.899566650390625e-05, "step": 12449, "training_step_time": 0.11375093460083008 }, { "epoch": 1.89971923828125e-05, "grad_norm": 0.4734836518764496, "learning_rate": 6.778969234612584e-05, "loss": 0.0205, "step": 12450 }, { "epoch": 1.89971923828125e-05, "model_forward_time": 0.025812864303588867, "step": 12450 }, { "epoch": 1.89971923828125e-05, "step": 12450, "training_step_time": 0.10823345184326172 }, { "epoch": 1.899871826171875e-05, "model_forward_time": 0.025609970092773438, "step": 12451 }, { "epoch": 1.899871826171875e-05, "step": 12451, "training_step_time": 0.11190342903137207 }, { "epoch": 1.9000244140625e-05, "model_forward_time": 0.025358915328979492, "step": 12452 }, { "epoch": 1.9000244140625e-05, "step": 12452, "training_step_time": 0.10784769058227539 }, { "epoch": 1.900177001953125e-05, "model_forward_time": 0.025796890258789062, "step": 12453 }, { "epoch": 1.900177001953125e-05, "step": 12453, "training_step_time": 0.10739588737487793 }, { "epoch": 1.90032958984375e-05, "model_forward_time": 0.025723695755004883, "step": 12454 }, { "epoch": 1.90032958984375e-05, "step": 12454, "training_step_time": 0.10760998725891113 }, { "epoch": 1.900482177734375e-05, "model_forward_time": 0.0256805419921875, "step": 12455 }, { "epoch": 1.900482177734375e-05, "step": 12455, "training_step_time": 0.10752534866333008 }, { "epoch": 1.900634765625e-05, "model_forward_time": 0.025437593460083008, "step": 12456 }, { "epoch": 1.900634765625e-05, "step": 12456, "training_step_time": 0.1102454662322998 }, { "epoch": 1.900787353515625e-05, "model_forward_time": 0.025408029556274414, "step": 12457 }, { "epoch": 1.900787353515625e-05, "step": 12457, "training_step_time": 0.10683226585388184 }, { "epoch": 1.90093994140625e-05, "model_forward_time": 0.028138399124145508, "step": 12458 }, { "epoch": 1.90093994140625e-05, "step": 12458, "training_step_time": 0.10922384262084961 }, { "epoch": 1.901092529296875e-05, "model_forward_time": 0.02608513832092285, "step": 12459 }, { "epoch": 1.901092529296875e-05, "step": 12459, "training_step_time": 0.11196565628051758 }, { "epoch": 1.9012451171875e-05, "grad_norm": 0.39418792724609375, "learning_rate": 6.773817241065072e-05, "loss": 0.024, "step": 12460 }, { "epoch": 1.9012451171875e-05, "model_forward_time": 0.024403095245361328, "step": 12460 }, { "epoch": 1.9012451171875e-05, "step": 12460, "training_step_time": 0.10802555084228516 }, { "epoch": 1.901397705078125e-05, "model_forward_time": 0.024373531341552734, "step": 12461 }, { "epoch": 1.901397705078125e-05, "step": 12461, "training_step_time": 0.1495351791381836 }, { "epoch": 1.90155029296875e-05, "model_forward_time": 0.025109291076660156, "step": 12462 }, { "epoch": 1.90155029296875e-05, "step": 12462, "training_step_time": 0.11609768867492676 }, { "epoch": 1.901702880859375e-05, "model_forward_time": 0.02511310577392578, "step": 12463 }, { "epoch": 1.901702880859375e-05, "step": 12463, "training_step_time": 0.10685181617736816 }, { "epoch": 1.90185546875e-05, "model_forward_time": 0.025152921676635742, "step": 12464 }, { "epoch": 1.90185546875e-05, "step": 12464, "training_step_time": 0.12397575378417969 }, { "epoch": 1.902008056640625e-05, "model_forward_time": 0.024967670440673828, "step": 12465 }, { "epoch": 1.902008056640625e-05, "step": 12465, "training_step_time": 0.13022232055664062 }, { "epoch": 1.90216064453125e-05, "model_forward_time": 0.025221586227416992, "step": 12466 }, { "epoch": 1.90216064453125e-05, "step": 12466, "training_step_time": 0.11591005325317383 }, { "epoch": 1.902313232421875e-05, "model_forward_time": 0.025347232818603516, "step": 12467 }, { "epoch": 1.902313232421875e-05, "step": 12467, "training_step_time": 0.11409664154052734 }, { "epoch": 1.9024658203125e-05, "model_forward_time": 0.025150537490844727, "step": 12468 }, { "epoch": 1.9024658203125e-05, "step": 12468, "training_step_time": 0.10904955863952637 }, { "epoch": 1.902618408203125e-05, "model_forward_time": 0.025264263153076172, "step": 12469 }, { "epoch": 1.902618408203125e-05, "step": 12469, "training_step_time": 0.10419416427612305 }, { "epoch": 1.90277099609375e-05, "grad_norm": 0.2917688488960266, "learning_rate": 6.768663092162356e-05, "loss": 0.0172, "step": 12470 }, { "epoch": 1.90277099609375e-05, "model_forward_time": 0.025104999542236328, "step": 12470 }, { "epoch": 1.90277099609375e-05, "step": 12470, "training_step_time": 0.11041045188903809 }, { "epoch": 1.902923583984375e-05, "model_forward_time": 0.02505183219909668, "step": 12471 }, { "epoch": 1.902923583984375e-05, "step": 12471, "training_step_time": 0.1162574291229248 }, { "epoch": 1.903076171875e-05, "model_forward_time": 0.025197505950927734, "step": 12472 }, { "epoch": 1.903076171875e-05, "step": 12472, "training_step_time": 0.20747780799865723 }, { "epoch": 1.903228759765625e-05, "model_forward_time": 0.025223493576049805, "step": 12473 }, { "epoch": 1.903228759765625e-05, "step": 12473, "training_step_time": 0.14615249633789062 }, { "epoch": 1.90338134765625e-05, "model_forward_time": 0.02490234375, "step": 12474 }, { "epoch": 1.90338134765625e-05, "step": 12474, "training_step_time": 0.19183659553527832 }, { "epoch": 1.903533935546875e-05, "model_forward_time": 0.024422407150268555, "step": 12475 }, { "epoch": 1.903533935546875e-05, "step": 12475, "training_step_time": 0.1274125576019287 }, { "epoch": 1.9036865234375e-05, "model_forward_time": 0.024765729904174805, "step": 12476 }, { "epoch": 1.9036865234375e-05, "step": 12476, "training_step_time": 0.10905838012695312 }, { "epoch": 1.903839111328125e-05, "model_forward_time": 0.025257587432861328, "step": 12477 }, { "epoch": 1.903839111328125e-05, "step": 12477, "training_step_time": 0.11559414863586426 }, { "epoch": 1.90399169921875e-05, "model_forward_time": 0.02515101432800293, "step": 12478 }, { "epoch": 1.90399169921875e-05, "step": 12478, "training_step_time": 0.873516321182251 }, { "epoch": 1.904144287109375e-05, "model_forward_time": 0.023021459579467773, "step": 12479 }, { "epoch": 1.904144287109375e-05, "step": 12479, "training_step_time": 0.13878536224365234 }, { "epoch": 1.904296875e-05, "grad_norm": 0.19898393750190735, "learning_rate": 6.763506794167208e-05, "loss": 0.013, "step": 12480 }, { "epoch": 1.904296875e-05, "model_forward_time": 0.02435135841369629, "step": 12480 }, { "epoch": 1.904296875e-05, "step": 12480, "training_step_time": 0.10429859161376953 }, { "epoch": 1.904449462890625e-05, "model_forward_time": 0.024643898010253906, "step": 12481 }, { "epoch": 1.904449462890625e-05, "step": 12481, "training_step_time": 0.10291409492492676 }, { "epoch": 1.90460205078125e-05, "model_forward_time": 0.028352022171020508, "step": 12482 }, { "epoch": 1.90460205078125e-05, "step": 12482, "training_step_time": 0.10936307907104492 }, { "epoch": 1.904754638671875e-05, "model_forward_time": 0.02559375762939453, "step": 12483 }, { "epoch": 1.904754638671875e-05, "step": 12483, "training_step_time": 0.10857033729553223 }, { "epoch": 1.9049072265625e-05, "model_forward_time": 0.02539348602294922, "step": 12484 }, { "epoch": 1.9049072265625e-05, "step": 12484, "training_step_time": 0.10625982284545898 }, { "epoch": 1.905059814453125e-05, "model_forward_time": 0.025647401809692383, "step": 12485 }, { "epoch": 1.905059814453125e-05, "step": 12485, "training_step_time": 0.10619020462036133 }, { "epoch": 1.90521240234375e-05, "model_forward_time": 0.025593996047973633, "step": 12486 }, { "epoch": 1.90521240234375e-05, "step": 12486, "training_step_time": 0.10822057723999023 }, { "epoch": 1.905364990234375e-05, "model_forward_time": 0.0254824161529541, "step": 12487 }, { "epoch": 1.905364990234375e-05, "step": 12487, "training_step_time": 0.10751771926879883 }, { "epoch": 1.905517578125e-05, "model_forward_time": 0.025308609008789062, "step": 12488 }, { "epoch": 1.905517578125e-05, "step": 12488, "training_step_time": 0.1058659553527832 }, { "epoch": 1.905670166015625e-05, "model_forward_time": 0.02500176429748535, "step": 12489 }, { "epoch": 1.905670166015625e-05, "step": 12489, "training_step_time": 0.10617542266845703 }, { "epoch": 1.90582275390625e-05, "grad_norm": 0.37793731689453125, "learning_rate": 6.758348353345014e-05, "loss": 0.0155, "step": 12490 }, { "epoch": 1.90582275390625e-05, "model_forward_time": 0.025348186492919922, "step": 12490 }, { "epoch": 1.90582275390625e-05, "step": 12490, "training_step_time": 0.1160585880279541 }, { "epoch": 1.905975341796875e-05, "model_forward_time": 0.025263071060180664, "step": 12491 }, { "epoch": 1.905975341796875e-05, "step": 12491, "training_step_time": 0.10581755638122559 }, { "epoch": 1.9061279296875e-05, "model_forward_time": 0.02572178840637207, "step": 12492 }, { "epoch": 1.9061279296875e-05, "step": 12492, "training_step_time": 0.10743045806884766 }, { "epoch": 1.906280517578125e-05, "model_forward_time": 0.025246143341064453, "step": 12493 }, { "epoch": 1.906280517578125e-05, "step": 12493, "training_step_time": 0.10716652870178223 }, { "epoch": 1.90643310546875e-05, "model_forward_time": 0.0251767635345459, "step": 12494 }, { "epoch": 1.90643310546875e-05, "step": 12494, "training_step_time": 0.10807228088378906 }, { "epoch": 1.906585693359375e-05, "model_forward_time": 0.02521347999572754, "step": 12495 }, { "epoch": 1.906585693359375e-05, "step": 12495, "training_step_time": 0.10840344429016113 }, { "epoch": 1.90673828125e-05, "model_forward_time": 0.025959491729736328, "step": 12496 }, { "epoch": 1.90673828125e-05, "step": 12496, "training_step_time": 0.11014533042907715 }, { "epoch": 1.906890869140625e-05, "model_forward_time": 0.02523016929626465, "step": 12497 }, { "epoch": 1.906890869140625e-05, "step": 12497, "training_step_time": 0.10666704177856445 }, { "epoch": 1.90704345703125e-05, "model_forward_time": 0.025559663772583008, "step": 12498 }, { "epoch": 1.90704345703125e-05, "step": 12498, "training_step_time": 0.10705804824829102 }, { "epoch": 1.907196044921875e-05, "model_forward_time": 0.028780221939086914, "step": 12499 }, { "epoch": 1.907196044921875e-05, "step": 12499, "training_step_time": 0.11050248146057129 }, { "epoch": 1.9073486328125e-05, "grad_norm": 0.24092067778110504, "learning_rate": 6.753187775963773e-05, "loss": 0.0207, "step": 12500 }, { "epoch": 1.9073486328125e-05, "model_forward_time": 0.025239944458007812, "step": 12500 }, { "epoch": 1.9073486328125e-05, "step": 12500, "training_step_time": 0.10977792739868164 }, { "epoch": 1.907501220703125e-05, "model_forward_time": 0.025148630142211914, "step": 12501 }, { "epoch": 1.907501220703125e-05, "step": 12501, "training_step_time": 0.10672736167907715 }, { "epoch": 1.90765380859375e-05, "model_forward_time": 0.02518010139465332, "step": 12502 }, { "epoch": 1.90765380859375e-05, "step": 12502, "training_step_time": 0.10593819618225098 }, { "epoch": 1.907806396484375e-05, "model_forward_time": 0.025443077087402344, "step": 12503 }, { "epoch": 1.907806396484375e-05, "step": 12503, "training_step_time": 0.19675660133361816 }, { "epoch": 1.907958984375e-05, "model_forward_time": 0.02471303939819336, "step": 12504 }, { "epoch": 1.907958984375e-05, "step": 12504, "training_step_time": 0.11966705322265625 }, { "epoch": 1.908111572265625e-05, "model_forward_time": 0.026642799377441406, "step": 12505 }, { "epoch": 1.908111572265625e-05, "step": 12505, "training_step_time": 0.10705161094665527 }, { "epoch": 1.90826416015625e-05, "model_forward_time": 0.025485992431640625, "step": 12506 }, { "epoch": 1.90826416015625e-05, "step": 12506, "training_step_time": 0.11581897735595703 }, { "epoch": 1.908416748046875e-05, "model_forward_time": 0.026882171630859375, "step": 12507 }, { "epoch": 1.908416748046875e-05, "step": 12507, "training_step_time": 0.13390302658081055 }, { "epoch": 1.9085693359375e-05, "model_forward_time": 0.025402545928955078, "step": 12508 }, { "epoch": 1.9085693359375e-05, "step": 12508, "training_step_time": 0.1069033145904541 }, { "epoch": 1.908721923828125e-05, "model_forward_time": 0.025069713592529297, "step": 12509 }, { "epoch": 1.908721923828125e-05, "step": 12509, "training_step_time": 0.12042713165283203 }, { "epoch": 1.90887451171875e-05, "grad_norm": 0.3779175877571106, "learning_rate": 6.748025068294067e-05, "loss": 0.0332, "step": 12510 }, { "epoch": 1.90887451171875e-05, "model_forward_time": 0.02490520477294922, "step": 12510 }, { "epoch": 1.90887451171875e-05, "step": 12510, "training_step_time": 0.11036229133605957 }, { "epoch": 1.909027099609375e-05, "model_forward_time": 0.025593280792236328, "step": 12511 }, { "epoch": 1.909027099609375e-05, "step": 12511, "training_step_time": 0.10973262786865234 }, { "epoch": 1.9091796875e-05, "model_forward_time": 0.024927377700805664, "step": 12512 }, { "epoch": 1.9091796875e-05, "step": 12512, "training_step_time": 0.1047067642211914 }, { "epoch": 1.909332275390625e-05, "model_forward_time": 0.025954484939575195, "step": 12513 }, { "epoch": 1.909332275390625e-05, "step": 12513, "training_step_time": 0.11730122566223145 }, { "epoch": 1.90948486328125e-05, "model_forward_time": 0.02887725830078125, "step": 12514 }, { "epoch": 1.90948486328125e-05, "step": 12514, "training_step_time": 0.10994791984558105 }, { "epoch": 1.909637451171875e-05, "model_forward_time": 0.025658607482910156, "step": 12515 }, { "epoch": 1.909637451171875e-05, "step": 12515, "training_step_time": 0.12349677085876465 }, { "epoch": 1.9097900390625e-05, "model_forward_time": 0.025382518768310547, "step": 12516 }, { "epoch": 1.9097900390625e-05, "step": 12516, "training_step_time": 0.12978315353393555 }, { "epoch": 1.909942626953125e-05, "model_forward_time": 0.025622844696044922, "step": 12517 }, { "epoch": 1.909942626953125e-05, "step": 12517, "training_step_time": 0.11504077911376953 }, { "epoch": 1.91009521484375e-05, "model_forward_time": 0.025829315185546875, "step": 12518 }, { "epoch": 1.91009521484375e-05, "step": 12518, "training_step_time": 0.11781692504882812 }, { "epoch": 1.910247802734375e-05, "model_forward_time": 0.025525331497192383, "step": 12519 }, { "epoch": 1.910247802734375e-05, "step": 12519, "training_step_time": 0.11563420295715332 }, { "epoch": 1.910400390625e-05, "grad_norm": 0.3728472590446472, "learning_rate": 6.742860236609077e-05, "loss": 0.0192, "step": 12520 }, { "epoch": 1.910400390625e-05, "model_forward_time": 0.025527000427246094, "step": 12520 }, { "epoch": 1.910400390625e-05, "step": 12520, "training_step_time": 0.15437936782836914 }, { "epoch": 1.910552978515625e-05, "model_forward_time": 0.024718046188354492, "step": 12521 }, { "epoch": 1.910552978515625e-05, "step": 12521, "training_step_time": 0.14941644668579102 }, { "epoch": 1.91070556640625e-05, "model_forward_time": 0.024363279342651367, "step": 12522 }, { "epoch": 1.91070556640625e-05, "step": 12522, "training_step_time": 0.11107635498046875 }, { "epoch": 1.910858154296875e-05, "model_forward_time": 0.025347471237182617, "step": 12523 }, { "epoch": 1.910858154296875e-05, "step": 12523, "training_step_time": 0.10961794853210449 }, { "epoch": 1.9110107421875e-05, "model_forward_time": 0.025721311569213867, "step": 12524 }, { "epoch": 1.9110107421875e-05, "step": 12524, "training_step_time": 0.10968255996704102 }, { "epoch": 1.911163330078125e-05, "model_forward_time": 0.0254209041595459, "step": 12525 }, { "epoch": 1.911163330078125e-05, "step": 12525, "training_step_time": 0.12099289894104004 }, { "epoch": 1.91131591796875e-05, "model_forward_time": 0.02539682388305664, "step": 12526 }, { "epoch": 1.91131591796875e-05, "step": 12526, "training_step_time": 0.10647821426391602 }, { "epoch": 1.911468505859375e-05, "model_forward_time": 0.0251924991607666, "step": 12527 }, { "epoch": 1.911468505859375e-05, "step": 12527, "training_step_time": 0.21776556968688965 }, { "epoch": 1.91162109375e-05, "model_forward_time": 0.024297475814819336, "step": 12528 }, { "epoch": 1.91162109375e-05, "step": 12528, "training_step_time": 0.12176632881164551 }, { "epoch": 1.911773681640625e-05, "model_forward_time": 0.02496480941772461, "step": 12529 }, { "epoch": 1.911773681640625e-05, "step": 12529, "training_step_time": 0.10559654235839844 }, { "epoch": 1.91192626953125e-05, "grad_norm": 0.2121896892786026, "learning_rate": 6.737693287184557e-05, "loss": 0.0179, "step": 12530 }, { "epoch": 1.91192626953125e-05, "model_forward_time": 0.02516317367553711, "step": 12530 }, { "epoch": 1.91192626953125e-05, "step": 12530, "training_step_time": 0.1066582202911377 }, { "epoch": 1.912078857421875e-05, "model_forward_time": 0.025281667709350586, "step": 12531 }, { "epoch": 1.912078857421875e-05, "step": 12531, "training_step_time": 0.10891985893249512 }, { "epoch": 1.9122314453125e-05, "model_forward_time": 0.025794029235839844, "step": 12532 }, { "epoch": 1.9122314453125e-05, "step": 12532, "training_step_time": 0.13487768173217773 }, { "epoch": 1.912384033203125e-05, "model_forward_time": 0.025496482849121094, "step": 12533 }, { "epoch": 1.912384033203125e-05, "step": 12533, "training_step_time": 0.18004727363586426 }, { "epoch": 1.91253662109375e-05, "model_forward_time": 0.02506566047668457, "step": 12534 }, { "epoch": 1.91253662109375e-05, "step": 12534, "training_step_time": 0.186295747756958 }, { "epoch": 1.912689208984375e-05, "model_forward_time": 0.02442026138305664, "step": 12535 }, { "epoch": 1.912689208984375e-05, "step": 12535, "training_step_time": 0.17704057693481445 }, { "epoch": 1.912841796875e-05, "model_forward_time": 0.024864912033081055, "step": 12536 }, { "epoch": 1.912841796875e-05, "step": 12536, "training_step_time": 0.1671915054321289 }, { "epoch": 1.912994384765625e-05, "model_forward_time": 0.024955272674560547, "step": 12537 }, { "epoch": 1.912994384765625e-05, "step": 12537, "training_step_time": 0.15191650390625 }, { "epoch": 1.91314697265625e-05, "model_forward_time": 0.024456501007080078, "step": 12538 }, { "epoch": 1.91314697265625e-05, "step": 12538, "training_step_time": 0.14144349098205566 }, { "epoch": 1.913299560546875e-05, "model_forward_time": 0.024447202682495117, "step": 12539 }, { "epoch": 1.913299560546875e-05, "step": 12539, "training_step_time": 0.13255047798156738 }, { "epoch": 1.9134521484375e-05, "grad_norm": 0.29751724004745483, "learning_rate": 6.732524226298841e-05, "loss": 0.0273, "step": 12540 }, { "epoch": 1.9134521484375e-05, "model_forward_time": 0.02472543716430664, "step": 12540 }, { "epoch": 1.9134521484375e-05, "step": 12540, "training_step_time": 0.124664306640625 }, { "epoch": 1.913604736328125e-05, "model_forward_time": 0.024416208267211914, "step": 12541 }, { "epoch": 1.913604736328125e-05, "step": 12541, "training_step_time": 0.1258559226989746 }, { "epoch": 1.91375732421875e-05, "model_forward_time": 0.025309324264526367, "step": 12542 }, { "epoch": 1.91375732421875e-05, "step": 12542, "training_step_time": 0.12025666236877441 }, { "epoch": 1.913909912109375e-05, "model_forward_time": 0.0252993106842041, "step": 12543 }, { "epoch": 1.913909912109375e-05, "step": 12543, "training_step_time": 0.11723017692565918 }, { "epoch": 1.9140625e-05, "model_forward_time": 0.02551436424255371, "step": 12544 }, { "epoch": 1.9140625e-05, "step": 12544, "training_step_time": 0.11181402206420898 }, { "epoch": 1.914215087890625e-05, "model_forward_time": 0.02577495574951172, "step": 12545 }, { "epoch": 1.914215087890625e-05, "step": 12545, "training_step_time": 0.10963749885559082 }, { "epoch": 1.91436767578125e-05, "model_forward_time": 0.024930953979492188, "step": 12546 }, { "epoch": 1.91436767578125e-05, "step": 12546, "training_step_time": 0.1128089427947998 }, { "epoch": 1.914520263671875e-05, "model_forward_time": 0.0251920223236084, "step": 12547 }, { "epoch": 1.914520263671875e-05, "step": 12547, "training_step_time": 0.17506790161132812 }, { "epoch": 1.9146728515625e-05, "model_forward_time": 0.024571657180786133, "step": 12548 }, { "epoch": 1.9146728515625e-05, "step": 12548, "training_step_time": 0.1097719669342041 }, { "epoch": 1.914825439453125e-05, "model_forward_time": 0.02416396141052246, "step": 12549 }, { "epoch": 1.914825439453125e-05, "step": 12549, "training_step_time": 0.18019437789916992 }, { "epoch": 1.91497802734375e-05, "grad_norm": 0.4422711431980133, "learning_rate": 6.727353060232822e-05, "loss": 0.023, "step": 12550 }, { "epoch": 1.91497802734375e-05, "model_forward_time": 0.02497076988220215, "step": 12550 }, { "epoch": 1.91497802734375e-05, "step": 12550, "training_step_time": 0.793179988861084 }, { "epoch": 1.915130615234375e-05, "model_forward_time": 0.022542715072631836, "step": 12551 }, { "epoch": 1.915130615234375e-05, "step": 12551, "training_step_time": 0.1301567554473877 }, { "epoch": 1.915283203125e-05, "model_forward_time": 0.0247805118560791, "step": 12552 }, { "epoch": 1.915283203125e-05, "step": 12552, "training_step_time": 0.11929750442504883 }, { "epoch": 1.915435791015625e-05, "model_forward_time": 0.025209426879882812, "step": 12553 }, { "epoch": 1.915435791015625e-05, "step": 12553, "training_step_time": 0.18669486045837402 }, { "epoch": 1.91558837890625e-05, "model_forward_time": 0.024862051010131836, "step": 12554 }, { "epoch": 1.91558837890625e-05, "step": 12554, "training_step_time": 0.14656853675842285 }, { "epoch": 1.915740966796875e-05, "model_forward_time": 0.024719715118408203, "step": 12555 }, { "epoch": 1.915740966796875e-05, "step": 12555, "training_step_time": 0.11053109169006348 }, { "epoch": 1.9158935546875e-05, "model_forward_time": 0.025652647018432617, "step": 12556 }, { "epoch": 1.9158935546875e-05, "step": 12556, "training_step_time": 0.12047815322875977 }, { "epoch": 1.916046142578125e-05, "model_forward_time": 0.025519847869873047, "step": 12557 }, { "epoch": 1.916046142578125e-05, "step": 12557, "training_step_time": 0.10713863372802734 }, { "epoch": 1.91619873046875e-05, "model_forward_time": 0.02430891990661621, "step": 12558 }, { "epoch": 1.91619873046875e-05, "step": 12558, "training_step_time": 0.1086878776550293 }, { "epoch": 1.916351318359375e-05, "model_forward_time": 0.0254976749420166, "step": 12559 }, { "epoch": 1.916351318359375e-05, "step": 12559, "training_step_time": 0.19531679153442383 }, { "epoch": 1.91650390625e-05, "grad_norm": 0.43292972445487976, "learning_rate": 6.722179795269956e-05, "loss": 0.0179, "step": 12560 }, { "epoch": 1.91650390625e-05, "model_forward_time": 0.02424025535583496, "step": 12560 }, { "epoch": 1.91650390625e-05, "step": 12560, "training_step_time": 0.13731884956359863 }, { "epoch": 1.916656494140625e-05, "model_forward_time": 0.024560928344726562, "step": 12561 }, { "epoch": 1.916656494140625e-05, "step": 12561, "training_step_time": 0.12201690673828125 }, { "epoch": 1.91680908203125e-05, "model_forward_time": 0.0245816707611084, "step": 12562 }, { "epoch": 1.91680908203125e-05, "step": 12562, "training_step_time": 0.14358949661254883 }, { "epoch": 1.916961669921875e-05, "model_forward_time": 0.024641990661621094, "step": 12563 }, { "epoch": 1.916961669921875e-05, "step": 12563, "training_step_time": 0.11072993278503418 }, { "epoch": 1.9171142578125e-05, "model_forward_time": 0.02481532096862793, "step": 12564 }, { "epoch": 1.9171142578125e-05, "step": 12564, "training_step_time": 0.16074919700622559 }, { "epoch": 1.917266845703125e-05, "model_forward_time": 0.024419546127319336, "step": 12565 }, { "epoch": 1.917266845703125e-05, "step": 12565, "training_step_time": 0.1379694938659668 }, { "epoch": 1.91741943359375e-05, "model_forward_time": 0.02414560317993164, "step": 12566 }, { "epoch": 1.91741943359375e-05, "step": 12566, "training_step_time": 0.11108088493347168 }, { "epoch": 1.917572021484375e-05, "model_forward_time": 0.025241374969482422, "step": 12567 }, { "epoch": 1.917572021484375e-05, "step": 12567, "training_step_time": 0.10595464706420898 }, { "epoch": 1.917724609375e-05, "model_forward_time": 0.02493119239807129, "step": 12568 }, { "epoch": 1.917724609375e-05, "step": 12568, "training_step_time": 0.10948395729064941 }, { "epoch": 1.917877197265625e-05, "model_forward_time": 0.02502298355102539, "step": 12569 }, { "epoch": 1.917877197265625e-05, "step": 12569, "training_step_time": 0.10489702224731445 }, { "epoch": 1.91802978515625e-05, "grad_norm": 0.3948003351688385, "learning_rate": 6.71700443769625e-05, "loss": 0.0184, "step": 12570 }, { "epoch": 1.91802978515625e-05, "model_forward_time": 0.025157690048217773, "step": 12570 }, { "epoch": 1.91802978515625e-05, "step": 12570, "training_step_time": 0.10632872581481934 }, { "epoch": 1.918182373046875e-05, "model_forward_time": 0.025172710418701172, "step": 12571 }, { "epoch": 1.918182373046875e-05, "step": 12571, "training_step_time": 0.10798835754394531 }, { "epoch": 1.9183349609375e-05, "model_forward_time": 0.02557659149169922, "step": 12572 }, { "epoch": 1.9183349609375e-05, "step": 12572, "training_step_time": 0.10526680946350098 }, { "epoch": 1.918487548828125e-05, "model_forward_time": 0.02683544158935547, "step": 12573 }, { "epoch": 1.918487548828125e-05, "step": 12573, "training_step_time": 0.10949969291687012 }, { "epoch": 1.91864013671875e-05, "model_forward_time": 0.025493621826171875, "step": 12574 }, { "epoch": 1.91864013671875e-05, "step": 12574, "training_step_time": 0.10431742668151855 }, { "epoch": 1.918792724609375e-05, "model_forward_time": 0.025062084197998047, "step": 12575 }, { "epoch": 1.918792724609375e-05, "step": 12575, "training_step_time": 0.1097726821899414 }, { "epoch": 1.9189453125e-05, "model_forward_time": 0.025292158126831055, "step": 12576 }, { "epoch": 1.9189453125e-05, "step": 12576, "training_step_time": 0.10596513748168945 }, { "epoch": 1.919097900390625e-05, "model_forward_time": 0.02477264404296875, "step": 12577 }, { "epoch": 1.919097900390625e-05, "step": 12577, "training_step_time": 0.10839176177978516 }, { "epoch": 1.91925048828125e-05, "model_forward_time": 0.025288105010986328, "step": 12578 }, { "epoch": 1.91925048828125e-05, "step": 12578, "training_step_time": 0.10384416580200195 }, { "epoch": 1.919403076171875e-05, "model_forward_time": 0.02502918243408203, "step": 12579 }, { "epoch": 1.919403076171875e-05, "step": 12579, "training_step_time": 0.10681438446044922 }, { "epoch": 1.9195556640625e-05, "grad_norm": 0.2294338196516037, "learning_rate": 6.711826993800248e-05, "loss": 0.0181, "step": 12580 }, { "epoch": 1.9195556640625e-05, "model_forward_time": 0.023891687393188477, "step": 12580 }, { "epoch": 1.9195556640625e-05, "step": 12580, "training_step_time": 0.10378861427307129 }, { "epoch": 1.919708251953125e-05, "model_forward_time": 0.023502111434936523, "step": 12581 }, { "epoch": 1.919708251953125e-05, "step": 12581, "training_step_time": 0.10587358474731445 }, { "epoch": 1.91986083984375e-05, "model_forward_time": 0.025188684463500977, "step": 12582 }, { "epoch": 1.91986083984375e-05, "step": 12582, "training_step_time": 0.10768985748291016 }, { "epoch": 1.920013427734375e-05, "model_forward_time": 0.025043487548828125, "step": 12583 }, { "epoch": 1.920013427734375e-05, "step": 12583, "training_step_time": 0.10391616821289062 }, { "epoch": 1.920166015625e-05, "model_forward_time": 0.025211811065673828, "step": 12584 }, { "epoch": 1.920166015625e-05, "step": 12584, "training_step_time": 0.1078484058380127 }, { "epoch": 1.920318603515625e-05, "model_forward_time": 0.025295257568359375, "step": 12585 }, { "epoch": 1.920318603515625e-05, "step": 12585, "training_step_time": 0.10500144958496094 }, { "epoch": 1.92047119140625e-05, "model_forward_time": 0.025437116622924805, "step": 12586 }, { "epoch": 1.92047119140625e-05, "step": 12586, "training_step_time": 0.11049842834472656 }, { "epoch": 1.920623779296875e-05, "model_forward_time": 0.025125503540039062, "step": 12587 }, { "epoch": 1.920623779296875e-05, "step": 12587, "training_step_time": 0.10646724700927734 }, { "epoch": 1.9207763671875e-05, "model_forward_time": 0.025173187255859375, "step": 12588 }, { "epoch": 1.9207763671875e-05, "step": 12588, "training_step_time": 0.10889649391174316 }, { "epoch": 1.920928955078125e-05, "model_forward_time": 0.025471210479736328, "step": 12589 }, { "epoch": 1.920928955078125e-05, "step": 12589, "training_step_time": 0.18343424797058105 }, { "epoch": 1.92108154296875e-05, "grad_norm": 0.21814358234405518, "learning_rate": 6.706647469873031e-05, "loss": 0.0257, "step": 12590 }, { "epoch": 1.92108154296875e-05, "model_forward_time": 0.024784088134765625, "step": 12590 }, { "epoch": 1.92108154296875e-05, "step": 12590, "training_step_time": 0.10801053047180176 }, { "epoch": 1.921234130859375e-05, "model_forward_time": 0.024697542190551758, "step": 12591 }, { "epoch": 1.921234130859375e-05, "step": 12591, "training_step_time": 0.11011719703674316 }, { "epoch": 1.92138671875e-05, "model_forward_time": 0.025035619735717773, "step": 12592 }, { "epoch": 1.92138671875e-05, "step": 12592, "training_step_time": 0.1288747787475586 }, { "epoch": 1.921539306640625e-05, "model_forward_time": 0.0251615047454834, "step": 12593 }, { "epoch": 1.921539306640625e-05, "step": 12593, "training_step_time": 0.131819486618042 }, { "epoch": 1.92169189453125e-05, "model_forward_time": 0.024675369262695312, "step": 12594 }, { "epoch": 1.92169189453125e-05, "step": 12594, "training_step_time": 0.10813689231872559 }, { "epoch": 1.921844482421875e-05, "model_forward_time": 0.02522587776184082, "step": 12595 }, { "epoch": 1.921844482421875e-05, "step": 12595, "training_step_time": 0.11674618721008301 }, { "epoch": 1.9219970703125e-05, "model_forward_time": 0.02489495277404785, "step": 12596 }, { "epoch": 1.9219970703125e-05, "step": 12596, "training_step_time": 0.10559916496276855 }, { "epoch": 1.922149658203125e-05, "model_forward_time": 0.024866104125976562, "step": 12597 }, { "epoch": 1.922149658203125e-05, "step": 12597, "training_step_time": 0.10707879066467285 }, { "epoch": 1.92230224609375e-05, "model_forward_time": 0.024941205978393555, "step": 12598 }, { "epoch": 1.92230224609375e-05, "step": 12598, "training_step_time": 0.10922574996948242 }, { "epoch": 1.922454833984375e-05, "model_forward_time": 0.02507162094116211, "step": 12599 }, { "epoch": 1.922454833984375e-05, "step": 12599, "training_step_time": 0.11738848686218262 }, { "epoch": 1.922607421875e-05, "grad_norm": 0.46139249205589294, "learning_rate": 6.701465872208216e-05, "loss": 0.0186, "step": 12600 }, { "epoch": 1.922607421875e-05, "model_forward_time": 0.025429725646972656, "step": 12600 }, { "epoch": 1.922607421875e-05, "step": 12600, "training_step_time": 0.10998749732971191 }, { "epoch": 1.922760009765625e-05, "model_forward_time": 0.02521228790283203, "step": 12601 }, { "epoch": 1.922760009765625e-05, "step": 12601, "training_step_time": 0.13446402549743652 }, { "epoch": 1.92291259765625e-05, "model_forward_time": 0.025613069534301758, "step": 12602 }, { "epoch": 1.92291259765625e-05, "step": 12602, "training_step_time": 0.1969621181488037 }, { "epoch": 1.923065185546875e-05, "model_forward_time": 0.0245358943939209, "step": 12603 }, { "epoch": 1.923065185546875e-05, "step": 12603, "training_step_time": 0.1727464199066162 }, { "epoch": 1.9232177734375e-05, "model_forward_time": 0.024669647216796875, "step": 12604 }, { "epoch": 1.9232177734375e-05, "step": 12604, "training_step_time": 0.1159052848815918 }, { "epoch": 1.923370361328125e-05, "model_forward_time": 0.025000333786010742, "step": 12605 }, { "epoch": 1.923370361328125e-05, "step": 12605, "training_step_time": 0.10856866836547852 }, { "epoch": 1.92352294921875e-05, "model_forward_time": 0.02567005157470703, "step": 12606 }, { "epoch": 1.92352294921875e-05, "step": 12606, "training_step_time": 0.19438481330871582 }, { "epoch": 1.923675537109375e-05, "model_forward_time": 0.024843692779541016, "step": 12607 }, { "epoch": 1.923675537109375e-05, "step": 12607, "training_step_time": 0.13912272453308105 }, { "epoch": 1.923828125e-05, "model_forward_time": 0.02520895004272461, "step": 12608 }, { "epoch": 1.923828125e-05, "step": 12608, "training_step_time": 0.11437821388244629 }, { "epoch": 1.923980712890625e-05, "model_forward_time": 0.025266408920288086, "step": 12609 }, { "epoch": 1.923980712890625e-05, "step": 12609, "training_step_time": 0.1401810646057129 }, { "epoch": 1.92413330078125e-05, "grad_norm": 0.2110971063375473, "learning_rate": 6.696282207101928e-05, "loss": 0.0126, "step": 12610 }, { "epoch": 1.92413330078125e-05, "model_forward_time": 0.0250704288482666, "step": 12610 }, { "epoch": 1.92413330078125e-05, "step": 12610, "training_step_time": 0.17672300338745117 }, { "epoch": 1.924285888671875e-05, "model_forward_time": 0.024567842483520508, "step": 12611 }, { "epoch": 1.924285888671875e-05, "step": 12611, "training_step_time": 0.12393832206726074 }, { "epoch": 1.9244384765625e-05, "model_forward_time": 0.02484750747680664, "step": 12612 }, { "epoch": 1.9244384765625e-05, "step": 12612, "training_step_time": 0.11827278137207031 }, { "epoch": 1.924591064453125e-05, "model_forward_time": 0.02541041374206543, "step": 12613 }, { "epoch": 1.924591064453125e-05, "step": 12613, "training_step_time": 0.10392117500305176 }, { "epoch": 1.92474365234375e-05, "model_forward_time": 0.025450468063354492, "step": 12614 }, { "epoch": 1.92474365234375e-05, "step": 12614, "training_step_time": 0.1038973331451416 }, { "epoch": 1.924896240234375e-05, "model_forward_time": 0.025098562240600586, "step": 12615 }, { "epoch": 1.924896240234375e-05, "step": 12615, "training_step_time": 0.10679221153259277 }, { "epoch": 1.925048828125e-05, "model_forward_time": 0.025987625122070312, "step": 12616 }, { "epoch": 1.925048828125e-05, "step": 12616, "training_step_time": 0.10654354095458984 }, { "epoch": 1.925201416015625e-05, "model_forward_time": 0.026123523712158203, "step": 12617 }, { "epoch": 1.925201416015625e-05, "step": 12617, "training_step_time": 0.10748744010925293 }, { "epoch": 1.92535400390625e-05, "model_forward_time": 0.02560734748840332, "step": 12618 }, { "epoch": 1.92535400390625e-05, "step": 12618, "training_step_time": 0.10573768615722656 }, { "epoch": 1.925506591796875e-05, "model_forward_time": 0.025284767150878906, "step": 12619 }, { "epoch": 1.925506591796875e-05, "step": 12619, "training_step_time": 0.11049270629882812 }, { "epoch": 1.9256591796875e-05, "grad_norm": 0.341896116733551, "learning_rate": 6.691096480852808e-05, "loss": 0.0133, "step": 12620 }, { "epoch": 1.9256591796875e-05, "model_forward_time": 0.025957822799682617, "step": 12620 }, { "epoch": 1.9256591796875e-05, "step": 12620, "training_step_time": 0.11049723625183105 }, { "epoch": 1.925811767578125e-05, "model_forward_time": 0.025624990463256836, "step": 12621 }, { "epoch": 1.925811767578125e-05, "step": 12621, "training_step_time": 0.10804581642150879 }, { "epoch": 1.92596435546875e-05, "model_forward_time": 0.024890422821044922, "step": 12622 }, { "epoch": 1.92596435546875e-05, "step": 12622, "training_step_time": 0.10812187194824219 }, { "epoch": 1.926116943359375e-05, "model_forward_time": 0.02484297752380371, "step": 12623 }, { "epoch": 1.926116943359375e-05, "step": 12623, "training_step_time": 0.10878157615661621 }, { "epoch": 1.92626953125e-05, "model_forward_time": 0.025377511978149414, "step": 12624 }, { "epoch": 1.92626953125e-05, "step": 12624, "training_step_time": 0.10845375061035156 }, { "epoch": 1.926422119140625e-05, "model_forward_time": 0.02545309066772461, "step": 12625 }, { "epoch": 1.926422119140625e-05, "step": 12625, "training_step_time": 0.10585904121398926 }, { "epoch": 1.92657470703125e-05, "model_forward_time": 0.025503158569335938, "step": 12626 }, { "epoch": 1.92657470703125e-05, "step": 12626, "training_step_time": 0.10647988319396973 }, { "epoch": 1.926727294921875e-05, "model_forward_time": 0.025379419326782227, "step": 12627 }, { "epoch": 1.926727294921875e-05, "step": 12627, "training_step_time": 0.10555768013000488 }, { "epoch": 1.9268798828125e-05, "model_forward_time": 0.025585651397705078, "step": 12628 }, { "epoch": 1.9268798828125e-05, "step": 12628, "training_step_time": 0.10747885704040527 }, { "epoch": 1.927032470703125e-05, "model_forward_time": 0.025168418884277344, "step": 12629 }, { "epoch": 1.927032470703125e-05, "step": 12629, "training_step_time": 0.1109018325805664 }, { "epoch": 1.92718505859375e-05, "grad_norm": 0.2618494927883148, "learning_rate": 6.685908699762002e-05, "loss": 0.0129, "step": 12630 }, { "epoch": 1.92718505859375e-05, "model_forward_time": 0.02467060089111328, "step": 12630 }, { "epoch": 1.92718505859375e-05, "step": 12630, "training_step_time": 0.10836100578308105 }, { "epoch": 1.927337646484375e-05, "model_forward_time": 0.025249242782592773, "step": 12631 }, { "epoch": 1.927337646484375e-05, "step": 12631, "training_step_time": 0.10479235649108887 }, { "epoch": 1.927490234375e-05, "model_forward_time": 0.02525615692138672, "step": 12632 }, { "epoch": 1.927490234375e-05, "step": 12632, "training_step_time": 0.10565781593322754 }, { "epoch": 1.927642822265625e-05, "model_forward_time": 0.025009870529174805, "step": 12633 }, { "epoch": 1.927642822265625e-05, "step": 12633, "training_step_time": 0.10599398612976074 }, { "epoch": 1.92779541015625e-05, "model_forward_time": 0.0249631404876709, "step": 12634 }, { "epoch": 1.92779541015625e-05, "step": 12634, "training_step_time": 0.10808539390563965 }, { "epoch": 1.927947998046875e-05, "model_forward_time": 0.025622844696044922, "step": 12635 }, { "epoch": 1.927947998046875e-05, "step": 12635, "training_step_time": 0.10614228248596191 }, { "epoch": 1.9281005859375e-05, "model_forward_time": 0.025485515594482422, "step": 12636 }, { "epoch": 1.9281005859375e-05, "step": 12636, "training_step_time": 0.1859593391418457 }, { "epoch": 1.928253173828125e-05, "model_forward_time": 0.024666786193847656, "step": 12637 }, { "epoch": 1.928253173828125e-05, "step": 12637, "training_step_time": 0.11871767044067383 }, { "epoch": 1.92840576171875e-05, "model_forward_time": 0.024733304977416992, "step": 12638 }, { "epoch": 1.92840576171875e-05, "step": 12638, "training_step_time": 0.10988140106201172 }, { "epoch": 1.928558349609375e-05, "model_forward_time": 0.024190187454223633, "step": 12639 }, { "epoch": 1.928558349609375e-05, "step": 12639, "training_step_time": 0.11555123329162598 }, { "epoch": 1.9287109375e-05, "grad_norm": 0.6126453280448914, "learning_rate": 6.680718870133156e-05, "loss": 0.0267, "step": 12640 }, { "epoch": 1.9287109375e-05, "model_forward_time": 0.024391889572143555, "step": 12640 }, { "epoch": 1.9287109375e-05, "step": 12640, "training_step_time": 0.1274721622467041 }, { "epoch": 1.928863525390625e-05, "model_forward_time": 0.02519989013671875, "step": 12641 }, { "epoch": 1.928863525390625e-05, "step": 12641, "training_step_time": 0.1184241771697998 }, { "epoch": 1.92901611328125e-05, "model_forward_time": 0.024925947189331055, "step": 12642 }, { "epoch": 1.92901611328125e-05, "step": 12642, "training_step_time": 0.11610698699951172 }, { "epoch": 1.929168701171875e-05, "model_forward_time": 0.025300025939941406, "step": 12643 }, { "epoch": 1.929168701171875e-05, "step": 12643, "training_step_time": 0.10991692543029785 }, { "epoch": 1.9293212890625e-05, "model_forward_time": 0.025727272033691406, "step": 12644 }, { "epoch": 1.9293212890625e-05, "step": 12644, "training_step_time": 0.10796976089477539 }, { "epoch": 1.929473876953125e-05, "model_forward_time": 0.025136470794677734, "step": 12645 }, { "epoch": 1.929473876953125e-05, "step": 12645, "training_step_time": 0.15371084213256836 }, { "epoch": 1.92962646484375e-05, "model_forward_time": 0.02484273910522461, "step": 12646 }, { "epoch": 1.92962646484375e-05, "step": 12646, "training_step_time": 0.11310195922851562 }, { "epoch": 1.929779052734375e-05, "model_forward_time": 0.025235652923583984, "step": 12647 }, { "epoch": 1.929779052734375e-05, "step": 12647, "training_step_time": 0.2005300521850586 }, { "epoch": 1.929931640625e-05, "model_forward_time": 0.024328947067260742, "step": 12648 }, { "epoch": 1.929931640625e-05, "step": 12648, "training_step_time": 0.18968605995178223 }, { "epoch": 1.930084228515625e-05, "model_forward_time": 0.02456045150756836, "step": 12649 }, { "epoch": 1.930084228515625e-05, "step": 12649, "training_step_time": 0.15802597999572754 }, { "epoch": 1.93023681640625e-05, "grad_norm": 0.24479682743549347, "learning_rate": 6.675526998272405e-05, "loss": 0.0157, "step": 12650 }, { "epoch": 1.93023681640625e-05, "model_forward_time": 0.0250399112701416, "step": 12650 }, { "epoch": 1.93023681640625e-05, "step": 12650, "training_step_time": 0.17844200134277344 }, { "epoch": 1.930389404296875e-05, "model_forward_time": 0.024821996688842773, "step": 12651 }, { "epoch": 1.930389404296875e-05, "step": 12651, "training_step_time": 0.10304903984069824 }, { "epoch": 1.9305419921875e-05, "model_forward_time": 0.025290727615356445, "step": 12652 }, { "epoch": 1.9305419921875e-05, "step": 12652, "training_step_time": 0.10643911361694336 }, { "epoch": 1.930694580078125e-05, "model_forward_time": 0.02562713623046875, "step": 12653 }, { "epoch": 1.930694580078125e-05, "step": 12653, "training_step_time": 0.18957948684692383 }, { "epoch": 1.93084716796875e-05, "model_forward_time": 0.024721622467041016, "step": 12654 }, { "epoch": 1.93084716796875e-05, "step": 12654, "training_step_time": 0.11517858505249023 }, { "epoch": 1.930999755859375e-05, "model_forward_time": 0.024723291397094727, "step": 12655 }, { "epoch": 1.930999755859375e-05, "step": 12655, "training_step_time": 0.11389827728271484 }, { "epoch": 1.93115234375e-05, "model_forward_time": 0.0255429744720459, "step": 12656 }, { "epoch": 1.93115234375e-05, "step": 12656, "training_step_time": 0.12598848342895508 }, { "epoch": 1.931304931640625e-05, "model_forward_time": 0.025797605514526367, "step": 12657 }, { "epoch": 1.931304931640625e-05, "step": 12657, "training_step_time": 0.1098930835723877 }, { "epoch": 1.93145751953125e-05, "model_forward_time": 0.025637388229370117, "step": 12658 }, { "epoch": 1.93145751953125e-05, "step": 12658, "training_step_time": 0.11049389839172363 }, { "epoch": 1.931610107421875e-05, "model_forward_time": 0.025377273559570312, "step": 12659 }, { "epoch": 1.931610107421875e-05, "step": 12659, "training_step_time": 0.1252450942993164 }, { "epoch": 1.9317626953125e-05, "grad_norm": 0.3344530165195465, "learning_rate": 6.670333090488356e-05, "loss": 0.0267, "step": 12660 }, { "epoch": 1.9317626953125e-05, "model_forward_time": 0.025220155715942383, "step": 12660 }, { "epoch": 1.9317626953125e-05, "step": 12660, "training_step_time": 0.11451268196105957 }, { "epoch": 1.931915283203125e-05, "model_forward_time": 0.025136232376098633, "step": 12661 }, { "epoch": 1.931915283203125e-05, "step": 12661, "training_step_time": 0.10765194892883301 }, { "epoch": 1.93206787109375e-05, "model_forward_time": 0.025594472885131836, "step": 12662 }, { "epoch": 1.93206787109375e-05, "step": 12662, "training_step_time": 0.10930633544921875 }, { "epoch": 1.932220458984375e-05, "model_forward_time": 0.025304317474365234, "step": 12663 }, { "epoch": 1.932220458984375e-05, "step": 12663, "training_step_time": 0.10629510879516602 }, { "epoch": 1.932373046875e-05, "model_forward_time": 0.02521800994873047, "step": 12664 }, { "epoch": 1.932373046875e-05, "step": 12664, "training_step_time": 0.1077733039855957 }, { "epoch": 1.932525634765625e-05, "model_forward_time": 0.027035951614379883, "step": 12665 }, { "epoch": 1.932525634765625e-05, "step": 12665, "training_step_time": 0.10764312744140625 }, { "epoch": 1.93267822265625e-05, "model_forward_time": 0.025460004806518555, "step": 12666 }, { "epoch": 1.93267822265625e-05, "step": 12666, "training_step_time": 0.11117243766784668 }, { "epoch": 1.932830810546875e-05, "model_forward_time": 0.02533864974975586, "step": 12667 }, { "epoch": 1.932830810546875e-05, "step": 12667, "training_step_time": 0.10635733604431152 }, { "epoch": 1.9329833984375e-05, "model_forward_time": 0.025207042694091797, "step": 12668 }, { "epoch": 1.9329833984375e-05, "step": 12668, "training_step_time": 0.11119651794433594 }, { "epoch": 1.933135986328125e-05, "model_forward_time": 0.024214506149291992, "step": 12669 }, { "epoch": 1.933135986328125e-05, "step": 12669, "training_step_time": 0.10557246208190918 }, { "epoch": 1.93328857421875e-05, "grad_norm": 0.387548565864563, "learning_rate": 6.6651371530921e-05, "loss": 0.0176, "step": 12670 }, { "epoch": 1.93328857421875e-05, "model_forward_time": 0.02405834197998047, "step": 12670 }, { "epoch": 1.93328857421875e-05, "step": 12670, "training_step_time": 0.10777544975280762 }, { "epoch": 1.933441162109375e-05, "model_forward_time": 0.024535417556762695, "step": 12671 }, { "epoch": 1.933441162109375e-05, "step": 12671, "training_step_time": 0.10795474052429199 }, { "epoch": 1.93359375e-05, "model_forward_time": 0.025798559188842773, "step": 12672 }, { "epoch": 1.93359375e-05, "step": 12672, "training_step_time": 0.10553407669067383 }, { "epoch": 1.933746337890625e-05, "model_forward_time": 0.02546238899230957, "step": 12673 }, { "epoch": 1.933746337890625e-05, "step": 12673, "training_step_time": 0.10803055763244629 }, { "epoch": 1.93389892578125e-05, "model_forward_time": 0.025255441665649414, "step": 12674 }, { "epoch": 1.93389892578125e-05, "step": 12674, "training_step_time": 0.10548663139343262 }, { "epoch": 1.934051513671875e-05, "model_forward_time": 0.025194644927978516, "step": 12675 }, { "epoch": 1.934051513671875e-05, "step": 12675, "training_step_time": 0.10522794723510742 }, { "epoch": 1.9342041015625e-05, "model_forward_time": 0.02551436424255371, "step": 12676 }, { "epoch": 1.9342041015625e-05, "step": 12676, "training_step_time": 0.10499906539916992 }, { "epoch": 1.934356689453125e-05, "model_forward_time": 0.025441408157348633, "step": 12677 }, { "epoch": 1.934356689453125e-05, "step": 12677, "training_step_time": 0.10617899894714355 }, { "epoch": 1.93450927734375e-05, "model_forward_time": 0.025286436080932617, "step": 12678 }, { "epoch": 1.93450927734375e-05, "step": 12678, "training_step_time": 0.10955572128295898 }, { "epoch": 1.934661865234375e-05, "model_forward_time": 0.02510809898376465, "step": 12679 }, { "epoch": 1.934661865234375e-05, "step": 12679, "training_step_time": 0.10473370552062988 }, { "epoch": 1.934814453125e-05, "grad_norm": 0.23371650278568268, "learning_rate": 6.659939192397192e-05, "loss": 0.0208, "step": 12680 }, { "epoch": 1.934814453125e-05, "model_forward_time": 0.024955272674560547, "step": 12680 }, { "epoch": 1.934814453125e-05, "step": 12680, "training_step_time": 0.10444879531860352 }, { "epoch": 1.934967041015625e-05, "model_forward_time": 0.02525162696838379, "step": 12681 }, { "epoch": 1.934967041015625e-05, "step": 12681, "training_step_time": 0.10809993743896484 }, { "epoch": 1.93511962890625e-05, "model_forward_time": 0.027800321578979492, "step": 12682 }, { "epoch": 1.93511962890625e-05, "step": 12682, "training_step_time": 0.10878992080688477 }, { "epoch": 1.935272216796875e-05, "model_forward_time": 0.02567887306213379, "step": 12683 }, { "epoch": 1.935272216796875e-05, "step": 12683, "training_step_time": 0.13672995567321777 }, { "epoch": 1.9354248046875e-05, "model_forward_time": 0.025494813919067383, "step": 12684 }, { "epoch": 1.9354248046875e-05, "step": 12684, "training_step_time": 0.1049489974975586 }, { "epoch": 1.935577392578125e-05, "model_forward_time": 0.025285959243774414, "step": 12685 }, { "epoch": 1.935577392578125e-05, "step": 12685, "training_step_time": 0.10925078392028809 }, { "epoch": 1.93572998046875e-05, "model_forward_time": 0.025732994079589844, "step": 12686 }, { "epoch": 1.93572998046875e-05, "step": 12686, "training_step_time": 0.12573981285095215 }, { "epoch": 1.935882568359375e-05, "model_forward_time": 0.025373458862304688, "step": 12687 }, { "epoch": 1.935882568359375e-05, "step": 12687, "training_step_time": 0.12750506401062012 }, { "epoch": 1.93603515625e-05, "model_forward_time": 0.02517080307006836, "step": 12688 }, { "epoch": 1.93603515625e-05, "step": 12688, "training_step_time": 0.11224007606506348 }, { "epoch": 1.936187744140625e-05, "model_forward_time": 0.02629232406616211, "step": 12689 }, { "epoch": 1.936187744140625e-05, "step": 12689, "training_step_time": 0.12698793411254883 }, { "epoch": 1.93634033203125e-05, "grad_norm": 0.3474064767360687, "learning_rate": 6.654739214719641e-05, "loss": 0.0207, "step": 12690 }, { "epoch": 1.93634033203125e-05, "model_forward_time": 0.02561163902282715, "step": 12690 }, { "epoch": 1.93634033203125e-05, "step": 12690, "training_step_time": 0.10907292366027832 }, { "epoch": 1.936492919921875e-05, "model_forward_time": 0.025131702423095703, "step": 12691 }, { "epoch": 1.936492919921875e-05, "step": 12691, "training_step_time": 0.10292792320251465 }, { "epoch": 1.9366455078125e-05, "model_forward_time": 0.024382591247558594, "step": 12692 }, { "epoch": 1.9366455078125e-05, "step": 12692, "training_step_time": 0.15087056159973145 }, { "epoch": 1.936798095703125e-05, "model_forward_time": 0.024495363235473633, "step": 12693 }, { "epoch": 1.936798095703125e-05, "step": 12693, "training_step_time": 0.19289565086364746 }, { "epoch": 1.93695068359375e-05, "model_forward_time": 0.02473306655883789, "step": 12694 }, { "epoch": 1.93695068359375e-05, "step": 12694, "training_step_time": 0.21766972541809082 }, { "epoch": 1.937103271484375e-05, "model_forward_time": 0.024669408798217773, "step": 12695 }, { "epoch": 1.937103271484375e-05, "step": 12695, "training_step_time": 0.15679335594177246 }, { "epoch": 1.937255859375e-05, "model_forward_time": 0.02606940269470215, "step": 12696 }, { "epoch": 1.937255859375e-05, "step": 12696, "training_step_time": 0.11690235137939453 }, { "epoch": 1.937408447265625e-05, "model_forward_time": 0.024926185607910156, "step": 12697 }, { "epoch": 1.937408447265625e-05, "step": 12697, "training_step_time": 0.11148786544799805 }, { "epoch": 1.93756103515625e-05, "model_forward_time": 0.025763750076293945, "step": 12698 }, { "epoch": 1.93756103515625e-05, "step": 12698, "training_step_time": 0.19720196723937988 }, { "epoch": 1.937713623046875e-05, "model_forward_time": 0.025014638900756836, "step": 12699 }, { "epoch": 1.937713623046875e-05, "step": 12699, "training_step_time": 0.10392260551452637 }, { "epoch": 1.9378662109375e-05, "grad_norm": 0.44094225764274597, "learning_rate": 6.649537226377915e-05, "loss": 0.0189, "step": 12700 }, { "epoch": 1.9378662109375e-05, "model_forward_time": 0.024032115936279297, "step": 12700 }, { "epoch": 1.9378662109375e-05, "step": 12700, "training_step_time": 0.1922895908355713 }, { "epoch": 1.938018798828125e-05, "model_forward_time": 0.024451494216918945, "step": 12701 }, { "epoch": 1.938018798828125e-05, "step": 12701, "training_step_time": 0.12868714332580566 }, { "epoch": 1.93817138671875e-05, "model_forward_time": 0.025012969970703125, "step": 12702 }, { "epoch": 1.93817138671875e-05, "step": 12702, "training_step_time": 0.1278972625732422 }, { "epoch": 1.938323974609375e-05, "model_forward_time": 0.024950504302978516, "step": 12703 }, { "epoch": 1.938323974609375e-05, "step": 12703, "training_step_time": 0.11070775985717773 }, { "epoch": 1.9384765625e-05, "model_forward_time": 0.025105953216552734, "step": 12704 }, { "epoch": 1.9384765625e-05, "step": 12704, "training_step_time": 0.1747570037841797 }, { "epoch": 1.938629150390625e-05, "model_forward_time": 0.02465200424194336, "step": 12705 }, { "epoch": 1.938629150390625e-05, "step": 12705, "training_step_time": 0.13484907150268555 }, { "epoch": 1.93878173828125e-05, "model_forward_time": 0.02415299415588379, "step": 12706 }, { "epoch": 1.93878173828125e-05, "step": 12706, "training_step_time": 0.11393952369689941 }, { "epoch": 1.938934326171875e-05, "model_forward_time": 0.025377750396728516, "step": 12707 }, { "epoch": 1.938934326171875e-05, "step": 12707, "training_step_time": 0.10611414909362793 }, { "epoch": 1.9390869140625e-05, "model_forward_time": 0.02612018585205078, "step": 12708 }, { "epoch": 1.9390869140625e-05, "step": 12708, "training_step_time": 0.17444610595703125 }, { "epoch": 1.939239501953125e-05, "model_forward_time": 0.026726245880126953, "step": 12709 }, { "epoch": 1.939239501953125e-05, "step": 12709, "training_step_time": 0.19596290588378906 }, { "epoch": 1.93939208984375e-05, "grad_norm": 0.510518491268158, "learning_rate": 6.644333233692916e-05, "loss": 0.0166, "step": 12710 }, { "epoch": 1.93939208984375e-05, "model_forward_time": 0.02432847023010254, "step": 12710 }, { "epoch": 1.93939208984375e-05, "step": 12710, "training_step_time": 0.1941695213317871 }, { "epoch": 1.939544677734375e-05, "model_forward_time": 0.02467179298400879, "step": 12711 }, { "epoch": 1.939544677734375e-05, "step": 12711, "training_step_time": 0.2008965015411377 }, { "epoch": 1.939697265625e-05, "model_forward_time": 0.024685382843017578, "step": 12712 }, { "epoch": 1.939697265625e-05, "step": 12712, "training_step_time": 0.18499112129211426 }, { "epoch": 1.939849853515625e-05, "model_forward_time": 0.024480581283569336, "step": 12713 }, { "epoch": 1.939849853515625e-05, "step": 12713, "training_step_time": 0.16906523704528809 }, { "epoch": 1.94000244140625e-05, "model_forward_time": 0.028041601181030273, "step": 12714 }, { "epoch": 1.94000244140625e-05, "step": 12714, "training_step_time": 0.16216421127319336 }, { "epoch": 1.940155029296875e-05, "model_forward_time": 0.024445295333862305, "step": 12715 }, { "epoch": 1.940155029296875e-05, "step": 12715, "training_step_time": 0.15805435180664062 }, { "epoch": 1.9403076171875e-05, "model_forward_time": 0.024680614471435547, "step": 12716 }, { "epoch": 1.9403076171875e-05, "step": 12716, "training_step_time": 0.13758087158203125 }, { "epoch": 1.940460205078125e-05, "model_forward_time": 0.024912595748901367, "step": 12717 }, { "epoch": 1.940460205078125e-05, "step": 12717, "training_step_time": 0.13077497482299805 }, { "epoch": 1.94061279296875e-05, "model_forward_time": 0.02513718605041504, "step": 12718 }, { "epoch": 1.94061279296875e-05, "step": 12718, "training_step_time": 0.1228799819946289 }, { "epoch": 1.940765380859375e-05, "model_forward_time": 0.025346994400024414, "step": 12719 }, { "epoch": 1.940765380859375e-05, "step": 12719, "training_step_time": 0.11973333358764648 }, { "epoch": 1.94091796875e-05, "grad_norm": 0.3286479115486145, "learning_rate": 6.639127242987988e-05, "loss": 0.0189, "step": 12720 }, { "epoch": 1.94091796875e-05, "model_forward_time": 0.025264501571655273, "step": 12720 }, { "epoch": 1.94091796875e-05, "step": 12720, "training_step_time": 0.11438894271850586 }, { "epoch": 1.941070556640625e-05, "model_forward_time": 0.028886795043945312, "step": 12721 }, { "epoch": 1.941070556640625e-05, "step": 12721, "training_step_time": 0.11502599716186523 }, { "epoch": 1.94122314453125e-05, "model_forward_time": 0.02595210075378418, "step": 12722 }, { "epoch": 1.94122314453125e-05, "step": 12722, "training_step_time": 0.12808537483215332 }, { "epoch": 1.941375732421875e-05, "model_forward_time": 0.025502920150756836, "step": 12723 }, { "epoch": 1.941375732421875e-05, "step": 12723, "training_step_time": 0.16045022010803223 }, { "epoch": 1.9415283203125e-05, "model_forward_time": 0.025359153747558594, "step": 12724 }, { "epoch": 1.9415283203125e-05, "step": 12724, "training_step_time": 0.11385941505432129 }, { "epoch": 1.941680908203125e-05, "model_forward_time": 0.025402545928955078, "step": 12725 }, { "epoch": 1.941680908203125e-05, "step": 12725, "training_step_time": 0.10817670822143555 }, { "epoch": 1.94183349609375e-05, "model_forward_time": 0.02525019645690918, "step": 12726 }, { "epoch": 1.94183349609375e-05, "step": 12726, "training_step_time": 0.12291574478149414 }, { "epoch": 1.941986083984375e-05, "model_forward_time": 0.025476694107055664, "step": 12727 }, { "epoch": 1.941986083984375e-05, "step": 12727, "training_step_time": 0.12531304359436035 }, { "epoch": 1.942138671875e-05, "model_forward_time": 0.025182247161865234, "step": 12728 }, { "epoch": 1.942138671875e-05, "step": 12728, "training_step_time": 0.11971092224121094 }, { "epoch": 1.942291259765625e-05, "model_forward_time": 0.025836944580078125, "step": 12729 }, { "epoch": 1.942291259765625e-05, "step": 12729, "training_step_time": 0.11362171173095703 }, { "epoch": 1.94244384765625e-05, "grad_norm": 0.4129391610622406, "learning_rate": 6.6339192605889e-05, "loss": 0.017, "step": 12730 }, { "epoch": 1.94244384765625e-05, "model_forward_time": 0.02591848373413086, "step": 12730 }, { "epoch": 1.94244384765625e-05, "step": 12730, "training_step_time": 0.10706138610839844 }, { "epoch": 1.942596435546875e-05, "model_forward_time": 0.02567744255065918, "step": 12731 }, { "epoch": 1.942596435546875e-05, "step": 12731, "training_step_time": 0.10634374618530273 }, { "epoch": 1.9427490234375e-05, "model_forward_time": 0.025088071823120117, "step": 12732 }, { "epoch": 1.9427490234375e-05, "step": 12732, "training_step_time": 0.1469581127166748 }, { "epoch": 1.942901611328125e-05, "model_forward_time": 0.024851560592651367, "step": 12733 }, { "epoch": 1.942901611328125e-05, "step": 12733, "training_step_time": 0.17246031761169434 }, { "epoch": 1.94305419921875e-05, "model_forward_time": 0.025064468383789062, "step": 12734 }, { "epoch": 1.94305419921875e-05, "step": 12734, "training_step_time": 0.13810062408447266 }, { "epoch": 1.943206787109375e-05, "model_forward_time": 0.025342464447021484, "step": 12735 }, { "epoch": 1.943206787109375e-05, "step": 12735, "training_step_time": 0.10831689834594727 }, { "epoch": 1.943359375e-05, "model_forward_time": 0.026210784912109375, "step": 12736 }, { "epoch": 1.943359375e-05, "step": 12736, "training_step_time": 0.19332408905029297 }, { "epoch": 1.943511962890625e-05, "model_forward_time": 0.024960756301879883, "step": 12737 }, { "epoch": 1.943511962890625e-05, "step": 12737, "training_step_time": 0.14487051963806152 }, { "epoch": 1.94366455078125e-05, "model_forward_time": 0.02463984489440918, "step": 12738 }, { "epoch": 1.94366455078125e-05, "step": 12738, "training_step_time": 0.10111284255981445 }, { "epoch": 1.943817138671875e-05, "model_forward_time": 0.025391340255737305, "step": 12739 }, { "epoch": 1.943817138671875e-05, "step": 12739, "training_step_time": 0.10775971412658691 }, { "epoch": 1.9439697265625e-05, "grad_norm": 0.3393997251987457, "learning_rate": 6.628709292823844e-05, "loss": 0.0206, "step": 12740 }, { "epoch": 1.9439697265625e-05, "model_forward_time": 0.0257875919342041, "step": 12740 }, { "epoch": 1.9439697265625e-05, "step": 12740, "training_step_time": 0.10635638236999512 }, { "epoch": 1.944122314453125e-05, "model_forward_time": 0.02565622329711914, "step": 12741 }, { "epoch": 1.944122314453125e-05, "step": 12741, "training_step_time": 0.20505642890930176 }, { "epoch": 1.94427490234375e-05, "model_forward_time": 0.025096893310546875, "step": 12742 }, { "epoch": 1.94427490234375e-05, "step": 12742, "training_step_time": 0.12585091590881348 }, { "epoch": 1.944427490234375e-05, "model_forward_time": 0.024597883224487305, "step": 12743 }, { "epoch": 1.944427490234375e-05, "step": 12743, "training_step_time": 0.12032365798950195 }, { "epoch": 1.944580078125e-05, "model_forward_time": 0.025203943252563477, "step": 12744 }, { "epoch": 1.944580078125e-05, "step": 12744, "training_step_time": 0.13687372207641602 }, { "epoch": 1.944732666015625e-05, "model_forward_time": 0.024367332458496094, "step": 12745 }, { "epoch": 1.944732666015625e-05, "step": 12745, "training_step_time": 0.12261390686035156 }, { "epoch": 1.94488525390625e-05, "model_forward_time": 0.025084733963012695, "step": 12746 }, { "epoch": 1.94488525390625e-05, "step": 12746, "training_step_time": 0.12470579147338867 }, { "epoch": 1.945037841796875e-05, "model_forward_time": 0.0253145694732666, "step": 12747 }, { "epoch": 1.945037841796875e-05, "step": 12747, "training_step_time": 0.10892963409423828 }, { "epoch": 1.9451904296875e-05, "model_forward_time": 0.02574634552001953, "step": 12748 }, { "epoch": 1.9451904296875e-05, "step": 12748, "training_step_time": 0.10658383369445801 }, { "epoch": 1.945343017578125e-05, "model_forward_time": 0.025261402130126953, "step": 12749 }, { "epoch": 1.945343017578125e-05, "step": 12749, "training_step_time": 0.1090092658996582 }, { "epoch": 1.94549560546875e-05, "grad_norm": 0.28004905581474304, "learning_rate": 6.623497346023418e-05, "loss": 0.0123, "step": 12750 }, { "epoch": 1.94549560546875e-05, "model_forward_time": 0.02552509307861328, "step": 12750 }, { "epoch": 1.94549560546875e-05, "step": 12750, "training_step_time": 0.10732436180114746 }, { "epoch": 1.945648193359375e-05, "model_forward_time": 0.025625228881835938, "step": 12751 }, { "epoch": 1.945648193359375e-05, "step": 12751, "training_step_time": 0.11023306846618652 }, { "epoch": 1.94580078125e-05, "model_forward_time": 0.025531291961669922, "step": 12752 }, { "epoch": 1.94580078125e-05, "step": 12752, "training_step_time": 0.10707283020019531 }, { "epoch": 1.945953369140625e-05, "model_forward_time": 0.025151491165161133, "step": 12753 }, { "epoch": 1.945953369140625e-05, "step": 12753, "training_step_time": 0.11092758178710938 }, { "epoch": 1.94610595703125e-05, "model_forward_time": 0.025544166564941406, "step": 12754 }, { "epoch": 1.94610595703125e-05, "step": 12754, "training_step_time": 0.10715937614440918 }, { "epoch": 1.946258544921875e-05, "model_forward_time": 0.025353431701660156, "step": 12755 }, { "epoch": 1.946258544921875e-05, "step": 12755, "training_step_time": 0.10713982582092285 }, { "epoch": 1.9464111328125e-05, "model_forward_time": 0.024909019470214844, "step": 12756 }, { "epoch": 1.9464111328125e-05, "step": 12756, "training_step_time": 0.1053171157836914 }, { "epoch": 1.946563720703125e-05, "model_forward_time": 0.02499699592590332, "step": 12757 }, { "epoch": 1.946563720703125e-05, "step": 12757, "training_step_time": 0.10581326484680176 }, { "epoch": 1.94671630859375e-05, "model_forward_time": 0.027768850326538086, "step": 12758 }, { "epoch": 1.94671630859375e-05, "step": 12758, "training_step_time": 0.10840296745300293 }, { "epoch": 1.946868896484375e-05, "model_forward_time": 0.025197267532348633, "step": 12759 }, { "epoch": 1.946868896484375e-05, "step": 12759, "training_step_time": 0.1060795783996582 }, { "epoch": 1.947021484375e-05, "grad_norm": 0.44285303354263306, "learning_rate": 6.61828342652063e-05, "loss": 0.0274, "step": 12760 }, { "epoch": 1.947021484375e-05, "model_forward_time": 0.025188922882080078, "step": 12760 }, { "epoch": 1.947021484375e-05, "step": 12760, "training_step_time": 0.10631084442138672 }, { "epoch": 1.947174072265625e-05, "model_forward_time": 0.02573227882385254, "step": 12761 }, { "epoch": 1.947174072265625e-05, "step": 12761, "training_step_time": 0.10662961006164551 }, { "epoch": 1.94732666015625e-05, "model_forward_time": 0.025211095809936523, "step": 12762 }, { "epoch": 1.94732666015625e-05, "step": 12762, "training_step_time": 0.10442638397216797 }, { "epoch": 1.947479248046875e-05, "model_forward_time": 0.025521278381347656, "step": 12763 }, { "epoch": 1.947479248046875e-05, "step": 12763, "training_step_time": 0.10545754432678223 }, { "epoch": 1.9476318359375e-05, "model_forward_time": 0.02527928352355957, "step": 12764 }, { "epoch": 1.9476318359375e-05, "step": 12764, "training_step_time": 0.10514378547668457 }, { "epoch": 1.947784423828125e-05, "model_forward_time": 0.025167465209960938, "step": 12765 }, { "epoch": 1.947784423828125e-05, "step": 12765, "training_step_time": 0.10724425315856934 }, { "epoch": 1.94793701171875e-05, "model_forward_time": 0.025214195251464844, "step": 12766 }, { "epoch": 1.94793701171875e-05, "step": 12766, "training_step_time": 0.1123056411743164 }, { "epoch": 1.948089599609375e-05, "model_forward_time": 0.025623559951782227, "step": 12767 }, { "epoch": 1.948089599609375e-05, "step": 12767, "training_step_time": 0.11095952987670898 }, { "epoch": 1.9482421875e-05, "model_forward_time": 0.025394201278686523, "step": 12768 }, { "epoch": 1.9482421875e-05, "step": 12768, "training_step_time": 0.11238384246826172 }, { "epoch": 1.948394775390625e-05, "model_forward_time": 0.025046110153198242, "step": 12769 }, { "epoch": 1.948394775390625e-05, "step": 12769, "training_step_time": 0.11199808120727539 }, { "epoch": 1.94854736328125e-05, "grad_norm": 0.2459578514099121, "learning_rate": 6.613067540650886e-05, "loss": 0.0272, "step": 12770 }, { "epoch": 1.94854736328125e-05, "model_forward_time": 0.02509021759033203, "step": 12770 }, { "epoch": 1.94854736328125e-05, "step": 12770, "training_step_time": 0.17148971557617188 }, { "epoch": 1.948699951171875e-05, "model_forward_time": 0.02474212646484375, "step": 12771 }, { "epoch": 1.948699951171875e-05, "step": 12771, "training_step_time": 0.1065976619720459 }, { "epoch": 1.9488525390625e-05, "model_forward_time": 0.024770021438598633, "step": 12772 }, { "epoch": 1.9488525390625e-05, "step": 12772, "training_step_time": 0.11105108261108398 }, { "epoch": 1.949005126953125e-05, "model_forward_time": 0.02567124366760254, "step": 12773 }, { "epoch": 1.949005126953125e-05, "step": 12773, "training_step_time": 0.13022065162658691 }, { "epoch": 1.94915771484375e-05, "model_forward_time": 0.0250551700592041, "step": 12774 }, { "epoch": 1.94915771484375e-05, "step": 12774, "training_step_time": 0.12306451797485352 }, { "epoch": 1.949310302734375e-05, "model_forward_time": 0.025131702423095703, "step": 12775 }, { "epoch": 1.949310302734375e-05, "step": 12775, "training_step_time": 0.13038158416748047 }, { "epoch": 1.949462890625e-05, "model_forward_time": 0.025133132934570312, "step": 12776 }, { "epoch": 1.949462890625e-05, "step": 12776, "training_step_time": 0.858898401260376 }, { "epoch": 1.949615478515625e-05, "model_forward_time": 0.023734569549560547, "step": 12777 }, { "epoch": 1.949615478515625e-05, "step": 12777, "training_step_time": 0.20022892951965332 }, { "epoch": 1.94976806640625e-05, "model_forward_time": 0.025213241577148438, "step": 12778 }, { "epoch": 1.94976806640625e-05, "step": 12778, "training_step_time": 0.1919097900390625 }, { "epoch": 1.949920654296875e-05, "model_forward_time": 0.02719855308532715, "step": 12779 }, { "epoch": 1.949920654296875e-05, "step": 12779, "training_step_time": 0.11533904075622559 }, { "epoch": 1.9500732421875e-05, "grad_norm": 0.4277324974536896, "learning_rate": 6.607849694751977e-05, "loss": 0.0221, "step": 12780 }, { "epoch": 1.9500732421875e-05, "model_forward_time": 0.02475571632385254, "step": 12780 }, { "epoch": 1.9500732421875e-05, "step": 12780, "training_step_time": 0.10977649688720703 }, { "epoch": 1.950225830078125e-05, "model_forward_time": 0.02614569664001465, "step": 12781 }, { "epoch": 1.950225830078125e-05, "step": 12781, "training_step_time": 0.10756063461303711 }, { "epoch": 1.95037841796875e-05, "model_forward_time": 0.0253143310546875, "step": 12782 }, { "epoch": 1.95037841796875e-05, "step": 12782, "training_step_time": 0.10864901542663574 }, { "epoch": 1.950531005859375e-05, "model_forward_time": 0.025280475616455078, "step": 12783 }, { "epoch": 1.950531005859375e-05, "step": 12783, "training_step_time": 0.17014741897583008 }, { "epoch": 1.95068359375e-05, "model_forward_time": 0.024578094482421875, "step": 12784 }, { "epoch": 1.95068359375e-05, "step": 12784, "training_step_time": 0.12132477760314941 }, { "epoch": 1.950836181640625e-05, "model_forward_time": 0.024729013442993164, "step": 12785 }, { "epoch": 1.950836181640625e-05, "step": 12785, "training_step_time": 0.12656760215759277 }, { "epoch": 1.95098876953125e-05, "model_forward_time": 0.025323867797851562, "step": 12786 }, { "epoch": 1.95098876953125e-05, "step": 12786, "training_step_time": 0.1218869686126709 }, { "epoch": 1.951141357421875e-05, "model_forward_time": 0.02502918243408203, "step": 12787 }, { "epoch": 1.951141357421875e-05, "step": 12787, "training_step_time": 0.13781094551086426 }, { "epoch": 1.9512939453125e-05, "model_forward_time": 0.025066614151000977, "step": 12788 }, { "epoch": 1.9512939453125e-05, "step": 12788, "training_step_time": 0.11885905265808105 }, { "epoch": 1.951446533203125e-05, "model_forward_time": 0.02532362937927246, "step": 12789 }, { "epoch": 1.951446533203125e-05, "step": 12789, "training_step_time": 0.11675190925598145 }, { "epoch": 1.95159912109375e-05, "grad_norm": 0.517057478427887, "learning_rate": 6.602629895164081e-05, "loss": 0.026, "step": 12790 }, { "epoch": 1.95159912109375e-05, "model_forward_time": 0.02506852149963379, "step": 12790 }, { "epoch": 1.95159912109375e-05, "step": 12790, "training_step_time": 0.10750007629394531 }, { "epoch": 1.951751708984375e-05, "model_forward_time": 0.025145769119262695, "step": 12791 }, { "epoch": 1.951751708984375e-05, "step": 12791, "training_step_time": 0.10510873794555664 }, { "epoch": 1.951904296875e-05, "model_forward_time": 0.025026798248291016, "step": 12792 }, { "epoch": 1.951904296875e-05, "step": 12792, "training_step_time": 0.10835647583007812 }, { "epoch": 1.952056884765625e-05, "model_forward_time": 0.02512836456298828, "step": 12793 }, { "epoch": 1.952056884765625e-05, "step": 12793, "training_step_time": 0.10707235336303711 }, { "epoch": 1.95220947265625e-05, "model_forward_time": 0.025365829467773438, "step": 12794 }, { "epoch": 1.95220947265625e-05, "step": 12794, "training_step_time": 0.11254453659057617 }, { "epoch": 1.952362060546875e-05, "model_forward_time": 0.025501012802124023, "step": 12795 }, { "epoch": 1.952362060546875e-05, "step": 12795, "training_step_time": 0.10904836654663086 }, { "epoch": 1.9525146484375e-05, "model_forward_time": 0.025204181671142578, "step": 12796 }, { "epoch": 1.9525146484375e-05, "step": 12796, "training_step_time": 0.1091923713684082 }, { "epoch": 1.952667236328125e-05, "model_forward_time": 0.025196313858032227, "step": 12797 }, { "epoch": 1.952667236328125e-05, "step": 12797, "training_step_time": 0.10581684112548828 }, { "epoch": 1.95281982421875e-05, "model_forward_time": 0.02559971809387207, "step": 12798 }, { "epoch": 1.95281982421875e-05, "step": 12798, "training_step_time": 0.10824298858642578 }, { "epoch": 1.952972412109375e-05, "model_forward_time": 0.025490283966064453, "step": 12799 }, { "epoch": 1.952972412109375e-05, "step": 12799, "training_step_time": 0.10611939430236816 }, { "epoch": 1.953125e-05, "grad_norm": 0.2915656566619873, "learning_rate": 6.59740814822974e-05, "loss": 0.0198, "step": 12800 }, { "epoch": 1.953125e-05, "model_forward_time": 0.02503204345703125, "step": 12800 }, { "epoch": 1.953125e-05, "step": 12800, "training_step_time": 0.11137032508850098 }, { "epoch": 1.953277587890625e-05, "model_forward_time": 0.025264501571655273, "step": 12801 }, { "epoch": 1.953277587890625e-05, "step": 12801, "training_step_time": 0.10472679138183594 }, { "epoch": 1.95343017578125e-05, "model_forward_time": 0.024953603744506836, "step": 12802 }, { "epoch": 1.95343017578125e-05, "step": 12802, "training_step_time": 0.11614847183227539 }, { "epoch": 1.953582763671875e-05, "model_forward_time": 0.02570629119873047, "step": 12803 }, { "epoch": 1.953582763671875e-05, "step": 12803, "training_step_time": 0.10995006561279297 }, { "epoch": 1.9537353515625e-05, "model_forward_time": 0.025226593017578125, "step": 12804 }, { "epoch": 1.9537353515625e-05, "step": 12804, "training_step_time": 0.10994553565979004 }, { "epoch": 1.953887939453125e-05, "model_forward_time": 0.02480602264404297, "step": 12805 }, { "epoch": 1.953887939453125e-05, "step": 12805, "training_step_time": 0.10703444480895996 }, { "epoch": 1.95404052734375e-05, "model_forward_time": 0.025394439697265625, "step": 12806 }, { "epoch": 1.95404052734375e-05, "step": 12806, "training_step_time": 0.10557842254638672 }, { "epoch": 1.954193115234375e-05, "model_forward_time": 0.025169849395751953, "step": 12807 }, { "epoch": 1.954193115234375e-05, "step": 12807, "training_step_time": 0.10796713829040527 }, { "epoch": 1.954345703125e-05, "model_forward_time": 0.025191545486450195, "step": 12808 }, { "epoch": 1.954345703125e-05, "step": 12808, "training_step_time": 0.10591793060302734 }, { "epoch": 1.954498291015625e-05, "model_forward_time": 0.025400161743164062, "step": 12809 }, { "epoch": 1.954498291015625e-05, "step": 12809, "training_step_time": 0.10566401481628418 }, { "epoch": 1.95465087890625e-05, "grad_norm": 0.4159059524536133, "learning_rate": 6.592184460293877e-05, "loss": 0.0226, "step": 12810 }, { "epoch": 1.95465087890625e-05, "model_forward_time": 0.024941444396972656, "step": 12810 }, { "epoch": 1.95465087890625e-05, "step": 12810, "training_step_time": 0.10492515563964844 }, { "epoch": 1.954803466796875e-05, "model_forward_time": 0.02572798728942871, "step": 12811 }, { "epoch": 1.954803466796875e-05, "step": 12811, "training_step_time": 0.10578727722167969 }, { "epoch": 1.9549560546875e-05, "model_forward_time": 0.025561094284057617, "step": 12812 }, { "epoch": 1.9549560546875e-05, "step": 12812, "training_step_time": 0.13389849662780762 }, { "epoch": 1.955108642578125e-05, "model_forward_time": 0.0256807804107666, "step": 12813 }, { "epoch": 1.955108642578125e-05, "step": 12813, "training_step_time": 0.11229777336120605 }, { "epoch": 1.95526123046875e-05, "model_forward_time": 0.025319576263427734, "step": 12814 }, { "epoch": 1.95526123046875e-05, "step": 12814, "training_step_time": 0.13170266151428223 }, { "epoch": 1.955413818359375e-05, "model_forward_time": 0.024202823638916016, "step": 12815 }, { "epoch": 1.955413818359375e-05, "step": 12815, "training_step_time": 0.1668996810913086 }, { "epoch": 1.95556640625e-05, "model_forward_time": 0.024346590042114258, "step": 12816 }, { "epoch": 1.95556640625e-05, "step": 12816, "training_step_time": 0.18709206581115723 }, { "epoch": 1.955718994140625e-05, "model_forward_time": 0.024864912033081055, "step": 12817 }, { "epoch": 1.955718994140625e-05, "step": 12817, "training_step_time": 0.14777493476867676 }, { "epoch": 1.95587158203125e-05, "model_forward_time": 0.02342700958251953, "step": 12818 }, { "epoch": 1.95587158203125e-05, "step": 12818, "training_step_time": 0.13246893882751465 }, { "epoch": 1.956024169921875e-05, "model_forward_time": 0.025235652923583984, "step": 12819 }, { "epoch": 1.956024169921875e-05, "step": 12819, "training_step_time": 0.10498809814453125 }, { "epoch": 1.9561767578125e-05, "grad_norm": 0.42813757061958313, "learning_rate": 6.586958837703759e-05, "loss": 0.0253, "step": 12820 }, { "epoch": 1.9561767578125e-05, "model_forward_time": 0.02482295036315918, "step": 12820 }, { "epoch": 1.9561767578125e-05, "step": 12820, "training_step_time": 0.13930201530456543 }, { "epoch": 1.956329345703125e-05, "model_forward_time": 0.025035381317138672, "step": 12821 }, { "epoch": 1.956329345703125e-05, "step": 12821, "training_step_time": 0.1259157657623291 }, { "epoch": 1.95648193359375e-05, "model_forward_time": 0.025003910064697266, "step": 12822 }, { "epoch": 1.95648193359375e-05, "step": 12822, "training_step_time": 0.11384224891662598 }, { "epoch": 1.956634521484375e-05, "model_forward_time": 0.025055646896362305, "step": 12823 }, { "epoch": 1.956634521484375e-05, "step": 12823, "training_step_time": 0.16033697128295898 }, { "epoch": 1.956787109375e-05, "model_forward_time": 0.024910926818847656, "step": 12824 }, { "epoch": 1.956787109375e-05, "step": 12824, "training_step_time": 0.17279911041259766 }, { "epoch": 1.956939697265625e-05, "model_forward_time": 0.024599552154541016, "step": 12825 }, { "epoch": 1.956939697265625e-05, "step": 12825, "training_step_time": 0.11660957336425781 }, { "epoch": 1.95709228515625e-05, "model_forward_time": 0.024692535400390625, "step": 12826 }, { "epoch": 1.95709228515625e-05, "step": 12826, "training_step_time": 0.19796490669250488 }, { "epoch": 1.957244873046875e-05, "model_forward_time": 0.024659395217895508, "step": 12827 }, { "epoch": 1.957244873046875e-05, "step": 12827, "training_step_time": 0.10709738731384277 }, { "epoch": 1.9573974609375e-05, "model_forward_time": 0.025322914123535156, "step": 12828 }, { "epoch": 1.9573974609375e-05, "step": 12828, "training_step_time": 0.10874319076538086 }, { "epoch": 1.957550048828125e-05, "model_forward_time": 0.024451017379760742, "step": 12829 }, { "epoch": 1.957550048828125e-05, "step": 12829, "training_step_time": 0.15697884559631348 }, { "epoch": 1.95770263671875e-05, "grad_norm": 0.4591871500015259, "learning_rate": 6.581731286809014e-05, "loss": 0.0215, "step": 12830 }, { "epoch": 1.95770263671875e-05, "model_forward_time": 0.024854183197021484, "step": 12830 }, { "epoch": 1.95770263671875e-05, "step": 12830, "training_step_time": 0.1105952262878418 }, { "epoch": 1.957855224609375e-05, "model_forward_time": 0.025530099868774414, "step": 12831 }, { "epoch": 1.957855224609375e-05, "step": 12831, "training_step_time": 0.13998842239379883 }, { "epoch": 1.9580078125e-05, "model_forward_time": 0.026187896728515625, "step": 12832 }, { "epoch": 1.9580078125e-05, "step": 12832, "training_step_time": 0.15935873985290527 }, { "epoch": 1.958160400390625e-05, "model_forward_time": 0.024337053298950195, "step": 12833 }, { "epoch": 1.958160400390625e-05, "step": 12833, "training_step_time": 0.2170407772064209 }, { "epoch": 1.95831298828125e-05, "model_forward_time": 0.024619102478027344, "step": 12834 }, { "epoch": 1.95831298828125e-05, "step": 12834, "training_step_time": 0.1117548942565918 }, { "epoch": 1.958465576171875e-05, "model_forward_time": 0.02537822723388672, "step": 12835 }, { "epoch": 1.958465576171875e-05, "step": 12835, "training_step_time": 0.10610413551330566 }, { "epoch": 1.9586181640625e-05, "model_forward_time": 0.025581836700439453, "step": 12836 }, { "epoch": 1.9586181640625e-05, "step": 12836, "training_step_time": 0.10433006286621094 }, { "epoch": 1.958770751953125e-05, "model_forward_time": 0.025313854217529297, "step": 12837 }, { "epoch": 1.958770751953125e-05, "step": 12837, "training_step_time": 0.10630679130554199 }, { "epoch": 1.95892333984375e-05, "model_forward_time": 0.025336503982543945, "step": 12838 }, { "epoch": 1.95892333984375e-05, "step": 12838, "training_step_time": 0.10774564743041992 }, { "epoch": 1.959075927734375e-05, "model_forward_time": 0.025085926055908203, "step": 12839 }, { "epoch": 1.959075927734375e-05, "step": 12839, "training_step_time": 0.10956311225891113 }, { "epoch": 1.959228515625e-05, "grad_norm": 0.20965513586997986, "learning_rate": 6.576501813961609e-05, "loss": 0.0146, "step": 12840 }, { "epoch": 1.959228515625e-05, "model_forward_time": 0.025368213653564453, "step": 12840 }, { "epoch": 1.959228515625e-05, "step": 12840, "training_step_time": 0.10538387298583984 }, { "epoch": 1.959381103515625e-05, "model_forward_time": 0.025298118591308594, "step": 12841 }, { "epoch": 1.959381103515625e-05, "step": 12841, "training_step_time": 0.10614824295043945 }, { "epoch": 1.95953369140625e-05, "model_forward_time": 0.025177955627441406, "step": 12842 }, { "epoch": 1.95953369140625e-05, "step": 12842, "training_step_time": 0.10755681991577148 }, { "epoch": 1.959686279296875e-05, "model_forward_time": 0.025674819946289062, "step": 12843 }, { "epoch": 1.959686279296875e-05, "step": 12843, "training_step_time": 0.10666608810424805 }, { "epoch": 1.9598388671875e-05, "model_forward_time": 0.02536177635192871, "step": 12844 }, { "epoch": 1.9598388671875e-05, "step": 12844, "training_step_time": 0.1089329719543457 }, { "epoch": 1.959991455078125e-05, "model_forward_time": 0.02554941177368164, "step": 12845 }, { "epoch": 1.959991455078125e-05, "step": 12845, "training_step_time": 0.1053915023803711 }, { "epoch": 1.96014404296875e-05, "model_forward_time": 0.02528834342956543, "step": 12846 }, { "epoch": 1.96014404296875e-05, "step": 12846, "training_step_time": 0.10483837127685547 }, { "epoch": 1.960296630859375e-05, "model_forward_time": 0.02777695655822754, "step": 12847 }, { "epoch": 1.960296630859375e-05, "step": 12847, "training_step_time": 0.1075289249420166 }, { "epoch": 1.96044921875e-05, "model_forward_time": 0.02497386932373047, "step": 12848 }, { "epoch": 1.96044921875e-05, "step": 12848, "training_step_time": 0.10898089408874512 }, { "epoch": 1.960601806640625e-05, "model_forward_time": 0.025057077407836914, "step": 12849 }, { "epoch": 1.960601806640625e-05, "step": 12849, "training_step_time": 0.1041414737701416 }, { "epoch": 1.96075439453125e-05, "grad_norm": 0.2478886991739273, "learning_rate": 6.571270425515843e-05, "loss": 0.0159, "step": 12850 }, { "epoch": 1.96075439453125e-05, "model_forward_time": 0.02523636817932129, "step": 12850 }, { "epoch": 1.96075439453125e-05, "step": 12850, "training_step_time": 0.10432600975036621 }, { "epoch": 1.960906982421875e-05, "model_forward_time": 0.024936437606811523, "step": 12851 }, { "epoch": 1.960906982421875e-05, "step": 12851, "training_step_time": 0.10858535766601562 }, { "epoch": 1.9610595703125e-05, "model_forward_time": 0.025166988372802734, "step": 12852 }, { "epoch": 1.9610595703125e-05, "step": 12852, "training_step_time": 0.10373783111572266 }, { "epoch": 1.961212158203125e-05, "model_forward_time": 0.027532100677490234, "step": 12853 }, { "epoch": 1.961212158203125e-05, "step": 12853, "training_step_time": 0.1087651252746582 }, { "epoch": 1.96136474609375e-05, "model_forward_time": 0.025132417678833008, "step": 12854 }, { "epoch": 1.96136474609375e-05, "step": 12854, "training_step_time": 0.1066291332244873 }, { "epoch": 1.961517333984375e-05, "model_forward_time": 0.026541948318481445, "step": 12855 }, { "epoch": 1.961517333984375e-05, "step": 12855, "training_step_time": 0.10902881622314453 }, { "epoch": 1.961669921875e-05, "model_forward_time": 0.02532339096069336, "step": 12856 }, { "epoch": 1.961669921875e-05, "step": 12856, "training_step_time": 0.10817360877990723 }, { "epoch": 1.961822509765625e-05, "model_forward_time": 0.02491903305053711, "step": 12857 }, { "epoch": 1.961822509765625e-05, "step": 12857, "training_step_time": 0.2093358039855957 }, { "epoch": 1.96197509765625e-05, "model_forward_time": 0.025052309036254883, "step": 12858 }, { "epoch": 1.96197509765625e-05, "step": 12858, "training_step_time": 0.11860346794128418 }, { "epoch": 1.962127685546875e-05, "model_forward_time": 0.02716660499572754, "step": 12859 }, { "epoch": 1.962127685546875e-05, "step": 12859, "training_step_time": 0.10867762565612793 }, { "epoch": 1.9622802734375e-05, "grad_norm": 0.467568039894104, "learning_rate": 6.56603712782835e-05, "loss": 0.0229, "step": 12860 }, { "epoch": 1.9622802734375e-05, "model_forward_time": 0.025312185287475586, "step": 12860 }, { "epoch": 1.9622802734375e-05, "step": 12860, "training_step_time": 0.11243152618408203 }, { "epoch": 1.962432861328125e-05, "model_forward_time": 0.025675535202026367, "step": 12861 }, { "epoch": 1.962432861328125e-05, "step": 12861, "training_step_time": 0.13793659210205078 }, { "epoch": 1.96258544921875e-05, "model_forward_time": 0.02503514289855957, "step": 12862 }, { "epoch": 1.96258544921875e-05, "step": 12862, "training_step_time": 0.11337804794311523 }, { "epoch": 1.962738037109375e-05, "model_forward_time": 0.025648832321166992, "step": 12863 }, { "epoch": 1.962738037109375e-05, "step": 12863, "training_step_time": 0.11484026908874512 }, { "epoch": 1.962890625e-05, "model_forward_time": 0.02514338493347168, "step": 12864 }, { "epoch": 1.962890625e-05, "step": 12864, "training_step_time": 0.10955953598022461 }, { "epoch": 1.963043212890625e-05, "model_forward_time": 0.024783849716186523, "step": 12865 }, { "epoch": 1.963043212890625e-05, "step": 12865, "training_step_time": 0.10452628135681152 }, { "epoch": 1.96319580078125e-05, "model_forward_time": 0.024631738662719727, "step": 12866 }, { "epoch": 1.96319580078125e-05, "step": 12866, "training_step_time": 0.1206357479095459 }, { "epoch": 1.963348388671875e-05, "model_forward_time": 0.024579286575317383, "step": 12867 }, { "epoch": 1.963348388671875e-05, "step": 12867, "training_step_time": 0.22048115730285645 }, { "epoch": 1.9635009765625e-05, "model_forward_time": 0.025398731231689453, "step": 12868 }, { "epoch": 1.9635009765625e-05, "step": 12868, "training_step_time": 0.12003278732299805 }, { "epoch": 1.963653564453125e-05, "model_forward_time": 0.025040626525878906, "step": 12869 }, { "epoch": 1.963653564453125e-05, "step": 12869, "training_step_time": 0.11315274238586426 }, { "epoch": 1.96380615234375e-05, "grad_norm": 0.36412978172302246, "learning_rate": 6.56080192725808e-05, "loss": 0.0156, "step": 12870 }, { "epoch": 1.96380615234375e-05, "model_forward_time": 0.025540590286254883, "step": 12870 }, { "epoch": 1.96380615234375e-05, "step": 12870, "training_step_time": 0.10921645164489746 }, { "epoch": 1.963958740234375e-05, "model_forward_time": 0.02552938461303711, "step": 12871 }, { "epoch": 1.963958740234375e-05, "step": 12871, "training_step_time": 0.10552024841308594 }, { "epoch": 1.964111328125e-05, "model_forward_time": 0.025791645050048828, "step": 12872 }, { "epoch": 1.964111328125e-05, "step": 12872, "training_step_time": 0.1988506317138672 }, { "epoch": 1.964263916015625e-05, "model_forward_time": 0.024656295776367188, "step": 12873 }, { "epoch": 1.964263916015625e-05, "step": 12873, "training_step_time": 0.10237288475036621 }, { "epoch": 1.96441650390625e-05, "model_forward_time": 0.024573564529418945, "step": 12874 }, { "epoch": 1.96441650390625e-05, "step": 12874, "training_step_time": 0.1059579849243164 }, { "epoch": 1.964569091796875e-05, "model_forward_time": 0.025198698043823242, "step": 12875 }, { "epoch": 1.964569091796875e-05, "step": 12875, "training_step_time": 0.16658949851989746 }, { "epoch": 1.9647216796875e-05, "model_forward_time": 0.024290084838867188, "step": 12876 }, { "epoch": 1.9647216796875e-05, "step": 12876, "training_step_time": 0.1685466766357422 }, { "epoch": 1.964874267578125e-05, "model_forward_time": 0.024353504180908203, "step": 12877 }, { "epoch": 1.964874267578125e-05, "step": 12877, "training_step_time": 0.1091301441192627 }, { "epoch": 1.96502685546875e-05, "model_forward_time": 0.024603605270385742, "step": 12878 }, { "epoch": 1.96502685546875e-05, "step": 12878, "training_step_time": 0.12238740921020508 }, { "epoch": 1.965179443359375e-05, "model_forward_time": 0.025193214416503906, "step": 12879 }, { "epoch": 1.965179443359375e-05, "step": 12879, "training_step_time": 0.10371565818786621 }, { "epoch": 1.96533203125e-05, "grad_norm": 0.31761202216148376, "learning_rate": 6.555564830166293e-05, "loss": 0.0142, "step": 12880 }, { "epoch": 1.96533203125e-05, "model_forward_time": 0.025135278701782227, "step": 12880 }, { "epoch": 1.96533203125e-05, "step": 12880, "training_step_time": 0.11572647094726562 }, { "epoch": 1.965484619140625e-05, "model_forward_time": 0.025266408920288086, "step": 12881 }, { "epoch": 1.965484619140625e-05, "step": 12881, "training_step_time": 0.1117708683013916 }, { "epoch": 1.96563720703125e-05, "model_forward_time": 0.02540874481201172, "step": 12882 }, { "epoch": 1.96563720703125e-05, "step": 12882, "training_step_time": 0.18756508827209473 }, { "epoch": 1.965789794921875e-05, "model_forward_time": 0.024507761001586914, "step": 12883 }, { "epoch": 1.965789794921875e-05, "step": 12883, "training_step_time": 0.2094886302947998 }, { "epoch": 1.9659423828125e-05, "model_forward_time": 0.024311065673828125, "step": 12884 }, { "epoch": 1.9659423828125e-05, "step": 12884, "training_step_time": 0.20440292358398438 }, { "epoch": 1.966094970703125e-05, "model_forward_time": 0.02422308921813965, "step": 12885 }, { "epoch": 1.966094970703125e-05, "step": 12885, "training_step_time": 0.19777631759643555 }, { "epoch": 1.96624755859375e-05, "model_forward_time": 0.02491903305053711, "step": 12886 }, { "epoch": 1.96624755859375e-05, "step": 12886, "training_step_time": 0.18086981773376465 }, { "epoch": 1.966400146484375e-05, "model_forward_time": 0.02459096908569336, "step": 12887 }, { "epoch": 1.966400146484375e-05, "step": 12887, "training_step_time": 0.1700887680053711 }, { "epoch": 1.966552734375e-05, "model_forward_time": 0.024539709091186523, "step": 12888 }, { "epoch": 1.966552734375e-05, "step": 12888, "training_step_time": 0.16908884048461914 }, { "epoch": 1.966705322265625e-05, "model_forward_time": 0.024845123291015625, "step": 12889 }, { "epoch": 1.966705322265625e-05, "step": 12889, "training_step_time": 0.10823464393615723 }, { "epoch": 1.96685791015625e-05, "grad_norm": 0.3218054473400116, "learning_rate": 6.550325842916559e-05, "loss": 0.0189, "step": 12890 }, { "epoch": 1.96685791015625e-05, "model_forward_time": 0.024412155151367188, "step": 12890 }, { "epoch": 1.96685791015625e-05, "step": 12890, "training_step_time": 0.10080862045288086 }, { "epoch": 1.967010498046875e-05, "model_forward_time": 0.025099992752075195, "step": 12891 }, { "epoch": 1.967010498046875e-05, "step": 12891, "training_step_time": 0.10512280464172363 }, { "epoch": 1.9671630859375e-05, "model_forward_time": 0.025232553482055664, "step": 12892 }, { "epoch": 1.9671630859375e-05, "step": 12892, "training_step_time": 0.10602259635925293 }, { "epoch": 1.967315673828125e-05, "model_forward_time": 0.02545332908630371, "step": 12893 }, { "epoch": 1.967315673828125e-05, "step": 12893, "training_step_time": 0.10567760467529297 }, { "epoch": 1.96746826171875e-05, "model_forward_time": 0.02527165412902832, "step": 12894 }, { "epoch": 1.96746826171875e-05, "step": 12894, "training_step_time": 0.10685157775878906 }, { "epoch": 1.967620849609375e-05, "model_forward_time": 0.025189638137817383, "step": 12895 }, { "epoch": 1.967620849609375e-05, "step": 12895, "training_step_time": 0.10503411293029785 }, { "epoch": 1.9677734375e-05, "model_forward_time": 0.02530384063720703, "step": 12896 }, { "epoch": 1.9677734375e-05, "step": 12896, "training_step_time": 0.1059272289276123 }, { "epoch": 1.967926025390625e-05, "model_forward_time": 0.024991989135742188, "step": 12897 }, { "epoch": 1.967926025390625e-05, "step": 12897, "training_step_time": 0.10413217544555664 }, { "epoch": 1.96807861328125e-05, "model_forward_time": 0.02576732635498047, "step": 12898 }, { "epoch": 1.96807861328125e-05, "step": 12898, "training_step_time": 0.10917425155639648 }, { "epoch": 1.968231201171875e-05, "model_forward_time": 0.025289535522460938, "step": 12899 }, { "epoch": 1.968231201171875e-05, "step": 12899, "training_step_time": 0.10906529426574707 }, { "epoch": 1.9683837890625e-05, "grad_norm": 0.30722227692604065, "learning_rate": 6.545084971874738e-05, "loss": 0.0278, "step": 12900 }, { "epoch": 1.9683837890625e-05, "model_forward_time": 0.025529146194458008, "step": 12900 }, { "epoch": 1.9683837890625e-05, "step": 12900, "training_step_time": 0.10883545875549316 }, { "epoch": 1.968536376953125e-05, "model_forward_time": 0.026594161987304688, "step": 12901 }, { "epoch": 1.968536376953125e-05, "step": 12901, "training_step_time": 0.11978411674499512 }, { "epoch": 1.96868896484375e-05, "model_forward_time": 0.025186538696289062, "step": 12902 }, { "epoch": 1.96868896484375e-05, "step": 12902, "training_step_time": 0.10829877853393555 }, { "epoch": 1.968841552734375e-05, "model_forward_time": 0.02528524398803711, "step": 12903 }, { "epoch": 1.968841552734375e-05, "step": 12903, "training_step_time": 0.1163630485534668 }, { "epoch": 1.968994140625e-05, "model_forward_time": 0.025546789169311523, "step": 12904 }, { "epoch": 1.968994140625e-05, "step": 12904, "training_step_time": 0.1234285831451416 }, { "epoch": 1.969146728515625e-05, "model_forward_time": 0.024251461029052734, "step": 12905 }, { "epoch": 1.969146728515625e-05, "step": 12905, "training_step_time": 0.11604690551757812 }, { "epoch": 1.96929931640625e-05, "model_forward_time": 0.02507758140563965, "step": 12906 }, { "epoch": 1.96929931640625e-05, "step": 12906, "training_step_time": 0.11973786354064941 }, { "epoch": 1.969451904296875e-05, "model_forward_time": 0.025205135345458984, "step": 12907 }, { "epoch": 1.969451904296875e-05, "step": 12907, "training_step_time": 0.11592221260070801 }, { "epoch": 1.9696044921875e-05, "model_forward_time": 0.02539229393005371, "step": 12908 }, { "epoch": 1.9696044921875e-05, "step": 12908, "training_step_time": 0.10541582107543945 }, { "epoch": 1.969757080078125e-05, "model_forward_time": 0.028443098068237305, "step": 12909 }, { "epoch": 1.969757080078125e-05, "step": 12909, "training_step_time": 0.1079854965209961 }, { "epoch": 1.96990966796875e-05, "grad_norm": 0.2507993280887604, "learning_rate": 6.539842223408984e-05, "loss": 0.0146, "step": 12910 }, { "epoch": 1.96990966796875e-05, "model_forward_time": 0.025532960891723633, "step": 12910 }, { "epoch": 1.96990966796875e-05, "step": 12910, "training_step_time": 0.1952366828918457 }, { "epoch": 1.970062255859375e-05, "model_forward_time": 0.024492263793945312, "step": 12911 }, { "epoch": 1.970062255859375e-05, "step": 12911, "training_step_time": 0.1786959171295166 }, { "epoch": 1.97021484375e-05, "model_forward_time": 0.02436089515686035, "step": 12912 }, { "epoch": 1.97021484375e-05, "step": 12912, "training_step_time": 0.17003345489501953 }, { "epoch": 1.970367431640625e-05, "model_forward_time": 0.02440667152404785, "step": 12913 }, { "epoch": 1.970367431640625e-05, "step": 12913, "training_step_time": 0.1667921543121338 }, { "epoch": 1.97052001953125e-05, "model_forward_time": 0.02492809295654297, "step": 12914 }, { "epoch": 1.97052001953125e-05, "step": 12914, "training_step_time": 0.17603302001953125 }, { "epoch": 1.970672607421875e-05, "model_forward_time": 0.024754047393798828, "step": 12915 }, { "epoch": 1.970672607421875e-05, "step": 12915, "training_step_time": 0.10221266746520996 }, { "epoch": 1.9708251953125e-05, "model_forward_time": 0.024989843368530273, "step": 12916 }, { "epoch": 1.9708251953125e-05, "step": 12916, "training_step_time": 0.10599684715270996 }, { "epoch": 1.970977783203125e-05, "model_forward_time": 0.0256960391998291, "step": 12917 }, { "epoch": 1.970977783203125e-05, "step": 12917, "training_step_time": 0.10503268241882324 }, { "epoch": 1.97113037109375e-05, "model_forward_time": 0.025318622589111328, "step": 12918 }, { "epoch": 1.97113037109375e-05, "step": 12918, "training_step_time": 0.10567975044250488 }, { "epoch": 1.971282958984375e-05, "model_forward_time": 0.02509903907775879, "step": 12919 }, { "epoch": 1.971282958984375e-05, "step": 12919, "training_step_time": 0.1442263126373291 }, { "epoch": 1.971435546875e-05, "grad_norm": 0.22766125202178955, "learning_rate": 6.534597603889732e-05, "loss": 0.01, "step": 12920 }, { "epoch": 1.971435546875e-05, "model_forward_time": 0.024898290634155273, "step": 12920 }, { "epoch": 1.971435546875e-05, "step": 12920, "training_step_time": 0.12909150123596191 }, { "epoch": 1.971588134765625e-05, "model_forward_time": 0.024796247482299805, "step": 12921 }, { "epoch": 1.971588134765625e-05, "step": 12921, "training_step_time": 0.12498784065246582 }, { "epoch": 1.97174072265625e-05, "model_forward_time": 0.02506399154663086, "step": 12922 }, { "epoch": 1.97174072265625e-05, "step": 12922, "training_step_time": 0.11887478828430176 }, { "epoch": 1.971893310546875e-05, "model_forward_time": 0.025819063186645508, "step": 12923 }, { "epoch": 1.971893310546875e-05, "step": 12923, "training_step_time": 0.1759481430053711 }, { "epoch": 1.9720458984375e-05, "model_forward_time": 0.024941682815551758, "step": 12924 }, { "epoch": 1.9720458984375e-05, "step": 12924, "training_step_time": 0.13159751892089844 }, { "epoch": 1.972198486328125e-05, "model_forward_time": 0.024305105209350586, "step": 12925 }, { "epoch": 1.972198486328125e-05, "step": 12925, "training_step_time": 0.1297473907470703 }, { "epoch": 1.97235107421875e-05, "model_forward_time": 0.02503490447998047, "step": 12926 }, { "epoch": 1.97235107421875e-05, "step": 12926, "training_step_time": 0.12967920303344727 }, { "epoch": 1.972503662109375e-05, "model_forward_time": 0.023729324340820312, "step": 12927 }, { "epoch": 1.972503662109375e-05, "step": 12927, "training_step_time": 0.10900282859802246 }, { "epoch": 1.97265625e-05, "model_forward_time": 0.02577948570251465, "step": 12928 }, { "epoch": 1.97265625e-05, "step": 12928, "training_step_time": 0.10805416107177734 }, { "epoch": 1.972808837890625e-05, "model_forward_time": 0.025420427322387695, "step": 12929 }, { "epoch": 1.972808837890625e-05, "step": 12929, "training_step_time": 0.10832905769348145 }, { "epoch": 1.97296142578125e-05, "grad_norm": 0.24631604552268982, "learning_rate": 6.529351119689688e-05, "loss": 0.0164, "step": 12930 }, { "epoch": 1.97296142578125e-05, "model_forward_time": 0.025557279586791992, "step": 12930 }, { "epoch": 1.97296142578125e-05, "step": 12930, "training_step_time": 0.10869216918945312 }, { "epoch": 1.973114013671875e-05, "model_forward_time": 0.025218963623046875, "step": 12931 }, { "epoch": 1.973114013671875e-05, "step": 12931, "training_step_time": 0.1116631031036377 }, { "epoch": 1.9732666015625e-05, "model_forward_time": 0.026793718338012695, "step": 12932 }, { "epoch": 1.9732666015625e-05, "step": 12932, "training_step_time": 0.10741233825683594 }, { "epoch": 1.973419189453125e-05, "model_forward_time": 0.02524876594543457, "step": 12933 }, { "epoch": 1.973419189453125e-05, "step": 12933, "training_step_time": 0.10444450378417969 }, { "epoch": 1.97357177734375e-05, "model_forward_time": 0.02561497688293457, "step": 12934 }, { "epoch": 1.97357177734375e-05, "step": 12934, "training_step_time": 0.10614323616027832 }, { "epoch": 1.973724365234375e-05, "model_forward_time": 0.025424957275390625, "step": 12935 }, { "epoch": 1.973724365234375e-05, "step": 12935, "training_step_time": 0.1080465316772461 }, { "epoch": 1.973876953125e-05, "model_forward_time": 0.02511453628540039, "step": 12936 }, { "epoch": 1.973876953125e-05, "step": 12936, "training_step_time": 0.10456657409667969 }, { "epoch": 1.974029541015625e-05, "model_forward_time": 0.025657176971435547, "step": 12937 }, { "epoch": 1.974029541015625e-05, "step": 12937, "training_step_time": 0.10506176948547363 }, { "epoch": 1.97418212890625e-05, "model_forward_time": 0.02539372444152832, "step": 12938 }, { "epoch": 1.97418212890625e-05, "step": 12938, "training_step_time": 0.10872387886047363 }, { "epoch": 1.974334716796875e-05, "model_forward_time": 0.026163578033447266, "step": 12939 }, { "epoch": 1.974334716796875e-05, "step": 12939, "training_step_time": 0.10485124588012695 }, { "epoch": 1.9744873046875e-05, "grad_norm": 0.33000293374061584, "learning_rate": 6.524102777183825e-05, "loss": 0.0172, "step": 12940 }, { "epoch": 1.9744873046875e-05, "model_forward_time": 0.025562286376953125, "step": 12940 }, { "epoch": 1.9744873046875e-05, "step": 12940, "training_step_time": 0.10541343688964844 }, { "epoch": 1.974639892578125e-05, "model_forward_time": 0.02530384063720703, "step": 12941 }, { "epoch": 1.974639892578125e-05, "step": 12941, "training_step_time": 0.10480093955993652 }, { "epoch": 1.97479248046875e-05, "model_forward_time": 0.027099609375, "step": 12942 }, { "epoch": 1.97479248046875e-05, "step": 12942, "training_step_time": 0.17743587493896484 }, { "epoch": 1.974945068359375e-05, "model_forward_time": 0.025362491607666016, "step": 12943 }, { "epoch": 1.974945068359375e-05, "step": 12943, "training_step_time": 0.20627999305725098 }, { "epoch": 1.97509765625e-05, "model_forward_time": 0.025545120239257812, "step": 12944 }, { "epoch": 1.97509765625e-05, "step": 12944, "training_step_time": 0.20525312423706055 }, { "epoch": 1.975250244140625e-05, "model_forward_time": 0.024981021881103516, "step": 12945 }, { "epoch": 1.975250244140625e-05, "step": 12945, "training_step_time": 0.1963975429534912 }, { "epoch": 1.97540283203125e-05, "model_forward_time": 0.024600982666015625, "step": 12946 }, { "epoch": 1.97540283203125e-05, "step": 12946, "training_step_time": 0.1828014850616455 }, { "epoch": 1.975555419921875e-05, "model_forward_time": 0.024325132369995117, "step": 12947 }, { "epoch": 1.975555419921875e-05, "step": 12947, "training_step_time": 0.20746731758117676 }, { "epoch": 1.9757080078125e-05, "model_forward_time": 0.0243527889251709, "step": 12948 }, { "epoch": 1.9757080078125e-05, "step": 12948, "training_step_time": 0.16481661796569824 }, { "epoch": 1.975860595703125e-05, "model_forward_time": 0.024631261825561523, "step": 12949 }, { "epoch": 1.975860595703125e-05, "step": 12949, "training_step_time": 0.18009614944458008 }, { "epoch": 1.97601318359375e-05, "grad_norm": 0.2345770299434662, "learning_rate": 6.518852582749373e-05, "loss": 0.0097, "step": 12950 }, { "epoch": 1.97601318359375e-05, "model_forward_time": 0.0279083251953125, "step": 12950 }, { "epoch": 1.97601318359375e-05, "step": 12950, "training_step_time": 0.14490294456481934 }, { "epoch": 1.976165771484375e-05, "model_forward_time": 0.02529597282409668, "step": 12951 }, { "epoch": 1.976165771484375e-05, "step": 12951, "training_step_time": 0.10426115989685059 }, { "epoch": 1.976318359375e-05, "model_forward_time": 0.025103092193603516, "step": 12952 }, { "epoch": 1.976318359375e-05, "step": 12952, "training_step_time": 0.12754106521606445 }, { "epoch": 1.976470947265625e-05, "model_forward_time": 0.025542736053466797, "step": 12953 }, { "epoch": 1.976470947265625e-05, "step": 12953, "training_step_time": 0.12406206130981445 }, { "epoch": 1.97662353515625e-05, "model_forward_time": 0.024995088577270508, "step": 12954 }, { "epoch": 1.97662353515625e-05, "step": 12954, "training_step_time": 0.1104421615600586 }, { "epoch": 1.976776123046875e-05, "model_forward_time": 0.025180578231811523, "step": 12955 }, { "epoch": 1.976776123046875e-05, "step": 12955, "training_step_time": 0.13296890258789062 }, { "epoch": 1.9769287109375e-05, "model_forward_time": 0.025307416915893555, "step": 12956 }, { "epoch": 1.9769287109375e-05, "step": 12956, "training_step_time": 0.12666916847229004 }, { "epoch": 1.977081298828125e-05, "model_forward_time": 0.02503514289855957, "step": 12957 }, { "epoch": 1.977081298828125e-05, "step": 12957, "training_step_time": 0.19740962982177734 }, { "epoch": 1.97723388671875e-05, "model_forward_time": 0.02440166473388672, "step": 12958 }, { "epoch": 1.97723388671875e-05, "step": 12958, "training_step_time": 0.11558794975280762 }, { "epoch": 1.977386474609375e-05, "model_forward_time": 0.024742603302001953, "step": 12959 }, { "epoch": 1.977386474609375e-05, "step": 12959, "training_step_time": 0.11440348625183105 }, { "epoch": 1.9775390625e-05, "grad_norm": 0.49542027711868286, "learning_rate": 6.513600542765817e-05, "loss": 0.0135, "step": 12960 }, { "epoch": 1.9775390625e-05, "model_forward_time": 0.025341510772705078, "step": 12960 }, { "epoch": 1.9775390625e-05, "step": 12960, "training_step_time": 0.11275434494018555 }, { "epoch": 1.977691650390625e-05, "model_forward_time": 0.02537083625793457, "step": 12961 }, { "epoch": 1.977691650390625e-05, "step": 12961, "training_step_time": 0.11256909370422363 }, { "epoch": 1.97784423828125e-05, "model_forward_time": 0.025336027145385742, "step": 12962 }, { "epoch": 1.97784423828125e-05, "step": 12962, "training_step_time": 0.15708160400390625 }, { "epoch": 1.977996826171875e-05, "model_forward_time": 0.024913787841796875, "step": 12963 }, { "epoch": 1.977996826171875e-05, "step": 12963, "training_step_time": 0.12293410301208496 }, { "epoch": 1.9781494140625e-05, "model_forward_time": 0.024710893630981445, "step": 12964 }, { "epoch": 1.9781494140625e-05, "step": 12964, "training_step_time": 0.13669514656066895 }, { "epoch": 1.978302001953125e-05, "model_forward_time": 0.026203393936157227, "step": 12965 }, { "epoch": 1.978302001953125e-05, "step": 12965, "training_step_time": 0.15183568000793457 }, { "epoch": 1.97845458984375e-05, "model_forward_time": 0.024552106857299805, "step": 12966 }, { "epoch": 1.97845458984375e-05, "step": 12966, "training_step_time": 0.17336392402648926 }, { "epoch": 1.978607177734375e-05, "model_forward_time": 0.025058269500732422, "step": 12967 }, { "epoch": 1.978607177734375e-05, "step": 12967, "training_step_time": 0.16536736488342285 }, { "epoch": 1.978759765625e-05, "model_forward_time": 0.026524782180786133, "step": 12968 }, { "epoch": 1.978759765625e-05, "step": 12968, "training_step_time": 0.11164498329162598 }, { "epoch": 1.978912353515625e-05, "model_forward_time": 0.024999141693115234, "step": 12969 }, { "epoch": 1.978912353515625e-05, "step": 12969, "training_step_time": 0.1055443286895752 }, { "epoch": 1.97906494140625e-05, "grad_norm": 0.3491387665271759, "learning_rate": 6.508346663614878e-05, "loss": 0.0139, "step": 12970 }, { "epoch": 1.97906494140625e-05, "model_forward_time": 0.02531147003173828, "step": 12970 }, { "epoch": 1.97906494140625e-05, "step": 12970, "training_step_time": 0.10434269905090332 }, { "epoch": 1.979217529296875e-05, "model_forward_time": 0.025009870529174805, "step": 12971 }, { "epoch": 1.979217529296875e-05, "step": 12971, "training_step_time": 0.10943722724914551 }, { "epoch": 1.9793701171875e-05, "model_forward_time": 0.025560379028320312, "step": 12972 }, { "epoch": 1.9793701171875e-05, "step": 12972, "training_step_time": 0.10622382164001465 }, { "epoch": 1.979522705078125e-05, "model_forward_time": 0.025025606155395508, "step": 12973 }, { "epoch": 1.979522705078125e-05, "step": 12973, "training_step_time": 0.10638260841369629 }, { "epoch": 1.97967529296875e-05, "model_forward_time": 0.025577545166015625, "step": 12974 }, { "epoch": 1.97967529296875e-05, "step": 12974, "training_step_time": 0.10578012466430664 }, { "epoch": 1.979827880859375e-05, "model_forward_time": 0.02516031265258789, "step": 12975 }, { "epoch": 1.979827880859375e-05, "step": 12975, "training_step_time": 0.1048879623413086 }, { "epoch": 1.97998046875e-05, "model_forward_time": 0.025217771530151367, "step": 12976 }, { "epoch": 1.97998046875e-05, "step": 12976, "training_step_time": 0.10461187362670898 }, { "epoch": 1.980133056640625e-05, "model_forward_time": 0.02544569969177246, "step": 12977 }, { "epoch": 1.980133056640625e-05, "step": 12977, "training_step_time": 0.10429859161376953 }, { "epoch": 1.98028564453125e-05, "model_forward_time": 0.025442123413085938, "step": 12978 }, { "epoch": 1.98028564453125e-05, "step": 12978, "training_step_time": 0.1060633659362793 }, { "epoch": 1.980438232421875e-05, "model_forward_time": 0.025482654571533203, "step": 12979 }, { "epoch": 1.980438232421875e-05, "step": 12979, "training_step_time": 0.10837769508361816 }, { "epoch": 1.9805908203125e-05, "grad_norm": 0.2443506419658661, "learning_rate": 6.503090951680512e-05, "loss": 0.0108, "step": 12980 }, { "epoch": 1.9805908203125e-05, "model_forward_time": 0.025778770446777344, "step": 12980 }, { "epoch": 1.9805908203125e-05, "step": 12980, "training_step_time": 0.10582351684570312 }, { "epoch": 1.980743408203125e-05, "model_forward_time": 0.02543473243713379, "step": 12981 }, { "epoch": 1.980743408203125e-05, "step": 12981, "training_step_time": 0.10823655128479004 }, { "epoch": 1.98089599609375e-05, "model_forward_time": 0.025121212005615234, "step": 12982 }, { "epoch": 1.98089599609375e-05, "step": 12982, "training_step_time": 0.10347366333007812 }, { "epoch": 1.981048583984375e-05, "model_forward_time": 0.025440692901611328, "step": 12983 }, { "epoch": 1.981048583984375e-05, "step": 12983, "training_step_time": 0.11075520515441895 }, { "epoch": 1.981201171875e-05, "model_forward_time": 0.025128841400146484, "step": 12984 }, { "epoch": 1.981201171875e-05, "step": 12984, "training_step_time": 0.11441922187805176 }, { "epoch": 1.981353759765625e-05, "model_forward_time": 0.025208473205566406, "step": 12985 }, { "epoch": 1.981353759765625e-05, "step": 12985, "training_step_time": 0.10868549346923828 }, { "epoch": 1.98150634765625e-05, "model_forward_time": 0.02619147300720215, "step": 12986 }, { "epoch": 1.98150634765625e-05, "step": 12986, "training_step_time": 0.11054706573486328 }, { "epoch": 1.981658935546875e-05, "model_forward_time": 0.0265505313873291, "step": 12987 }, { "epoch": 1.981658935546875e-05, "step": 12987, "training_step_time": 0.10784482955932617 }, { "epoch": 1.9818115234375e-05, "model_forward_time": 0.02614736557006836, "step": 12988 }, { "epoch": 1.9818115234375e-05, "step": 12988, "training_step_time": 0.11139512062072754 }, { "epoch": 1.981964111328125e-05, "model_forward_time": 0.025509119033813477, "step": 12989 }, { "epoch": 1.981964111328125e-05, "step": 12989, "training_step_time": 0.10703849792480469 }, { "epoch": 1.98211669921875e-05, "grad_norm": 0.2788366377353668, "learning_rate": 6.497833413348909e-05, "loss": 0.0148, "step": 12990 }, { "epoch": 1.98211669921875e-05, "model_forward_time": 0.025382041931152344, "step": 12990 }, { "epoch": 1.98211669921875e-05, "step": 12990, "training_step_time": 0.19214177131652832 }, { "epoch": 1.982269287109375e-05, "model_forward_time": 0.02535080909729004, "step": 12991 }, { "epoch": 1.982269287109375e-05, "step": 12991, "training_step_time": 0.13379693031311035 }, { "epoch": 1.982421875e-05, "model_forward_time": 0.02426600456237793, "step": 12992 }, { "epoch": 1.982421875e-05, "step": 12992, "training_step_time": 0.20628976821899414 }, { "epoch": 1.982574462890625e-05, "model_forward_time": 0.025187253952026367, "step": 12993 }, { "epoch": 1.982574462890625e-05, "step": 12993, "training_step_time": 0.13337993621826172 }, { "epoch": 1.98272705078125e-05, "model_forward_time": 0.024390220642089844, "step": 12994 }, { "epoch": 1.98272705078125e-05, "step": 12994, "training_step_time": 0.1152191162109375 }, { "epoch": 1.982879638671875e-05, "model_forward_time": 0.026357412338256836, "step": 12995 }, { "epoch": 1.982879638671875e-05, "step": 12995, "training_step_time": 0.11371779441833496 }, { "epoch": 1.9830322265625e-05, "model_forward_time": 0.025954484939575195, "step": 12996 }, { "epoch": 1.9830322265625e-05, "step": 12996, "training_step_time": 0.11198306083679199 }, { "epoch": 1.983184814453125e-05, "model_forward_time": 0.025496721267700195, "step": 12997 }, { "epoch": 1.983184814453125e-05, "step": 12997, "training_step_time": 0.10410785675048828 }, { "epoch": 1.98333740234375e-05, "model_forward_time": 0.024810791015625, "step": 12998 }, { "epoch": 1.98333740234375e-05, "step": 12998, "training_step_time": 0.17913269996643066 }, { "epoch": 1.983489990234375e-05, "model_forward_time": 0.025715351104736328, "step": 12999 }, { "epoch": 1.983489990234375e-05, "step": 12999, "training_step_time": 0.16986942291259766 }, { "epoch": 1.983642578125e-05, "grad_norm": 0.5717180967330933, "learning_rate": 6.492574055008473e-05, "loss": 0.018, "step": 13000 }, { "epoch": 1.983642578125e-05, "model_forward_time": 0.025163650512695312, "step": 13000 }, { "epoch": 1.983642578125e-05, "step": 13000, "training_step_time": 0.09932637214660645 }, { "epoch": 1.983795166015625e-05, "model_forward_time": 0.023018360137939453, "step": 13001 }, { "epoch": 1.983795166015625e-05, "step": 13001, "training_step_time": 0.11437487602233887 }, { "epoch": 1.98394775390625e-05, "model_forward_time": 0.024817943572998047, "step": 13002 }, { "epoch": 1.98394775390625e-05, "step": 13002, "training_step_time": 0.12758779525756836 }, { "epoch": 1.984100341796875e-05, "model_forward_time": 0.025259971618652344, "step": 13003 }, { "epoch": 1.984100341796875e-05, "step": 13003, "training_step_time": 0.10886621475219727 }, { "epoch": 1.9842529296875e-05, "model_forward_time": 0.0252840518951416, "step": 13004 }, { "epoch": 1.9842529296875e-05, "step": 13004, "training_step_time": 0.12966537475585938 }, { "epoch": 1.984405517578125e-05, "model_forward_time": 0.025537967681884766, "step": 13005 }, { "epoch": 1.984405517578125e-05, "step": 13005, "training_step_time": 0.10333633422851562 }, { "epoch": 1.98455810546875e-05, "model_forward_time": 0.026198863983154297, "step": 13006 }, { "epoch": 1.98455810546875e-05, "step": 13006, "training_step_time": 0.10606074333190918 }, { "epoch": 1.984710693359375e-05, "model_forward_time": 0.025611400604248047, "step": 13007 }, { "epoch": 1.984710693359375e-05, "step": 13007, "training_step_time": 0.14496111869812012 }, { "epoch": 1.98486328125e-05, "model_forward_time": 0.024966955184936523, "step": 13008 }, { "epoch": 1.98486328125e-05, "step": 13008, "training_step_time": 0.15911602973937988 }, { "epoch": 1.985015869140625e-05, "model_forward_time": 0.024559974670410156, "step": 13009 }, { "epoch": 1.985015869140625e-05, "step": 13009, "training_step_time": 0.1760084629058838 }, { "epoch": 1.98516845703125e-05, "grad_norm": 0.2336445450782776, "learning_rate": 6.487312883049819e-05, "loss": 0.0289, "step": 13010 }, { "epoch": 1.98516845703125e-05, "model_forward_time": 0.024847030639648438, "step": 13010 }, { "epoch": 1.98516845703125e-05, "step": 13010, "training_step_time": 0.15555167198181152 }, { "epoch": 1.985321044921875e-05, "model_forward_time": 0.02464604377746582, "step": 13011 }, { "epoch": 1.985321044921875e-05, "step": 13011, "training_step_time": 0.14980840682983398 }, { "epoch": 1.9854736328125e-05, "model_forward_time": 0.025061845779418945, "step": 13012 }, { "epoch": 1.9854736328125e-05, "step": 13012, "training_step_time": 0.10904884338378906 }, { "epoch": 1.985626220703125e-05, "model_forward_time": 0.024730205535888672, "step": 13013 }, { "epoch": 1.985626220703125e-05, "step": 13013, "training_step_time": 0.10241103172302246 }, { "epoch": 1.98577880859375e-05, "model_forward_time": 0.02540111541748047, "step": 13014 }, { "epoch": 1.98577880859375e-05, "step": 13014, "training_step_time": 0.10588288307189941 }, { "epoch": 1.985931396484375e-05, "model_forward_time": 0.025182485580444336, "step": 13015 }, { "epoch": 1.985931396484375e-05, "step": 13015, "training_step_time": 0.13691067695617676 }, { "epoch": 1.986083984375e-05, "model_forward_time": 0.025496959686279297, "step": 13016 }, { "epoch": 1.986083984375e-05, "step": 13016, "training_step_time": 0.1389782428741455 }, { "epoch": 1.986236572265625e-05, "model_forward_time": 0.02418828010559082, "step": 13017 }, { "epoch": 1.986236572265625e-05, "step": 13017, "training_step_time": 0.10822629928588867 }, { "epoch": 1.98638916015625e-05, "model_forward_time": 0.025012493133544922, "step": 13018 }, { "epoch": 1.98638916015625e-05, "step": 13018, "training_step_time": 0.12387681007385254 }, { "epoch": 1.986541748046875e-05, "model_forward_time": 0.02521038055419922, "step": 13019 }, { "epoch": 1.986541748046875e-05, "step": 13019, "training_step_time": 0.1122133731842041 }, { "epoch": 1.9866943359375e-05, "grad_norm": 0.22945524752140045, "learning_rate": 6.48204990386577e-05, "loss": 0.0229, "step": 13020 }, { "epoch": 1.9866943359375e-05, "model_forward_time": 0.025598526000976562, "step": 13020 }, { "epoch": 1.9866943359375e-05, "step": 13020, "training_step_time": 0.11237621307373047 }, { "epoch": 1.986846923828125e-05, "model_forward_time": 0.02527761459350586, "step": 13021 }, { "epoch": 1.986846923828125e-05, "step": 13021, "training_step_time": 0.11441326141357422 }, { "epoch": 1.98699951171875e-05, "model_forward_time": 0.025340795516967773, "step": 13022 }, { "epoch": 1.98699951171875e-05, "step": 13022, "training_step_time": 0.11266541481018066 }, { "epoch": 1.987152099609375e-05, "model_forward_time": 0.0253298282623291, "step": 13023 }, { "epoch": 1.987152099609375e-05, "step": 13023, "training_step_time": 0.10731291770935059 }, { "epoch": 1.9873046875e-05, "model_forward_time": 0.02550363540649414, "step": 13024 }, { "epoch": 1.9873046875e-05, "step": 13024, "training_step_time": 0.10975885391235352 }, { "epoch": 1.987457275390625e-05, "model_forward_time": 0.027916669845581055, "step": 13025 }, { "epoch": 1.987457275390625e-05, "step": 13025, "training_step_time": 0.11066150665283203 }, { "epoch": 1.98760986328125e-05, "model_forward_time": 0.02533555030822754, "step": 13026 }, { "epoch": 1.98760986328125e-05, "step": 13026, "training_step_time": 0.10743546485900879 }, { "epoch": 1.987762451171875e-05, "model_forward_time": 0.025142669677734375, "step": 13027 }, { "epoch": 1.987762451171875e-05, "step": 13027, "training_step_time": 0.10720491409301758 }, { "epoch": 1.9879150390625e-05, "model_forward_time": 0.025131702423095703, "step": 13028 }, { "epoch": 1.9879150390625e-05, "step": 13028, "training_step_time": 0.10606861114501953 }, { "epoch": 1.988067626953125e-05, "model_forward_time": 0.02549433708190918, "step": 13029 }, { "epoch": 1.988067626953125e-05, "step": 13029, "training_step_time": 0.10604166984558105 }, { "epoch": 1.98822021484375e-05, "grad_norm": 0.41425690054893494, "learning_rate": 6.476785123851336e-05, "loss": 0.0299, "step": 13030 }, { "epoch": 1.98822021484375e-05, "model_forward_time": 0.02429938316345215, "step": 13030 }, { "epoch": 1.98822021484375e-05, "step": 13030, "training_step_time": 0.1160440444946289 }, { "epoch": 1.988372802734375e-05, "model_forward_time": 0.02533555030822754, "step": 13031 }, { "epoch": 1.988372802734375e-05, "step": 13031, "training_step_time": 0.1092674732208252 }, { "epoch": 1.988525390625e-05, "model_forward_time": 0.024751663208007812, "step": 13032 }, { "epoch": 1.988525390625e-05, "step": 13032, "training_step_time": 0.10417914390563965 }, { "epoch": 1.988677978515625e-05, "model_forward_time": 0.027908802032470703, "step": 13033 }, { "epoch": 1.988677978515625e-05, "step": 13033, "training_step_time": 0.10971546173095703 }, { "epoch": 1.98883056640625e-05, "model_forward_time": 0.025412321090698242, "step": 13034 }, { "epoch": 1.98883056640625e-05, "step": 13034, "training_step_time": 0.10615777969360352 }, { "epoch": 1.988983154296875e-05, "model_forward_time": 0.025093555450439453, "step": 13035 }, { "epoch": 1.988983154296875e-05, "step": 13035, "training_step_time": 0.10447144508361816 }, { "epoch": 1.9891357421875e-05, "model_forward_time": 0.025583505630493164, "step": 13036 }, { "epoch": 1.9891357421875e-05, "step": 13036, "training_step_time": 0.10522580146789551 }, { "epoch": 1.989288330078125e-05, "model_forward_time": 0.02599620819091797, "step": 13037 }, { "epoch": 1.989288330078125e-05, "step": 13037, "training_step_time": 0.10562658309936523 }, { "epoch": 1.98944091796875e-05, "model_forward_time": 0.025603532791137695, "step": 13038 }, { "epoch": 1.98944091796875e-05, "step": 13038, "training_step_time": 0.10754227638244629 }, { "epoch": 1.989593505859375e-05, "model_forward_time": 0.025469541549682617, "step": 13039 }, { "epoch": 1.989593505859375e-05, "step": 13039, "training_step_time": 0.10546612739562988 }, { "epoch": 1.98974609375e-05, "grad_norm": 0.42252713441848755, "learning_rate": 6.471518549403726e-05, "loss": 0.0168, "step": 13040 }, { "epoch": 1.98974609375e-05, "model_forward_time": 0.025261878967285156, "step": 13040 }, { "epoch": 1.98974609375e-05, "step": 13040, "training_step_time": 0.10693478584289551 }, { "epoch": 1.989898681640625e-05, "model_forward_time": 0.025131702423095703, "step": 13041 }, { "epoch": 1.989898681640625e-05, "step": 13041, "training_step_time": 0.10682201385498047 }, { "epoch": 1.99005126953125e-05, "model_forward_time": 0.025505542755126953, "step": 13042 }, { "epoch": 1.99005126953125e-05, "step": 13042, "training_step_time": 0.10527634620666504 }, { "epoch": 1.990203857421875e-05, "model_forward_time": 0.0250852108001709, "step": 13043 }, { "epoch": 1.990203857421875e-05, "step": 13043, "training_step_time": 0.10788869857788086 }, { "epoch": 1.9903564453125e-05, "model_forward_time": 0.025376081466674805, "step": 13044 }, { "epoch": 1.9903564453125e-05, "step": 13044, "training_step_time": 0.10537195205688477 }, { "epoch": 1.990509033203125e-05, "model_forward_time": 0.025628328323364258, "step": 13045 }, { "epoch": 1.990509033203125e-05, "step": 13045, "training_step_time": 0.10646820068359375 }, { "epoch": 1.99066162109375e-05, "model_forward_time": 0.025632858276367188, "step": 13046 }, { "epoch": 1.99066162109375e-05, "step": 13046, "training_step_time": 0.10693049430847168 }, { "epoch": 1.990814208984375e-05, "model_forward_time": 0.025464296340942383, "step": 13047 }, { "epoch": 1.990814208984375e-05, "step": 13047, "training_step_time": 0.1395723819732666 }, { "epoch": 1.990966796875e-05, "model_forward_time": 0.027755022048950195, "step": 13048 }, { "epoch": 1.990966796875e-05, "step": 13048, "training_step_time": 0.12746572494506836 }, { "epoch": 1.991119384765625e-05, "model_forward_time": 0.024701356887817383, "step": 13049 }, { "epoch": 1.991119384765625e-05, "step": 13049, "training_step_time": 0.23617172241210938 }, { "epoch": 1.99127197265625e-05, "grad_norm": 0.21935215592384338, "learning_rate": 6.466250186922325e-05, "loss": 0.0129, "step": 13050 }, { "epoch": 1.99127197265625e-05, "model_forward_time": 0.024423837661743164, "step": 13050 }, { "epoch": 1.99127197265625e-05, "step": 13050, "training_step_time": 0.16428875923156738 }, { "epoch": 1.991424560546875e-05, "model_forward_time": 0.024848461151123047, "step": 13051 }, { "epoch": 1.991424560546875e-05, "step": 13051, "training_step_time": 0.17467784881591797 }, { "epoch": 1.9915771484375e-05, "model_forward_time": 0.0247344970703125, "step": 13052 }, { "epoch": 1.9915771484375e-05, "step": 13052, "training_step_time": 0.10133123397827148 }, { "epoch": 1.991729736328125e-05, "model_forward_time": 0.02526235580444336, "step": 13053 }, { "epoch": 1.991729736328125e-05, "step": 13053, "training_step_time": 0.17099428176879883 }, { "epoch": 1.99188232421875e-05, "model_forward_time": 0.024710416793823242, "step": 13054 }, { "epoch": 1.99188232421875e-05, "step": 13054, "training_step_time": 0.10448861122131348 }, { "epoch": 1.992034912109375e-05, "model_forward_time": 0.026925325393676758, "step": 13055 }, { "epoch": 1.992034912109375e-05, "step": 13055, "training_step_time": 0.14220309257507324 }, { "epoch": 1.9921875e-05, "model_forward_time": 0.025788545608520508, "step": 13056 }, { "epoch": 1.9921875e-05, "step": 13056, "training_step_time": 0.1923828125 }, { "epoch": 1.992340087890625e-05, "model_forward_time": 0.024652719497680664, "step": 13057 }, { "epoch": 1.992340087890625e-05, "step": 13057, "training_step_time": 0.1302051544189453 }, { "epoch": 1.99249267578125e-05, "model_forward_time": 0.024805307388305664, "step": 13058 }, { "epoch": 1.99249267578125e-05, "step": 13058, "training_step_time": 0.1180565357208252 }, { "epoch": 1.992645263671875e-05, "model_forward_time": 0.025587797164916992, "step": 13059 }, { "epoch": 1.992645263671875e-05, "step": 13059, "training_step_time": 0.10673022270202637 }, { "epoch": 1.9927978515625e-05, "grad_norm": 0.3332976996898651, "learning_rate": 6.460980042808687e-05, "loss": 0.016, "step": 13060 }, { "epoch": 1.9927978515625e-05, "model_forward_time": 0.025459766387939453, "step": 13060 }, { "epoch": 1.9927978515625e-05, "step": 13060, "training_step_time": 0.10619640350341797 }, { "epoch": 1.992950439453125e-05, "model_forward_time": 0.0250241756439209, "step": 13061 }, { "epoch": 1.992950439453125e-05, "step": 13061, "training_step_time": 0.10913586616516113 }, { "epoch": 1.99310302734375e-05, "model_forward_time": 0.024940013885498047, "step": 13062 }, { "epoch": 1.99310302734375e-05, "step": 13062, "training_step_time": 0.1120452880859375 }, { "epoch": 1.993255615234375e-05, "model_forward_time": 0.025316715240478516, "step": 13063 }, { "epoch": 1.993255615234375e-05, "step": 13063, "training_step_time": 0.11233282089233398 }, { "epoch": 1.993408203125e-05, "model_forward_time": 0.024810314178466797, "step": 13064 }, { "epoch": 1.993408203125e-05, "step": 13064, "training_step_time": 0.12602853775024414 }, { "epoch": 1.993560791015625e-05, "model_forward_time": 0.02645587921142578, "step": 13065 }, { "epoch": 1.993560791015625e-05, "step": 13065, "training_step_time": 0.11041903495788574 }, { "epoch": 1.99371337890625e-05, "model_forward_time": 0.02548384666442871, "step": 13066 }, { "epoch": 1.99371337890625e-05, "step": 13066, "training_step_time": 0.17828083038330078 }, { "epoch": 1.993865966796875e-05, "model_forward_time": 0.025374650955200195, "step": 13067 }, { "epoch": 1.993865966796875e-05, "step": 13067, "training_step_time": 0.1350555419921875 }, { "epoch": 1.9940185546875e-05, "model_forward_time": 0.02435016632080078, "step": 13068 }, { "epoch": 1.9940185546875e-05, "step": 13068, "training_step_time": 0.11890387535095215 }, { "epoch": 1.994171142578125e-05, "model_forward_time": 0.024969100952148438, "step": 13069 }, { "epoch": 1.994171142578125e-05, "step": 13069, "training_step_time": 0.10307550430297852 }, { "epoch": 1.99432373046875e-05, "grad_norm": 0.2634739279747009, "learning_rate": 6.455708123466536e-05, "loss": 0.0262, "step": 13070 }, { "epoch": 1.99432373046875e-05, "model_forward_time": 0.025755882263183594, "step": 13070 }, { "epoch": 1.99432373046875e-05, "step": 13070, "training_step_time": 0.10401606559753418 }, { "epoch": 1.994476318359375e-05, "model_forward_time": 0.024999141693115234, "step": 13071 }, { "epoch": 1.994476318359375e-05, "step": 13071, "training_step_time": 0.10678505897521973 }, { "epoch": 1.99462890625e-05, "model_forward_time": 0.025098085403442383, "step": 13072 }, { "epoch": 1.99462890625e-05, "step": 13072, "training_step_time": 0.1051645278930664 }, { "epoch": 1.994781494140625e-05, "model_forward_time": 0.0254514217376709, "step": 13073 }, { "epoch": 1.994781494140625e-05, "step": 13073, "training_step_time": 0.10649538040161133 }, { "epoch": 1.99493408203125e-05, "model_forward_time": 0.02523517608642578, "step": 13074 }, { "epoch": 1.99493408203125e-05, "step": 13074, "training_step_time": 0.10987496376037598 }, { "epoch": 1.995086669921875e-05, "model_forward_time": 0.02665543556213379, "step": 13075 }, { "epoch": 1.995086669921875e-05, "step": 13075, "training_step_time": 0.11060547828674316 }, { "epoch": 1.9952392578125e-05, "model_forward_time": 0.025132417678833008, "step": 13076 }, { "epoch": 1.9952392578125e-05, "step": 13076, "training_step_time": 0.10463261604309082 }, { "epoch": 1.995391845703125e-05, "model_forward_time": 0.024984359741210938, "step": 13077 }, { "epoch": 1.995391845703125e-05, "step": 13077, "training_step_time": 0.10304689407348633 }, { "epoch": 1.99554443359375e-05, "model_forward_time": 0.025263071060180664, "step": 13078 }, { "epoch": 1.99554443359375e-05, "step": 13078, "training_step_time": 0.10876107215881348 }, { "epoch": 1.995697021484375e-05, "model_forward_time": 0.025107622146606445, "step": 13079 }, { "epoch": 1.995697021484375e-05, "step": 13079, "training_step_time": 0.10625338554382324 }, { "epoch": 1.995849609375e-05, "grad_norm": 0.2288821041584015, "learning_rate": 6.450434435301751e-05, "loss": 0.0152, "step": 13080 }, { "epoch": 1.995849609375e-05, "model_forward_time": 0.025149106979370117, "step": 13080 }, { "epoch": 1.995849609375e-05, "step": 13080, "training_step_time": 0.1066596508026123 }, { "epoch": 1.996002197265625e-05, "model_forward_time": 0.025597572326660156, "step": 13081 }, { "epoch": 1.996002197265625e-05, "step": 13081, "training_step_time": 0.10601949691772461 }, { "epoch": 1.99615478515625e-05, "model_forward_time": 0.025180578231811523, "step": 13082 }, { "epoch": 1.99615478515625e-05, "step": 13082, "training_step_time": 0.1060030460357666 }, { "epoch": 1.996307373046875e-05, "model_forward_time": 0.02520132064819336, "step": 13083 }, { "epoch": 1.996307373046875e-05, "step": 13083, "training_step_time": 0.10643792152404785 }, { "epoch": 1.9964599609375e-05, "model_forward_time": 0.025391101837158203, "step": 13084 }, { "epoch": 1.9964599609375e-05, "step": 13084, "training_step_time": 0.10547113418579102 }, { "epoch": 1.996612548828125e-05, "model_forward_time": 0.024657249450683594, "step": 13085 }, { "epoch": 1.996612548828125e-05, "step": 13085, "training_step_time": 0.10745382308959961 }, { "epoch": 1.99676513671875e-05, "model_forward_time": 0.025775671005249023, "step": 13086 }, { "epoch": 1.99676513671875e-05, "step": 13086, "training_step_time": 0.10675930976867676 }, { "epoch": 1.996917724609375e-05, "model_forward_time": 0.027628660202026367, "step": 13087 }, { "epoch": 1.996917724609375e-05, "step": 13087, "training_step_time": 0.1089470386505127 }, { "epoch": 1.9970703125e-05, "model_forward_time": 0.02516961097717285, "step": 13088 }, { "epoch": 1.9970703125e-05, "step": 13088, "training_step_time": 0.11188769340515137 }, { "epoch": 1.997222900390625e-05, "model_forward_time": 0.025101661682128906, "step": 13089 }, { "epoch": 1.997222900390625e-05, "step": 13089, "training_step_time": 0.10859203338623047 }, { "epoch": 1.99737548828125e-05, "grad_norm": 0.27786651253700256, "learning_rate": 6.445158984722358e-05, "loss": 0.0162, "step": 13090 }, { "epoch": 1.99737548828125e-05, "model_forward_time": 0.025038480758666992, "step": 13090 }, { "epoch": 1.99737548828125e-05, "step": 13090, "training_step_time": 0.11224102973937988 }, { "epoch": 1.997528076171875e-05, "model_forward_time": 0.02498149871826172, "step": 13091 }, { "epoch": 1.997528076171875e-05, "step": 13091, "training_step_time": 0.1072235107421875 }, { "epoch": 1.9976806640625e-05, "model_forward_time": 0.0250091552734375, "step": 13092 }, { "epoch": 1.9976806640625e-05, "step": 13092, "training_step_time": 0.11084389686584473 }, { "epoch": 1.997833251953125e-05, "model_forward_time": 0.024289846420288086, "step": 13093 }, { "epoch": 1.997833251953125e-05, "step": 13093, "training_step_time": 0.1074521541595459 }, { "epoch": 1.99798583984375e-05, "model_forward_time": 0.024430036544799805, "step": 13094 }, { "epoch": 1.99798583984375e-05, "step": 13094, "training_step_time": 0.18610835075378418 }, { "epoch": 1.998138427734375e-05, "model_forward_time": 0.025235891342163086, "step": 13095 }, { "epoch": 1.998138427734375e-05, "step": 13095, "training_step_time": 0.11112117767333984 }, { "epoch": 1.998291015625e-05, "model_forward_time": 0.024611949920654297, "step": 13096 }, { "epoch": 1.998291015625e-05, "step": 13096, "training_step_time": 0.11307859420776367 }, { "epoch": 1.998443603515625e-05, "model_forward_time": 0.02549433708190918, "step": 13097 }, { "epoch": 1.998443603515625e-05, "step": 13097, "training_step_time": 0.11691522598266602 }, { "epoch": 1.99859619140625e-05, "model_forward_time": 0.025487661361694336, "step": 13098 }, { "epoch": 1.99859619140625e-05, "step": 13098, "training_step_time": 0.12632012367248535 }, { "epoch": 1.998748779296875e-05, "model_forward_time": 0.025876998901367188, "step": 13099 }, { "epoch": 1.998748779296875e-05, "step": 13099, "training_step_time": 0.11048626899719238 }, { "epoch": 1.9989013671875e-05, "grad_norm": 0.24237234890460968, "learning_rate": 6.439881778138531e-05, "loss": 0.0184, "step": 13100 }, { "epoch": 1.9989013671875e-05, "model_forward_time": 0.025295257568359375, "step": 13100 }, { "epoch": 1.9989013671875e-05, "step": 13100, "training_step_time": 0.16676878929138184 }, { "epoch": 1.999053955078125e-05, "model_forward_time": 0.024645566940307617, "step": 13101 }, { "epoch": 1.999053955078125e-05, "step": 13101, "training_step_time": 0.17843270301818848 }, { "epoch": 1.99920654296875e-05, "model_forward_time": 0.025000810623168945, "step": 13102 }, { "epoch": 1.99920654296875e-05, "step": 13102, "training_step_time": 0.19494295120239258 }, { "epoch": 1.999359130859375e-05, "model_forward_time": 0.024909257888793945, "step": 13103 }, { "epoch": 1.999359130859375e-05, "step": 13103, "training_step_time": 0.15199995040893555 }, { "epoch": 1.99951171875e-05, "model_forward_time": 0.02503228187561035, "step": 13104 }, { "epoch": 1.99951171875e-05, "step": 13104, "training_step_time": 0.16281890869140625 }, { "epoch": 1.999664306640625e-05, "model_forward_time": 0.024399518966674805, "step": 13105 }, { "epoch": 1.999664306640625e-05, "step": 13105, "training_step_time": 0.1760883331298828 }, { "epoch": 1.99981689453125e-05, "model_forward_time": 0.024824142456054688, "step": 13106 }, { "epoch": 1.99981689453125e-05, "step": 13106, "training_step_time": 0.10220789909362793 }, { "epoch": 1.999969482421875e-05, "model_forward_time": 0.024693727493286133, "step": 13107 }, { "epoch": 1.999969482421875e-05, "step": 13107, "training_step_time": 0.10187840461730957 }, { "epoch": 2.0001220703125e-05, "model_forward_time": 0.025020599365234375, "step": 13108 }, { "epoch": 2.0001220703125e-05, "step": 13108, "training_step_time": 0.10901093482971191 }, { "epoch": 2.000274658203125e-05, "model_forward_time": 0.025329113006591797, "step": 13109 }, { "epoch": 2.000274658203125e-05, "step": 13109, "training_step_time": 0.10388827323913574 }, { "epoch": 2.00042724609375e-05, "grad_norm": 0.37235501408576965, "learning_rate": 6.43460282196257e-05, "loss": 0.0128, "step": 13110 }, { "epoch": 2.00042724609375e-05, "model_forward_time": 0.025379657745361328, "step": 13110 }, { "epoch": 2.00042724609375e-05, "step": 13110, "training_step_time": 0.19805121421813965 }, { "epoch": 2.000579833984375e-05, "model_forward_time": 0.024512767791748047, "step": 13111 }, { "epoch": 2.000579833984375e-05, "step": 13111, "training_step_time": 0.1289660930633545 }, { "epoch": 2.000732421875e-05, "model_forward_time": 0.024247169494628906, "step": 13112 }, { "epoch": 2.000732421875e-05, "step": 13112, "training_step_time": 0.1319580078125 }, { "epoch": 2.000885009765625e-05, "model_forward_time": 0.024625539779663086, "step": 13113 }, { "epoch": 2.000885009765625e-05, "step": 13113, "training_step_time": 0.13284516334533691 }, { "epoch": 2.00103759765625e-05, "model_forward_time": 0.024773120880126953, "step": 13114 }, { "epoch": 2.00103759765625e-05, "step": 13114, "training_step_time": 0.11490368843078613 }, { "epoch": 2.001190185546875e-05, "model_forward_time": 0.025012969970703125, "step": 13115 }, { "epoch": 2.001190185546875e-05, "step": 13115, "training_step_time": 0.13335585594177246 }, { "epoch": 2.0013427734375e-05, "model_forward_time": 0.025088787078857422, "step": 13116 }, { "epoch": 2.0013427734375e-05, "step": 13116, "training_step_time": 0.10246515274047852 }, { "epoch": 2.001495361328125e-05, "model_forward_time": 0.02521228790283203, "step": 13117 }, { "epoch": 2.001495361328125e-05, "step": 13117, "training_step_time": 0.10532665252685547 }, { "epoch": 2.00164794921875e-05, "model_forward_time": 0.025324583053588867, "step": 13118 }, { "epoch": 2.00164794921875e-05, "step": 13118, "training_step_time": 0.10660457611083984 }, { "epoch": 2.001800537109375e-05, "model_forward_time": 0.0253603458404541, "step": 13119 }, { "epoch": 2.001800537109375e-05, "step": 13119, "training_step_time": 0.1057138442993164 }, { "epoch": 2.001953125e-05, "grad_norm": 0.3438621759414673, "learning_rate": 6.42932212260891e-05, "loss": 0.0112, "step": 13120 }, { "epoch": 2.001953125e-05, "model_forward_time": 0.025230884552001953, "step": 13120 }, { "epoch": 2.001953125e-05, "step": 13120, "training_step_time": 0.10791587829589844 }, { "epoch": 2.002105712890625e-05, "model_forward_time": 0.02501988410949707, "step": 13121 }, { "epoch": 2.002105712890625e-05, "step": 13121, "training_step_time": 0.10616445541381836 }, { "epoch": 2.00225830078125e-05, "model_forward_time": 0.02545952796936035, "step": 13122 }, { "epoch": 2.00225830078125e-05, "step": 13122, "training_step_time": 0.1059412956237793 }, { "epoch": 2.002410888671875e-05, "model_forward_time": 0.02530694007873535, "step": 13123 }, { "epoch": 2.002410888671875e-05, "step": 13123, "training_step_time": 0.1060936450958252 }, { "epoch": 2.0025634765625e-05, "model_forward_time": 0.025653362274169922, "step": 13124 }, { "epoch": 2.0025634765625e-05, "step": 13124, "training_step_time": 0.10892033576965332 }, { "epoch": 2.002716064453125e-05, "model_forward_time": 0.02498483657836914, "step": 13125 }, { "epoch": 2.002716064453125e-05, "step": 13125, "training_step_time": 0.10973548889160156 }, { "epoch": 2.00286865234375e-05, "model_forward_time": 0.025124073028564453, "step": 13126 }, { "epoch": 2.00286865234375e-05, "step": 13126, "training_step_time": 0.10728907585144043 }, { "epoch": 2.003021240234375e-05, "model_forward_time": 0.0252988338470459, "step": 13127 }, { "epoch": 2.003021240234375e-05, "step": 13127, "training_step_time": 0.10434389114379883 }, { "epoch": 2.003173828125e-05, "model_forward_time": 0.025461912155151367, "step": 13128 }, { "epoch": 2.003173828125e-05, "step": 13128, "training_step_time": 0.10489439964294434 }, { "epoch": 2.003326416015625e-05, "model_forward_time": 0.025616884231567383, "step": 13129 }, { "epoch": 2.003326416015625e-05, "step": 13129, "training_step_time": 0.10916566848754883 }, { "epoch": 2.00347900390625e-05, "grad_norm": 0.5092164278030396, "learning_rate": 6.42403968649409e-05, "loss": 0.0202, "step": 13130 }, { "epoch": 2.00347900390625e-05, "model_forward_time": 0.025549650192260742, "step": 13130 }, { "epoch": 2.00347900390625e-05, "step": 13130, "training_step_time": 0.1053159236907959 }, { "epoch": 2.003631591796875e-05, "model_forward_time": 0.025358200073242188, "step": 13131 }, { "epoch": 2.003631591796875e-05, "step": 13131, "training_step_time": 0.1114804744720459 }, { "epoch": 2.0037841796875e-05, "model_forward_time": 0.02514958381652832, "step": 13132 }, { "epoch": 2.0037841796875e-05, "step": 13132, "training_step_time": 0.10991573333740234 }, { "epoch": 2.003936767578125e-05, "model_forward_time": 0.02521491050720215, "step": 13133 }, { "epoch": 2.003936767578125e-05, "step": 13133, "training_step_time": 0.10701608657836914 }, { "epoch": 2.00408935546875e-05, "model_forward_time": 0.026064157485961914, "step": 13134 }, { "epoch": 2.00408935546875e-05, "step": 13134, "training_step_time": 0.10944151878356934 }, { "epoch": 2.004241943359375e-05, "model_forward_time": 0.02555084228515625, "step": 13135 }, { "epoch": 2.004241943359375e-05, "step": 13135, "training_step_time": 0.1080636978149414 }, { "epoch": 2.00439453125e-05, "model_forward_time": 0.02543044090270996, "step": 13136 }, { "epoch": 2.00439453125e-05, "step": 13136, "training_step_time": 0.10766053199768066 }, { "epoch": 2.004547119140625e-05, "model_forward_time": 0.02535414695739746, "step": 13137 }, { "epoch": 2.004547119140625e-05, "step": 13137, "training_step_time": 0.10640931129455566 }, { "epoch": 2.00469970703125e-05, "model_forward_time": 0.02507495880126953, "step": 13138 }, { "epoch": 2.00469970703125e-05, "step": 13138, "training_step_time": 0.10819196701049805 }, { "epoch": 2.004852294921875e-05, "model_forward_time": 0.025016069412231445, "step": 13139 }, { "epoch": 2.004852294921875e-05, "step": 13139, "training_step_time": 0.135850191116333 }, { "epoch": 2.0050048828125e-05, "grad_norm": 0.23394645750522614, "learning_rate": 6.418755520036775e-05, "loss": 0.0196, "step": 13140 }, { "epoch": 2.0050048828125e-05, "model_forward_time": 0.025213956832885742, "step": 13140 }, { "epoch": 2.0050048828125e-05, "step": 13140, "training_step_time": 0.21664166450500488 }, { "epoch": 2.005157470703125e-05, "model_forward_time": 0.024566173553466797, "step": 13141 }, { "epoch": 2.005157470703125e-05, "step": 13141, "training_step_time": 0.15531229972839355 }, { "epoch": 2.00531005859375e-05, "model_forward_time": 0.023367881774902344, "step": 13142 }, { "epoch": 2.00531005859375e-05, "step": 13142, "training_step_time": 0.1949901580810547 }, { "epoch": 2.005462646484375e-05, "model_forward_time": 0.024398088455200195, "step": 13143 }, { "epoch": 2.005462646484375e-05, "step": 13143, "training_step_time": 0.14475178718566895 }, { "epoch": 2.005615234375e-05, "model_forward_time": 0.024286985397338867, "step": 13144 }, { "epoch": 2.005615234375e-05, "step": 13144, "training_step_time": 0.1433866024017334 }, { "epoch": 2.005767822265625e-05, "model_forward_time": 0.025110244750976562, "step": 13145 }, { "epoch": 2.005767822265625e-05, "step": 13145, "training_step_time": 0.1800978183746338 }, { "epoch": 2.00592041015625e-05, "model_forward_time": 0.025079011917114258, "step": 13146 }, { "epoch": 2.00592041015625e-05, "step": 13146, "training_step_time": 0.1626269817352295 }, { "epoch": 2.006072998046875e-05, "model_forward_time": 0.023705005645751953, "step": 13147 }, { "epoch": 2.006072998046875e-05, "step": 13147, "training_step_time": 0.17360520362854004 }, { "epoch": 2.0062255859375e-05, "model_forward_time": 0.02312779426574707, "step": 13148 }, { "epoch": 2.0062255859375e-05, "step": 13148, "training_step_time": 0.17927098274230957 }, { "epoch": 2.006378173828125e-05, "model_forward_time": 0.02369093894958496, "step": 13149 }, { "epoch": 2.006378173828125e-05, "step": 13149, "training_step_time": 0.14154863357543945 }, { "epoch": 2.00653076171875e-05, "grad_norm": 0.42746293544769287, "learning_rate": 6.413469629657723e-05, "loss": 0.02, "step": 13150 }, { "epoch": 2.00653076171875e-05, "model_forward_time": 0.023860692977905273, "step": 13150 }, { "epoch": 2.00653076171875e-05, "step": 13150, "training_step_time": 0.1055762767791748 }, { "epoch": 2.006683349609375e-05, "model_forward_time": 0.02430438995361328, "step": 13151 }, { "epoch": 2.006683349609375e-05, "step": 13151, "training_step_time": 0.10465550422668457 }, { "epoch": 2.0068359375e-05, "model_forward_time": 0.027730226516723633, "step": 13152 }, { "epoch": 2.0068359375e-05, "step": 13152, "training_step_time": 0.10910868644714355 }, { "epoch": 2.006988525390625e-05, "model_forward_time": 0.024867534637451172, "step": 13153 }, { "epoch": 2.006988525390625e-05, "step": 13153, "training_step_time": 0.10611414909362793 }, { "epoch": 2.00714111328125e-05, "model_forward_time": 0.02453923225402832, "step": 13154 }, { "epoch": 2.00714111328125e-05, "step": 13154, "training_step_time": 0.10519623756408691 }, { "epoch": 2.007293701171875e-05, "model_forward_time": 0.024723052978515625, "step": 13155 }, { "epoch": 2.007293701171875e-05, "step": 13155, "training_step_time": 0.20570826530456543 }, { "epoch": 2.0074462890625e-05, "model_forward_time": 0.023334741592407227, "step": 13156 }, { "epoch": 2.0074462890625e-05, "step": 13156, "training_step_time": 0.12289237976074219 }, { "epoch": 2.007598876953125e-05, "model_forward_time": 0.023724794387817383, "step": 13157 }, { "epoch": 2.007598876953125e-05, "step": 13157, "training_step_time": 0.12497711181640625 }, { "epoch": 2.00775146484375e-05, "model_forward_time": 0.024933338165283203, "step": 13158 }, { "epoch": 2.00775146484375e-05, "step": 13158, "training_step_time": 0.10424065589904785 }, { "epoch": 2.007904052734375e-05, "model_forward_time": 0.02471470832824707, "step": 13159 }, { "epoch": 2.007904052734375e-05, "step": 13159, "training_step_time": 0.15917348861694336 }, { "epoch": 2.008056640625e-05, "grad_norm": 0.20946824550628662, "learning_rate": 6.408182021779791e-05, "loss": 0.0104, "step": 13160 }, { "epoch": 2.008056640625e-05, "model_forward_time": 0.024190425872802734, "step": 13160 }, { "epoch": 2.008056640625e-05, "step": 13160, "training_step_time": 0.12352418899536133 }, { "epoch": 2.008209228515625e-05, "model_forward_time": 0.0235750675201416, "step": 13161 }, { "epoch": 2.008209228515625e-05, "step": 13161, "training_step_time": 0.10820388793945312 }, { "epoch": 2.00836181640625e-05, "model_forward_time": 0.02458810806274414, "step": 13162 }, { "epoch": 2.00836181640625e-05, "step": 13162, "training_step_time": 0.10415434837341309 }, { "epoch": 2.008514404296875e-05, "model_forward_time": 0.02469635009765625, "step": 13163 }, { "epoch": 2.008514404296875e-05, "step": 13163, "training_step_time": 0.10478973388671875 }, { "epoch": 2.0086669921875e-05, "model_forward_time": 0.024752378463745117, "step": 13164 }, { "epoch": 2.0086669921875e-05, "step": 13164, "training_step_time": 0.10426998138427734 }, { "epoch": 2.008819580078125e-05, "model_forward_time": 0.02406787872314453, "step": 13165 }, { "epoch": 2.008819580078125e-05, "step": 13165, "training_step_time": 0.10357475280761719 }, { "epoch": 2.00897216796875e-05, "model_forward_time": 0.02452874183654785, "step": 13166 }, { "epoch": 2.00897216796875e-05, "step": 13166, "training_step_time": 0.10468626022338867 }, { "epoch": 2.009124755859375e-05, "model_forward_time": 0.0242156982421875, "step": 13167 }, { "epoch": 2.009124755859375e-05, "step": 13167, "training_step_time": 0.10726714134216309 }, { "epoch": 2.00927734375e-05, "model_forward_time": 0.024643659591674805, "step": 13168 }, { "epoch": 2.00927734375e-05, "step": 13168, "training_step_time": 0.10453391075134277 }, { "epoch": 2.009429931640625e-05, "model_forward_time": 0.02452850341796875, "step": 13169 }, { "epoch": 2.009429931640625e-05, "step": 13169, "training_step_time": 0.10692954063415527 }, { "epoch": 2.00958251953125e-05, "grad_norm": 0.13432158529758453, "learning_rate": 6.402892702827916e-05, "loss": 0.0161, "step": 13170 }, { "epoch": 2.00958251953125e-05, "model_forward_time": 0.024339914321899414, "step": 13170 }, { "epoch": 2.00958251953125e-05, "step": 13170, "training_step_time": 0.10939669609069824 }, { "epoch": 2.009735107421875e-05, "model_forward_time": 0.024614334106445312, "step": 13171 }, { "epoch": 2.009735107421875e-05, "step": 13171, "training_step_time": 0.10586047172546387 }, { "epoch": 2.0098876953125e-05, "model_forward_time": 0.02441263198852539, "step": 13172 }, { "epoch": 2.0098876953125e-05, "step": 13172, "training_step_time": 0.10708117485046387 }, { "epoch": 2.010040283203125e-05, "model_forward_time": 0.024289369583129883, "step": 13173 }, { "epoch": 2.010040283203125e-05, "step": 13173, "training_step_time": 0.1089174747467041 }, { "epoch": 2.01019287109375e-05, "model_forward_time": 0.02441692352294922, "step": 13174 }, { "epoch": 2.01019287109375e-05, "step": 13174, "training_step_time": 0.11598014831542969 }, { "epoch": 2.010345458984375e-05, "model_forward_time": 0.02412581443786621, "step": 13175 }, { "epoch": 2.010345458984375e-05, "step": 13175, "training_step_time": 0.112091064453125 }, { "epoch": 2.010498046875e-05, "model_forward_time": 0.028087615966796875, "step": 13176 }, { "epoch": 2.010498046875e-05, "step": 13176, "training_step_time": 0.11748075485229492 }, { "epoch": 2.010650634765625e-05, "model_forward_time": 0.02450084686279297, "step": 13177 }, { "epoch": 2.010650634765625e-05, "step": 13177, "training_step_time": 0.10859346389770508 }, { "epoch": 2.01080322265625e-05, "model_forward_time": 0.024389266967773438, "step": 13178 }, { "epoch": 2.01080322265625e-05, "step": 13178, "training_step_time": 0.1066434383392334 }, { "epoch": 2.010955810546875e-05, "model_forward_time": 0.024564743041992188, "step": 13179 }, { "epoch": 2.010955810546875e-05, "step": 13179, "training_step_time": 0.10689163208007812 }, { "epoch": 2.0111083984375e-05, "grad_norm": 0.23071473836898804, "learning_rate": 6.397601679229126e-05, "loss": 0.0166, "step": 13180 }, { "epoch": 2.0111083984375e-05, "model_forward_time": 0.024986982345581055, "step": 13180 }, { "epoch": 2.0111083984375e-05, "step": 13180, "training_step_time": 0.1074364185333252 }, { "epoch": 2.011260986328125e-05, "model_forward_time": 0.024399995803833008, "step": 13181 }, { "epoch": 2.011260986328125e-05, "step": 13181, "training_step_time": 0.10699701309204102 }, { "epoch": 2.01141357421875e-05, "model_forward_time": 0.024799108505249023, "step": 13182 }, { "epoch": 2.01141357421875e-05, "step": 13182, "training_step_time": 0.1033930778503418 }, { "epoch": 2.011566162109375e-05, "model_forward_time": 0.02440667152404785, "step": 13183 }, { "epoch": 2.011566162109375e-05, "step": 13183, "training_step_time": 0.10511016845703125 }, { "epoch": 2.01171875e-05, "model_forward_time": 0.02459883689880371, "step": 13184 }, { "epoch": 2.01171875e-05, "step": 13184, "training_step_time": 0.10967278480529785 }, { "epoch": 2.011871337890625e-05, "model_forward_time": 0.0257260799407959, "step": 13185 }, { "epoch": 2.011871337890625e-05, "step": 13185, "training_step_time": 0.1060800552368164 }, { "epoch": 2.01202392578125e-05, "model_forward_time": 0.025269746780395508, "step": 13186 }, { "epoch": 2.01202392578125e-05, "step": 13186, "training_step_time": 0.12652015686035156 }, { "epoch": 2.012176513671875e-05, "model_forward_time": 0.025461673736572266, "step": 13187 }, { "epoch": 2.012176513671875e-05, "step": 13187, "training_step_time": 0.10809564590454102 }, { "epoch": 2.0123291015625e-05, "model_forward_time": 0.025513887405395508, "step": 13188 }, { "epoch": 2.0123291015625e-05, "step": 13188, "training_step_time": 0.11003375053405762 }, { "epoch": 2.012481689453125e-05, "model_forward_time": 0.02524089813232422, "step": 13189 }, { "epoch": 2.012481689453125e-05, "step": 13189, "training_step_time": 0.12632155418395996 }, { "epoch": 2.01263427734375e-05, "grad_norm": 0.1875467747449875, "learning_rate": 6.39230895741251e-05, "loss": 0.011, "step": 13190 }, { "epoch": 2.01263427734375e-05, "model_forward_time": 0.025649070739746094, "step": 13190 }, { "epoch": 2.01263427734375e-05, "step": 13190, "training_step_time": 0.11919975280761719 }, { "epoch": 2.012786865234375e-05, "model_forward_time": 0.02534008026123047, "step": 13191 }, { "epoch": 2.012786865234375e-05, "step": 13191, "training_step_time": 0.11327195167541504 }, { "epoch": 2.012939453125e-05, "model_forward_time": 0.025333642959594727, "step": 13192 }, { "epoch": 2.012939453125e-05, "step": 13192, "training_step_time": 0.1025545597076416 }, { "epoch": 2.013092041015625e-05, "model_forward_time": 0.023714542388916016, "step": 13193 }, { "epoch": 2.013092041015625e-05, "step": 13193, "training_step_time": 0.1510303020477295 }, { "epoch": 2.01324462890625e-05, "model_forward_time": 0.024546384811401367, "step": 13194 }, { "epoch": 2.01324462890625e-05, "step": 13194, "training_step_time": 0.15325355529785156 }, { "epoch": 2.013397216796875e-05, "model_forward_time": 0.025468826293945312, "step": 13195 }, { "epoch": 2.013397216796875e-05, "step": 13195, "training_step_time": 0.14933514595031738 }, { "epoch": 2.0135498046875e-05, "model_forward_time": 0.02483201026916504, "step": 13196 }, { "epoch": 2.0135498046875e-05, "step": 13196, "training_step_time": 0.11266231536865234 }, { "epoch": 2.013702392578125e-05, "model_forward_time": 0.025164127349853516, "step": 13197 }, { "epoch": 2.013702392578125e-05, "step": 13197, "training_step_time": 0.13440418243408203 }, { "epoch": 2.01385498046875e-05, "model_forward_time": 0.024837732315063477, "step": 13198 }, { "epoch": 2.01385498046875e-05, "step": 13198, "training_step_time": 0.12651562690734863 }, { "epoch": 2.014007568359375e-05, "model_forward_time": 0.02491021156311035, "step": 13199 }, { "epoch": 2.014007568359375e-05, "step": 13199, "training_step_time": 0.11913371086120605 }, { "epoch": 2.01416015625e-05, "grad_norm": 0.4467898905277252, "learning_rate": 6.387014543809223e-05, "loss": 0.0182, "step": 13200 }, { "epoch": 2.01416015625e-05, "model_forward_time": 0.025460004806518555, "step": 13200 }, { "epoch": 2.01416015625e-05, "step": 13200, "training_step_time": 0.12027096748352051 }, { "epoch": 2.014312744140625e-05, "model_forward_time": 0.02543497085571289, "step": 13201 }, { "epoch": 2.014312744140625e-05, "step": 13201, "training_step_time": 0.11974430084228516 }, { "epoch": 2.01446533203125e-05, "model_forward_time": 0.025207996368408203, "step": 13202 }, { "epoch": 2.01446533203125e-05, "step": 13202, "training_step_time": 0.1846599578857422 }, { "epoch": 2.014617919921875e-05, "model_forward_time": 0.024797439575195312, "step": 13203 }, { "epoch": 2.014617919921875e-05, "step": 13203, "training_step_time": 0.11417365074157715 }, { "epoch": 2.0147705078125e-05, "model_forward_time": 0.024991750717163086, "step": 13204 }, { "epoch": 2.0147705078125e-05, "step": 13204, "training_step_time": 0.10914206504821777 }, { "epoch": 2.014923095703125e-05, "model_forward_time": 0.02550816535949707, "step": 13205 }, { "epoch": 2.014923095703125e-05, "step": 13205, "training_step_time": 0.12216520309448242 }, { "epoch": 2.01507568359375e-05, "model_forward_time": 0.02749490737915039, "step": 13206 }, { "epoch": 2.01507568359375e-05, "step": 13206, "training_step_time": 0.10899519920349121 }, { "epoch": 2.015228271484375e-05, "model_forward_time": 0.02510833740234375, "step": 13207 }, { "epoch": 2.015228271484375e-05, "step": 13207, "training_step_time": 0.1255331039428711 }, { "epoch": 2.015380859375e-05, "model_forward_time": 0.025784015655517578, "step": 13208 }, { "epoch": 2.015380859375e-05, "step": 13208, "training_step_time": 0.11221504211425781 }, { "epoch": 2.015533447265625e-05, "model_forward_time": 0.025271892547607422, "step": 13209 }, { "epoch": 2.015533447265625e-05, "step": 13209, "training_step_time": 0.1057744026184082 }, { "epoch": 2.01568603515625e-05, "grad_norm": 0.2989524006843567, "learning_rate": 6.38171844485248e-05, "loss": 0.0084, "step": 13210 }, { "epoch": 2.01568603515625e-05, "model_forward_time": 0.025249242782592773, "step": 13210 }, { "epoch": 2.01568603515625e-05, "step": 13210, "training_step_time": 0.1072232723236084 }, { "epoch": 2.015838623046875e-05, "model_forward_time": 0.02526116371154785, "step": 13211 }, { "epoch": 2.015838623046875e-05, "step": 13211, "training_step_time": 0.10965657234191895 }, { "epoch": 2.0159912109375e-05, "model_forward_time": 0.02570962905883789, "step": 13212 }, { "epoch": 2.0159912109375e-05, "step": 13212, "training_step_time": 0.10574460029602051 }, { "epoch": 2.016143798828125e-05, "model_forward_time": 0.024075031280517578, "step": 13213 }, { "epoch": 2.016143798828125e-05, "step": 13213, "training_step_time": 0.10756444931030273 }, { "epoch": 2.01629638671875e-05, "model_forward_time": 0.024388551712036133, "step": 13214 }, { "epoch": 2.01629638671875e-05, "step": 13214, "training_step_time": 0.10751819610595703 }, { "epoch": 2.016448974609375e-05, "model_forward_time": 0.0251007080078125, "step": 13215 }, { "epoch": 2.016448974609375e-05, "step": 13215, "training_step_time": 0.1071474552154541 }, { "epoch": 2.0166015625e-05, "model_forward_time": 0.02499699592590332, "step": 13216 }, { "epoch": 2.0166015625e-05, "step": 13216, "training_step_time": 0.10649704933166504 }, { "epoch": 2.016754150390625e-05, "model_forward_time": 0.02580571174621582, "step": 13217 }, { "epoch": 2.016754150390625e-05, "step": 13217, "training_step_time": 0.10537600517272949 }, { "epoch": 2.01690673828125e-05, "model_forward_time": 0.025492429733276367, "step": 13218 }, { "epoch": 2.01690673828125e-05, "step": 13218, "training_step_time": 0.10483741760253906 }, { "epoch": 2.017059326171875e-05, "model_forward_time": 0.025397062301635742, "step": 13219 }, { "epoch": 2.017059326171875e-05, "step": 13219, "training_step_time": 0.1060481071472168 }, { "epoch": 2.0172119140625e-05, "grad_norm": 0.2838421165943146, "learning_rate": 6.376420666977538e-05, "loss": 0.0117, "step": 13220 }, { "epoch": 2.0172119140625e-05, "model_forward_time": 0.02544403076171875, "step": 13220 }, { "epoch": 2.0172119140625e-05, "step": 13220, "training_step_time": 0.10497164726257324 }, { "epoch": 2.017364501953125e-05, "model_forward_time": 0.02568340301513672, "step": 13221 }, { "epoch": 2.017364501953125e-05, "step": 13221, "training_step_time": 0.10411953926086426 }, { "epoch": 2.01751708984375e-05, "model_forward_time": 0.025105714797973633, "step": 13222 }, { "epoch": 2.01751708984375e-05, "step": 13222, "training_step_time": 0.10828781127929688 }, { "epoch": 2.017669677734375e-05, "model_forward_time": 0.027075529098510742, "step": 13223 }, { "epoch": 2.017669677734375e-05, "step": 13223, "training_step_time": 0.7103557586669922 }, { "epoch": 2.017822265625e-05, "model_forward_time": 0.023116111755371094, "step": 13224 }, { "epoch": 2.017822265625e-05, "step": 13224, "training_step_time": 0.09629511833190918 }, { "epoch": 2.017974853515625e-05, "model_forward_time": 0.026126384735107422, "step": 13225 }, { "epoch": 2.017974853515625e-05, "step": 13225, "training_step_time": 0.10521626472473145 }, { "epoch": 2.01812744140625e-05, "model_forward_time": 0.02551412582397461, "step": 13226 }, { "epoch": 2.01812744140625e-05, "step": 13226, "training_step_time": 0.10938262939453125 }, { "epoch": 2.018280029296875e-05, "model_forward_time": 0.026064634323120117, "step": 13227 }, { "epoch": 2.018280029296875e-05, "step": 13227, "training_step_time": 0.10818791389465332 }, { "epoch": 2.0184326171875e-05, "model_forward_time": 0.024653911590576172, "step": 13228 }, { "epoch": 2.0184326171875e-05, "step": 13228, "training_step_time": 0.10693049430847168 }, { "epoch": 2.018585205078125e-05, "model_forward_time": 0.024796247482299805, "step": 13229 }, { "epoch": 2.018585205078125e-05, "step": 13229, "training_step_time": 0.18159723281860352 }, { "epoch": 2.01873779296875e-05, "grad_norm": 0.4310348927974701, "learning_rate": 6.371121216621698e-05, "loss": 0.0168, "step": 13230 }, { "epoch": 2.01873779296875e-05, "model_forward_time": 0.02542877197265625, "step": 13230 }, { "epoch": 2.01873779296875e-05, "step": 13230, "training_step_time": 0.10765886306762695 }, { "epoch": 2.018890380859375e-05, "model_forward_time": 0.02446746826171875, "step": 13231 }, { "epoch": 2.018890380859375e-05, "step": 13231, "training_step_time": 0.10975313186645508 }, { "epoch": 2.01904296875e-05, "model_forward_time": 0.024953126907348633, "step": 13232 }, { "epoch": 2.01904296875e-05, "step": 13232, "training_step_time": 0.12446308135986328 }, { "epoch": 2.019195556640625e-05, "model_forward_time": 0.025537967681884766, "step": 13233 }, { "epoch": 2.019195556640625e-05, "step": 13233, "training_step_time": 0.1285398006439209 }, { "epoch": 2.01934814453125e-05, "model_forward_time": 0.027690649032592773, "step": 13234 }, { "epoch": 2.01934814453125e-05, "step": 13234, "training_step_time": 0.1398179531097412 }, { "epoch": 2.019500732421875e-05, "model_forward_time": 0.026032447814941406, "step": 13235 }, { "epoch": 2.019500732421875e-05, "step": 13235, "training_step_time": 0.11769485473632812 }, { "epoch": 2.0196533203125e-05, "model_forward_time": 0.02501058578491211, "step": 13236 }, { "epoch": 2.0196533203125e-05, "step": 13236, "training_step_time": 0.17729401588439941 }, { "epoch": 2.019805908203125e-05, "model_forward_time": 0.0249631404876709, "step": 13237 }, { "epoch": 2.019805908203125e-05, "step": 13237, "training_step_time": 0.17268109321594238 }, { "epoch": 2.01995849609375e-05, "model_forward_time": 0.02484726905822754, "step": 13238 }, { "epoch": 2.01995849609375e-05, "step": 13238, "training_step_time": 0.16028761863708496 }, { "epoch": 2.020111083984375e-05, "model_forward_time": 0.025167226791381836, "step": 13239 }, { "epoch": 2.020111083984375e-05, "step": 13239, "training_step_time": 0.17432355880737305 }, { "epoch": 2.020263671875e-05, "grad_norm": 0.3545497953891754, "learning_rate": 6.365820100224292e-05, "loss": 0.0225, "step": 13240 }, { "epoch": 2.020263671875e-05, "model_forward_time": 0.02816939353942871, "step": 13240 }, { "epoch": 2.020263671875e-05, "step": 13240, "training_step_time": 0.12832283973693848 }, { "epoch": 2.020416259765625e-05, "model_forward_time": 0.024519681930541992, "step": 13241 }, { "epoch": 2.020416259765625e-05, "step": 13241, "training_step_time": 0.12159442901611328 }, { "epoch": 2.02056884765625e-05, "model_forward_time": 0.025325775146484375, "step": 13242 }, { "epoch": 2.02056884765625e-05, "step": 13242, "training_step_time": 0.1211247444152832 }, { "epoch": 2.020721435546875e-05, "model_forward_time": 0.025580644607543945, "step": 13243 }, { "epoch": 2.020721435546875e-05, "step": 13243, "training_step_time": 0.1179358959197998 }, { "epoch": 2.0208740234375e-05, "model_forward_time": 0.024941444396972656, "step": 13244 }, { "epoch": 2.0208740234375e-05, "step": 13244, "training_step_time": 0.1675574779510498 }, { "epoch": 2.021026611328125e-05, "model_forward_time": 0.02480483055114746, "step": 13245 }, { "epoch": 2.021026611328125e-05, "step": 13245, "training_step_time": 0.11046195030212402 }, { "epoch": 2.02117919921875e-05, "model_forward_time": 0.024949312210083008, "step": 13246 }, { "epoch": 2.02117919921875e-05, "step": 13246, "training_step_time": 0.2097158432006836 }, { "epoch": 2.021331787109375e-05, "model_forward_time": 0.024770259857177734, "step": 13247 }, { "epoch": 2.021331787109375e-05, "step": 13247, "training_step_time": 0.1073751449584961 }, { "epoch": 2.021484375e-05, "model_forward_time": 0.0247802734375, "step": 13248 }, { "epoch": 2.021484375e-05, "step": 13248, "training_step_time": 0.11640620231628418 }, { "epoch": 2.021636962890625e-05, "model_forward_time": 0.02523040771484375, "step": 13249 }, { "epoch": 2.021636962890625e-05, "step": 13249, "training_step_time": 0.13392162322998047 }, { "epoch": 2.02178955078125e-05, "grad_norm": 0.4664561152458191, "learning_rate": 6.360517324226676e-05, "loss": 0.0128, "step": 13250 }, { "epoch": 2.02178955078125e-05, "model_forward_time": 0.025210857391357422, "step": 13250 }, { "epoch": 2.02178955078125e-05, "step": 13250, "training_step_time": 0.11370420455932617 }, { "epoch": 2.021942138671875e-05, "model_forward_time": 0.025385141372680664, "step": 13251 }, { "epoch": 2.021942138671875e-05, "step": 13251, "training_step_time": 0.1073293685913086 }, { "epoch": 2.0220947265625e-05, "model_forward_time": 0.025185108184814453, "step": 13252 }, { "epoch": 2.0220947265625e-05, "step": 13252, "training_step_time": 0.10932445526123047 }, { "epoch": 2.022247314453125e-05, "model_forward_time": 0.02559041976928711, "step": 13253 }, { "epoch": 2.022247314453125e-05, "step": 13253, "training_step_time": 0.10640096664428711 }, { "epoch": 2.02239990234375e-05, "model_forward_time": 0.024692535400390625, "step": 13254 }, { "epoch": 2.02239990234375e-05, "step": 13254, "training_step_time": 0.10883784294128418 }, { "epoch": 2.022552490234375e-05, "model_forward_time": 0.026071548461914062, "step": 13255 }, { "epoch": 2.022552490234375e-05, "step": 13255, "training_step_time": 0.10596847534179688 }, { "epoch": 2.022705078125e-05, "model_forward_time": 0.025192737579345703, "step": 13256 }, { "epoch": 2.022705078125e-05, "step": 13256, "training_step_time": 0.10589456558227539 }, { "epoch": 2.022857666015625e-05, "model_forward_time": 0.025281190872192383, "step": 13257 }, { "epoch": 2.022857666015625e-05, "step": 13257, "training_step_time": 0.10729265213012695 }, { "epoch": 2.02301025390625e-05, "model_forward_time": 0.028850317001342773, "step": 13258 }, { "epoch": 2.02301025390625e-05, "step": 13258, "training_step_time": 0.10833501815795898 }, { "epoch": 2.023162841796875e-05, "model_forward_time": 0.025326967239379883, "step": 13259 }, { "epoch": 2.023162841796875e-05, "step": 13259, "training_step_time": 0.10670685768127441 }, { "epoch": 2.0233154296875e-05, "grad_norm": 0.48605209589004517, "learning_rate": 6.355212895072223e-05, "loss": 0.0154, "step": 13260 }, { "epoch": 2.0233154296875e-05, "model_forward_time": 0.026866436004638672, "step": 13260 }, { "epoch": 2.0233154296875e-05, "step": 13260, "training_step_time": 0.10517621040344238 }, { "epoch": 2.023468017578125e-05, "model_forward_time": 0.025850534439086914, "step": 13261 }, { "epoch": 2.023468017578125e-05, "step": 13261, "training_step_time": 0.10683059692382812 }, { "epoch": 2.02362060546875e-05, "model_forward_time": 0.02562117576599121, "step": 13262 }, { "epoch": 2.02362060546875e-05, "step": 13262, "training_step_time": 0.10760855674743652 }, { "epoch": 2.023773193359375e-05, "model_forward_time": 0.02621173858642578, "step": 13263 }, { "epoch": 2.023773193359375e-05, "step": 13263, "training_step_time": 0.12128186225891113 }, { "epoch": 2.02392578125e-05, "model_forward_time": 0.025330305099487305, "step": 13264 }, { "epoch": 2.02392578125e-05, "step": 13264, "training_step_time": 0.1379563808441162 }, { "epoch": 2.024078369140625e-05, "model_forward_time": 0.025113582611083984, "step": 13265 }, { "epoch": 2.024078369140625e-05, "step": 13265, "training_step_time": 0.1284654140472412 }, { "epoch": 2.02423095703125e-05, "model_forward_time": 0.02457118034362793, "step": 13266 }, { "epoch": 2.02423095703125e-05, "step": 13266, "training_step_time": 0.11966109275817871 }, { "epoch": 2.024383544921875e-05, "model_forward_time": 0.02575230598449707, "step": 13267 }, { "epoch": 2.024383544921875e-05, "step": 13267, "training_step_time": 0.12421107292175293 }, { "epoch": 2.0245361328125e-05, "model_forward_time": 0.025345563888549805, "step": 13268 }, { "epoch": 2.0245361328125e-05, "step": 13268, "training_step_time": 0.11509895324707031 }, { "epoch": 2.024688720703125e-05, "model_forward_time": 0.02538013458251953, "step": 13269 }, { "epoch": 2.024688720703125e-05, "step": 13269, "training_step_time": 0.1108243465423584 }, { "epoch": 2.02484130859375e-05, "grad_norm": 0.20968252420425415, "learning_rate": 6.349906819206313e-05, "loss": 0.0154, "step": 13270 }, { "epoch": 2.02484130859375e-05, "model_forward_time": 0.024813175201416016, "step": 13270 }, { "epoch": 2.02484130859375e-05, "step": 13270, "training_step_time": 0.1151738166809082 }, { "epoch": 2.024993896484375e-05, "model_forward_time": 0.02462291717529297, "step": 13271 }, { "epoch": 2.024993896484375e-05, "step": 13271, "training_step_time": 0.11022472381591797 }, { "epoch": 2.025146484375e-05, "model_forward_time": 0.025240182876586914, "step": 13272 }, { "epoch": 2.025146484375e-05, "step": 13272, "training_step_time": 0.10874605178833008 }, { "epoch": 2.025299072265625e-05, "model_forward_time": 0.025382041931152344, "step": 13273 }, { "epoch": 2.025299072265625e-05, "step": 13273, "training_step_time": 0.10761761665344238 }, { "epoch": 2.02545166015625e-05, "model_forward_time": 0.02546215057373047, "step": 13274 }, { "epoch": 2.02545166015625e-05, "step": 13274, "training_step_time": 0.2008051872253418 }, { "epoch": 2.025604248046875e-05, "model_forward_time": 0.024811267852783203, "step": 13275 }, { "epoch": 2.025604248046875e-05, "step": 13275, "training_step_time": 0.11292719841003418 }, { "epoch": 2.0257568359375e-05, "model_forward_time": 0.025092601776123047, "step": 13276 }, { "epoch": 2.0257568359375e-05, "step": 13276, "training_step_time": 0.112457275390625 }, { "epoch": 2.025909423828125e-05, "model_forward_time": 0.025597333908081055, "step": 13277 }, { "epoch": 2.025909423828125e-05, "step": 13277, "training_step_time": 0.11565947532653809 }, { "epoch": 2.02606201171875e-05, "model_forward_time": 0.028142929077148438, "step": 13278 }, { "epoch": 2.02606201171875e-05, "step": 13278, "training_step_time": 0.13158774375915527 }, { "epoch": 2.026214599609375e-05, "model_forward_time": 0.025293827056884766, "step": 13279 }, { "epoch": 2.026214599609375e-05, "step": 13279, "training_step_time": 0.22027158737182617 }, { "epoch": 2.0263671875e-05, "grad_norm": 0.47196853160858154, "learning_rate": 6.344599103076329e-05, "loss": 0.0145, "step": 13280 }, { "epoch": 2.0263671875e-05, "model_forward_time": 0.025439739227294922, "step": 13280 }, { "epoch": 2.0263671875e-05, "step": 13280, "training_step_time": 0.1041116714477539 }, { "epoch": 2.026519775390625e-05, "model_forward_time": 0.02497720718383789, "step": 13281 }, { "epoch": 2.026519775390625e-05, "step": 13281, "training_step_time": 0.10393500328063965 }, { "epoch": 2.02667236328125e-05, "model_forward_time": 0.025792837142944336, "step": 13282 }, { "epoch": 2.02667236328125e-05, "step": 13282, "training_step_time": 0.1177985668182373 }, { "epoch": 2.026824951171875e-05, "model_forward_time": 0.025362253189086914, "step": 13283 }, { "epoch": 2.026824951171875e-05, "step": 13283, "training_step_time": 0.1530294418334961 }, { "epoch": 2.0269775390625e-05, "model_forward_time": 0.025170087814331055, "step": 13284 }, { "epoch": 2.0269775390625e-05, "step": 13284, "training_step_time": 0.17938518524169922 }, { "epoch": 2.027130126953125e-05, "model_forward_time": 0.02484917640686035, "step": 13285 }, { "epoch": 2.027130126953125e-05, "step": 13285, "training_step_time": 0.11914205551147461 }, { "epoch": 2.02728271484375e-05, "model_forward_time": 0.024804115295410156, "step": 13286 }, { "epoch": 2.02728271484375e-05, "step": 13286, "training_step_time": 0.11117434501647949 }, { "epoch": 2.027435302734375e-05, "model_forward_time": 0.025751829147338867, "step": 13287 }, { "epoch": 2.027435302734375e-05, "step": 13287, "training_step_time": 0.1049811840057373 }, { "epoch": 2.027587890625e-05, "model_forward_time": 0.025020122528076172, "step": 13288 }, { "epoch": 2.027587890625e-05, "step": 13288, "training_step_time": 0.10649561882019043 }, { "epoch": 2.027740478515625e-05, "model_forward_time": 0.02551436424255371, "step": 13289 }, { "epoch": 2.027740478515625e-05, "step": 13289, "training_step_time": 0.10687065124511719 }, { "epoch": 2.02789306640625e-05, "grad_norm": 0.24185921251773834, "learning_rate": 6.339289753131649e-05, "loss": 0.0146, "step": 13290 }, { "epoch": 2.02789306640625e-05, "model_forward_time": 0.025755643844604492, "step": 13290 }, { "epoch": 2.02789306640625e-05, "step": 13290, "training_step_time": 0.10640978813171387 }, { "epoch": 2.028045654296875e-05, "model_forward_time": 0.025796890258789062, "step": 13291 }, { "epoch": 2.028045654296875e-05, "step": 13291, "training_step_time": 0.14030838012695312 }, { "epoch": 2.0281982421875e-05, "model_forward_time": 0.026367664337158203, "step": 13292 }, { "epoch": 2.0281982421875e-05, "step": 13292, "training_step_time": 0.1271061897277832 }, { "epoch": 2.028350830078125e-05, "model_forward_time": 0.025219202041625977, "step": 13293 }, { "epoch": 2.028350830078125e-05, "step": 13293, "training_step_time": 0.1328415870666504 }, { "epoch": 2.02850341796875e-05, "model_forward_time": 0.024710655212402344, "step": 13294 }, { "epoch": 2.02850341796875e-05, "step": 13294, "training_step_time": 0.10700201988220215 }, { "epoch": 2.028656005859375e-05, "model_forward_time": 0.025164365768432617, "step": 13295 }, { "epoch": 2.028656005859375e-05, "step": 13295, "training_step_time": 0.17856693267822266 }, { "epoch": 2.02880859375e-05, "model_forward_time": 0.02461695671081543, "step": 13296 }, { "epoch": 2.02880859375e-05, "step": 13296, "training_step_time": 0.13098669052124023 }, { "epoch": 2.028961181640625e-05, "model_forward_time": 0.024326324462890625, "step": 13297 }, { "epoch": 2.028961181640625e-05, "step": 13297, "training_step_time": 0.11051321029663086 }, { "epoch": 2.02911376953125e-05, "model_forward_time": 0.025365829467773438, "step": 13298 }, { "epoch": 2.02911376953125e-05, "step": 13298, "training_step_time": 0.10321927070617676 }, { "epoch": 2.029266357421875e-05, "model_forward_time": 0.02542400360107422, "step": 13299 }, { "epoch": 2.029266357421875e-05, "step": 13299, "training_step_time": 0.10504651069641113 }, { "epoch": 2.0294189453125e-05, "grad_norm": 0.2074599266052246, "learning_rate": 6.333978775823631e-05, "loss": 0.0129, "step": 13300 }, { "epoch": 2.0294189453125e-05, "model_forward_time": 0.025163650512695312, "step": 13300 }, { "epoch": 2.0294189453125e-05, "step": 13300, "training_step_time": 0.10434365272521973 }, { "epoch": 2.029571533203125e-05, "model_forward_time": 0.02515554428100586, "step": 13301 }, { "epoch": 2.029571533203125e-05, "step": 13301, "training_step_time": 0.10779452323913574 }, { "epoch": 2.02972412109375e-05, "model_forward_time": 0.02542591094970703, "step": 13302 }, { "epoch": 2.02972412109375e-05, "step": 13302, "training_step_time": 0.10377287864685059 }, { "epoch": 2.029876708984375e-05, "model_forward_time": 0.025277137756347656, "step": 13303 }, { "epoch": 2.029876708984375e-05, "step": 13303, "training_step_time": 0.10571765899658203 }, { "epoch": 2.030029296875e-05, "model_forward_time": 0.02493882179260254, "step": 13304 }, { "epoch": 2.030029296875e-05, "step": 13304, "training_step_time": 0.10590672492980957 }, { "epoch": 2.030181884765625e-05, "model_forward_time": 0.02521491050720215, "step": 13305 }, { "epoch": 2.030181884765625e-05, "step": 13305, "training_step_time": 0.10861349105834961 }, { "epoch": 2.03033447265625e-05, "model_forward_time": 0.025342702865600586, "step": 13306 }, { "epoch": 2.03033447265625e-05, "step": 13306, "training_step_time": 0.10641336441040039 }, { "epoch": 2.030487060546875e-05, "model_forward_time": 0.025427579879760742, "step": 13307 }, { "epoch": 2.030487060546875e-05, "step": 13307, "training_step_time": 0.10676693916320801 }, { "epoch": 2.0306396484375e-05, "model_forward_time": 0.02509760856628418, "step": 13308 }, { "epoch": 2.0306396484375e-05, "step": 13308, "training_step_time": 0.10972356796264648 }, { "epoch": 2.030792236328125e-05, "model_forward_time": 0.024326562881469727, "step": 13309 }, { "epoch": 2.030792236328125e-05, "step": 13309, "training_step_time": 0.1069328784942627 }, { "epoch": 2.03094482421875e-05, "grad_norm": 0.23471605777740479, "learning_rate": 6.328666177605616e-05, "loss": 0.0193, "step": 13310 }, { "epoch": 2.03094482421875e-05, "model_forward_time": 0.024066925048828125, "step": 13310 }, { "epoch": 2.03094482421875e-05, "step": 13310, "training_step_time": 0.11478137969970703 }, { "epoch": 2.031097412109375e-05, "model_forward_time": 0.0251619815826416, "step": 13311 }, { "epoch": 2.031097412109375e-05, "step": 13311, "training_step_time": 0.10899877548217773 }, { "epoch": 2.03125e-05, "model_forward_time": 0.02590155601501465, "step": 13312 }, { "epoch": 2.03125e-05, "step": 13312, "training_step_time": 0.10541939735412598 }, { "epoch": 2.031402587890625e-05, "model_forward_time": 0.02649545669555664, "step": 13313 }, { "epoch": 2.031402587890625e-05, "step": 13313, "training_step_time": 0.10709834098815918 }, { "epoch": 2.03155517578125e-05, "model_forward_time": 0.025274991989135742, "step": 13314 }, { "epoch": 2.03155517578125e-05, "step": 13314, "training_step_time": 0.10619544982910156 }, { "epoch": 2.031707763671875e-05, "model_forward_time": 0.025475740432739258, "step": 13315 }, { "epoch": 2.031707763671875e-05, "step": 13315, "training_step_time": 0.11225461959838867 }, { "epoch": 2.0318603515625e-05, "model_forward_time": 0.02570366859436035, "step": 13316 }, { "epoch": 2.0318603515625e-05, "step": 13316, "training_step_time": 0.11017584800720215 }, { "epoch": 2.032012939453125e-05, "model_forward_time": 0.02529311180114746, "step": 13317 }, { "epoch": 2.032012939453125e-05, "step": 13317, "training_step_time": 0.10726737976074219 }, { "epoch": 2.03216552734375e-05, "model_forward_time": 0.025243282318115234, "step": 13318 }, { "epoch": 2.03216552734375e-05, "step": 13318, "training_step_time": 0.10825228691101074 }, { "epoch": 2.032318115234375e-05, "model_forward_time": 0.024976491928100586, "step": 13319 }, { "epoch": 2.032318115234375e-05, "step": 13319, "training_step_time": 0.1051335334777832 }, { "epoch": 2.032470703125e-05, "grad_norm": 0.6534555554389954, "learning_rate": 6.323351964932908e-05, "loss": 0.0234, "step": 13320 }, { "epoch": 2.032470703125e-05, "model_forward_time": 0.025284767150878906, "step": 13320 }, { "epoch": 2.032470703125e-05, "step": 13320, "training_step_time": 0.10705852508544922 }, { "epoch": 2.032623291015625e-05, "model_forward_time": 0.02513909339904785, "step": 13321 }, { "epoch": 2.032623291015625e-05, "step": 13321, "training_step_time": 0.1048727035522461 }, { "epoch": 2.03277587890625e-05, "model_forward_time": 0.02554774284362793, "step": 13322 }, { "epoch": 2.03277587890625e-05, "step": 13322, "training_step_time": 0.15282225608825684 }, { "epoch": 2.032928466796875e-05, "model_forward_time": 0.02503061294555664, "step": 13323 }, { "epoch": 2.032928466796875e-05, "step": 13323, "training_step_time": 0.1163182258605957 }, { "epoch": 2.0330810546875e-05, "model_forward_time": 0.024680376052856445, "step": 13324 }, { "epoch": 2.0330810546875e-05, "step": 13324, "training_step_time": 0.13112902641296387 }, { "epoch": 2.033233642578125e-05, "model_forward_time": 0.02414560317993164, "step": 13325 }, { "epoch": 2.033233642578125e-05, "step": 13325, "training_step_time": 0.12574434280395508 }, { "epoch": 2.03338623046875e-05, "model_forward_time": 0.0239865779876709, "step": 13326 }, { "epoch": 2.03338623046875e-05, "step": 13326, "training_step_time": 0.15393328666687012 }, { "epoch": 2.033538818359375e-05, "model_forward_time": 0.024796009063720703, "step": 13327 }, { "epoch": 2.033538818359375e-05, "step": 13327, "training_step_time": 0.1716611385345459 }, { "epoch": 2.03369140625e-05, "model_forward_time": 0.025322914123535156, "step": 13328 }, { "epoch": 2.03369140625e-05, "step": 13328, "training_step_time": 0.18828606605529785 }, { "epoch": 2.033843994140625e-05, "model_forward_time": 0.023903846740722656, "step": 13329 }, { "epoch": 2.033843994140625e-05, "step": 13329, "training_step_time": 0.14388418197631836 }, { "epoch": 2.03399658203125e-05, "grad_norm": 0.27252867817878723, "learning_rate": 6.31803614426278e-05, "loss": 0.0168, "step": 13330 }, { "epoch": 2.03399658203125e-05, "model_forward_time": 0.024565458297729492, "step": 13330 }, { "epoch": 2.03399658203125e-05, "step": 13330, "training_step_time": 0.17617368698120117 }, { "epoch": 2.034149169921875e-05, "model_forward_time": 0.024648666381835938, "step": 13331 }, { "epoch": 2.034149169921875e-05, "step": 13331, "training_step_time": 0.17773771286010742 }, { "epoch": 2.0343017578125e-05, "model_forward_time": 0.024277687072753906, "step": 13332 }, { "epoch": 2.0343017578125e-05, "step": 13332, "training_step_time": 0.1134943962097168 }, { "epoch": 2.034454345703125e-05, "model_forward_time": 0.024907588958740234, "step": 13333 }, { "epoch": 2.034454345703125e-05, "step": 13333, "training_step_time": 0.1138298511505127 }, { "epoch": 2.03460693359375e-05, "model_forward_time": 0.0251007080078125, "step": 13334 }, { "epoch": 2.03460693359375e-05, "step": 13334, "training_step_time": 0.1078031063079834 }, { "epoch": 2.034759521484375e-05, "model_forward_time": 0.025640010833740234, "step": 13335 }, { "epoch": 2.034759521484375e-05, "step": 13335, "training_step_time": 0.10801815986633301 }, { "epoch": 2.034912109375e-05, "model_forward_time": 0.025121212005615234, "step": 13336 }, { "epoch": 2.034912109375e-05, "step": 13336, "training_step_time": 0.10560989379882812 }, { "epoch": 2.035064697265625e-05, "model_forward_time": 0.025162220001220703, "step": 13337 }, { "epoch": 2.035064697265625e-05, "step": 13337, "training_step_time": 0.10491085052490234 }, { "epoch": 2.03521728515625e-05, "model_forward_time": 0.025075197219848633, "step": 13338 }, { "epoch": 2.03521728515625e-05, "step": 13338, "training_step_time": 0.16425228118896484 }, { "epoch": 2.035369873046875e-05, "model_forward_time": 0.02476048469543457, "step": 13339 }, { "epoch": 2.035369873046875e-05, "step": 13339, "training_step_time": 0.1192617416381836 }, { "epoch": 2.0355224609375e-05, "grad_norm": 0.26604318618774414, "learning_rate": 6.312718722054454e-05, "loss": 0.0185, "step": 13340 }, { "epoch": 2.0355224609375e-05, "model_forward_time": 0.02500319480895996, "step": 13340 }, { "epoch": 2.0355224609375e-05, "step": 13340, "training_step_time": 0.1104893684387207 }, { "epoch": 2.035675048828125e-05, "model_forward_time": 0.02562737464904785, "step": 13341 }, { "epoch": 2.035675048828125e-05, "step": 13341, "training_step_time": 0.1252579689025879 }, { "epoch": 2.03582763671875e-05, "model_forward_time": 0.025723695755004883, "step": 13342 }, { "epoch": 2.03582763671875e-05, "step": 13342, "training_step_time": 0.1060798168182373 }, { "epoch": 2.035980224609375e-05, "model_forward_time": 0.025710344314575195, "step": 13343 }, { "epoch": 2.035980224609375e-05, "step": 13343, "training_step_time": 0.11118292808532715 }, { "epoch": 2.0361328125e-05, "model_forward_time": 0.02558588981628418, "step": 13344 }, { "epoch": 2.0361328125e-05, "step": 13344, "training_step_time": 0.12491750717163086 }, { "epoch": 2.036285400390625e-05, "model_forward_time": 0.02542591094970703, "step": 13345 }, { "epoch": 2.036285400390625e-05, "step": 13345, "training_step_time": 0.1108391284942627 }, { "epoch": 2.03643798828125e-05, "model_forward_time": 0.0256960391998291, "step": 13346 }, { "epoch": 2.03643798828125e-05, "step": 13346, "training_step_time": 0.11232662200927734 }, { "epoch": 2.036590576171875e-05, "model_forward_time": 0.025386571884155273, "step": 13347 }, { "epoch": 2.036590576171875e-05, "step": 13347, "training_step_time": 0.11004471778869629 }, { "epoch": 2.0367431640625e-05, "model_forward_time": 0.025976181030273438, "step": 13348 }, { "epoch": 2.0367431640625e-05, "step": 13348, "training_step_time": 0.10616803169250488 }, { "epoch": 2.036895751953125e-05, "model_forward_time": 0.024954557418823242, "step": 13349 }, { "epoch": 2.036895751953125e-05, "step": 13349, "training_step_time": 0.10417342185974121 }, { "epoch": 2.03704833984375e-05, "grad_norm": 0.21818700432777405, "learning_rate": 6.307399704769099e-05, "loss": 0.0127, "step": 13350 }, { "epoch": 2.03704833984375e-05, "model_forward_time": 0.02532339096069336, "step": 13350 }, { "epoch": 2.03704833984375e-05, "step": 13350, "training_step_time": 0.10731673240661621 }, { "epoch": 2.037200927734375e-05, "model_forward_time": 0.025515079498291016, "step": 13351 }, { "epoch": 2.037200927734375e-05, "step": 13351, "training_step_time": 0.10441398620605469 }, { "epoch": 2.037353515625e-05, "model_forward_time": 0.025191307067871094, "step": 13352 }, { "epoch": 2.037353515625e-05, "step": 13352, "training_step_time": 0.10577249526977539 }, { "epoch": 2.037506103515625e-05, "model_forward_time": 0.024889469146728516, "step": 13353 }, { "epoch": 2.037506103515625e-05, "step": 13353, "training_step_time": 0.10596275329589844 }, { "epoch": 2.03765869140625e-05, "model_forward_time": 0.025519371032714844, "step": 13354 }, { "epoch": 2.03765869140625e-05, "step": 13354, "training_step_time": 0.10979866981506348 }, { "epoch": 2.037811279296875e-05, "model_forward_time": 0.026854753494262695, "step": 13355 }, { "epoch": 2.037811279296875e-05, "step": 13355, "training_step_time": 0.10616421699523926 }, { "epoch": 2.0379638671875e-05, "model_forward_time": 0.025388240814208984, "step": 13356 }, { "epoch": 2.0379638671875e-05, "step": 13356, "training_step_time": 0.10576081275939941 }, { "epoch": 2.038116455078125e-05, "model_forward_time": 0.025641202926635742, "step": 13357 }, { "epoch": 2.038116455078125e-05, "step": 13357, "training_step_time": 0.10996222496032715 }, { "epoch": 2.03826904296875e-05, "model_forward_time": 0.02522730827331543, "step": 13358 }, { "epoch": 2.03826904296875e-05, "step": 13358, "training_step_time": 0.1120295524597168 }, { "epoch": 2.038421630859375e-05, "model_forward_time": 0.025324106216430664, "step": 13359 }, { "epoch": 2.038421630859375e-05, "step": 13359, "training_step_time": 0.10677099227905273 }, { "epoch": 2.03857421875e-05, "grad_norm": 0.2759242057800293, "learning_rate": 6.302079098869824e-05, "loss": 0.0168, "step": 13360 }, { "epoch": 2.03857421875e-05, "model_forward_time": 0.02557086944580078, "step": 13360 }, { "epoch": 2.03857421875e-05, "step": 13360, "training_step_time": 0.1097874641418457 }, { "epoch": 2.038726806640625e-05, "model_forward_time": 0.02562856674194336, "step": 13361 }, { "epoch": 2.038726806640625e-05, "step": 13361, "training_step_time": 0.10847139358520508 }, { "epoch": 2.03887939453125e-05, "model_forward_time": 0.026401519775390625, "step": 13362 }, { "epoch": 2.03887939453125e-05, "step": 13362, "training_step_time": 0.10815906524658203 }, { "epoch": 2.039031982421875e-05, "model_forward_time": 0.025693178176879883, "step": 13363 }, { "epoch": 2.039031982421875e-05, "step": 13363, "training_step_time": 0.11211395263671875 }, { "epoch": 2.0391845703125e-05, "model_forward_time": 0.02521228790283203, "step": 13364 }, { "epoch": 2.0391845703125e-05, "step": 13364, "training_step_time": 0.10996460914611816 }, { "epoch": 2.039337158203125e-05, "model_forward_time": 0.025126934051513672, "step": 13365 }, { "epoch": 2.039337158203125e-05, "step": 13365, "training_step_time": 0.10844683647155762 }, { "epoch": 2.03948974609375e-05, "model_forward_time": 0.025361061096191406, "step": 13366 }, { "epoch": 2.03948974609375e-05, "step": 13366, "training_step_time": 0.10727047920227051 }, { "epoch": 2.039642333984375e-05, "model_forward_time": 0.02548980712890625, "step": 13367 }, { "epoch": 2.039642333984375e-05, "step": 13367, "training_step_time": 0.1099245548248291 }, { "epoch": 2.039794921875e-05, "model_forward_time": 0.02556443214416504, "step": 13368 }, { "epoch": 2.039794921875e-05, "step": 13368, "training_step_time": 0.10476875305175781 }, { "epoch": 2.039947509765625e-05, "model_forward_time": 0.025258541107177734, "step": 13369 }, { "epoch": 2.039947509765625e-05, "step": 13369, "training_step_time": 0.16568517684936523 }, { "epoch": 2.04010009765625e-05, "grad_norm": 0.3927362561225891, "learning_rate": 6.296756910821666e-05, "loss": 0.0258, "step": 13370 }, { "epoch": 2.04010009765625e-05, "model_forward_time": 0.02494049072265625, "step": 13370 }, { "epoch": 2.04010009765625e-05, "step": 13370, "training_step_time": 0.1055293083190918 }, { "epoch": 2.040252685546875e-05, "model_forward_time": 0.025125980377197266, "step": 13371 }, { "epoch": 2.040252685546875e-05, "step": 13371, "training_step_time": 0.1176290512084961 }, { "epoch": 2.0404052734375e-05, "model_forward_time": 0.025312423706054688, "step": 13372 }, { "epoch": 2.0404052734375e-05, "step": 13372, "training_step_time": 0.1184237003326416 }, { "epoch": 2.040557861328125e-05, "model_forward_time": 0.025186538696289062, "step": 13373 }, { "epoch": 2.040557861328125e-05, "step": 13373, "training_step_time": 0.17196273803710938 }, { "epoch": 2.04071044921875e-05, "model_forward_time": 0.02446603775024414, "step": 13374 }, { "epoch": 2.04071044921875e-05, "step": 13374, "training_step_time": 0.1827099323272705 }, { "epoch": 2.040863037109375e-05, "model_forward_time": 0.024699926376342773, "step": 13375 }, { "epoch": 2.040863037109375e-05, "step": 13375, "training_step_time": 0.1266465187072754 }, { "epoch": 2.041015625e-05, "model_forward_time": 0.023993730545043945, "step": 13376 }, { "epoch": 2.041015625e-05, "step": 13376, "training_step_time": 0.1243433952331543 }, { "epoch": 2.041168212890625e-05, "model_forward_time": 0.024440288543701172, "step": 13377 }, { "epoch": 2.041168212890625e-05, "step": 13377, "training_step_time": 0.11133742332458496 }, { "epoch": 2.04132080078125e-05, "model_forward_time": 0.02582073211669922, "step": 13378 }, { "epoch": 2.04132080078125e-05, "step": 13378, "training_step_time": 0.20570898056030273 }, { "epoch": 2.041473388671875e-05, "model_forward_time": 0.024333953857421875, "step": 13379 }, { "epoch": 2.041473388671875e-05, "step": 13379, "training_step_time": 0.13486790657043457 }, { "epoch": 2.0416259765625e-05, "grad_norm": 0.4889189302921295, "learning_rate": 6.291433147091583e-05, "loss": 0.0184, "step": 13380 }, { "epoch": 2.0416259765625e-05, "model_forward_time": 0.024821758270263672, "step": 13380 }, { "epoch": 2.0416259765625e-05, "step": 13380, "training_step_time": 0.19266343116760254 }, { "epoch": 2.041778564453125e-05, "model_forward_time": 0.02434992790222168, "step": 13381 }, { "epoch": 2.041778564453125e-05, "step": 13381, "training_step_time": 0.10489630699157715 }, { "epoch": 2.04193115234375e-05, "model_forward_time": 0.024602174758911133, "step": 13382 }, { "epoch": 2.04193115234375e-05, "step": 13382, "training_step_time": 0.10331845283508301 }, { "epoch": 2.042083740234375e-05, "model_forward_time": 0.025224924087524414, "step": 13383 }, { "epoch": 2.042083740234375e-05, "step": 13383, "training_step_time": 0.10495710372924805 }, { "epoch": 2.042236328125e-05, "model_forward_time": 0.02542734146118164, "step": 13384 }, { "epoch": 2.042236328125e-05, "step": 13384, "training_step_time": 0.1168813705444336 }, { "epoch": 2.042388916015625e-05, "model_forward_time": 0.025130748748779297, "step": 13385 }, { "epoch": 2.042388916015625e-05, "step": 13385, "training_step_time": 0.16591334342956543 }, { "epoch": 2.04254150390625e-05, "model_forward_time": 0.024766206741333008, "step": 13386 }, { "epoch": 2.04254150390625e-05, "step": 13386, "training_step_time": 0.11996960639953613 }, { "epoch": 2.042694091796875e-05, "model_forward_time": 0.024512052536010742, "step": 13387 }, { "epoch": 2.042694091796875e-05, "step": 13387, "training_step_time": 0.10339736938476562 }, { "epoch": 2.0428466796875e-05, "model_forward_time": 0.025208473205566406, "step": 13388 }, { "epoch": 2.0428466796875e-05, "step": 13388, "training_step_time": 0.12165307998657227 }, { "epoch": 2.042999267578125e-05, "model_forward_time": 0.025351762771606445, "step": 13389 }, { "epoch": 2.042999267578125e-05, "step": 13389, "training_step_time": 0.18850278854370117 }, { "epoch": 2.04315185546875e-05, "grad_norm": 0.5438939332962036, "learning_rate": 6.286107814148454e-05, "loss": 0.0228, "step": 13390 }, { "epoch": 2.04315185546875e-05, "model_forward_time": 0.02413654327392578, "step": 13390 }, { "epoch": 2.04315185546875e-05, "step": 13390, "training_step_time": 0.12462306022644043 }, { "epoch": 2.043304443359375e-05, "model_forward_time": 0.0245668888092041, "step": 13391 }, { "epoch": 2.043304443359375e-05, "step": 13391, "training_step_time": 0.10827827453613281 }, { "epoch": 2.04345703125e-05, "model_forward_time": 0.025310516357421875, "step": 13392 }, { "epoch": 2.04345703125e-05, "step": 13392, "training_step_time": 0.10808825492858887 }, { "epoch": 2.043609619140625e-05, "model_forward_time": 0.025266408920288086, "step": 13393 }, { "epoch": 2.043609619140625e-05, "step": 13393, "training_step_time": 0.1845111846923828 }, { "epoch": 2.04376220703125e-05, "model_forward_time": 0.02456808090209961, "step": 13394 }, { "epoch": 2.04376220703125e-05, "step": 13394, "training_step_time": 0.20853614807128906 }, { "epoch": 2.043914794921875e-05, "model_forward_time": 0.02411651611328125, "step": 13395 }, { "epoch": 2.043914794921875e-05, "step": 13395, "training_step_time": 0.20552682876586914 }, { "epoch": 2.0440673828125e-05, "model_forward_time": 0.02406454086303711, "step": 13396 }, { "epoch": 2.0440673828125e-05, "step": 13396, "training_step_time": 0.2001035213470459 }, { "epoch": 2.044219970703125e-05, "model_forward_time": 0.02544856071472168, "step": 13397 }, { "epoch": 2.044219970703125e-05, "step": 13397, "training_step_time": 0.18812942504882812 }, { "epoch": 2.04437255859375e-05, "model_forward_time": 0.024179458618164062, "step": 13398 }, { "epoch": 2.04437255859375e-05, "step": 13398, "training_step_time": 0.17641615867614746 }, { "epoch": 2.044525146484375e-05, "model_forward_time": 0.024378299713134766, "step": 13399 }, { "epoch": 2.044525146484375e-05, "step": 13399, "training_step_time": 0.17526006698608398 }, { "epoch": 2.044677734375e-05, "grad_norm": 0.537973165512085, "learning_rate": 6.280780918463057e-05, "loss": 0.027, "step": 13400 }, { "epoch": 2.044677734375e-05, "model_forward_time": 0.024411439895629883, "step": 13400 }, { "epoch": 2.044677734375e-05, "step": 13400, "training_step_time": 0.15742993354797363 }, { "epoch": 2.044830322265625e-05, "model_forward_time": 0.024432659149169922, "step": 13401 }, { "epoch": 2.044830322265625e-05, "step": 13401, "training_step_time": 0.11360335350036621 }, { "epoch": 2.04498291015625e-05, "model_forward_time": 0.02487039566040039, "step": 13402 }, { "epoch": 2.04498291015625e-05, "step": 13402, "training_step_time": 0.10068964958190918 }, { "epoch": 2.045135498046875e-05, "model_forward_time": 0.025463104248046875, "step": 13403 }, { "epoch": 2.045135498046875e-05, "step": 13403, "training_step_time": 0.10314798355102539 }, { "epoch": 2.0452880859375e-05, "model_forward_time": 0.02539849281311035, "step": 13404 }, { "epoch": 2.0452880859375e-05, "step": 13404, "training_step_time": 0.10997819900512695 }, { "epoch": 2.045440673828125e-05, "model_forward_time": 0.025297880172729492, "step": 13405 }, { "epoch": 2.045440673828125e-05, "step": 13405, "training_step_time": 0.10525131225585938 }, { "epoch": 2.04559326171875e-05, "model_forward_time": 0.025723934173583984, "step": 13406 }, { "epoch": 2.04559326171875e-05, "step": 13406, "training_step_time": 0.10515999794006348 }, { "epoch": 2.045745849609375e-05, "model_forward_time": 0.025177717208862305, "step": 13407 }, { "epoch": 2.045745849609375e-05, "step": 13407, "training_step_time": 0.10677170753479004 }, { "epoch": 2.0458984375e-05, "model_forward_time": 0.02572798728942871, "step": 13408 }, { "epoch": 2.0458984375e-05, "step": 13408, "training_step_time": 0.11076951026916504 }, { "epoch": 2.046051025390625e-05, "model_forward_time": 0.025222063064575195, "step": 13409 }, { "epoch": 2.046051025390625e-05, "step": 13409, "training_step_time": 0.10886454582214355 }, { "epoch": 2.04620361328125e-05, "grad_norm": 0.4679735600948334, "learning_rate": 6.275452466508077e-05, "loss": 0.0187, "step": 13410 }, { "epoch": 2.04620361328125e-05, "model_forward_time": 0.02583146095275879, "step": 13410 }, { "epoch": 2.04620361328125e-05, "step": 13410, "training_step_time": 0.1889970302581787 }, { "epoch": 2.046356201171875e-05, "model_forward_time": 0.026597023010253906, "step": 13411 }, { "epoch": 2.046356201171875e-05, "step": 13411, "training_step_time": 0.15633463859558105 }, { "epoch": 2.0465087890625e-05, "model_forward_time": 0.023148775100708008, "step": 13412 }, { "epoch": 2.0465087890625e-05, "step": 13412, "training_step_time": 0.1860370635986328 }, { "epoch": 2.046661376953125e-05, "model_forward_time": 0.024436473846435547, "step": 13413 }, { "epoch": 2.046661376953125e-05, "step": 13413, "training_step_time": 0.20177459716796875 }, { "epoch": 2.04681396484375e-05, "model_forward_time": 0.02530074119567871, "step": 13414 }, { "epoch": 2.04681396484375e-05, "step": 13414, "training_step_time": 0.1618971824645996 }, { "epoch": 2.046966552734375e-05, "model_forward_time": 0.025056838989257812, "step": 13415 }, { "epoch": 2.046966552734375e-05, "step": 13415, "training_step_time": 0.10505366325378418 }, { "epoch": 2.047119140625e-05, "model_forward_time": 0.024538755416870117, "step": 13416 }, { "epoch": 2.047119140625e-05, "step": 13416, "training_step_time": 0.14645838737487793 }, { "epoch": 2.047271728515625e-05, "model_forward_time": 0.024964094161987305, "step": 13417 }, { "epoch": 2.047271728515625e-05, "step": 13417, "training_step_time": 0.16351628303527832 }, { "epoch": 2.04742431640625e-05, "model_forward_time": 0.025046348571777344, "step": 13418 }, { "epoch": 2.04742431640625e-05, "step": 13418, "training_step_time": 0.12252449989318848 }, { "epoch": 2.047576904296875e-05, "model_forward_time": 0.02460479736328125, "step": 13419 }, { "epoch": 2.047576904296875e-05, "step": 13419, "training_step_time": 0.11268949508666992 }, { "epoch": 2.0477294921875e-05, "grad_norm": 0.2953059673309326, "learning_rate": 6.27012246475808e-05, "loss": 0.0222, "step": 13420 }, { "epoch": 2.0477294921875e-05, "model_forward_time": 0.025562047958374023, "step": 13420 }, { "epoch": 2.0477294921875e-05, "step": 13420, "training_step_time": 0.1158447265625 }, { "epoch": 2.047882080078125e-05, "model_forward_time": 0.0252077579498291, "step": 13421 }, { "epoch": 2.047882080078125e-05, "step": 13421, "training_step_time": 0.12416815757751465 }, { "epoch": 2.04803466796875e-05, "model_forward_time": 0.025690555572509766, "step": 13422 }, { "epoch": 2.04803466796875e-05, "step": 13422, "training_step_time": 0.10564088821411133 }, { "epoch": 2.048187255859375e-05, "model_forward_time": 0.02516031265258789, "step": 13423 }, { "epoch": 2.048187255859375e-05, "step": 13423, "training_step_time": 0.10915946960449219 }, { "epoch": 2.04833984375e-05, "model_forward_time": 0.02544689178466797, "step": 13424 }, { "epoch": 2.04833984375e-05, "step": 13424, "training_step_time": 0.10480928421020508 }, { "epoch": 2.048492431640625e-05, "model_forward_time": 0.025229692459106445, "step": 13425 }, { "epoch": 2.048492431640625e-05, "step": 13425, "training_step_time": 0.10796427726745605 }, { "epoch": 2.04864501953125e-05, "model_forward_time": 0.02448129653930664, "step": 13426 }, { "epoch": 2.04864501953125e-05, "step": 13426, "training_step_time": 0.1351613998413086 }, { "epoch": 2.048797607421875e-05, "model_forward_time": 0.0281524658203125, "step": 13427 }, { "epoch": 2.048797607421875e-05, "step": 13427, "training_step_time": 0.12517619132995605 }, { "epoch": 2.0489501953125e-05, "model_forward_time": 0.024603605270385742, "step": 13428 }, { "epoch": 2.0489501953125e-05, "step": 13428, "training_step_time": 0.11872172355651855 }, { "epoch": 2.049102783203125e-05, "model_forward_time": 0.02561640739440918, "step": 13429 }, { "epoch": 2.049102783203125e-05, "step": 13429, "training_step_time": 0.1268782615661621 }, { "epoch": 2.04925537109375e-05, "grad_norm": 0.5403966307640076, "learning_rate": 6.264790919689525e-05, "loss": 0.0161, "step": 13430 }, { "epoch": 2.04925537109375e-05, "model_forward_time": 0.025127410888671875, "step": 13430 }, { "epoch": 2.04925537109375e-05, "step": 13430, "training_step_time": 0.10532832145690918 }, { "epoch": 2.049407958984375e-05, "model_forward_time": 0.025257587432861328, "step": 13431 }, { "epoch": 2.049407958984375e-05, "step": 13431, "training_step_time": 0.11516499519348145 }, { "epoch": 2.049560546875e-05, "model_forward_time": 0.025308609008789062, "step": 13432 }, { "epoch": 2.049560546875e-05, "step": 13432, "training_step_time": 0.11084794998168945 }, { "epoch": 2.049713134765625e-05, "model_forward_time": 0.025204896926879883, "step": 13433 }, { "epoch": 2.049713134765625e-05, "step": 13433, "training_step_time": 0.10706782341003418 }, { "epoch": 2.04986572265625e-05, "model_forward_time": 0.025284290313720703, "step": 13434 }, { "epoch": 2.04986572265625e-05, "step": 13434, "training_step_time": 0.10545110702514648 }, { "epoch": 2.050018310546875e-05, "model_forward_time": 0.025746583938598633, "step": 13435 }, { "epoch": 2.050018310546875e-05, "step": 13435, "training_step_time": 0.10692477226257324 }, { "epoch": 2.0501708984375e-05, "model_forward_time": 0.02491021156311035, "step": 13436 }, { "epoch": 2.0501708984375e-05, "step": 13436, "training_step_time": 0.10530471801757812 }, { "epoch": 2.050323486328125e-05, "model_forward_time": 0.025292158126831055, "step": 13437 }, { "epoch": 2.050323486328125e-05, "step": 13437, "training_step_time": 0.10413026809692383 }, { "epoch": 2.05047607421875e-05, "model_forward_time": 0.02547907829284668, "step": 13438 }, { "epoch": 2.05047607421875e-05, "step": 13438, "training_step_time": 0.10844707489013672 }, { "epoch": 2.050628662109375e-05, "model_forward_time": 0.025534391403198242, "step": 13439 }, { "epoch": 2.050628662109375e-05, "step": 13439, "training_step_time": 0.10549044609069824 }, { "epoch": 2.05078125e-05, "grad_norm": 0.246059387922287, "learning_rate": 6.259457837780742e-05, "loss": 0.0175, "step": 13440 }, { "epoch": 2.05078125e-05, "model_forward_time": 0.02570176124572754, "step": 13440 }, { "epoch": 2.05078125e-05, "step": 13440, "training_step_time": 0.11027359962463379 }, { "epoch": 2.050933837890625e-05, "model_forward_time": 0.02532196044921875, "step": 13441 }, { "epoch": 2.050933837890625e-05, "step": 13441, "training_step_time": 0.10902976989746094 }, { "epoch": 2.05108642578125e-05, "model_forward_time": 0.025692224502563477, "step": 13442 }, { "epoch": 2.05108642578125e-05, "step": 13442, "training_step_time": 0.10401558876037598 }, { "epoch": 2.051239013671875e-05, "model_forward_time": 0.025447845458984375, "step": 13443 }, { "epoch": 2.051239013671875e-05, "step": 13443, "training_step_time": 0.10426831245422363 }, { "epoch": 2.0513916015625e-05, "model_forward_time": 0.025374650955200195, "step": 13444 }, { "epoch": 2.0513916015625e-05, "step": 13444, "training_step_time": 0.10567498207092285 }, { "epoch": 2.051544189453125e-05, "model_forward_time": 0.024979829788208008, "step": 13445 }, { "epoch": 2.051544189453125e-05, "step": 13445, "training_step_time": 0.10604453086853027 }, { "epoch": 2.05169677734375e-05, "model_forward_time": 0.025234699249267578, "step": 13446 }, { "epoch": 2.05169677734375e-05, "step": 13446, "training_step_time": 0.10599446296691895 }, { "epoch": 2.051849365234375e-05, "model_forward_time": 0.025767087936401367, "step": 13447 }, { "epoch": 2.051849365234375e-05, "step": 13447, "training_step_time": 0.17940235137939453 }, { "epoch": 2.052001953125e-05, "model_forward_time": 0.024871110916137695, "step": 13448 }, { "epoch": 2.052001953125e-05, "step": 13448, "training_step_time": 0.20208024978637695 }, { "epoch": 2.052154541015625e-05, "model_forward_time": 0.024614810943603516, "step": 13449 }, { "epoch": 2.052154541015625e-05, "step": 13449, "training_step_time": 0.21276640892028809 }, { "epoch": 2.05230712890625e-05, "grad_norm": 0.3965492248535156, "learning_rate": 6.254123225511923e-05, "loss": 0.0301, "step": 13450 }, { "epoch": 2.05230712890625e-05, "model_forward_time": 0.02409219741821289, "step": 13450 }, { "epoch": 2.05230712890625e-05, "step": 13450, "training_step_time": 0.20916199684143066 }, { "epoch": 2.052459716796875e-05, "model_forward_time": 0.024336576461791992, "step": 13451 }, { "epoch": 2.052459716796875e-05, "step": 13451, "training_step_time": 0.2063922882080078 }, { "epoch": 2.0526123046875e-05, "model_forward_time": 0.024483442306518555, "step": 13452 }, { "epoch": 2.0526123046875e-05, "step": 13452, "training_step_time": 0.19893336296081543 }, { "epoch": 2.052764892578125e-05, "model_forward_time": 0.024085283279418945, "step": 13453 }, { "epoch": 2.052764892578125e-05, "step": 13453, "training_step_time": 0.19656848907470703 }, { "epoch": 2.05291748046875e-05, "model_forward_time": 0.02414083480834961, "step": 13454 }, { "epoch": 2.05291748046875e-05, "step": 13454, "training_step_time": 0.1954667568206787 }, { "epoch": 2.053070068359375e-05, "model_forward_time": 0.02589702606201172, "step": 13455 }, { "epoch": 2.053070068359375e-05, "step": 13455, "training_step_time": 0.1307220458984375 }, { "epoch": 2.05322265625e-05, "model_forward_time": 0.025348424911499023, "step": 13456 }, { "epoch": 2.05322265625e-05, "step": 13456, "training_step_time": 0.12230920791625977 }, { "epoch": 2.053375244140625e-05, "model_forward_time": 0.026223421096801758, "step": 13457 }, { "epoch": 2.053375244140625e-05, "step": 13457, "training_step_time": 0.11296939849853516 }, { "epoch": 2.05352783203125e-05, "model_forward_time": 0.02593398094177246, "step": 13458 }, { "epoch": 2.05352783203125e-05, "step": 13458, "training_step_time": 0.13977622985839844 }, { "epoch": 2.053680419921875e-05, "model_forward_time": 0.02613973617553711, "step": 13459 }, { "epoch": 2.053680419921875e-05, "step": 13459, "training_step_time": 0.1622178554534912 }, { "epoch": 2.0538330078125e-05, "grad_norm": 0.2964446544647217, "learning_rate": 6.248787089365133e-05, "loss": 0.026, "step": 13460 }, { "epoch": 2.0538330078125e-05, "model_forward_time": 0.025403976440429688, "step": 13460 }, { "epoch": 2.0538330078125e-05, "step": 13460, "training_step_time": 0.1813497543334961 }, { "epoch": 2.053985595703125e-05, "model_forward_time": 0.024903297424316406, "step": 13461 }, { "epoch": 2.053985595703125e-05, "step": 13461, "training_step_time": 0.12934541702270508 }, { "epoch": 2.05413818359375e-05, "model_forward_time": 0.024985074996948242, "step": 13462 }, { "epoch": 2.05413818359375e-05, "step": 13462, "training_step_time": 0.10903286933898926 }, { "epoch": 2.054290771484375e-05, "model_forward_time": 0.025337696075439453, "step": 13463 }, { "epoch": 2.054290771484375e-05, "step": 13463, "training_step_time": 0.13035178184509277 }, { "epoch": 2.054443359375e-05, "model_forward_time": 0.025640487670898438, "step": 13464 }, { "epoch": 2.054443359375e-05, "step": 13464, "training_step_time": 0.1145782470703125 }, { "epoch": 2.054595947265625e-05, "model_forward_time": 0.025607824325561523, "step": 13465 }, { "epoch": 2.054595947265625e-05, "step": 13465, "training_step_time": 0.11302518844604492 }, { "epoch": 2.05474853515625e-05, "model_forward_time": 0.025816917419433594, "step": 13466 }, { "epoch": 2.05474853515625e-05, "step": 13466, "training_step_time": 0.11213016510009766 }, { "epoch": 2.054901123046875e-05, "model_forward_time": 0.025067806243896484, "step": 13467 }, { "epoch": 2.054901123046875e-05, "step": 13467, "training_step_time": 0.10860538482666016 }, { "epoch": 2.0550537109375e-05, "model_forward_time": 0.025256633758544922, "step": 13468 }, { "epoch": 2.0550537109375e-05, "step": 13468, "training_step_time": 0.10731077194213867 }, { "epoch": 2.055206298828125e-05, "model_forward_time": 0.025688886642456055, "step": 13469 }, { "epoch": 2.055206298828125e-05, "step": 13469, "training_step_time": 0.20747661590576172 }, { "epoch": 2.05535888671875e-05, "grad_norm": 0.23747020959854126, "learning_rate": 6.243449435824276e-05, "loss": 0.0334, "step": 13470 }, { "epoch": 2.05535888671875e-05, "model_forward_time": 0.02470111846923828, "step": 13470 }, { "epoch": 2.05535888671875e-05, "step": 13470, "training_step_time": 0.11760783195495605 }, { "epoch": 2.055511474609375e-05, "model_forward_time": 0.024959087371826172, "step": 13471 }, { "epoch": 2.055511474609375e-05, "step": 13471, "training_step_time": 0.13237762451171875 }, { "epoch": 2.0556640625e-05, "model_forward_time": 0.025312185287475586, "step": 13472 }, { "epoch": 2.0556640625e-05, "step": 13472, "training_step_time": 0.11254715919494629 }, { "epoch": 2.055816650390625e-05, "model_forward_time": 0.025689363479614258, "step": 13473 }, { "epoch": 2.055816650390625e-05, "step": 13473, "training_step_time": 0.1810760498046875 }, { "epoch": 2.05596923828125e-05, "model_forward_time": 0.025043725967407227, "step": 13474 }, { "epoch": 2.05596923828125e-05, "step": 13474, "training_step_time": 0.12662744522094727 }, { "epoch": 2.056121826171875e-05, "model_forward_time": 0.025513887405395508, "step": 13475 }, { "epoch": 2.056121826171875e-05, "step": 13475, "training_step_time": 0.11168694496154785 }, { "epoch": 2.0562744140625e-05, "model_forward_time": 0.025313377380371094, "step": 13476 }, { "epoch": 2.0562744140625e-05, "step": 13476, "training_step_time": 0.10495781898498535 }, { "epoch": 2.056427001953125e-05, "model_forward_time": 0.02562880516052246, "step": 13477 }, { "epoch": 2.056427001953125e-05, "step": 13477, "training_step_time": 0.10590624809265137 }, { "epoch": 2.05657958984375e-05, "model_forward_time": 0.02529597282409668, "step": 13478 }, { "epoch": 2.05657958984375e-05, "step": 13478, "training_step_time": 0.10621333122253418 }, { "epoch": 2.056732177734375e-05, "model_forward_time": 0.027664899826049805, "step": 13479 }, { "epoch": 2.056732177734375e-05, "step": 13479, "training_step_time": 0.10705780982971191 }, { "epoch": 2.056884765625e-05, "grad_norm": 0.390501469373703, "learning_rate": 6.238110271375102e-05, "loss": 0.0233, "step": 13480 }, { "epoch": 2.056884765625e-05, "model_forward_time": 0.02529764175415039, "step": 13480 }, { "epoch": 2.056884765625e-05, "step": 13480, "training_step_time": 0.11092662811279297 }, { "epoch": 2.057037353515625e-05, "model_forward_time": 0.02570819854736328, "step": 13481 }, { "epoch": 2.057037353515625e-05, "step": 13481, "training_step_time": 0.10501623153686523 }, { "epoch": 2.05718994140625e-05, "model_forward_time": 0.025580167770385742, "step": 13482 }, { "epoch": 2.05718994140625e-05, "step": 13482, "training_step_time": 0.10663294792175293 }, { "epoch": 2.057342529296875e-05, "model_forward_time": 0.025522947311401367, "step": 13483 }, { "epoch": 2.057342529296875e-05, "step": 13483, "training_step_time": 0.10570430755615234 }, { "epoch": 2.0574951171875e-05, "model_forward_time": 0.025350332260131836, "step": 13484 }, { "epoch": 2.0574951171875e-05, "step": 13484, "training_step_time": 0.10508418083190918 }, { "epoch": 2.057647705078125e-05, "model_forward_time": 0.025224685668945312, "step": 13485 }, { "epoch": 2.057647705078125e-05, "step": 13485, "training_step_time": 0.10484957695007324 }, { "epoch": 2.05780029296875e-05, "model_forward_time": 0.025508403778076172, "step": 13486 }, { "epoch": 2.05780029296875e-05, "step": 13486, "training_step_time": 0.10888338088989258 }, { "epoch": 2.057952880859375e-05, "model_forward_time": 0.025072813034057617, "step": 13487 }, { "epoch": 2.057952880859375e-05, "step": 13487, "training_step_time": 0.10498213768005371 }, { "epoch": 2.05810546875e-05, "model_forward_time": 0.025693178176879883, "step": 13488 }, { "epoch": 2.05810546875e-05, "step": 13488, "training_step_time": 0.10777568817138672 }, { "epoch": 2.058258056640625e-05, "model_forward_time": 0.026051759719848633, "step": 13489 }, { "epoch": 2.058258056640625e-05, "step": 13489, "training_step_time": 0.10966873168945312 }, { "epoch": 2.05841064453125e-05, "grad_norm": 0.31566035747528076, "learning_rate": 6.232769602505203e-05, "loss": 0.0241, "step": 13490 }, { "epoch": 2.05841064453125e-05, "model_forward_time": 0.025745868682861328, "step": 13490 }, { "epoch": 2.05841064453125e-05, "step": 13490, "training_step_time": 0.10670232772827148 }, { "epoch": 2.058563232421875e-05, "model_forward_time": 0.02549147605895996, "step": 13491 }, { "epoch": 2.058563232421875e-05, "step": 13491, "training_step_time": 0.10652661323547363 }, { "epoch": 2.0587158203125e-05, "model_forward_time": 0.02582526206970215, "step": 13492 }, { "epoch": 2.0587158203125e-05, "step": 13492, "training_step_time": 0.10699605941772461 }, { "epoch": 2.058868408203125e-05, "model_forward_time": 0.02557229995727539, "step": 13493 }, { "epoch": 2.058868408203125e-05, "step": 13493, "training_step_time": 0.10550284385681152 }, { "epoch": 2.05902099609375e-05, "model_forward_time": 0.02545166015625, "step": 13494 }, { "epoch": 2.05902099609375e-05, "step": 13494, "training_step_time": 0.10523653030395508 }, { "epoch": 2.059173583984375e-05, "model_forward_time": 0.025597810745239258, "step": 13495 }, { "epoch": 2.059173583984375e-05, "step": 13495, "training_step_time": 0.1056661605834961 }, { "epoch": 2.059326171875e-05, "model_forward_time": 0.02552962303161621, "step": 13496 }, { "epoch": 2.059326171875e-05, "step": 13496, "training_step_time": 0.1071176528930664 }, { "epoch": 2.059478759765625e-05, "model_forward_time": 0.025418996810913086, "step": 13497 }, { "epoch": 2.059478759765625e-05, "step": 13497, "training_step_time": 0.1056063175201416 }, { "epoch": 2.05963134765625e-05, "model_forward_time": 0.02577662467956543, "step": 13498 }, { "epoch": 2.05963134765625e-05, "step": 13498, "training_step_time": 0.10579228401184082 }, { "epoch": 2.059783935546875e-05, "model_forward_time": 0.025702476501464844, "step": 13499 }, { "epoch": 2.059783935546875e-05, "step": 13499, "training_step_time": 0.17978358268737793 }, { "epoch": 2.0599365234375e-05, "grad_norm": 0.2811414897441864, "learning_rate": 6.227427435703997e-05, "loss": 0.0124, "step": 13500 }, { "epoch": 2.0599365234375e-05, "model_forward_time": 0.02506732940673828, "step": 13500 }, { "epoch": 2.0599365234375e-05, "step": 13500, "training_step_time": 0.10538029670715332 }, { "epoch": 2.060089111328125e-05, "model_forward_time": 0.024884462356567383, "step": 13501 }, { "epoch": 2.060089111328125e-05, "step": 13501, "training_step_time": 0.1276853084564209 }, { "epoch": 2.06024169921875e-05, "model_forward_time": 0.025621652603149414, "step": 13502 }, { "epoch": 2.06024169921875e-05, "step": 13502, "training_step_time": 0.13167500495910645 }, { "epoch": 2.060394287109375e-05, "model_forward_time": 0.025303125381469727, "step": 13503 }, { "epoch": 2.060394287109375e-05, "step": 13503, "training_step_time": 0.13707923889160156 }, { "epoch": 2.060546875e-05, "model_forward_time": 0.02512383460998535, "step": 13504 }, { "epoch": 2.060546875e-05, "step": 13504, "training_step_time": 0.11810469627380371 }, { "epoch": 2.060699462890625e-05, "model_forward_time": 0.028889894485473633, "step": 13505 }, { "epoch": 2.060699462890625e-05, "step": 13505, "training_step_time": 0.11882901191711426 }, { "epoch": 2.06085205078125e-05, "model_forward_time": 0.024982452392578125, "step": 13506 }, { "epoch": 2.06085205078125e-05, "step": 13506, "training_step_time": 0.10388326644897461 }, { "epoch": 2.061004638671875e-05, "model_forward_time": 0.02473306655883789, "step": 13507 }, { "epoch": 2.061004638671875e-05, "step": 13507, "training_step_time": 0.1320035457611084 }, { "epoch": 2.0611572265625e-05, "model_forward_time": 0.025761127471923828, "step": 13508 }, { "epoch": 2.0611572265625e-05, "step": 13508, "training_step_time": 0.2026369571685791 }, { "epoch": 2.061309814453125e-05, "model_forward_time": 0.026338815689086914, "step": 13509 }, { "epoch": 2.061309814453125e-05, "step": 13509, "training_step_time": 0.1712629795074463 }, { "epoch": 2.06146240234375e-05, "grad_norm": 0.3576022684574127, "learning_rate": 6.222083777462715e-05, "loss": 0.0249, "step": 13510 }, { "epoch": 2.06146240234375e-05, "model_forward_time": 0.024466991424560547, "step": 13510 }, { "epoch": 2.06146240234375e-05, "step": 13510, "training_step_time": 0.1889963150024414 }, { "epoch": 2.061614990234375e-05, "model_forward_time": 0.025861740112304688, "step": 13511 }, { "epoch": 2.061614990234375e-05, "step": 13511, "training_step_time": 0.14404678344726562 }, { "epoch": 2.061767578125e-05, "model_forward_time": 0.02688741683959961, "step": 13512 }, { "epoch": 2.061767578125e-05, "step": 13512, "training_step_time": 0.14098334312438965 }, { "epoch": 2.061920166015625e-05, "model_forward_time": 0.024428367614746094, "step": 13513 }, { "epoch": 2.061920166015625e-05, "step": 13513, "training_step_time": 0.12908458709716797 }, { "epoch": 2.06207275390625e-05, "model_forward_time": 0.024909019470214844, "step": 13514 }, { "epoch": 2.06207275390625e-05, "step": 13514, "training_step_time": 0.12499356269836426 }, { "epoch": 2.062225341796875e-05, "model_forward_time": 0.025639057159423828, "step": 13515 }, { "epoch": 2.062225341796875e-05, "step": 13515, "training_step_time": 0.11852765083312988 }, { "epoch": 2.0623779296875e-05, "model_forward_time": 0.025999069213867188, "step": 13516 }, { "epoch": 2.0623779296875e-05, "step": 13516, "training_step_time": 0.11444544792175293 }, { "epoch": 2.062530517578125e-05, "model_forward_time": 0.025890350341796875, "step": 13517 }, { "epoch": 2.062530517578125e-05, "step": 13517, "training_step_time": 0.12332773208618164 }, { "epoch": 2.06268310546875e-05, "model_forward_time": 0.02600264549255371, "step": 13518 }, { "epoch": 2.06268310546875e-05, "step": 13518, "training_step_time": 0.16089296340942383 }, { "epoch": 2.062835693359375e-05, "model_forward_time": 0.02433919906616211, "step": 13519 }, { "epoch": 2.062835693359375e-05, "step": 13519, "training_step_time": 0.21832513809204102 }, { "epoch": 2.06298828125e-05, "grad_norm": 0.2942567765712738, "learning_rate": 6.216738634274411e-05, "loss": 0.0251, "step": 13520 }, { "epoch": 2.06298828125e-05, "model_forward_time": 0.025035381317138672, "step": 13520 }, { "epoch": 2.06298828125e-05, "step": 13520, "training_step_time": 0.1167595386505127 }, { "epoch": 2.063140869140625e-05, "model_forward_time": 0.02455306053161621, "step": 13521 }, { "epoch": 2.063140869140625e-05, "step": 13521, "training_step_time": 0.10366249084472656 }, { "epoch": 2.06329345703125e-05, "model_forward_time": 0.02541375160217285, "step": 13522 }, { "epoch": 2.06329345703125e-05, "step": 13522, "training_step_time": 0.10523533821105957 }, { "epoch": 2.063446044921875e-05, "model_forward_time": 0.025339365005493164, "step": 13523 }, { "epoch": 2.063446044921875e-05, "step": 13523, "training_step_time": 0.10650134086608887 }, { "epoch": 2.0635986328125e-05, "model_forward_time": 0.02537083625793457, "step": 13524 }, { "epoch": 2.0635986328125e-05, "step": 13524, "training_step_time": 0.1060328483581543 }, { "epoch": 2.063751220703125e-05, "model_forward_time": 0.02584385871887207, "step": 13525 }, { "epoch": 2.063751220703125e-05, "step": 13525, "training_step_time": 0.10665106773376465 }, { "epoch": 2.06390380859375e-05, "model_forward_time": 0.025703907012939453, "step": 13526 }, { "epoch": 2.06390380859375e-05, "step": 13526, "training_step_time": 0.1108710765838623 }, { "epoch": 2.064056396484375e-05, "model_forward_time": 0.02501535415649414, "step": 13527 }, { "epoch": 2.064056396484375e-05, "step": 13527, "training_step_time": 0.10784006118774414 }, { "epoch": 2.064208984375e-05, "model_forward_time": 0.025657176971435547, "step": 13528 }, { "epoch": 2.064208984375e-05, "step": 13528, "training_step_time": 0.10536408424377441 }, { "epoch": 2.064361572265625e-05, "model_forward_time": 0.025498390197753906, "step": 13529 }, { "epoch": 2.064361572265625e-05, "step": 13529, "training_step_time": 0.10569286346435547 }, { "epoch": 2.06451416015625e-05, "grad_norm": 0.36455395817756653, "learning_rate": 6.211392012633932e-05, "loss": 0.0254, "step": 13530 }, { "epoch": 2.06451416015625e-05, "model_forward_time": 0.02626323699951172, "step": 13530 }, { "epoch": 2.06451416015625e-05, "step": 13530, "training_step_time": 0.11243510246276855 }, { "epoch": 2.064666748046875e-05, "model_forward_time": 0.026011228561401367, "step": 13531 }, { "epoch": 2.064666748046875e-05, "step": 13531, "training_step_time": 0.10503387451171875 }, { "epoch": 2.0648193359375e-05, "model_forward_time": 0.025607585906982422, "step": 13532 }, { "epoch": 2.0648193359375e-05, "step": 13532, "training_step_time": 0.10560750961303711 }, { "epoch": 2.064971923828125e-05, "model_forward_time": 0.026081562042236328, "step": 13533 }, { "epoch": 2.064971923828125e-05, "step": 13533, "training_step_time": 0.11016416549682617 }, { "epoch": 2.06512451171875e-05, "model_forward_time": 0.02525639533996582, "step": 13534 }, { "epoch": 2.06512451171875e-05, "step": 13534, "training_step_time": 0.10515308380126953 }, { "epoch": 2.065277099609375e-05, "model_forward_time": 0.025351524353027344, "step": 13535 }, { "epoch": 2.065277099609375e-05, "step": 13535, "training_step_time": 0.1093449592590332 }, { "epoch": 2.0654296875e-05, "model_forward_time": 0.02552938461303711, "step": 13536 }, { "epoch": 2.0654296875e-05, "step": 13536, "training_step_time": 0.10496139526367188 }, { "epoch": 2.065582275390625e-05, "model_forward_time": 0.025290966033935547, "step": 13537 }, { "epoch": 2.065582275390625e-05, "step": 13537, "training_step_time": 0.1048898696899414 }, { "epoch": 2.06573486328125e-05, "model_forward_time": 0.0255739688873291, "step": 13538 }, { "epoch": 2.06573486328125e-05, "step": 13538, "training_step_time": 0.10610198974609375 }, { "epoch": 2.065887451171875e-05, "model_forward_time": 0.025422334671020508, "step": 13539 }, { "epoch": 2.065887451171875e-05, "step": 13539, "training_step_time": 0.10854792594909668 }, { "epoch": 2.0660400390625e-05, "grad_norm": 0.29043322801589966, "learning_rate": 6.206043919037933e-05, "loss": 0.0215, "step": 13540 }, { "epoch": 2.0660400390625e-05, "model_forward_time": 0.025322675704956055, "step": 13540 }, { "epoch": 2.0660400390625e-05, "step": 13540, "training_step_time": 0.10613369941711426 }, { "epoch": 2.066192626953125e-05, "model_forward_time": 0.025369644165039062, "step": 13541 }, { "epoch": 2.066192626953125e-05, "step": 13541, "training_step_time": 0.10640740394592285 }, { "epoch": 2.06634521484375e-05, "model_forward_time": 0.02557682991027832, "step": 13542 }, { "epoch": 2.06634521484375e-05, "step": 13542, "training_step_time": 0.10752701759338379 }, { "epoch": 2.066497802734375e-05, "model_forward_time": 0.026835203170776367, "step": 13543 }, { "epoch": 2.066497802734375e-05, "step": 13543, "training_step_time": 0.1137089729309082 }, { "epoch": 2.066650390625e-05, "model_forward_time": 0.026080608367919922, "step": 13544 }, { "epoch": 2.066650390625e-05, "step": 13544, "training_step_time": 0.10647463798522949 }, { "epoch": 2.066802978515625e-05, "model_forward_time": 0.02614140510559082, "step": 13545 }, { "epoch": 2.066802978515625e-05, "step": 13545, "training_step_time": 0.15695667266845703 }, { "epoch": 2.06695556640625e-05, "model_forward_time": 0.025475740432739258, "step": 13546 }, { "epoch": 2.06695556640625e-05, "step": 13546, "training_step_time": 0.22822093963623047 }, { "epoch": 2.067108154296875e-05, "model_forward_time": 0.025451183319091797, "step": 13547 }, { "epoch": 2.067108154296875e-05, "step": 13547, "training_step_time": 0.16279911994934082 }, { "epoch": 2.0672607421875e-05, "model_forward_time": 0.0246584415435791, "step": 13548 }, { "epoch": 2.0672607421875e-05, "step": 13548, "training_step_time": 0.19060587882995605 }, { "epoch": 2.067413330078125e-05, "model_forward_time": 0.02529287338256836, "step": 13549 }, { "epoch": 2.067413330078125e-05, "step": 13549, "training_step_time": 0.18202757835388184 }, { "epoch": 2.06756591796875e-05, "grad_norm": 0.27025356888771057, "learning_rate": 6.200694359984849e-05, "loss": 0.0145, "step": 13550 }, { "epoch": 2.06756591796875e-05, "model_forward_time": 0.02523517608642578, "step": 13550 }, { "epoch": 2.06756591796875e-05, "step": 13550, "training_step_time": 0.1037900447845459 }, { "epoch": 2.067718505859375e-05, "model_forward_time": 0.024685382843017578, "step": 13551 }, { "epoch": 2.067718505859375e-05, "step": 13551, "training_step_time": 0.17618966102600098 }, { "epoch": 2.06787109375e-05, "model_forward_time": 0.02469611167907715, "step": 13552 }, { "epoch": 2.06787109375e-05, "step": 13552, "training_step_time": 0.16537880897521973 }, { "epoch": 2.068023681640625e-05, "model_forward_time": 0.024758338928222656, "step": 13553 }, { "epoch": 2.068023681640625e-05, "step": 13553, "training_step_time": 0.10965704917907715 }, { "epoch": 2.06817626953125e-05, "model_forward_time": 0.02538466453552246, "step": 13554 }, { "epoch": 2.06817626953125e-05, "step": 13554, "training_step_time": 0.1163020133972168 }, { "epoch": 2.068328857421875e-05, "model_forward_time": 0.02577829360961914, "step": 13555 }, { "epoch": 2.068328857421875e-05, "step": 13555, "training_step_time": 0.1302950382232666 }, { "epoch": 2.0684814453125e-05, "model_forward_time": 0.0257413387298584, "step": 13556 }, { "epoch": 2.0684814453125e-05, "step": 13556, "training_step_time": 0.1236116886138916 }, { "epoch": 2.068634033203125e-05, "model_forward_time": 0.02552628517150879, "step": 13557 }, { "epoch": 2.068634033203125e-05, "step": 13557, "training_step_time": 0.12032341957092285 }, { "epoch": 2.06878662109375e-05, "model_forward_time": 0.025516748428344727, "step": 13558 }, { "epoch": 2.06878662109375e-05, "step": 13558, "training_step_time": 0.12189531326293945 }, { "epoch": 2.068939208984375e-05, "model_forward_time": 0.025724411010742188, "step": 13559 }, { "epoch": 2.068939208984375e-05, "step": 13559, "training_step_time": 0.12296533584594727 }, { "epoch": 2.069091796875e-05, "grad_norm": 0.16759471595287323, "learning_rate": 6.195343341974899e-05, "loss": 0.0232, "step": 13560 }, { "epoch": 2.069091796875e-05, "model_forward_time": 0.024808645248413086, "step": 13560 }, { "epoch": 2.069091796875e-05, "step": 13560, "training_step_time": 0.12245917320251465 }, { "epoch": 2.069244384765625e-05, "model_forward_time": 0.025620460510253906, "step": 13561 }, { "epoch": 2.069244384765625e-05, "step": 13561, "training_step_time": 0.12115120887756348 }, { "epoch": 2.06939697265625e-05, "model_forward_time": 0.026069164276123047, "step": 13562 }, { "epoch": 2.06939697265625e-05, "step": 13562, "training_step_time": 0.1276533603668213 }, { "epoch": 2.069549560546875e-05, "model_forward_time": 0.025541067123413086, "step": 13563 }, { "epoch": 2.069549560546875e-05, "step": 13563, "training_step_time": 0.15735912322998047 }, { "epoch": 2.0697021484375e-05, "model_forward_time": 0.024887800216674805, "step": 13564 }, { "epoch": 2.0697021484375e-05, "step": 13564, "training_step_time": 0.21988320350646973 }, { "epoch": 2.069854736328125e-05, "model_forward_time": 0.024647951126098633, "step": 13565 }, { "epoch": 2.069854736328125e-05, "step": 13565, "training_step_time": 0.11358976364135742 }, { "epoch": 2.07000732421875e-05, "model_forward_time": 0.025397300720214844, "step": 13566 }, { "epoch": 2.07000732421875e-05, "step": 13566, "training_step_time": 0.10919880867004395 }, { "epoch": 2.070159912109375e-05, "model_forward_time": 0.025625228881835938, "step": 13567 }, { "epoch": 2.070159912109375e-05, "step": 13567, "training_step_time": 0.11376523971557617 }, { "epoch": 2.0703125e-05, "model_forward_time": 0.02531886100769043, "step": 13568 }, { "epoch": 2.0703125e-05, "step": 13568, "training_step_time": 0.10929059982299805 }, { "epoch": 2.070465087890625e-05, "model_forward_time": 0.02492046356201172, "step": 13569 }, { "epoch": 2.070465087890625e-05, "step": 13569, "training_step_time": 0.10867166519165039 }, { "epoch": 2.07061767578125e-05, "grad_norm": 0.2568618357181549, "learning_rate": 6.189990871510078e-05, "loss": 0.0159, "step": 13570 }, { "epoch": 2.07061767578125e-05, "model_forward_time": 0.024616003036499023, "step": 13570 }, { "epoch": 2.07061767578125e-05, "step": 13570, "training_step_time": 0.10872244834899902 }, { "epoch": 2.070770263671875e-05, "model_forward_time": 0.02526998519897461, "step": 13571 }, { "epoch": 2.070770263671875e-05, "step": 13571, "training_step_time": 0.10507607460021973 }, { "epoch": 2.0709228515625e-05, "model_forward_time": 0.025464773178100586, "step": 13572 }, { "epoch": 2.0709228515625e-05, "step": 13572, "training_step_time": 0.1047205924987793 }, { "epoch": 2.071075439453125e-05, "model_forward_time": 0.02590203285217285, "step": 13573 }, { "epoch": 2.071075439453125e-05, "step": 13573, "training_step_time": 0.10735416412353516 }, { "epoch": 2.07122802734375e-05, "model_forward_time": 0.025557994842529297, "step": 13574 }, { "epoch": 2.07122802734375e-05, "step": 13574, "training_step_time": 0.10766267776489258 }, { "epoch": 2.071380615234375e-05, "model_forward_time": 0.025234460830688477, "step": 13575 }, { "epoch": 2.071380615234375e-05, "step": 13575, "training_step_time": 0.10898280143737793 }, { "epoch": 2.071533203125e-05, "model_forward_time": 0.025351762771606445, "step": 13576 }, { "epoch": 2.071533203125e-05, "step": 13576, "training_step_time": 0.10595297813415527 }, { "epoch": 2.071685791015625e-05, "model_forward_time": 0.025415897369384766, "step": 13577 }, { "epoch": 2.071685791015625e-05, "step": 13577, "training_step_time": 0.11336827278137207 }, { "epoch": 2.07183837890625e-05, "model_forward_time": 0.026666879653930664, "step": 13578 }, { "epoch": 2.07183837890625e-05, "step": 13578, "training_step_time": 0.10653972625732422 }, { "epoch": 2.071990966796875e-05, "model_forward_time": 0.025657176971435547, "step": 13579 }, { "epoch": 2.071990966796875e-05, "step": 13579, "training_step_time": 0.10499072074890137 }, { "epoch": 2.0721435546875e-05, "grad_norm": 0.3949906826019287, "learning_rate": 6.184636955094138e-05, "loss": 0.0161, "step": 13580 }, { "epoch": 2.0721435546875e-05, "model_forward_time": 0.025122880935668945, "step": 13580 }, { "epoch": 2.0721435546875e-05, "step": 13580, "training_step_time": 0.10523748397827148 }, { "epoch": 2.072296142578125e-05, "model_forward_time": 0.02554178237915039, "step": 13581 }, { "epoch": 2.072296142578125e-05, "step": 13581, "training_step_time": 0.10465693473815918 }, { "epoch": 2.07244873046875e-05, "model_forward_time": 0.025726795196533203, "step": 13582 }, { "epoch": 2.07244873046875e-05, "step": 13582, "training_step_time": 0.10459494590759277 }, { "epoch": 2.072601318359375e-05, "model_forward_time": 0.025447368621826172, "step": 13583 }, { "epoch": 2.072601318359375e-05, "step": 13583, "training_step_time": 0.10417723655700684 }, { "epoch": 2.07275390625e-05, "model_forward_time": 0.025584936141967773, "step": 13584 }, { "epoch": 2.07275390625e-05, "step": 13584, "training_step_time": 0.10549402236938477 }, { "epoch": 2.072906494140625e-05, "model_forward_time": 0.025400161743164062, "step": 13585 }, { "epoch": 2.072906494140625e-05, "step": 13585, "training_step_time": 0.10564112663269043 }, { "epoch": 2.07305908203125e-05, "model_forward_time": 0.02568960189819336, "step": 13586 }, { "epoch": 2.07305908203125e-05, "step": 13586, "training_step_time": 0.1101067066192627 }, { "epoch": 2.073211669921875e-05, "model_forward_time": 0.025120258331298828, "step": 13587 }, { "epoch": 2.073211669921875e-05, "step": 13587, "training_step_time": 0.10855317115783691 }, { "epoch": 2.0733642578125e-05, "model_forward_time": 0.025241613388061523, "step": 13588 }, { "epoch": 2.0733642578125e-05, "step": 13588, "training_step_time": 0.10760617256164551 }, { "epoch": 2.073516845703125e-05, "model_forward_time": 0.02600574493408203, "step": 13589 }, { "epoch": 2.073516845703125e-05, "step": 13589, "training_step_time": 0.10991024971008301 }, { "epoch": 2.07366943359375e-05, "grad_norm": 0.23415841162204742, "learning_rate": 6.179281599232591e-05, "loss": 0.0242, "step": 13590 }, { "epoch": 2.07366943359375e-05, "model_forward_time": 0.02530384063720703, "step": 13590 }, { "epoch": 2.07366943359375e-05, "step": 13590, "training_step_time": 0.19319605827331543 }, { "epoch": 2.073822021484375e-05, "model_forward_time": 0.02474832534790039, "step": 13591 }, { "epoch": 2.073822021484375e-05, "step": 13591, "training_step_time": 0.11527585983276367 }, { "epoch": 2.073974609375e-05, "model_forward_time": 0.024869918823242188, "step": 13592 }, { "epoch": 2.073974609375e-05, "step": 13592, "training_step_time": 0.11902046203613281 }, { "epoch": 2.074127197265625e-05, "model_forward_time": 0.025594234466552734, "step": 13593 }, { "epoch": 2.074127197265625e-05, "step": 13593, "training_step_time": 0.14376306533813477 }, { "epoch": 2.07427978515625e-05, "model_forward_time": 0.025374650955200195, "step": 13594 }, { "epoch": 2.07427978515625e-05, "step": 13594, "training_step_time": 0.2284080982208252 }, { "epoch": 2.074432373046875e-05, "model_forward_time": 0.025103092193603516, "step": 13595 }, { "epoch": 2.074432373046875e-05, "step": 13595, "training_step_time": 0.21947169303894043 }, { "epoch": 2.0745849609375e-05, "model_forward_time": 0.024671554565429688, "step": 13596 }, { "epoch": 2.0745849609375e-05, "step": 13596, "training_step_time": 0.2086644172668457 }, { "epoch": 2.074737548828125e-05, "model_forward_time": 0.024730920791625977, "step": 13597 }, { "epoch": 2.074737548828125e-05, "step": 13597, "training_step_time": 0.15371179580688477 }, { "epoch": 2.07489013671875e-05, "model_forward_time": 0.02458333969116211, "step": 13598 }, { "epoch": 2.07489013671875e-05, "step": 13598, "training_step_time": 0.1919417381286621 }, { "epoch": 2.075042724609375e-05, "model_forward_time": 0.025199174880981445, "step": 13599 }, { "epoch": 2.075042724609375e-05, "step": 13599, "training_step_time": 0.11182117462158203 }, { "epoch": 2.0751953125e-05, "grad_norm": 0.3446387052536011, "learning_rate": 6.173924810432705e-05, "loss": 0.0328, "step": 13600 }, { "epoch": 2.0751953125e-05, "model_forward_time": 0.024994850158691406, "step": 13600 }, { "epoch": 2.0751953125e-05, "step": 13600, "training_step_time": 0.11134648323059082 }, { "epoch": 2.075347900390625e-05, "model_forward_time": 0.02562117576599121, "step": 13601 }, { "epoch": 2.075347900390625e-05, "step": 13601, "training_step_time": 0.11544203758239746 }, { "epoch": 2.07550048828125e-05, "model_forward_time": 0.02560567855834961, "step": 13602 }, { "epoch": 2.07550048828125e-05, "step": 13602, "training_step_time": 0.10756945610046387 }, { "epoch": 2.075653076171875e-05, "model_forward_time": 0.025394439697265625, "step": 13603 }, { "epoch": 2.075653076171875e-05, "step": 13603, "training_step_time": 0.10807442665100098 }, { "epoch": 2.0758056640625e-05, "model_forward_time": 0.025594472885131836, "step": 13604 }, { "epoch": 2.0758056640625e-05, "step": 13604, "training_step_time": 0.10811233520507812 }, { "epoch": 2.075958251953125e-05, "model_forward_time": 0.02522730827331543, "step": 13605 }, { "epoch": 2.075958251953125e-05, "step": 13605, "training_step_time": 0.1431865692138672 }, { "epoch": 2.07611083984375e-05, "model_forward_time": 0.025676250457763672, "step": 13606 }, { "epoch": 2.07611083984375e-05, "step": 13606, "training_step_time": 0.13843846321105957 }, { "epoch": 2.076263427734375e-05, "model_forward_time": 0.02462148666381836, "step": 13607 }, { "epoch": 2.076263427734375e-05, "step": 13607, "training_step_time": 0.11219406127929688 }, { "epoch": 2.076416015625e-05, "model_forward_time": 0.025599002838134766, "step": 13608 }, { "epoch": 2.076416015625e-05, "step": 13608, "training_step_time": 0.1130824089050293 }, { "epoch": 2.076568603515625e-05, "model_forward_time": 0.02474355697631836, "step": 13609 }, { "epoch": 2.076568603515625e-05, "step": 13609, "training_step_time": 0.10604166984558105 }, { "epoch": 2.07672119140625e-05, "grad_norm": 0.22904595732688904, "learning_rate": 6.168566595203479e-05, "loss": 0.0141, "step": 13610 }, { "epoch": 2.07672119140625e-05, "model_forward_time": 0.024632692337036133, "step": 13610 }, { "epoch": 2.07672119140625e-05, "step": 13610, "training_step_time": 0.17171549797058105 }, { "epoch": 2.076873779296875e-05, "model_forward_time": 0.02483057975769043, "step": 13611 }, { "epoch": 2.076873779296875e-05, "step": 13611, "training_step_time": 0.16538381576538086 }, { "epoch": 2.0770263671875e-05, "model_forward_time": 0.024667739868164062, "step": 13612 }, { "epoch": 2.0770263671875e-05, "step": 13612, "training_step_time": 0.10505175590515137 }, { "epoch": 2.077178955078125e-05, "model_forward_time": 0.024743318557739258, "step": 13613 }, { "epoch": 2.077178955078125e-05, "step": 13613, "training_step_time": 0.10822892189025879 }, { "epoch": 2.07733154296875e-05, "model_forward_time": 0.024968862533569336, "step": 13614 }, { "epoch": 2.07733154296875e-05, "step": 13614, "training_step_time": 0.11079597473144531 }, { "epoch": 2.077484130859375e-05, "model_forward_time": 0.025288105010986328, "step": 13615 }, { "epoch": 2.077484130859375e-05, "step": 13615, "training_step_time": 0.14905571937561035 }, { "epoch": 2.07763671875e-05, "model_forward_time": 0.02492380142211914, "step": 13616 }, { "epoch": 2.07763671875e-05, "step": 13616, "training_step_time": 0.1673579216003418 }, { "epoch": 2.077789306640625e-05, "model_forward_time": 0.024719953536987305, "step": 13617 }, { "epoch": 2.077789306640625e-05, "step": 13617, "training_step_time": 0.15595412254333496 }, { "epoch": 2.07794189453125e-05, "model_forward_time": 0.024540424346923828, "step": 13618 }, { "epoch": 2.07794189453125e-05, "step": 13618, "training_step_time": 0.14380502700805664 }, { "epoch": 2.078094482421875e-05, "model_forward_time": 0.02460503578186035, "step": 13619 }, { "epoch": 2.078094482421875e-05, "step": 13619, "training_step_time": 0.1566150188446045 }, { "epoch": 2.0782470703125e-05, "grad_norm": 0.37864014506340027, "learning_rate": 6.163206960055651e-05, "loss": 0.0279, "step": 13620 }, { "epoch": 2.0782470703125e-05, "model_forward_time": 0.02496027946472168, "step": 13620 }, { "epoch": 2.0782470703125e-05, "step": 13620, "training_step_time": 0.1282200813293457 }, { "epoch": 2.078399658203125e-05, "model_forward_time": 0.024662494659423828, "step": 13621 }, { "epoch": 2.078399658203125e-05, "step": 13621, "training_step_time": 0.12955975532531738 }, { "epoch": 2.07855224609375e-05, "model_forward_time": 0.024855613708496094, "step": 13622 }, { "epoch": 2.07855224609375e-05, "step": 13622, "training_step_time": 0.12385082244873047 }, { "epoch": 2.078704833984375e-05, "model_forward_time": 0.02506089210510254, "step": 13623 }, { "epoch": 2.078704833984375e-05, "step": 13623, "training_step_time": 0.11514592170715332 }, { "epoch": 2.078857421875e-05, "model_forward_time": 0.02560567855834961, "step": 13624 }, { "epoch": 2.078857421875e-05, "step": 13624, "training_step_time": 0.11656022071838379 }, { "epoch": 2.079010009765625e-05, "model_forward_time": 0.0254366397857666, "step": 13625 }, { "epoch": 2.079010009765625e-05, "step": 13625, "training_step_time": 0.11447405815124512 }, { "epoch": 2.07916259765625e-05, "model_forward_time": 0.0255281925201416, "step": 13626 }, { "epoch": 2.07916259765625e-05, "step": 13626, "training_step_time": 0.11028385162353516 }, { "epoch": 2.079315185546875e-05, "model_forward_time": 0.025513887405395508, "step": 13627 }, { "epoch": 2.079315185546875e-05, "step": 13627, "training_step_time": 0.1080942153930664 }, { "epoch": 2.0794677734375e-05, "model_forward_time": 0.025769472122192383, "step": 13628 }, { "epoch": 2.0794677734375e-05, "step": 13628, "training_step_time": 0.10908222198486328 }, { "epoch": 2.079620361328125e-05, "model_forward_time": 0.025214672088623047, "step": 13629 }, { "epoch": 2.079620361328125e-05, "step": 13629, "training_step_time": 0.10624241828918457 }, { "epoch": 2.07977294921875e-05, "grad_norm": 0.4058496952056885, "learning_rate": 6.157845911501684e-05, "loss": 0.0164, "step": 13630 }, { "epoch": 2.07977294921875e-05, "model_forward_time": 0.02564406394958496, "step": 13630 }, { "epoch": 2.07977294921875e-05, "step": 13630, "training_step_time": 0.1081094741821289 }, { "epoch": 2.079925537109375e-05, "model_forward_time": 0.025511980056762695, "step": 13631 }, { "epoch": 2.079925537109375e-05, "step": 13631, "training_step_time": 0.10627388954162598 }, { "epoch": 2.080078125e-05, "model_forward_time": 0.02545785903930664, "step": 13632 }, { "epoch": 2.080078125e-05, "step": 13632, "training_step_time": 0.10564494132995605 }, { "epoch": 2.080230712890625e-05, "model_forward_time": 0.025623083114624023, "step": 13633 }, { "epoch": 2.080230712890625e-05, "step": 13633, "training_step_time": 0.10777878761291504 }, { "epoch": 2.08038330078125e-05, "model_forward_time": 0.025361061096191406, "step": 13634 }, { "epoch": 2.08038330078125e-05, "step": 13634, "training_step_time": 0.10615873336791992 }, { "epoch": 2.080535888671875e-05, "model_forward_time": 0.0254056453704834, "step": 13635 }, { "epoch": 2.080535888671875e-05, "step": 13635, "training_step_time": 0.1260547637939453 }, { "epoch": 2.0806884765625e-05, "model_forward_time": 0.02587604522705078, "step": 13636 }, { "epoch": 2.0806884765625e-05, "step": 13636, "training_step_time": 0.12718892097473145 }, { "epoch": 2.080841064453125e-05, "model_forward_time": 0.025219202041625977, "step": 13637 }, { "epoch": 2.080841064453125e-05, "step": 13637, "training_step_time": 0.21882390975952148 }, { "epoch": 2.08099365234375e-05, "model_forward_time": 0.025017738342285156, "step": 13638 }, { "epoch": 2.08099365234375e-05, "step": 13638, "training_step_time": 0.14789438247680664 }, { "epoch": 2.081146240234375e-05, "model_forward_time": 0.024890422821044922, "step": 13639 }, { "epoch": 2.081146240234375e-05, "step": 13639, "training_step_time": 0.1112062931060791 }, { "epoch": 2.081298828125e-05, "grad_norm": 0.31141793727874756, "learning_rate": 6.152483456055756e-05, "loss": 0.0179, "step": 13640 }, { "epoch": 2.081298828125e-05, "model_forward_time": 0.02823805809020996, "step": 13640 }, { "epoch": 2.081298828125e-05, "step": 13640, "training_step_time": 0.11250591278076172 }, { "epoch": 2.081451416015625e-05, "model_forward_time": 0.025721073150634766, "step": 13641 }, { "epoch": 2.081451416015625e-05, "step": 13641, "training_step_time": 0.1622319221496582 }, { "epoch": 2.08160400390625e-05, "model_forward_time": 0.024960994720458984, "step": 13642 }, { "epoch": 2.08160400390625e-05, "step": 13642, "training_step_time": 0.17319297790527344 }, { "epoch": 2.081756591796875e-05, "model_forward_time": 0.025726318359375, "step": 13643 }, { "epoch": 2.081756591796875e-05, "step": 13643, "training_step_time": 0.11777639389038086 }, { "epoch": 2.0819091796875e-05, "model_forward_time": 0.024801969528198242, "step": 13644 }, { "epoch": 2.0819091796875e-05, "step": 13644, "training_step_time": 0.12955713272094727 }, { "epoch": 2.082061767578125e-05, "model_forward_time": 0.02549290657043457, "step": 13645 }, { "epoch": 2.082061767578125e-05, "step": 13645, "training_step_time": 0.11130213737487793 }, { "epoch": 2.08221435546875e-05, "model_forward_time": 0.025822162628173828, "step": 13646 }, { "epoch": 2.08221435546875e-05, "step": 13646, "training_step_time": 0.11555647850036621 }, { "epoch": 2.082366943359375e-05, "model_forward_time": 0.026665687561035156, "step": 13647 }, { "epoch": 2.082366943359375e-05, "step": 13647, "training_step_time": 0.14503169059753418 }, { "epoch": 2.08251953125e-05, "model_forward_time": 0.025708675384521484, "step": 13648 }, { "epoch": 2.08251953125e-05, "step": 13648, "training_step_time": 0.13213109970092773 }, { "epoch": 2.082672119140625e-05, "model_forward_time": 0.024899721145629883, "step": 13649 }, { "epoch": 2.082672119140625e-05, "step": 13649, "training_step_time": 0.2188856601715088 }, { "epoch": 2.08282470703125e-05, "grad_norm": 0.41740694642066956, "learning_rate": 6.147119600233758e-05, "loss": 0.0214, "step": 13650 }, { "epoch": 2.08282470703125e-05, "model_forward_time": 0.02570819854736328, "step": 13650 }, { "epoch": 2.08282470703125e-05, "step": 13650, "training_step_time": 0.12549614906311035 }, { "epoch": 2.082977294921875e-05, "model_forward_time": 0.02491450309753418, "step": 13651 }, { "epoch": 2.082977294921875e-05, "step": 13651, "training_step_time": 0.13227272033691406 }, { "epoch": 2.0831298828125e-05, "model_forward_time": 0.02501392364501953, "step": 13652 }, { "epoch": 2.0831298828125e-05, "step": 13652, "training_step_time": 0.1143195629119873 }, { "epoch": 2.083282470703125e-05, "model_forward_time": 0.02572321891784668, "step": 13653 }, { "epoch": 2.083282470703125e-05, "step": 13653, "training_step_time": 0.16243863105773926 }, { "epoch": 2.08343505859375e-05, "model_forward_time": 0.024828433990478516, "step": 13654 }, { "epoch": 2.08343505859375e-05, "step": 13654, "training_step_time": 0.13164567947387695 }, { "epoch": 2.083587646484375e-05, "model_forward_time": 0.0242922306060791, "step": 13655 }, { "epoch": 2.083587646484375e-05, "step": 13655, "training_step_time": 0.11646199226379395 }, { "epoch": 2.083740234375e-05, "model_forward_time": 0.026276350021362305, "step": 13656 }, { "epoch": 2.083740234375e-05, "step": 13656, "training_step_time": 0.11182427406311035 }, { "epoch": 2.083892822265625e-05, "model_forward_time": 0.026369810104370117, "step": 13657 }, { "epoch": 2.083892822265625e-05, "step": 13657, "training_step_time": 0.11131548881530762 }, { "epoch": 2.08404541015625e-05, "model_forward_time": 0.025813579559326172, "step": 13658 }, { "epoch": 2.08404541015625e-05, "step": 13658, "training_step_time": 0.1101229190826416 }, { "epoch": 2.084197998046875e-05, "model_forward_time": 0.025196075439453125, "step": 13659 }, { "epoch": 2.084197998046875e-05, "step": 13659, "training_step_time": 0.10927867889404297 }, { "epoch": 2.0843505859375e-05, "grad_norm": 0.2596238851547241, "learning_rate": 6.141754350553279e-05, "loss": 0.0222, "step": 13660 }, { "epoch": 2.0843505859375e-05, "model_forward_time": 0.026456594467163086, "step": 13660 }, { "epoch": 2.0843505859375e-05, "step": 13660, "training_step_time": 0.11250495910644531 }, { "epoch": 2.084503173828125e-05, "model_forward_time": 0.025280475616455078, "step": 13661 }, { "epoch": 2.084503173828125e-05, "step": 13661, "training_step_time": 0.10811591148376465 }, { "epoch": 2.08465576171875e-05, "model_forward_time": 0.02529764175415039, "step": 13662 }, { "epoch": 2.08465576171875e-05, "step": 13662, "training_step_time": 0.10808014869689941 }, { "epoch": 2.084808349609375e-05, "model_forward_time": 0.025424718856811523, "step": 13663 }, { "epoch": 2.084808349609375e-05, "step": 13663, "training_step_time": 0.11020278930664062 }, { "epoch": 2.0849609375e-05, "model_forward_time": 0.025475740432739258, "step": 13664 }, { "epoch": 2.0849609375e-05, "step": 13664, "training_step_time": 0.11224818229675293 }, { "epoch": 2.085113525390625e-05, "model_forward_time": 0.025644779205322266, "step": 13665 }, { "epoch": 2.085113525390625e-05, "step": 13665, "training_step_time": 0.11086845397949219 }, { "epoch": 2.08526611328125e-05, "model_forward_time": 0.02604508399963379, "step": 13666 }, { "epoch": 2.08526611328125e-05, "step": 13666, "training_step_time": 0.10829734802246094 }, { "epoch": 2.085418701171875e-05, "model_forward_time": 0.025397539138793945, "step": 13667 }, { "epoch": 2.085418701171875e-05, "step": 13667, "training_step_time": 0.10617184638977051 }, { "epoch": 2.0855712890625e-05, "model_forward_time": 0.025548219680786133, "step": 13668 }, { "epoch": 2.0855712890625e-05, "step": 13668, "training_step_time": 0.10532283782958984 }, { "epoch": 2.085723876953125e-05, "model_forward_time": 0.025619983673095703, "step": 13669 }, { "epoch": 2.085723876953125e-05, "step": 13669, "training_step_time": 0.10579252243041992 }, { "epoch": 2.08587646484375e-05, "grad_norm": 0.2168739140033722, "learning_rate": 6.136387713533603e-05, "loss": 0.0162, "step": 13670 }, { "epoch": 2.08587646484375e-05, "model_forward_time": 0.025711774826049805, "step": 13670 }, { "epoch": 2.08587646484375e-05, "step": 13670, "training_step_time": 0.10611248016357422 }, { "epoch": 2.086029052734375e-05, "model_forward_time": 0.02528524398803711, "step": 13671 }, { "epoch": 2.086029052734375e-05, "step": 13671, "training_step_time": 0.10628008842468262 }, { "epoch": 2.086181640625e-05, "model_forward_time": 0.025571107864379883, "step": 13672 }, { "epoch": 2.086181640625e-05, "step": 13672, "training_step_time": 0.10644412040710449 }, { "epoch": 2.086334228515625e-05, "model_forward_time": 0.025787830352783203, "step": 13673 }, { "epoch": 2.086334228515625e-05, "step": 13673, "training_step_time": 0.10737013816833496 }, { "epoch": 2.08648681640625e-05, "model_forward_time": 0.02672886848449707, "step": 13674 }, { "epoch": 2.08648681640625e-05, "step": 13674, "training_step_time": 0.10660004615783691 }, { "epoch": 2.086639404296875e-05, "model_forward_time": 0.025548219680786133, "step": 13675 }, { "epoch": 2.086639404296875e-05, "step": 13675, "training_step_time": 0.10455894470214844 }, { "epoch": 2.0867919921875e-05, "model_forward_time": 0.0257112979888916, "step": 13676 }, { "epoch": 2.0867919921875e-05, "step": 13676, "training_step_time": 0.10475730895996094 }, { "epoch": 2.086944580078125e-05, "model_forward_time": 0.02555084228515625, "step": 13677 }, { "epoch": 2.086944580078125e-05, "step": 13677, "training_step_time": 0.10443806648254395 }, { "epoch": 2.08709716796875e-05, "model_forward_time": 0.025644540786743164, "step": 13678 }, { "epoch": 2.08709716796875e-05, "step": 13678, "training_step_time": 0.10723686218261719 }, { "epoch": 2.087249755859375e-05, "model_forward_time": 0.025046110153198242, "step": 13679 }, { "epoch": 2.087249755859375e-05, "step": 13679, "training_step_time": 0.10556697845458984 }, { "epoch": 2.08740234375e-05, "grad_norm": 0.18559281527996063, "learning_rate": 6.131019695695702e-05, "loss": 0.0142, "step": 13680 }, { "epoch": 2.08740234375e-05, "model_forward_time": 0.025385141372680664, "step": 13680 }, { "epoch": 2.08740234375e-05, "step": 13680, "training_step_time": 0.10452127456665039 }, { "epoch": 2.087554931640625e-05, "model_forward_time": 0.025847911834716797, "step": 13681 }, { "epoch": 2.087554931640625e-05, "step": 13681, "training_step_time": 0.18056845664978027 }, { "epoch": 2.08770751953125e-05, "model_forward_time": 0.025025367736816406, "step": 13682 }, { "epoch": 2.08770751953125e-05, "step": 13682, "training_step_time": 0.2406308650970459 }, { "epoch": 2.087860107421875e-05, "model_forward_time": 0.025998592376708984, "step": 13683 }, { "epoch": 2.087860107421875e-05, "step": 13683, "training_step_time": 0.19423246383666992 }, { "epoch": 2.0880126953125e-05, "model_forward_time": 0.023974180221557617, "step": 13684 }, { "epoch": 2.0880126953125e-05, "step": 13684, "training_step_time": 0.1882915496826172 }, { "epoch": 2.088165283203125e-05, "model_forward_time": 0.025397062301635742, "step": 13685 }, { "epoch": 2.088165283203125e-05, "step": 13685, "training_step_time": 0.1261589527130127 }, { "epoch": 2.08831787109375e-05, "model_forward_time": 0.02797985076904297, "step": 13686 }, { "epoch": 2.08831787109375e-05, "step": 13686, "training_step_time": 0.14324569702148438 }, { "epoch": 2.088470458984375e-05, "model_forward_time": 0.02514481544494629, "step": 13687 }, { "epoch": 2.088470458984375e-05, "step": 13687, "training_step_time": 0.1538372039794922 }, { "epoch": 2.088623046875e-05, "model_forward_time": 0.024793624877929688, "step": 13688 }, { "epoch": 2.088623046875e-05, "step": 13688, "training_step_time": 0.13050603866577148 }, { "epoch": 2.088775634765625e-05, "model_forward_time": 0.025187253952026367, "step": 13689 }, { "epoch": 2.088775634765625e-05, "step": 13689, "training_step_time": 0.12175703048706055 }, { "epoch": 2.08892822265625e-05, "grad_norm": 0.41579878330230713, "learning_rate": 6.125650303562221e-05, "loss": 0.0169, "step": 13690 }, { "epoch": 2.08892822265625e-05, "model_forward_time": 0.02540445327758789, "step": 13690 }, { "epoch": 2.08892822265625e-05, "step": 13690, "training_step_time": 0.19053196907043457 }, { "epoch": 2.089080810546875e-05, "model_forward_time": 0.02640819549560547, "step": 13691 }, { "epoch": 2.089080810546875e-05, "step": 13691, "training_step_time": 0.11151742935180664 }, { "epoch": 2.0892333984375e-05, "model_forward_time": 0.02582573890686035, "step": 13692 }, { "epoch": 2.0892333984375e-05, "step": 13692, "training_step_time": 0.1090705394744873 }, { "epoch": 2.089385986328125e-05, "model_forward_time": 0.0254971981048584, "step": 13693 }, { "epoch": 2.089385986328125e-05, "step": 13693, "training_step_time": 0.10692596435546875 }, { "epoch": 2.08953857421875e-05, "model_forward_time": 0.025211095809936523, "step": 13694 }, { "epoch": 2.08953857421875e-05, "step": 13694, "training_step_time": 0.15807819366455078 }, { "epoch": 2.089691162109375e-05, "model_forward_time": 0.025496482849121094, "step": 13695 }, { "epoch": 2.089691162109375e-05, "step": 13695, "training_step_time": 0.11967277526855469 }, { "epoch": 2.08984375e-05, "model_forward_time": 0.02579331398010254, "step": 13696 }, { "epoch": 2.08984375e-05, "step": 13696, "training_step_time": 0.11074185371398926 }, { "epoch": 2.089996337890625e-05, "model_forward_time": 0.026192665100097656, "step": 13697 }, { "epoch": 2.089996337890625e-05, "step": 13697, "training_step_time": 0.12152814865112305 }, { "epoch": 2.09014892578125e-05, "model_forward_time": 0.025872468948364258, "step": 13698 }, { "epoch": 2.09014892578125e-05, "step": 13698, "training_step_time": 0.10606932640075684 }, { "epoch": 2.090301513671875e-05, "model_forward_time": 0.025550365447998047, "step": 13699 }, { "epoch": 2.090301513671875e-05, "step": 13699, "training_step_time": 0.11208963394165039 }, { "epoch": 2.0904541015625e-05, "grad_norm": 0.3767525553703308, "learning_rate": 6.12027954365748e-05, "loss": 0.0168, "step": 13700 }, { "epoch": 2.0904541015625e-05, "model_forward_time": 0.025256872177124023, "step": 13700 }, { "epoch": 2.0904541015625e-05, "step": 13700, "training_step_time": 0.14169812202453613 }, { "epoch": 2.090606689453125e-05, "model_forward_time": 0.02752971649169922, "step": 13701 }, { "epoch": 2.090606689453125e-05, "step": 13701, "training_step_time": 0.10912275314331055 }, { "epoch": 2.09075927734375e-05, "model_forward_time": 0.026776790618896484, "step": 13702 }, { "epoch": 2.09075927734375e-05, "step": 13702, "training_step_time": 0.10444235801696777 }, { "epoch": 2.090911865234375e-05, "model_forward_time": 0.026980161666870117, "step": 13703 }, { "epoch": 2.090911865234375e-05, "step": 13703, "training_step_time": 0.10830116271972656 }, { "epoch": 2.091064453125e-05, "model_forward_time": 0.026279211044311523, "step": 13704 }, { "epoch": 2.091064453125e-05, "step": 13704, "training_step_time": 0.10456180572509766 }, { "epoch": 2.091217041015625e-05, "model_forward_time": 0.026111125946044922, "step": 13705 }, { "epoch": 2.091217041015625e-05, "step": 13705, "training_step_time": 0.1063385009765625 }, { "epoch": 2.09136962890625e-05, "model_forward_time": 0.025219202041625977, "step": 13706 }, { "epoch": 2.09136962890625e-05, "step": 13706, "training_step_time": 0.10744643211364746 }, { "epoch": 2.091522216796875e-05, "model_forward_time": 0.02541041374206543, "step": 13707 }, { "epoch": 2.091522216796875e-05, "step": 13707, "training_step_time": 0.10593771934509277 }, { "epoch": 2.0916748046875e-05, "model_forward_time": 0.025578022003173828, "step": 13708 }, { "epoch": 2.0916748046875e-05, "step": 13708, "training_step_time": 0.10746955871582031 }, { "epoch": 2.091827392578125e-05, "model_forward_time": 0.027563095092773438, "step": 13709 }, { "epoch": 2.091827392578125e-05, "step": 13709, "training_step_time": 0.10862302780151367 }, { "epoch": 2.09197998046875e-05, "grad_norm": 0.2269592583179474, "learning_rate": 6.11490742250746e-05, "loss": 0.0147, "step": 13710 }, { "epoch": 2.09197998046875e-05, "model_forward_time": 0.025574445724487305, "step": 13710 }, { "epoch": 2.09197998046875e-05, "step": 13710, "training_step_time": 0.11015939712524414 }, { "epoch": 2.092132568359375e-05, "model_forward_time": 0.02546525001525879, "step": 13711 }, { "epoch": 2.092132568359375e-05, "step": 13711, "training_step_time": 0.10714888572692871 }, { "epoch": 2.09228515625e-05, "model_forward_time": 0.02526402473449707, "step": 13712 }, { "epoch": 2.09228515625e-05, "step": 13712, "training_step_time": 0.10870909690856934 }, { "epoch": 2.092437744140625e-05, "model_forward_time": 0.025445938110351562, "step": 13713 }, { "epoch": 2.092437744140625e-05, "step": 13713, "training_step_time": 0.10788774490356445 }, { "epoch": 2.09259033203125e-05, "model_forward_time": 0.0260162353515625, "step": 13714 }, { "epoch": 2.09259033203125e-05, "step": 13714, "training_step_time": 0.10829949378967285 }, { "epoch": 2.092742919921875e-05, "model_forward_time": 0.026346683502197266, "step": 13715 }, { "epoch": 2.092742919921875e-05, "step": 13715, "training_step_time": 0.10771703720092773 }, { "epoch": 2.0928955078125e-05, "model_forward_time": 0.02622199058532715, "step": 13716 }, { "epoch": 2.0928955078125e-05, "step": 13716, "training_step_time": 0.10752248764038086 }, { "epoch": 2.093048095703125e-05, "model_forward_time": 0.02557206153869629, "step": 13717 }, { "epoch": 2.093048095703125e-05, "step": 13717, "training_step_time": 0.11059308052062988 }, { "epoch": 2.09320068359375e-05, "model_forward_time": 0.025725364685058594, "step": 13718 }, { "epoch": 2.09320068359375e-05, "step": 13718, "training_step_time": 0.10825324058532715 }, { "epoch": 2.093353271484375e-05, "model_forward_time": 0.025615692138671875, "step": 13719 }, { "epoch": 2.093353271484375e-05, "step": 13719, "training_step_time": 0.10699295997619629 }, { "epoch": 2.093505859375e-05, "grad_norm": 0.21005350351333618, "learning_rate": 6.10953394663979e-05, "loss": 0.0135, "step": 13720 }, { "epoch": 2.093505859375e-05, "model_forward_time": 0.025527238845825195, "step": 13720 }, { "epoch": 2.093505859375e-05, "step": 13720, "training_step_time": 0.10567855834960938 }, { "epoch": 2.093658447265625e-05, "model_forward_time": 0.025064945220947266, "step": 13721 }, { "epoch": 2.093658447265625e-05, "step": 13721, "training_step_time": 0.10699319839477539 }, { "epoch": 2.09381103515625e-05, "model_forward_time": 0.02465534210205078, "step": 13722 }, { "epoch": 2.09381103515625e-05, "step": 13722, "training_step_time": 0.10451865196228027 }, { "epoch": 2.093963623046875e-05, "model_forward_time": 0.025048494338989258, "step": 13723 }, { "epoch": 2.093963623046875e-05, "step": 13723, "training_step_time": 0.10701298713684082 }, { "epoch": 2.0941162109375e-05, "model_forward_time": 0.02509784698486328, "step": 13724 }, { "epoch": 2.0941162109375e-05, "step": 13724, "training_step_time": 0.10663914680480957 }, { "epoch": 2.094268798828125e-05, "model_forward_time": 0.025310993194580078, "step": 13725 }, { "epoch": 2.094268798828125e-05, "step": 13725, "training_step_time": 0.10582304000854492 }, { "epoch": 2.09442138671875e-05, "model_forward_time": 0.028603076934814453, "step": 13726 }, { "epoch": 2.09442138671875e-05, "step": 13726, "training_step_time": 0.10929107666015625 }, { "epoch": 2.094573974609375e-05, "model_forward_time": 0.025411367416381836, "step": 13727 }, { "epoch": 2.094573974609375e-05, "step": 13727, "training_step_time": 0.15403223037719727 }, { "epoch": 2.0947265625e-05, "model_forward_time": 0.02510523796081543, "step": 13728 }, { "epoch": 2.0947265625e-05, "step": 13728, "training_step_time": 0.20499300956726074 }, { "epoch": 2.094879150390625e-05, "model_forward_time": 0.02453923225402832, "step": 13729 }, { "epoch": 2.094879150390625e-05, "step": 13729, "training_step_time": 0.14041447639465332 }, { "epoch": 2.09503173828125e-05, "grad_norm": 0.3171594440937042, "learning_rate": 6.104159122583752e-05, "loss": 0.0143, "step": 13730 }, { "epoch": 2.09503173828125e-05, "model_forward_time": 0.024187326431274414, "step": 13730 }, { "epoch": 2.09503173828125e-05, "step": 13730, "training_step_time": 0.1953895092010498 }, { "epoch": 2.095184326171875e-05, "model_forward_time": 0.027753591537475586, "step": 13731 }, { "epoch": 2.095184326171875e-05, "step": 13731, "training_step_time": 0.10712957382202148 }, { "epoch": 2.0953369140625e-05, "model_forward_time": 0.02504134178161621, "step": 13732 }, { "epoch": 2.0953369140625e-05, "step": 13732, "training_step_time": 0.14459466934204102 }, { "epoch": 2.095489501953125e-05, "model_forward_time": 0.025074243545532227, "step": 13733 }, { "epoch": 2.095489501953125e-05, "step": 13733, "training_step_time": 0.16294550895690918 }, { "epoch": 2.09564208984375e-05, "model_forward_time": 0.02475261688232422, "step": 13734 }, { "epoch": 2.09564208984375e-05, "step": 13734, "training_step_time": 0.11420631408691406 }, { "epoch": 2.095794677734375e-05, "model_forward_time": 0.02444291114807129, "step": 13735 }, { "epoch": 2.095794677734375e-05, "step": 13735, "training_step_time": 0.12803339958190918 }, { "epoch": 2.095947265625e-05, "model_forward_time": 0.025377988815307617, "step": 13736 }, { "epoch": 2.095947265625e-05, "step": 13736, "training_step_time": 0.19315838813781738 }, { "epoch": 2.096099853515625e-05, "model_forward_time": 0.024515628814697266, "step": 13737 }, { "epoch": 2.096099853515625e-05, "step": 13737, "training_step_time": 0.10225319862365723 }, { "epoch": 2.09625244140625e-05, "model_forward_time": 0.02680492401123047, "step": 13738 }, { "epoch": 2.09625244140625e-05, "step": 13738, "training_step_time": 0.1062312126159668 }, { "epoch": 2.096405029296875e-05, "model_forward_time": 0.02536487579345703, "step": 13739 }, { "epoch": 2.096405029296875e-05, "step": 13739, "training_step_time": 0.10562801361083984 }, { "epoch": 2.0965576171875e-05, "grad_norm": 0.29130005836486816, "learning_rate": 6.0987829568702656e-05, "loss": 0.013, "step": 13740 }, { "epoch": 2.0965576171875e-05, "model_forward_time": 0.025903701782226562, "step": 13740 }, { "epoch": 2.0965576171875e-05, "step": 13740, "training_step_time": 0.10328483581542969 }, { "epoch": 2.096710205078125e-05, "model_forward_time": 0.02615499496459961, "step": 13741 }, { "epoch": 2.096710205078125e-05, "step": 13741, "training_step_time": 0.1491847038269043 }, { "epoch": 2.09686279296875e-05, "model_forward_time": 0.02608656883239746, "step": 13742 }, { "epoch": 2.09686279296875e-05, "step": 13742, "training_step_time": 0.1291196346282959 }, { "epoch": 2.097015380859375e-05, "model_forward_time": 0.024675607681274414, "step": 13743 }, { "epoch": 2.097015380859375e-05, "step": 13743, "training_step_time": 0.133544921875 }, { "epoch": 2.09716796875e-05, "model_forward_time": 0.025962352752685547, "step": 13744 }, { "epoch": 2.09716796875e-05, "step": 13744, "training_step_time": 0.16542816162109375 }, { "epoch": 2.097320556640625e-05, "model_forward_time": 0.02377915382385254, "step": 13745 }, { "epoch": 2.097320556640625e-05, "step": 13745, "training_step_time": 0.191239595413208 }, { "epoch": 2.09747314453125e-05, "model_forward_time": 0.024747371673583984, "step": 13746 }, { "epoch": 2.09747314453125e-05, "step": 13746, "training_step_time": 0.1715688705444336 }, { "epoch": 2.097625732421875e-05, "model_forward_time": 0.024481773376464844, "step": 13747 }, { "epoch": 2.097625732421875e-05, "step": 13747, "training_step_time": 0.17028427124023438 }, { "epoch": 2.0977783203125e-05, "model_forward_time": 0.025011062622070312, "step": 13748 }, { "epoch": 2.0977783203125e-05, "step": 13748, "training_step_time": 0.16060233116149902 }, { "epoch": 2.097930908203125e-05, "model_forward_time": 0.024953603744506836, "step": 13749 }, { "epoch": 2.097930908203125e-05, "step": 13749, "training_step_time": 0.14506292343139648 }, { "epoch": 2.09808349609375e-05, "grad_norm": 0.22500164806842804, "learning_rate": 6.09340545603188e-05, "loss": 0.0147, "step": 13750 }, { "epoch": 2.09808349609375e-05, "model_forward_time": 0.024704933166503906, "step": 13750 }, { "epoch": 2.09808349609375e-05, "step": 13750, "training_step_time": 0.1338956356048584 }, { "epoch": 2.098236083984375e-05, "model_forward_time": 0.026500701904296875, "step": 13751 }, { "epoch": 2.098236083984375e-05, "step": 13751, "training_step_time": 0.12761211395263672 }, { "epoch": 2.098388671875e-05, "model_forward_time": 0.024725675582885742, "step": 13752 }, { "epoch": 2.098388671875e-05, "step": 13752, "training_step_time": 0.12161564826965332 }, { "epoch": 2.098541259765625e-05, "model_forward_time": 0.024921417236328125, "step": 13753 }, { "epoch": 2.098541259765625e-05, "step": 13753, "training_step_time": 0.1141660213470459 }, { "epoch": 2.09869384765625e-05, "model_forward_time": 0.025361061096191406, "step": 13754 }, { "epoch": 2.09869384765625e-05, "step": 13754, "training_step_time": 0.11153364181518555 }, { "epoch": 2.098846435546875e-05, "model_forward_time": 0.025524139404296875, "step": 13755 }, { "epoch": 2.098846435546875e-05, "step": 13755, "training_step_time": 0.11288666725158691 }, { "epoch": 2.0989990234375e-05, "model_forward_time": 0.025410175323486328, "step": 13756 }, { "epoch": 2.0989990234375e-05, "step": 13756, "training_step_time": 0.11191940307617188 }, { "epoch": 2.099151611328125e-05, "model_forward_time": 0.024551868438720703, "step": 13757 }, { "epoch": 2.099151611328125e-05, "step": 13757, "training_step_time": 0.10490679740905762 }, { "epoch": 2.09930419921875e-05, "model_forward_time": 0.024232864379882812, "step": 13758 }, { "epoch": 2.09930419921875e-05, "step": 13758, "training_step_time": 0.1020815372467041 }, { "epoch": 2.099456787109375e-05, "model_forward_time": 0.02547001838684082, "step": 13759 }, { "epoch": 2.099456787109375e-05, "step": 13759, "training_step_time": 0.10875606536865234 }, { "epoch": 2.099609375e-05, "grad_norm": 0.3524976968765259, "learning_rate": 6.088026626602763e-05, "loss": 0.0333, "step": 13760 }, { "epoch": 2.099609375e-05, "model_forward_time": 0.02614450454711914, "step": 13760 }, { "epoch": 2.099609375e-05, "step": 13760, "training_step_time": 0.10822606086730957 }, { "epoch": 2.099761962890625e-05, "model_forward_time": 0.02534794807434082, "step": 13761 }, { "epoch": 2.099761962890625e-05, "step": 13761, "training_step_time": 0.10287213325500488 }, { "epoch": 2.09991455078125e-05, "model_forward_time": 0.025422334671020508, "step": 13762 }, { "epoch": 2.09991455078125e-05, "step": 13762, "training_step_time": 0.10394906997680664 }, { "epoch": 2.100067138671875e-05, "model_forward_time": 0.024719953536987305, "step": 13763 }, { "epoch": 2.100067138671875e-05, "step": 13763, "training_step_time": 0.1069633960723877 }, { "epoch": 2.1002197265625e-05, "model_forward_time": 0.025336265563964844, "step": 13764 }, { "epoch": 2.1002197265625e-05, "step": 13764, "training_step_time": 0.10631752014160156 }, { "epoch": 2.100372314453125e-05, "model_forward_time": 0.025374889373779297, "step": 13765 }, { "epoch": 2.100372314453125e-05, "step": 13765, "training_step_time": 0.10664129257202148 }, { "epoch": 2.10052490234375e-05, "model_forward_time": 0.02541351318359375, "step": 13766 }, { "epoch": 2.10052490234375e-05, "step": 13766, "training_step_time": 0.1121671199798584 }, { "epoch": 2.100677490234375e-05, "model_forward_time": 0.024404287338256836, "step": 13767 }, { "epoch": 2.100677490234375e-05, "step": 13767, "training_step_time": 0.11769914627075195 }, { "epoch": 2.100830078125e-05, "model_forward_time": 0.024979591369628906, "step": 13768 }, { "epoch": 2.100830078125e-05, "step": 13768, "training_step_time": 0.11680340766906738 }, { "epoch": 2.100982666015625e-05, "model_forward_time": 0.025100231170654297, "step": 13769 }, { "epoch": 2.100982666015625e-05, "step": 13769, "training_step_time": 0.11489129066467285 }, { "epoch": 2.10113525390625e-05, "grad_norm": 0.2127663493156433, "learning_rate": 6.0826464751186994e-05, "loss": 0.0137, "step": 13770 }, { "epoch": 2.10113525390625e-05, "model_forward_time": 0.024636268615722656, "step": 13770 }, { "epoch": 2.10113525390625e-05, "step": 13770, "training_step_time": 0.15667033195495605 }, { "epoch": 2.101287841796875e-05, "model_forward_time": 0.025126934051513672, "step": 13771 }, { "epoch": 2.101287841796875e-05, "step": 13771, "training_step_time": 0.13269853591918945 }, { "epoch": 2.1014404296875e-05, "model_forward_time": 0.02482748031616211, "step": 13772 }, { "epoch": 2.1014404296875e-05, "step": 13772, "training_step_time": 0.21873831748962402 }, { "epoch": 2.101593017578125e-05, "model_forward_time": 0.02473759651184082, "step": 13773 }, { "epoch": 2.101593017578125e-05, "step": 13773, "training_step_time": 0.12624096870422363 }, { "epoch": 2.10174560546875e-05, "model_forward_time": 0.024590492248535156, "step": 13774 }, { "epoch": 2.10174560546875e-05, "step": 13774, "training_step_time": 0.11187887191772461 }, { "epoch": 2.101898193359375e-05, "model_forward_time": 0.025938749313354492, "step": 13775 }, { "epoch": 2.101898193359375e-05, "step": 13775, "training_step_time": 0.13290882110595703 }, { "epoch": 2.10205078125e-05, "model_forward_time": 0.02457451820373535, "step": 13776 }, { "epoch": 2.10205078125e-05, "step": 13776, "training_step_time": 0.21213483810424805 }, { "epoch": 2.102203369140625e-05, "model_forward_time": 0.024930953979492188, "step": 13777 }, { "epoch": 2.102203369140625e-05, "step": 13777, "training_step_time": 0.11803960800170898 }, { "epoch": 2.10235595703125e-05, "model_forward_time": 0.024651765823364258, "step": 13778 }, { "epoch": 2.10235595703125e-05, "step": 13778, "training_step_time": 0.12125587463378906 }, { "epoch": 2.102508544921875e-05, "model_forward_time": 0.025071144104003906, "step": 13779 }, { "epoch": 2.102508544921875e-05, "step": 13779, "training_step_time": 0.11096739768981934 }, { "epoch": 2.1026611328125e-05, "grad_norm": 0.3180241584777832, "learning_rate": 6.077265008117081e-05, "loss": 0.0139, "step": 13780 }, { "epoch": 2.1026611328125e-05, "model_forward_time": 0.025513887405395508, "step": 13780 }, { "epoch": 2.1026611328125e-05, "step": 13780, "training_step_time": 0.11728549003601074 }, { "epoch": 2.102813720703125e-05, "model_forward_time": 0.02926468849182129, "step": 13781 }, { "epoch": 2.102813720703125e-05, "step": 13781, "training_step_time": 0.11231470108032227 }, { "epoch": 2.10296630859375e-05, "model_forward_time": 0.02541947364807129, "step": 13782 }, { "epoch": 2.10296630859375e-05, "step": 13782, "training_step_time": 0.10933542251586914 }, { "epoch": 2.103118896484375e-05, "model_forward_time": 0.0251615047454834, "step": 13783 }, { "epoch": 2.103118896484375e-05, "step": 13783, "training_step_time": 0.10403084754943848 }, { "epoch": 2.103271484375e-05, "model_forward_time": 0.024271726608276367, "step": 13784 }, { "epoch": 2.103271484375e-05, "step": 13784, "training_step_time": 0.1035923957824707 }, { "epoch": 2.103424072265625e-05, "model_forward_time": 0.024541378021240234, "step": 13785 }, { "epoch": 2.103424072265625e-05, "step": 13785, "training_step_time": 0.21591973304748535 }, { "epoch": 2.10357666015625e-05, "model_forward_time": 0.02500748634338379, "step": 13786 }, { "epoch": 2.10357666015625e-05, "step": 13786, "training_step_time": 0.12184858322143555 }, { "epoch": 2.103729248046875e-05, "model_forward_time": 0.024967193603515625, "step": 13787 }, { "epoch": 2.103729248046875e-05, "step": 13787, "training_step_time": 0.1257326602935791 }, { "epoch": 2.1038818359375e-05, "model_forward_time": 0.024944782257080078, "step": 13788 }, { "epoch": 2.1038818359375e-05, "step": 13788, "training_step_time": 0.1119387149810791 }, { "epoch": 2.104034423828125e-05, "model_forward_time": 0.02567291259765625, "step": 13789 }, { "epoch": 2.104034423828125e-05, "step": 13789, "training_step_time": 0.15781950950622559 }, { "epoch": 2.10418701171875e-05, "grad_norm": 0.17182987928390503, "learning_rate": 6.071882232136901e-05, "loss": 0.0163, "step": 13790 }, { "epoch": 2.10418701171875e-05, "model_forward_time": 0.024692535400390625, "step": 13790 }, { "epoch": 2.10418701171875e-05, "step": 13790, "training_step_time": 0.13045501708984375 }, { "epoch": 2.104339599609375e-05, "model_forward_time": 0.024561405181884766, "step": 13791 }, { "epoch": 2.104339599609375e-05, "step": 13791, "training_step_time": 0.11142945289611816 }, { "epoch": 2.1044921875e-05, "model_forward_time": 0.025524139404296875, "step": 13792 }, { "epoch": 2.1044921875e-05, "step": 13792, "training_step_time": 0.10689616203308105 }, { "epoch": 2.104644775390625e-05, "model_forward_time": 0.02545785903930664, "step": 13793 }, { "epoch": 2.104644775390625e-05, "step": 13793, "training_step_time": 0.10878443717956543 }, { "epoch": 2.10479736328125e-05, "model_forward_time": 0.025337934494018555, "step": 13794 }, { "epoch": 2.10479736328125e-05, "step": 13794, "training_step_time": 0.10604190826416016 }, { "epoch": 2.104949951171875e-05, "model_forward_time": 0.025556087493896484, "step": 13795 }, { "epoch": 2.104949951171875e-05, "step": 13795, "training_step_time": 0.10880732536315918 }, { "epoch": 2.1051025390625e-05, "model_forward_time": 0.025599241256713867, "step": 13796 }, { "epoch": 2.1051025390625e-05, "step": 13796, "training_step_time": 0.11069202423095703 }, { "epoch": 2.105255126953125e-05, "model_forward_time": 0.02544093132019043, "step": 13797 }, { "epoch": 2.105255126953125e-05, "step": 13797, "training_step_time": 0.10885739326477051 }, { "epoch": 2.10540771484375e-05, "model_forward_time": 0.02591562271118164, "step": 13798 }, { "epoch": 2.10540771484375e-05, "step": 13798, "training_step_time": 0.10829520225524902 }, { "epoch": 2.105560302734375e-05, "model_forward_time": 0.026581764221191406, "step": 13799 }, { "epoch": 2.105560302734375e-05, "step": 13799, "training_step_time": 0.1089475154876709 }, { "epoch": 2.105712890625e-05, "grad_norm": 0.3595592677593231, "learning_rate": 6.066498153718735e-05, "loss": 0.0162, "step": 13800 }, { "epoch": 2.105712890625e-05, "model_forward_time": 0.026041030883789062, "step": 13800 }, { "epoch": 2.105712890625e-05, "step": 13800, "training_step_time": 0.18308782577514648 }, { "epoch": 2.105865478515625e-05, "model_forward_time": 0.023525714874267578, "step": 13801 }, { "epoch": 2.105865478515625e-05, "step": 13801, "training_step_time": 0.1820087432861328 }, { "epoch": 2.10601806640625e-05, "model_forward_time": 0.023185253143310547, "step": 13802 }, { "epoch": 2.10601806640625e-05, "step": 13802, "training_step_time": 0.16408705711364746 }, { "epoch": 2.106170654296875e-05, "model_forward_time": 0.024877309799194336, "step": 13803 }, { "epoch": 2.106170654296875e-05, "step": 13803, "training_step_time": 0.15259003639221191 }, { "epoch": 2.1063232421875e-05, "model_forward_time": 0.023276090621948242, "step": 13804 }, { "epoch": 2.1063232421875e-05, "step": 13804, "training_step_time": 0.14666056632995605 }, { "epoch": 2.106475830078125e-05, "model_forward_time": 0.02346944808959961, "step": 13805 }, { "epoch": 2.106475830078125e-05, "step": 13805, "training_step_time": 0.12651538848876953 }, { "epoch": 2.10662841796875e-05, "model_forward_time": 0.02337026596069336, "step": 13806 }, { "epoch": 2.10662841796875e-05, "step": 13806, "training_step_time": 0.12755775451660156 }, { "epoch": 2.106781005859375e-05, "model_forward_time": 0.02415299415588379, "step": 13807 }, { "epoch": 2.106781005859375e-05, "step": 13807, "training_step_time": 0.12300801277160645 }, { "epoch": 2.10693359375e-05, "model_forward_time": 0.02429819107055664, "step": 13808 }, { "epoch": 2.10693359375e-05, "step": 13808, "training_step_time": 0.11923742294311523 }, { "epoch": 2.107086181640625e-05, "model_forward_time": 0.02471780776977539, "step": 13809 }, { "epoch": 2.107086181640625e-05, "step": 13809, "training_step_time": 0.11581587791442871 }, { "epoch": 2.10723876953125e-05, "grad_norm": 0.43297770619392395, "learning_rate": 6.0611127794047486e-05, "loss": 0.0241, "step": 13810 }, { "epoch": 2.10723876953125e-05, "model_forward_time": 0.02421736717224121, "step": 13810 }, { "epoch": 2.10723876953125e-05, "step": 13810, "training_step_time": 0.11188077926635742 }, { "epoch": 2.107391357421875e-05, "model_forward_time": 0.02520442008972168, "step": 13811 }, { "epoch": 2.107391357421875e-05, "step": 13811, "training_step_time": 0.10904240608215332 }, { "epoch": 2.1075439453125e-05, "model_forward_time": 0.025173187255859375, "step": 13812 }, { "epoch": 2.1075439453125e-05, "step": 13812, "training_step_time": 0.10860180854797363 }, { "epoch": 2.107696533203125e-05, "model_forward_time": 0.025487184524536133, "step": 13813 }, { "epoch": 2.107696533203125e-05, "step": 13813, "training_step_time": 0.10796499252319336 }, { "epoch": 2.10784912109375e-05, "model_forward_time": 0.02460026741027832, "step": 13814 }, { "epoch": 2.10784912109375e-05, "step": 13814, "training_step_time": 0.12011170387268066 }, { "epoch": 2.108001708984375e-05, "model_forward_time": 0.024837493896484375, "step": 13815 }, { "epoch": 2.108001708984375e-05, "step": 13815, "training_step_time": 0.12703585624694824 }, { "epoch": 2.108154296875e-05, "model_forward_time": 0.02537250518798828, "step": 13816 }, { "epoch": 2.108154296875e-05, "step": 13816, "training_step_time": 0.2201244831085205 }, { "epoch": 2.108306884765625e-05, "model_forward_time": 0.02442479133605957, "step": 13817 }, { "epoch": 2.108306884765625e-05, "step": 13817, "training_step_time": 0.13438653945922852 }, { "epoch": 2.10845947265625e-05, "model_forward_time": 0.024334192276000977, "step": 13818 }, { "epoch": 2.10845947265625e-05, "step": 13818, "training_step_time": 0.17963743209838867 }, { "epoch": 2.108612060546875e-05, "model_forward_time": 0.02477407455444336, "step": 13819 }, { "epoch": 2.108612060546875e-05, "step": 13819, "training_step_time": 0.10099434852600098 }, { "epoch": 2.1087646484375e-05, "grad_norm": 0.3133634626865387, "learning_rate": 6.055726115738678e-05, "loss": 0.0202, "step": 13820 }, { "epoch": 2.1087646484375e-05, "model_forward_time": 0.024273157119750977, "step": 13820 }, { "epoch": 2.1087646484375e-05, "step": 13820, "training_step_time": 0.14082694053649902 }, { "epoch": 2.108917236328125e-05, "model_forward_time": 0.024604082107543945, "step": 13821 }, { "epoch": 2.108917236328125e-05, "step": 13821, "training_step_time": 0.13423585891723633 }, { "epoch": 2.10906982421875e-05, "model_forward_time": 0.024552583694458008, "step": 13822 }, { "epoch": 2.10906982421875e-05, "step": 13822, "training_step_time": 0.10879659652709961 }, { "epoch": 2.109222412109375e-05, "model_forward_time": 0.025456905364990234, "step": 13823 }, { "epoch": 2.109222412109375e-05, "step": 13823, "training_step_time": 0.11156773567199707 }, { "epoch": 2.109375e-05, "model_forward_time": 0.025557518005371094, "step": 13824 }, { "epoch": 2.109375e-05, "step": 13824, "training_step_time": 0.10536003112792969 }, { "epoch": 2.109527587890625e-05, "model_forward_time": 0.025357484817504883, "step": 13825 }, { "epoch": 2.109527587890625e-05, "step": 13825, "training_step_time": 0.12174582481384277 }, { "epoch": 2.10968017578125e-05, "model_forward_time": 0.02515411376953125, "step": 13826 }, { "epoch": 2.10968017578125e-05, "step": 13826, "training_step_time": 0.1058952808380127 }, { "epoch": 2.109832763671875e-05, "model_forward_time": 0.02508687973022461, "step": 13827 }, { "epoch": 2.109832763671875e-05, "step": 13827, "training_step_time": 0.10395336151123047 }, { "epoch": 2.1099853515625e-05, "model_forward_time": 0.02506542205810547, "step": 13828 }, { "epoch": 2.1099853515625e-05, "step": 13828, "training_step_time": 0.10573840141296387 }, { "epoch": 2.110137939453125e-05, "model_forward_time": 0.025094032287597656, "step": 13829 }, { "epoch": 2.110137939453125e-05, "step": 13829, "training_step_time": 0.20791149139404297 }, { "epoch": 2.11029052734375e-05, "grad_norm": 0.31008756160736084, "learning_rate": 6.05033816926583e-05, "loss": 0.0185, "step": 13830 }, { "epoch": 2.11029052734375e-05, "model_forward_time": 0.02429509162902832, "step": 13830 }, { "epoch": 2.11029052734375e-05, "step": 13830, "training_step_time": 0.11154365539550781 }, { "epoch": 2.110443115234375e-05, "model_forward_time": 0.02499675750732422, "step": 13831 }, { "epoch": 2.110443115234375e-05, "step": 13831, "training_step_time": 0.13228678703308105 }, { "epoch": 2.110595703125e-05, "model_forward_time": 0.02540135383605957, "step": 13832 }, { "epoch": 2.110595703125e-05, "step": 13832, "training_step_time": 0.13718414306640625 }, { "epoch": 2.110748291015625e-05, "model_forward_time": 0.024593114852905273, "step": 13833 }, { "epoch": 2.110748291015625e-05, "step": 13833, "training_step_time": 0.11216187477111816 }, { "epoch": 2.11090087890625e-05, "model_forward_time": 0.02505326271057129, "step": 13834 }, { "epoch": 2.11090087890625e-05, "step": 13834, "training_step_time": 0.12896966934204102 }, { "epoch": 2.111053466796875e-05, "model_forward_time": 0.025183916091918945, "step": 13835 }, { "epoch": 2.111053466796875e-05, "step": 13835, "training_step_time": 0.11802148818969727 }, { "epoch": 2.1112060546875e-05, "model_forward_time": 0.02514934539794922, "step": 13836 }, { "epoch": 2.1112060546875e-05, "step": 13836, "training_step_time": 0.10564041137695312 }, { "epoch": 2.111358642578125e-05, "model_forward_time": 0.0253903865814209, "step": 13837 }, { "epoch": 2.111358642578125e-05, "step": 13837, "training_step_time": 0.10464072227478027 }, { "epoch": 2.11151123046875e-05, "model_forward_time": 0.024884462356567383, "step": 13838 }, { "epoch": 2.11151123046875e-05, "step": 13838, "training_step_time": 0.10327935218811035 }, { "epoch": 2.111663818359375e-05, "model_forward_time": 0.025180339813232422, "step": 13839 }, { "epoch": 2.111663818359375e-05, "step": 13839, "training_step_time": 0.10840821266174316 }, { "epoch": 2.11181640625e-05, "grad_norm": 0.43487513065338135, "learning_rate": 6.044948946533064e-05, "loss": 0.0197, "step": 13840 }, { "epoch": 2.11181640625e-05, "model_forward_time": 0.0250091552734375, "step": 13840 }, { "epoch": 2.11181640625e-05, "step": 13840, "training_step_time": 0.10513687133789062 }, { "epoch": 2.111968994140625e-05, "model_forward_time": 0.02486252784729004, "step": 13841 }, { "epoch": 2.111968994140625e-05, "step": 13841, "training_step_time": 0.18270015716552734 }, { "epoch": 2.11212158203125e-05, "model_forward_time": 0.02440786361694336, "step": 13842 }, { "epoch": 2.11212158203125e-05, "step": 13842, "training_step_time": 0.19562506675720215 }, { "epoch": 2.112274169921875e-05, "model_forward_time": 0.02408432960510254, "step": 13843 }, { "epoch": 2.112274169921875e-05, "step": 13843, "training_step_time": 0.18784570693969727 }, { "epoch": 2.1124267578125e-05, "model_forward_time": 0.024085283279418945, "step": 13844 }, { "epoch": 2.1124267578125e-05, "step": 13844, "training_step_time": 0.18026185035705566 }, { "epoch": 2.112579345703125e-05, "model_forward_time": 0.023852109909057617, "step": 13845 }, { "epoch": 2.112579345703125e-05, "step": 13845, "training_step_time": 0.16629433631896973 }, { "epoch": 2.11273193359375e-05, "model_forward_time": 0.02449321746826172, "step": 13846 }, { "epoch": 2.11273193359375e-05, "step": 13846, "training_step_time": 0.11739206314086914 }, { "epoch": 2.112884521484375e-05, "model_forward_time": 0.024660825729370117, "step": 13847 }, { "epoch": 2.112884521484375e-05, "step": 13847, "training_step_time": 0.10135364532470703 }, { "epoch": 2.113037109375e-05, "model_forward_time": 0.025350570678710938, "step": 13848 }, { "epoch": 2.113037109375e-05, "step": 13848, "training_step_time": 0.10333847999572754 }, { "epoch": 2.113189697265625e-05, "model_forward_time": 0.0251157283782959, "step": 13849 }, { "epoch": 2.113189697265625e-05, "step": 13849, "training_step_time": 0.1031041145324707 }, { "epoch": 2.11334228515625e-05, "grad_norm": 0.45656758546829224, "learning_rate": 6.0395584540887963e-05, "loss": 0.0139, "step": 13850 }, { "epoch": 2.11334228515625e-05, "model_forward_time": 0.025149106979370117, "step": 13850 }, { "epoch": 2.11334228515625e-05, "step": 13850, "training_step_time": 0.10684800148010254 }, { "epoch": 2.113494873046875e-05, "model_forward_time": 0.025339603424072266, "step": 13851 }, { "epoch": 2.113494873046875e-05, "step": 13851, "training_step_time": 0.10315346717834473 }, { "epoch": 2.1136474609375e-05, "model_forward_time": 0.025275230407714844, "step": 13852 }, { "epoch": 2.1136474609375e-05, "step": 13852, "training_step_time": 0.10297107696533203 }, { "epoch": 2.113800048828125e-05, "model_forward_time": 0.02508831024169922, "step": 13853 }, { "epoch": 2.113800048828125e-05, "step": 13853, "training_step_time": 0.10972261428833008 }, { "epoch": 2.11395263671875e-05, "model_forward_time": 0.025352001190185547, "step": 13854 }, { "epoch": 2.11395263671875e-05, "step": 13854, "training_step_time": 0.1049959659576416 }, { "epoch": 2.114105224609375e-05, "model_forward_time": 0.02535867691040039, "step": 13855 }, { "epoch": 2.114105224609375e-05, "step": 13855, "training_step_time": 0.1058509349822998 }, { "epoch": 2.1142578125e-05, "model_forward_time": 0.025278091430664062, "step": 13856 }, { "epoch": 2.1142578125e-05, "step": 13856, "training_step_time": 0.10474109649658203 }, { "epoch": 2.114410400390625e-05, "model_forward_time": 0.024910449981689453, "step": 13857 }, { "epoch": 2.114410400390625e-05, "step": 13857, "training_step_time": 0.10487532615661621 }, { "epoch": 2.11456298828125e-05, "model_forward_time": 0.02590179443359375, "step": 13858 }, { "epoch": 2.11456298828125e-05, "step": 13858, "training_step_time": 0.175065279006958 }, { "epoch": 2.114715576171875e-05, "model_forward_time": 0.024261474609375, "step": 13859 }, { "epoch": 2.114715576171875e-05, "step": 13859, "training_step_time": 0.14134836196899414 }, { "epoch": 2.1148681640625e-05, "grad_norm": 0.21963365375995636, "learning_rate": 6.034166698482984e-05, "loss": 0.0157, "step": 13860 }, { "epoch": 2.1148681640625e-05, "model_forward_time": 0.024405241012573242, "step": 13860 }, { "epoch": 2.1148681640625e-05, "step": 13860, "training_step_time": 0.19643735885620117 }, { "epoch": 2.115020751953125e-05, "model_forward_time": 0.025187253952026367, "step": 13861 }, { "epoch": 2.115020751953125e-05, "step": 13861, "training_step_time": 0.1666247844696045 }, { "epoch": 2.11517333984375e-05, "model_forward_time": 0.024095773696899414, "step": 13862 }, { "epoch": 2.11517333984375e-05, "step": 13862, "training_step_time": 0.14900612831115723 }, { "epoch": 2.115325927734375e-05, "model_forward_time": 0.02439713478088379, "step": 13863 }, { "epoch": 2.115325927734375e-05, "step": 13863, "training_step_time": 0.17798948287963867 }, { "epoch": 2.115478515625e-05, "model_forward_time": 0.025011539459228516, "step": 13864 }, { "epoch": 2.115478515625e-05, "step": 13864, "training_step_time": 0.18427515029907227 }, { "epoch": 2.115631103515625e-05, "model_forward_time": 0.02464437484741211, "step": 13865 }, { "epoch": 2.115631103515625e-05, "step": 13865, "training_step_time": 0.13962984085083008 }, { "epoch": 2.11578369140625e-05, "model_forward_time": 0.02464747428894043, "step": 13866 }, { "epoch": 2.11578369140625e-05, "step": 13866, "training_step_time": 0.20704889297485352 }, { "epoch": 2.115936279296875e-05, "model_forward_time": 0.02448725700378418, "step": 13867 }, { "epoch": 2.115936279296875e-05, "step": 13867, "training_step_time": 0.13471627235412598 }, { "epoch": 2.1160888671875e-05, "model_forward_time": 0.02469182014465332, "step": 13868 }, { "epoch": 2.1160888671875e-05, "step": 13868, "training_step_time": 0.1241450309753418 }, { "epoch": 2.116241455078125e-05, "model_forward_time": 0.025192737579345703, "step": 13869 }, { "epoch": 2.116241455078125e-05, "step": 13869, "training_step_time": 0.11481738090515137 }, { "epoch": 2.11639404296875e-05, "grad_norm": 0.2901814579963684, "learning_rate": 6.0287736862671175e-05, "loss": 0.0208, "step": 13870 }, { "epoch": 2.11639404296875e-05, "model_forward_time": 0.025123119354248047, "step": 13870 }, { "epoch": 2.11639404296875e-05, "step": 13870, "training_step_time": 0.11545515060424805 }, { "epoch": 2.116546630859375e-05, "model_forward_time": 0.025197505950927734, "step": 13871 }, { "epoch": 2.116546630859375e-05, "step": 13871, "training_step_time": 0.11384391784667969 }, { "epoch": 2.11669921875e-05, "model_forward_time": 0.025483131408691406, "step": 13872 }, { "epoch": 2.11669921875e-05, "step": 13872, "training_step_time": 0.11619091033935547 }, { "epoch": 2.116851806640625e-05, "model_forward_time": 0.024842023849487305, "step": 13873 }, { "epoch": 2.116851806640625e-05, "step": 13873, "training_step_time": 0.12018084526062012 }, { "epoch": 2.11700439453125e-05, "model_forward_time": 0.025530099868774414, "step": 13874 }, { "epoch": 2.11700439453125e-05, "step": 13874, "training_step_time": 0.13125896453857422 }, { "epoch": 2.117156982421875e-05, "model_forward_time": 0.025022506713867188, "step": 13875 }, { "epoch": 2.117156982421875e-05, "step": 13875, "training_step_time": 0.1059274673461914 }, { "epoch": 2.1173095703125e-05, "model_forward_time": 0.02528691291809082, "step": 13876 }, { "epoch": 2.1173095703125e-05, "step": 13876, "training_step_time": 0.2113492488861084 }, { "epoch": 2.117462158203125e-05, "model_forward_time": 0.024336814880371094, "step": 13877 }, { "epoch": 2.117462158203125e-05, "step": 13877, "training_step_time": 0.11668992042541504 }, { "epoch": 2.11761474609375e-05, "model_forward_time": 0.024745702743530273, "step": 13878 }, { "epoch": 2.11761474609375e-05, "step": 13878, "training_step_time": 0.11249494552612305 }, { "epoch": 2.117767333984375e-05, "model_forward_time": 0.024939775466918945, "step": 13879 }, { "epoch": 2.117767333984375e-05, "step": 13879, "training_step_time": 0.10766863822937012 }, { "epoch": 2.117919921875e-05, "grad_norm": 0.2624422013759613, "learning_rate": 6.023379423994214e-05, "loss": 0.0106, "step": 13880 }, { "epoch": 2.117919921875e-05, "model_forward_time": 0.02528214454650879, "step": 13880 }, { "epoch": 2.117919921875e-05, "step": 13880, "training_step_time": 0.11126470565795898 }, { "epoch": 2.118072509765625e-05, "model_forward_time": 0.025384902954101562, "step": 13881 }, { "epoch": 2.118072509765625e-05, "step": 13881, "training_step_time": 0.10678744316101074 }, { "epoch": 2.11822509765625e-05, "model_forward_time": 0.02501654624938965, "step": 13882 }, { "epoch": 2.11822509765625e-05, "step": 13882, "training_step_time": 0.10811400413513184 }, { "epoch": 2.118377685546875e-05, "model_forward_time": 0.02526569366455078, "step": 13883 }, { "epoch": 2.118377685546875e-05, "step": 13883, "training_step_time": 0.1064748764038086 }, { "epoch": 2.1185302734375e-05, "model_forward_time": 0.025110960006713867, "step": 13884 }, { "epoch": 2.1185302734375e-05, "step": 13884, "training_step_time": 0.10522222518920898 }, { "epoch": 2.118682861328125e-05, "model_forward_time": 0.02486133575439453, "step": 13885 }, { "epoch": 2.118682861328125e-05, "step": 13885, "training_step_time": 0.10434484481811523 }, { "epoch": 2.11883544921875e-05, "model_forward_time": 0.025339365005493164, "step": 13886 }, { "epoch": 2.11883544921875e-05, "step": 13886, "training_step_time": 0.10593748092651367 }, { "epoch": 2.118988037109375e-05, "model_forward_time": 0.025481700897216797, "step": 13887 }, { "epoch": 2.118988037109375e-05, "step": 13887, "training_step_time": 0.10702872276306152 }, { "epoch": 2.119140625e-05, "model_forward_time": 0.025051355361938477, "step": 13888 }, { "epoch": 2.119140625e-05, "step": 13888, "training_step_time": 0.10433650016784668 }, { "epoch": 2.119293212890625e-05, "model_forward_time": 0.025042057037353516, "step": 13889 }, { "epoch": 2.119293212890625e-05, "step": 13889, "training_step_time": 0.1077113151550293 }, { "epoch": 2.11944580078125e-05, "grad_norm": 0.4025578200817108, "learning_rate": 6.017983918218812e-05, "loss": 0.024, "step": 13890 }, { "epoch": 2.11944580078125e-05, "model_forward_time": 0.02539992332458496, "step": 13890 }, { "epoch": 2.11944580078125e-05, "step": 13890, "training_step_time": 0.10498523712158203 }, { "epoch": 2.119598388671875e-05, "model_forward_time": 0.02527165412902832, "step": 13891 }, { "epoch": 2.119598388671875e-05, "step": 13891, "training_step_time": 0.10509920120239258 }, { "epoch": 2.1197509765625e-05, "model_forward_time": 0.025394201278686523, "step": 13892 }, { "epoch": 2.1197509765625e-05, "step": 13892, "training_step_time": 0.1056370735168457 }, { "epoch": 2.119903564453125e-05, "model_forward_time": 0.025632143020629883, "step": 13893 }, { "epoch": 2.119903564453125e-05, "step": 13893, "training_step_time": 0.10530781745910645 }, { "epoch": 2.12005615234375e-05, "model_forward_time": 0.025727033615112305, "step": 13894 }, { "epoch": 2.12005615234375e-05, "step": 13894, "training_step_time": 0.1074361801147461 }, { "epoch": 2.120208740234375e-05, "model_forward_time": 0.02525806427001953, "step": 13895 }, { "epoch": 2.120208740234375e-05, "step": 13895, "training_step_time": 0.1079564094543457 }, { "epoch": 2.120361328125e-05, "model_forward_time": 0.02511286735534668, "step": 13896 }, { "epoch": 2.120361328125e-05, "step": 13896, "training_step_time": 0.10541105270385742 }, { "epoch": 2.120513916015625e-05, "model_forward_time": 0.02458810806274414, "step": 13897 }, { "epoch": 2.120513916015625e-05, "step": 13897, "training_step_time": 0.10821962356567383 }, { "epoch": 2.12066650390625e-05, "model_forward_time": 0.025307893753051758, "step": 13898 }, { "epoch": 2.12066650390625e-05, "step": 13898, "training_step_time": 0.11216425895690918 }, { "epoch": 2.120819091796875e-05, "model_forward_time": 0.02530193328857422, "step": 13899 }, { "epoch": 2.120819091796875e-05, "step": 13899, "training_step_time": 0.10505509376525879 }, { "epoch": 2.1209716796875e-05, "grad_norm": 0.2524588704109192, "learning_rate": 6.012587175496961e-05, "loss": 0.0098, "step": 13900 }, { "epoch": 2.1209716796875e-05, "model_forward_time": 0.025150060653686523, "step": 13900 }, { "epoch": 2.1209716796875e-05, "step": 13900, "training_step_time": 0.10435652732849121 }, { "epoch": 2.121124267578125e-05, "model_forward_time": 0.025235414505004883, "step": 13901 }, { "epoch": 2.121124267578125e-05, "step": 13901, "training_step_time": 0.10574722290039062 }, { "epoch": 2.12127685546875e-05, "model_forward_time": 0.025246143341064453, "step": 13902 }, { "epoch": 2.12127685546875e-05, "step": 13902, "training_step_time": 0.1064596176147461 }, { "epoch": 2.121429443359375e-05, "model_forward_time": 0.025489091873168945, "step": 13903 }, { "epoch": 2.121429443359375e-05, "step": 13903, "training_step_time": 0.18894600868225098 }, { "epoch": 2.12158203125e-05, "model_forward_time": 0.024791955947875977, "step": 13904 }, { "epoch": 2.12158203125e-05, "step": 13904, "training_step_time": 0.13489246368408203 }, { "epoch": 2.121734619140625e-05, "model_forward_time": 0.02440190315246582, "step": 13905 }, { "epoch": 2.121734619140625e-05, "step": 13905, "training_step_time": 0.18694472312927246 }, { "epoch": 2.12188720703125e-05, "model_forward_time": 0.0245974063873291, "step": 13906 }, { "epoch": 2.12188720703125e-05, "step": 13906, "training_step_time": 0.15798044204711914 }, { "epoch": 2.122039794921875e-05, "model_forward_time": 0.024889469146728516, "step": 13907 }, { "epoch": 2.122039794921875e-05, "step": 13907, "training_step_time": 0.10735630989074707 }, { "epoch": 2.1221923828125e-05, "model_forward_time": 0.024691343307495117, "step": 13908 }, { "epoch": 2.1221923828125e-05, "step": 13908, "training_step_time": 0.19231963157653809 }, { "epoch": 2.122344970703125e-05, "model_forward_time": 0.024800539016723633, "step": 13909 }, { "epoch": 2.122344970703125e-05, "step": 13909, "training_step_time": 0.10147786140441895 }, { "epoch": 2.12249755859375e-05, "grad_norm": 0.21475963294506073, "learning_rate": 6.0071892023862105e-05, "loss": 0.0143, "step": 13910 }, { "epoch": 2.12249755859375e-05, "model_forward_time": 0.02467823028564453, "step": 13910 }, { "epoch": 2.12249755859375e-05, "step": 13910, "training_step_time": 0.14706754684448242 }, { "epoch": 2.122650146484375e-05, "model_forward_time": 0.02446460723876953, "step": 13911 }, { "epoch": 2.122650146484375e-05, "step": 13911, "training_step_time": 0.10947513580322266 }, { "epoch": 2.122802734375e-05, "model_forward_time": 0.02607870101928711, "step": 13912 }, { "epoch": 2.122802734375e-05, "step": 13912, "training_step_time": 0.18893694877624512 }, { "epoch": 2.122955322265625e-05, "model_forward_time": 0.024167776107788086, "step": 13913 }, { "epoch": 2.122955322265625e-05, "step": 13913, "training_step_time": 0.12911105155944824 }, { "epoch": 2.12310791015625e-05, "model_forward_time": 0.024782657623291016, "step": 13914 }, { "epoch": 2.12310791015625e-05, "step": 13914, "training_step_time": 0.10303544998168945 }, { "epoch": 2.123260498046875e-05, "model_forward_time": 0.02649068832397461, "step": 13915 }, { "epoch": 2.123260498046875e-05, "step": 13915, "training_step_time": 0.11663508415222168 }, { "epoch": 2.1234130859375e-05, "model_forward_time": 0.02526712417602539, "step": 13916 }, { "epoch": 2.1234130859375e-05, "step": 13916, "training_step_time": 0.10574054718017578 }, { "epoch": 2.123565673828125e-05, "model_forward_time": 0.02545475959777832, "step": 13917 }, { "epoch": 2.123565673828125e-05, "step": 13917, "training_step_time": 0.11175751686096191 }, { "epoch": 2.12371826171875e-05, "model_forward_time": 0.02419257164001465, "step": 13918 }, { "epoch": 2.12371826171875e-05, "step": 13918, "training_step_time": 0.18088483810424805 }, { "epoch": 2.123870849609375e-05, "model_forward_time": 0.02466106414794922, "step": 13919 }, { "epoch": 2.123870849609375e-05, "step": 13919, "training_step_time": 0.12383532524108887 }, { "epoch": 2.1240234375e-05, "grad_norm": 0.4200749397277832, "learning_rate": 6.001790005445607e-05, "loss": 0.0192, "step": 13920 }, { "epoch": 2.1240234375e-05, "model_forward_time": 0.024456024169921875, "step": 13920 }, { "epoch": 2.1240234375e-05, "step": 13920, "training_step_time": 0.11646580696105957 }, { "epoch": 2.124176025390625e-05, "model_forward_time": 0.02443838119506836, "step": 13921 }, { "epoch": 2.124176025390625e-05, "step": 13921, "training_step_time": 0.11777877807617188 }, { "epoch": 2.12432861328125e-05, "model_forward_time": 0.025475263595581055, "step": 13922 }, { "epoch": 2.12432861328125e-05, "step": 13922, "training_step_time": 0.2063736915588379 }, { "epoch": 2.124481201171875e-05, "model_forward_time": 0.024596452713012695, "step": 13923 }, { "epoch": 2.124481201171875e-05, "step": 13923, "training_step_time": 0.12872982025146484 }, { "epoch": 2.1246337890625e-05, "model_forward_time": 0.02398991584777832, "step": 13924 }, { "epoch": 2.1246337890625e-05, "step": 13924, "training_step_time": 0.10710930824279785 }, { "epoch": 2.124786376953125e-05, "model_forward_time": 0.025101900100708008, "step": 13925 }, { "epoch": 2.124786376953125e-05, "step": 13925, "training_step_time": 0.11490678787231445 }, { "epoch": 2.12493896484375e-05, "model_forward_time": 0.025785207748413086, "step": 13926 }, { "epoch": 2.12493896484375e-05, "step": 13926, "training_step_time": 0.10704445838928223 }, { "epoch": 2.125091552734375e-05, "model_forward_time": 0.025490999221801758, "step": 13927 }, { "epoch": 2.125091552734375e-05, "step": 13927, "training_step_time": 0.10575079917907715 }, { "epoch": 2.125244140625e-05, "model_forward_time": 0.025529861450195312, "step": 13928 }, { "epoch": 2.125244140625e-05, "step": 13928, "training_step_time": 0.10776472091674805 }, { "epoch": 2.125396728515625e-05, "model_forward_time": 0.02514052391052246, "step": 13929 }, { "epoch": 2.125396728515625e-05, "step": 13929, "training_step_time": 0.10558795928955078 }, { "epoch": 2.12554931640625e-05, "grad_norm": 0.4564298391342163, "learning_rate": 5.9963895912356836e-05, "loss": 0.0172, "step": 13930 }, { "epoch": 2.12554931640625e-05, "model_forward_time": 0.025671005249023438, "step": 13930 }, { "epoch": 2.12554931640625e-05, "step": 13930, "training_step_time": 0.10561132431030273 }, { "epoch": 2.125701904296875e-05, "model_forward_time": 0.025171995162963867, "step": 13931 }, { "epoch": 2.125701904296875e-05, "step": 13931, "training_step_time": 0.10582613945007324 }, { "epoch": 2.1258544921875e-05, "model_forward_time": 0.025439023971557617, "step": 13932 }, { "epoch": 2.1258544921875e-05, "step": 13932, "training_step_time": 0.10581374168395996 }, { "epoch": 2.126007080078125e-05, "model_forward_time": 0.025280237197875977, "step": 13933 }, { "epoch": 2.126007080078125e-05, "step": 13933, "training_step_time": 0.10661935806274414 }, { "epoch": 2.12615966796875e-05, "model_forward_time": 0.025499343872070312, "step": 13934 }, { "epoch": 2.12615966796875e-05, "step": 13934, "training_step_time": 0.10638904571533203 }, { "epoch": 2.126312255859375e-05, "model_forward_time": 0.025398731231689453, "step": 13935 }, { "epoch": 2.126312255859375e-05, "step": 13935, "training_step_time": 0.10975217819213867 }, { "epoch": 2.12646484375e-05, "model_forward_time": 0.025362014770507812, "step": 13936 }, { "epoch": 2.12646484375e-05, "step": 13936, "training_step_time": 0.10542774200439453 }, { "epoch": 2.126617431640625e-05, "model_forward_time": 0.025346994400024414, "step": 13937 }, { "epoch": 2.126617431640625e-05, "step": 13937, "training_step_time": 0.10587024688720703 }, { "epoch": 2.12677001953125e-05, "model_forward_time": 0.025196075439453125, "step": 13938 }, { "epoch": 2.12677001953125e-05, "step": 13938, "training_step_time": 0.10580897331237793 }, { "epoch": 2.126922607421875e-05, "model_forward_time": 0.0251615047454834, "step": 13939 }, { "epoch": 2.126922607421875e-05, "step": 13939, "training_step_time": 0.10612249374389648 }, { "epoch": 2.1270751953125e-05, "grad_norm": 0.35134264826774597, "learning_rate": 5.9909879663184544e-05, "loss": 0.016, "step": 13940 }, { "epoch": 2.1270751953125e-05, "model_forward_time": 0.025101900100708008, "step": 13940 }, { "epoch": 2.1270751953125e-05, "step": 13940, "training_step_time": 0.10547161102294922 }, { "epoch": 2.127227783203125e-05, "model_forward_time": 0.025228261947631836, "step": 13941 }, { "epoch": 2.127227783203125e-05, "step": 13941, "training_step_time": 0.10650801658630371 }, { "epoch": 2.12738037109375e-05, "model_forward_time": 0.025037050247192383, "step": 13942 }, { "epoch": 2.12738037109375e-05, "step": 13942, "training_step_time": 0.10799837112426758 }, { "epoch": 2.127532958984375e-05, "model_forward_time": 0.02582526206970215, "step": 13943 }, { "epoch": 2.127532958984375e-05, "step": 13943, "training_step_time": 0.10747790336608887 }, { "epoch": 2.127685546875e-05, "model_forward_time": 0.02507638931274414, "step": 13944 }, { "epoch": 2.127685546875e-05, "step": 13944, "training_step_time": 0.10650944709777832 }, { "epoch": 2.127838134765625e-05, "model_forward_time": 0.02550339698791504, "step": 13945 }, { "epoch": 2.127838134765625e-05, "step": 13945, "training_step_time": 0.10601329803466797 }, { "epoch": 2.12799072265625e-05, "model_forward_time": 0.024989843368530273, "step": 13946 }, { "epoch": 2.12799072265625e-05, "step": 13946, "training_step_time": 0.10802578926086426 }, { "epoch": 2.128143310546875e-05, "model_forward_time": 0.02522563934326172, "step": 13947 }, { "epoch": 2.128143310546875e-05, "step": 13947, "training_step_time": 0.10716748237609863 }, { "epoch": 2.1282958984375e-05, "model_forward_time": 0.025110721588134766, "step": 13948 }, { "epoch": 2.1282958984375e-05, "step": 13948, "training_step_time": 0.19045114517211914 }, { "epoch": 2.128448486328125e-05, "model_forward_time": 0.024541616439819336, "step": 13949 }, { "epoch": 2.128448486328125e-05, "step": 13949, "training_step_time": 0.155198335647583 }, { "epoch": 2.12860107421875e-05, "grad_norm": 0.2921213209629059, "learning_rate": 5.985585137257401e-05, "loss": 0.0146, "step": 13950 }, { "epoch": 2.12860107421875e-05, "model_forward_time": 0.02502727508544922, "step": 13950 }, { "epoch": 2.12860107421875e-05, "step": 13950, "training_step_time": 0.15549445152282715 }, { "epoch": 2.128753662109375e-05, "model_forward_time": 0.024546384811401367, "step": 13951 }, { "epoch": 2.128753662109375e-05, "step": 13951, "training_step_time": 0.18978333473205566 }, { "epoch": 2.12890625e-05, "model_forward_time": 0.024152517318725586, "step": 13952 }, { "epoch": 2.12890625e-05, "step": 13952, "training_step_time": 0.18538928031921387 }, { "epoch": 2.129058837890625e-05, "model_forward_time": 0.02405261993408203, "step": 13953 }, { "epoch": 2.129058837890625e-05, "step": 13953, "training_step_time": 0.19746994972229004 }, { "epoch": 2.12921142578125e-05, "model_forward_time": 0.02359628677368164, "step": 13954 }, { "epoch": 2.12921142578125e-05, "step": 13954, "training_step_time": 0.19241929054260254 }, { "epoch": 2.129364013671875e-05, "model_forward_time": 0.025365114212036133, "step": 13955 }, { "epoch": 2.129364013671875e-05, "step": 13955, "training_step_time": 0.22680449485778809 }, { "epoch": 2.1295166015625e-05, "model_forward_time": 0.02462911605834961, "step": 13956 }, { "epoch": 2.1295166015625e-05, "step": 13956, "training_step_time": 0.1784207820892334 }, { "epoch": 2.129669189453125e-05, "model_forward_time": 0.024585723876953125, "step": 13957 }, { "epoch": 2.129669189453125e-05, "step": 13957, "training_step_time": 0.22498822212219238 }, { "epoch": 2.12982177734375e-05, "model_forward_time": 0.02415633201599121, "step": 13958 }, { "epoch": 2.12982177734375e-05, "step": 13958, "training_step_time": 0.20644831657409668 }, { "epoch": 2.129974365234375e-05, "model_forward_time": 0.024311542510986328, "step": 13959 }, { "epoch": 2.129974365234375e-05, "step": 13959, "training_step_time": 0.12637972831726074 }, { "epoch": 2.130126953125e-05, "grad_norm": 0.35321858525276184, "learning_rate": 5.980181110617473e-05, "loss": 0.0182, "step": 13960 }, { "epoch": 2.130126953125e-05, "model_forward_time": 0.02434086799621582, "step": 13960 }, { "epoch": 2.130126953125e-05, "step": 13960, "training_step_time": 0.10785126686096191 }, { "epoch": 2.130279541015625e-05, "model_forward_time": 0.02537703514099121, "step": 13961 }, { "epoch": 2.130279541015625e-05, "step": 13961, "training_step_time": 0.10762739181518555 }, { "epoch": 2.13043212890625e-05, "model_forward_time": 0.02480483055114746, "step": 13962 }, { "epoch": 2.13043212890625e-05, "step": 13962, "training_step_time": 0.10862135887145996 }, { "epoch": 2.130584716796875e-05, "model_forward_time": 0.024865150451660156, "step": 13963 }, { "epoch": 2.130584716796875e-05, "step": 13963, "training_step_time": 0.1378934383392334 }, { "epoch": 2.1307373046875e-05, "model_forward_time": 0.025484561920166016, "step": 13964 }, { "epoch": 2.1307373046875e-05, "step": 13964, "training_step_time": 0.11118578910827637 }, { "epoch": 2.130889892578125e-05, "model_forward_time": 0.025599002838134766, "step": 13965 }, { "epoch": 2.130889892578125e-05, "step": 13965, "training_step_time": 0.18898534774780273 }, { "epoch": 2.13104248046875e-05, "model_forward_time": 0.024372100830078125, "step": 13966 }, { "epoch": 2.13104248046875e-05, "step": 13966, "training_step_time": 0.15226292610168457 }, { "epoch": 2.131195068359375e-05, "model_forward_time": 0.02411961555480957, "step": 13967 }, { "epoch": 2.131195068359375e-05, "step": 13967, "training_step_time": 0.1278977394104004 }, { "epoch": 2.13134765625e-05, "model_forward_time": 0.024448871612548828, "step": 13968 }, { "epoch": 2.13134765625e-05, "step": 13968, "training_step_time": 0.1273479461669922 }, { "epoch": 2.131500244140625e-05, "model_forward_time": 0.025207996368408203, "step": 13969 }, { "epoch": 2.131500244140625e-05, "step": 13969, "training_step_time": 0.12486648559570312 }, { "epoch": 2.13165283203125e-05, "grad_norm": 0.3368975520133972, "learning_rate": 5.974775892965071e-05, "loss": 0.0408, "step": 13970 }, { "epoch": 2.13165283203125e-05, "model_forward_time": 0.024851560592651367, "step": 13970 }, { "epoch": 2.13165283203125e-05, "step": 13970, "training_step_time": 0.11732292175292969 }, { "epoch": 2.131805419921875e-05, "model_forward_time": 0.025589466094970703, "step": 13971 }, { "epoch": 2.131805419921875e-05, "step": 13971, "training_step_time": 0.11449480056762695 }, { "epoch": 2.1319580078125e-05, "model_forward_time": 0.025035381317138672, "step": 13972 }, { "epoch": 2.1319580078125e-05, "step": 13972, "training_step_time": 0.11477947235107422 }, { "epoch": 2.132110595703125e-05, "model_forward_time": 0.025274276733398438, "step": 13973 }, { "epoch": 2.132110595703125e-05, "step": 13973, "training_step_time": 0.11434817314147949 }, { "epoch": 2.13226318359375e-05, "model_forward_time": 0.025328874588012695, "step": 13974 }, { "epoch": 2.13226318359375e-05, "step": 13974, "training_step_time": 0.10937976837158203 }, { "epoch": 2.132415771484375e-05, "model_forward_time": 0.025514602661132812, "step": 13975 }, { "epoch": 2.132415771484375e-05, "step": 13975, "training_step_time": 0.11252498626708984 }, { "epoch": 2.132568359375e-05, "model_forward_time": 0.025487661361694336, "step": 13976 }, { "epoch": 2.132568359375e-05, "step": 13976, "training_step_time": 0.10770106315612793 }, { "epoch": 2.132720947265625e-05, "model_forward_time": 0.025414705276489258, "step": 13977 }, { "epoch": 2.132720947265625e-05, "step": 13977, "training_step_time": 0.10860419273376465 }, { "epoch": 2.13287353515625e-05, "model_forward_time": 0.025402545928955078, "step": 13978 }, { "epoch": 2.13287353515625e-05, "step": 13978, "training_step_time": 0.10608220100402832 }, { "epoch": 2.133026123046875e-05, "model_forward_time": 0.025238752365112305, "step": 13979 }, { "epoch": 2.133026123046875e-05, "step": 13979, "training_step_time": 0.10479283332824707 }, { "epoch": 2.1331787109375e-05, "grad_norm": 0.3981991112232208, "learning_rate": 5.969369490868042e-05, "loss": 0.0221, "step": 13980 }, { "epoch": 2.1331787109375e-05, "model_forward_time": 0.025357961654663086, "step": 13980 }, { "epoch": 2.1331787109375e-05, "step": 13980, "training_step_time": 0.10549592971801758 }, { "epoch": 2.133331298828125e-05, "model_forward_time": 0.025704622268676758, "step": 13981 }, { "epoch": 2.133331298828125e-05, "step": 13981, "training_step_time": 0.10637784004211426 }, { "epoch": 2.13348388671875e-05, "model_forward_time": 0.025435924530029297, "step": 13982 }, { "epoch": 2.13348388671875e-05, "step": 13982, "training_step_time": 0.10661768913269043 }, { "epoch": 2.133636474609375e-05, "model_forward_time": 0.025319814682006836, "step": 13983 }, { "epoch": 2.133636474609375e-05, "step": 13983, "training_step_time": 0.10835480690002441 }, { "epoch": 2.1337890625e-05, "model_forward_time": 0.025866985321044922, "step": 13984 }, { "epoch": 2.1337890625e-05, "step": 13984, "training_step_time": 0.10635924339294434 }, { "epoch": 2.133941650390625e-05, "model_forward_time": 0.025388479232788086, "step": 13985 }, { "epoch": 2.133941650390625e-05, "step": 13985, "training_step_time": 0.10951805114746094 }, { "epoch": 2.13409423828125e-05, "model_forward_time": 0.024925708770751953, "step": 13986 }, { "epoch": 2.13409423828125e-05, "step": 13986, "training_step_time": 0.10550117492675781 }, { "epoch": 2.134246826171875e-05, "model_forward_time": 0.025908231735229492, "step": 13987 }, { "epoch": 2.134246826171875e-05, "step": 13987, "training_step_time": 0.10879755020141602 }, { "epoch": 2.1343994140625e-05, "model_forward_time": 0.025860309600830078, "step": 13988 }, { "epoch": 2.1343994140625e-05, "step": 13988, "training_step_time": 0.10840415954589844 }, { "epoch": 2.134552001953125e-05, "model_forward_time": 0.02550220489501953, "step": 13989 }, { "epoch": 2.134552001953125e-05, "step": 13989, "training_step_time": 0.11141848564147949 }, { "epoch": 2.13470458984375e-05, "grad_norm": 0.1812504678964615, "learning_rate": 5.963961910895676e-05, "loss": 0.0205, "step": 13990 }, { "epoch": 2.13470458984375e-05, "model_forward_time": 0.025580406188964844, "step": 13990 }, { "epoch": 2.13470458984375e-05, "step": 13990, "training_step_time": 0.10611224174499512 }, { "epoch": 2.134857177734375e-05, "model_forward_time": 0.025664806365966797, "step": 13991 }, { "epoch": 2.134857177734375e-05, "step": 13991, "training_step_time": 0.11786174774169922 }, { "epoch": 2.135009765625e-05, "model_forward_time": 0.025136232376098633, "step": 13992 }, { "epoch": 2.135009765625e-05, "step": 13992, "training_step_time": 0.15346431732177734 }, { "epoch": 2.135162353515625e-05, "model_forward_time": 0.024534225463867188, "step": 13993 }, { "epoch": 2.135162353515625e-05, "step": 13993, "training_step_time": 0.11351656913757324 }, { "epoch": 2.13531494140625e-05, "model_forward_time": 0.02462315559387207, "step": 13994 }, { "epoch": 2.13531494140625e-05, "step": 13994, "training_step_time": 0.1067650318145752 }, { "epoch": 2.135467529296875e-05, "model_forward_time": 0.025611400604248047, "step": 13995 }, { "epoch": 2.135467529296875e-05, "step": 13995, "training_step_time": 0.1327519416809082 }, { "epoch": 2.1356201171875e-05, "model_forward_time": 0.02557206153869629, "step": 13996 }, { "epoch": 2.1356201171875e-05, "step": 13996, "training_step_time": 0.16394448280334473 }, { "epoch": 2.135772705078125e-05, "model_forward_time": 0.0248870849609375, "step": 13997 }, { "epoch": 2.135772705078125e-05, "step": 13997, "training_step_time": 0.11015200614929199 }, { "epoch": 2.13592529296875e-05, "model_forward_time": 0.025127887725830078, "step": 13998 }, { "epoch": 2.13592529296875e-05, "step": 13998, "training_step_time": 0.1094968318939209 }, { "epoch": 2.136077880859375e-05, "model_forward_time": 0.026792526245117188, "step": 13999 }, { "epoch": 2.136077880859375e-05, "step": 13999, "training_step_time": 0.10842680931091309 }, { "epoch": 2.13623046875e-05, "grad_norm": 0.32337090373039246, "learning_rate": 5.958553159618693e-05, "loss": 0.0171, "step": 14000 }, { "epoch": 2.13623046875e-05, "model_forward_time": 0.026173830032348633, "step": 14000 }, { "epoch": 2.13623046875e-05, "step": 14000, "training_step_time": 0.09994864463806152 }, { "epoch": 2.136383056640625e-05, "model_forward_time": 0.023351430892944336, "step": 14001 }, { "epoch": 2.136383056640625e-05, "step": 14001, "training_step_time": 0.16586518287658691 }, { "epoch": 2.13653564453125e-05, "model_forward_time": 0.024451732635498047, "step": 14002 }, { "epoch": 2.13653564453125e-05, "step": 14002, "training_step_time": 0.12363719940185547 }, { "epoch": 2.136688232421875e-05, "model_forward_time": 0.024913311004638672, "step": 14003 }, { "epoch": 2.136688232421875e-05, "step": 14003, "training_step_time": 0.16837596893310547 }, { "epoch": 2.1368408203125e-05, "model_forward_time": 0.024346113204956055, "step": 14004 }, { "epoch": 2.1368408203125e-05, "step": 14004, "training_step_time": 0.1948544979095459 }, { "epoch": 2.136993408203125e-05, "model_forward_time": 0.024114608764648438, "step": 14005 }, { "epoch": 2.136993408203125e-05, "step": 14005, "training_step_time": 0.10498929023742676 }, { "epoch": 2.13714599609375e-05, "model_forward_time": 0.02505660057067871, "step": 14006 }, { "epoch": 2.13714599609375e-05, "step": 14006, "training_step_time": 0.11477088928222656 }, { "epoch": 2.137298583984375e-05, "model_forward_time": 0.02535557746887207, "step": 14007 }, { "epoch": 2.137298583984375e-05, "step": 14007, "training_step_time": 0.10522031784057617 }, { "epoch": 2.137451171875e-05, "model_forward_time": 0.024880170822143555, "step": 14008 }, { "epoch": 2.137451171875e-05, "step": 14008, "training_step_time": 0.13418245315551758 }, { "epoch": 2.137603759765625e-05, "model_forward_time": 0.02491307258605957, "step": 14009 }, { "epoch": 2.137603759765625e-05, "step": 14009, "training_step_time": 0.11905550956726074 }, { "epoch": 2.13775634765625e-05, "grad_norm": 0.3173942565917969, "learning_rate": 5.953143243609235e-05, "loss": 0.0143, "step": 14010 }, { "epoch": 2.13775634765625e-05, "model_forward_time": 0.024995803833007812, "step": 14010 }, { "epoch": 2.13775634765625e-05, "step": 14010, "training_step_time": 0.11174392700195312 }, { "epoch": 2.137908935546875e-05, "model_forward_time": 0.025606155395507812, "step": 14011 }, { "epoch": 2.137908935546875e-05, "step": 14011, "training_step_time": 0.10557889938354492 }, { "epoch": 2.1380615234375e-05, "model_forward_time": 0.02539968490600586, "step": 14012 }, { "epoch": 2.1380615234375e-05, "step": 14012, "training_step_time": 0.10534000396728516 }, { "epoch": 2.138214111328125e-05, "model_forward_time": 0.025237321853637695, "step": 14013 }, { "epoch": 2.138214111328125e-05, "step": 14013, "training_step_time": 0.11093735694885254 }, { "epoch": 2.13836669921875e-05, "model_forward_time": 0.02521204948425293, "step": 14014 }, { "epoch": 2.13836669921875e-05, "step": 14014, "training_step_time": 0.11744427680969238 }, { "epoch": 2.138519287109375e-05, "model_forward_time": 0.02526998519897461, "step": 14015 }, { "epoch": 2.138519287109375e-05, "step": 14015, "training_step_time": 0.10761380195617676 }, { "epoch": 2.138671875e-05, "model_forward_time": 0.025046110153198242, "step": 14016 }, { "epoch": 2.138671875e-05, "step": 14016, "training_step_time": 0.128798246383667 }, { "epoch": 2.138824462890625e-05, "model_forward_time": 0.024949312210083008, "step": 14017 }, { "epoch": 2.138824462890625e-05, "step": 14017, "training_step_time": 0.12284350395202637 }, { "epoch": 2.13897705078125e-05, "model_forward_time": 0.025310993194580078, "step": 14018 }, { "epoch": 2.13897705078125e-05, "step": 14018, "training_step_time": 0.13483190536499023 }, { "epoch": 2.139129638671875e-05, "model_forward_time": 0.02504277229309082, "step": 14019 }, { "epoch": 2.139129638671875e-05, "step": 14019, "training_step_time": 0.10747265815734863 }, { "epoch": 2.1392822265625e-05, "grad_norm": 0.566034197807312, "learning_rate": 5.9477321694408606e-05, "loss": 0.0253, "step": 14020 }, { "epoch": 2.1392822265625e-05, "model_forward_time": 0.025091171264648438, "step": 14020 }, { "epoch": 2.1392822265625e-05, "step": 14020, "training_step_time": 0.1680295467376709 }, { "epoch": 2.139434814453125e-05, "model_forward_time": 0.024509191513061523, "step": 14021 }, { "epoch": 2.139434814453125e-05, "step": 14021, "training_step_time": 0.13176274299621582 }, { "epoch": 2.13958740234375e-05, "model_forward_time": 0.024382829666137695, "step": 14022 }, { "epoch": 2.13958740234375e-05, "step": 14022, "training_step_time": 0.11061596870422363 }, { "epoch": 2.139739990234375e-05, "model_forward_time": 0.0254058837890625, "step": 14023 }, { "epoch": 2.139739990234375e-05, "step": 14023, "training_step_time": 0.10433769226074219 }, { "epoch": 2.139892578125e-05, "model_forward_time": 0.025636911392211914, "step": 14024 }, { "epoch": 2.139892578125e-05, "step": 14024, "training_step_time": 0.10586047172546387 }, { "epoch": 2.140045166015625e-05, "model_forward_time": 0.025478363037109375, "step": 14025 }, { "epoch": 2.140045166015625e-05, "step": 14025, "training_step_time": 0.10463809967041016 }, { "epoch": 2.14019775390625e-05, "model_forward_time": 0.025357961654663086, "step": 14026 }, { "epoch": 2.14019775390625e-05, "step": 14026, "training_step_time": 0.10375833511352539 }, { "epoch": 2.140350341796875e-05, "model_forward_time": 0.02506399154663086, "step": 14027 }, { "epoch": 2.140350341796875e-05, "step": 14027, "training_step_time": 0.10563278198242188 }, { "epoch": 2.1405029296875e-05, "model_forward_time": 0.025147676467895508, "step": 14028 }, { "epoch": 2.1405029296875e-05, "step": 14028, "training_step_time": 0.1040036678314209 }, { "epoch": 2.140655517578125e-05, "model_forward_time": 0.0249788761138916, "step": 14029 }, { "epoch": 2.140655517578125e-05, "step": 14029, "training_step_time": 0.10353374481201172 }, { "epoch": 2.14080810546875e-05, "grad_norm": 0.4070761501789093, "learning_rate": 5.9423199436885345e-05, "loss": 0.0216, "step": 14030 }, { "epoch": 2.14080810546875e-05, "model_forward_time": 0.024532556533813477, "step": 14030 }, { "epoch": 2.14080810546875e-05, "step": 14030, "training_step_time": 0.10291314125061035 }, { "epoch": 2.140960693359375e-05, "model_forward_time": 0.02493429183959961, "step": 14031 }, { "epoch": 2.140960693359375e-05, "step": 14031, "training_step_time": 0.10916590690612793 }, { "epoch": 2.14111328125e-05, "model_forward_time": 0.025258302688598633, "step": 14032 }, { "epoch": 2.14111328125e-05, "step": 14032, "training_step_time": 0.10734677314758301 }, { "epoch": 2.141265869140625e-05, "model_forward_time": 0.024966001510620117, "step": 14033 }, { "epoch": 2.141265869140625e-05, "step": 14033, "training_step_time": 0.10363364219665527 }, { "epoch": 2.14141845703125e-05, "model_forward_time": 0.02516961097717285, "step": 14034 }, { "epoch": 2.14141845703125e-05, "step": 14034, "training_step_time": 0.10454416275024414 }, { "epoch": 2.141571044921875e-05, "model_forward_time": 0.025236129760742188, "step": 14035 }, { "epoch": 2.141571044921875e-05, "step": 14035, "training_step_time": 0.10728049278259277 }, { "epoch": 2.1417236328125e-05, "model_forward_time": 0.024898290634155273, "step": 14036 }, { "epoch": 2.1417236328125e-05, "step": 14036, "training_step_time": 0.10718703269958496 }, { "epoch": 2.141876220703125e-05, "model_forward_time": 0.02476644515991211, "step": 14037 }, { "epoch": 2.141876220703125e-05, "step": 14037, "training_step_time": 0.10628485679626465 }, { "epoch": 2.14202880859375e-05, "model_forward_time": 0.025035619735717773, "step": 14038 }, { "epoch": 2.14202880859375e-05, "step": 14038, "training_step_time": 0.10842156410217285 }, { "epoch": 2.142181396484375e-05, "model_forward_time": 0.025457143783569336, "step": 14039 }, { "epoch": 2.142181396484375e-05, "step": 14039, "training_step_time": 0.10665583610534668 }, { "epoch": 2.142333984375e-05, "grad_norm": 0.5166757702827454, "learning_rate": 5.9369065729286245e-05, "loss": 0.0149, "step": 14040 }, { "epoch": 2.142333984375e-05, "model_forward_time": 0.025465726852416992, "step": 14040 }, { "epoch": 2.142333984375e-05, "step": 14040, "training_step_time": 0.10839462280273438 }, { "epoch": 2.142486572265625e-05, "model_forward_time": 0.02562737464904785, "step": 14041 }, { "epoch": 2.142486572265625e-05, "step": 14041, "training_step_time": 0.10825490951538086 }, { "epoch": 2.14263916015625e-05, "model_forward_time": 0.0253446102142334, "step": 14042 }, { "epoch": 2.14263916015625e-05, "step": 14042, "training_step_time": 0.10557222366333008 }, { "epoch": 2.142791748046875e-05, "model_forward_time": 0.025227069854736328, "step": 14043 }, { "epoch": 2.142791748046875e-05, "step": 14043, "training_step_time": 0.10369324684143066 }, { "epoch": 2.1429443359375e-05, "model_forward_time": 0.02515697479248047, "step": 14044 }, { "epoch": 2.1429443359375e-05, "step": 14044, "training_step_time": 0.10808372497558594 }, { "epoch": 2.143096923828125e-05, "model_forward_time": 0.025079011917114258, "step": 14045 }, { "epoch": 2.143096923828125e-05, "step": 14045, "training_step_time": 0.10954904556274414 }, { "epoch": 2.14324951171875e-05, "model_forward_time": 0.025165796279907227, "step": 14046 }, { "epoch": 2.14324951171875e-05, "step": 14046, "training_step_time": 0.11055994033813477 }, { "epoch": 2.143402099609375e-05, "model_forward_time": 0.026336193084716797, "step": 14047 }, { "epoch": 2.143402099609375e-05, "step": 14047, "training_step_time": 0.15317058563232422 }, { "epoch": 2.1435546875e-05, "model_forward_time": 0.024962425231933594, "step": 14048 }, { "epoch": 2.1435546875e-05, "step": 14048, "training_step_time": 0.14794301986694336 }, { "epoch": 2.143707275390625e-05, "model_forward_time": 0.024369239807128906, "step": 14049 }, { "epoch": 2.143707275390625e-05, "step": 14049, "training_step_time": 0.1387336254119873 }, { "epoch": 2.14385986328125e-05, "grad_norm": 0.39149346947669983, "learning_rate": 5.9314920637388815e-05, "loss": 0.0145, "step": 14050 }, { "epoch": 2.14385986328125e-05, "model_forward_time": 0.024322986602783203, "step": 14050 }, { "epoch": 2.14385986328125e-05, "step": 14050, "training_step_time": 0.19942164421081543 }, { "epoch": 2.144012451171875e-05, "model_forward_time": 0.024318456649780273, "step": 14051 }, { "epoch": 2.144012451171875e-05, "step": 14051, "training_step_time": 0.15562963485717773 }, { "epoch": 2.1441650390625e-05, "model_forward_time": 0.0246737003326416, "step": 14052 }, { "epoch": 2.1441650390625e-05, "step": 14052, "training_step_time": 0.1476426124572754 }, { "epoch": 2.144317626953125e-05, "model_forward_time": 0.024805307388305664, "step": 14053 }, { "epoch": 2.144317626953125e-05, "step": 14053, "training_step_time": 0.19887042045593262 }, { "epoch": 2.14447021484375e-05, "model_forward_time": 0.024436235427856445, "step": 14054 }, { "epoch": 2.14447021484375e-05, "step": 14054, "training_step_time": 0.10278058052062988 }, { "epoch": 2.144622802734375e-05, "model_forward_time": 0.024085283279418945, "step": 14055 }, { "epoch": 2.144622802734375e-05, "step": 14055, "training_step_time": 0.14394831657409668 }, { "epoch": 2.144775390625e-05, "model_forward_time": 0.024950265884399414, "step": 14056 }, { "epoch": 2.144775390625e-05, "step": 14056, "training_step_time": 0.10691189765930176 }, { "epoch": 2.144927978515625e-05, "model_forward_time": 0.02534198760986328, "step": 14057 }, { "epoch": 2.144927978515625e-05, "step": 14057, "training_step_time": 0.20055532455444336 }, { "epoch": 2.14508056640625e-05, "model_forward_time": 0.024526357650756836, "step": 14058 }, { "epoch": 2.14508056640625e-05, "step": 14058, "training_step_time": 0.14471793174743652 }, { "epoch": 2.145233154296875e-05, "model_forward_time": 0.02510547637939453, "step": 14059 }, { "epoch": 2.145233154296875e-05, "step": 14059, "training_step_time": 0.10533833503723145 }, { "epoch": 2.1453857421875e-05, "grad_norm": 0.41174158453941345, "learning_rate": 5.9260764226984476e-05, "loss": 0.0208, "step": 14060 }, { "epoch": 2.1453857421875e-05, "model_forward_time": 0.025342702865600586, "step": 14060 }, { "epoch": 2.1453857421875e-05, "step": 14060, "training_step_time": 0.1174924373626709 }, { "epoch": 2.145538330078125e-05, "model_forward_time": 0.024910449981689453, "step": 14061 }, { "epoch": 2.145538330078125e-05, "step": 14061, "training_step_time": 0.2035226821899414 }, { "epoch": 2.14569091796875e-05, "model_forward_time": 0.024322509765625, "step": 14062 }, { "epoch": 2.14569091796875e-05, "step": 14062, "training_step_time": 0.1265122890472412 }, { "epoch": 2.145843505859375e-05, "model_forward_time": 0.024437904357910156, "step": 14063 }, { "epoch": 2.145843505859375e-05, "step": 14063, "training_step_time": 0.10659003257751465 }, { "epoch": 2.14599609375e-05, "model_forward_time": 0.02566218376159668, "step": 14064 }, { "epoch": 2.14599609375e-05, "step": 14064, "training_step_time": 0.11966443061828613 }, { "epoch": 2.146148681640625e-05, "model_forward_time": 0.02475261688232422, "step": 14065 }, { "epoch": 2.146148681640625e-05, "step": 14065, "training_step_time": 0.1100001335144043 }, { "epoch": 2.14630126953125e-05, "model_forward_time": 0.025791645050048828, "step": 14066 }, { "epoch": 2.14630126953125e-05, "step": 14066, "training_step_time": 0.11473727226257324 }, { "epoch": 2.146453857421875e-05, "model_forward_time": 0.025249242782592773, "step": 14067 }, { "epoch": 2.146453857421875e-05, "step": 14067, "training_step_time": 0.13289499282836914 }, { "epoch": 2.1466064453125e-05, "model_forward_time": 0.025318145751953125, "step": 14068 }, { "epoch": 2.1466064453125e-05, "step": 14068, "training_step_time": 0.11708378791809082 }, { "epoch": 2.146759033203125e-05, "model_forward_time": 0.02529764175415039, "step": 14069 }, { "epoch": 2.146759033203125e-05, "step": 14069, "training_step_time": 0.10348391532897949 }, { "epoch": 2.14691162109375e-05, "grad_norm": 0.40277501940727234, "learning_rate": 5.9206596563878357e-05, "loss": 0.0169, "step": 14070 }, { "epoch": 2.14691162109375e-05, "model_forward_time": 0.02511310577392578, "step": 14070 }, { "epoch": 2.14691162109375e-05, "step": 14070, "training_step_time": 0.10499048233032227 }, { "epoch": 2.147064208984375e-05, "model_forward_time": 0.025663375854492188, "step": 14071 }, { "epoch": 2.147064208984375e-05, "step": 14071, "training_step_time": 0.10557961463928223 }, { "epoch": 2.147216796875e-05, "model_forward_time": 0.025542259216308594, "step": 14072 }, { "epoch": 2.147216796875e-05, "step": 14072, "training_step_time": 0.10620927810668945 }, { "epoch": 2.147369384765625e-05, "model_forward_time": 0.025388002395629883, "step": 14073 }, { "epoch": 2.147369384765625e-05, "step": 14073, "training_step_time": 0.10884952545166016 }, { "epoch": 2.14752197265625e-05, "model_forward_time": 0.025684833526611328, "step": 14074 }, { "epoch": 2.14752197265625e-05, "step": 14074, "training_step_time": 0.11088728904724121 }, { "epoch": 2.147674560546875e-05, "model_forward_time": 0.025590181350708008, "step": 14075 }, { "epoch": 2.147674560546875e-05, "step": 14075, "training_step_time": 0.11618828773498535 }, { "epoch": 2.1478271484375e-05, "model_forward_time": 0.025785446166992188, "step": 14076 }, { "epoch": 2.1478271484375e-05, "step": 14076, "training_step_time": 0.11698675155639648 }, { "epoch": 2.147979736328125e-05, "model_forward_time": 0.025569438934326172, "step": 14077 }, { "epoch": 2.147979736328125e-05, "step": 14077, "training_step_time": 0.11405444145202637 }, { "epoch": 2.14813232421875e-05, "model_forward_time": 0.025281429290771484, "step": 14078 }, { "epoch": 2.14813232421875e-05, "step": 14078, "training_step_time": 0.11471676826477051 }, { "epoch": 2.148284912109375e-05, "model_forward_time": 0.025444746017456055, "step": 14079 }, { "epoch": 2.148284912109375e-05, "step": 14079, "training_step_time": 0.11733126640319824 }, { "epoch": 2.1484375e-05, "grad_norm": 0.17169028520584106, "learning_rate": 5.915241771388931e-05, "loss": 0.017, "step": 14080 }, { "epoch": 2.1484375e-05, "model_forward_time": 0.025615692138671875, "step": 14080 }, { "epoch": 2.1484375e-05, "step": 14080, "training_step_time": 0.11424565315246582 }, { "epoch": 2.148590087890625e-05, "model_forward_time": 0.025841236114501953, "step": 14081 }, { "epoch": 2.148590087890625e-05, "step": 14081, "training_step_time": 0.11140823364257812 }, { "epoch": 2.14874267578125e-05, "model_forward_time": 0.027458667755126953, "step": 14082 }, { "epoch": 2.14874267578125e-05, "step": 14082, "training_step_time": 0.10958194732666016 }, { "epoch": 2.148895263671875e-05, "model_forward_time": 0.026074886322021484, "step": 14083 }, { "epoch": 2.148895263671875e-05, "step": 14083, "training_step_time": 0.11341619491577148 }, { "epoch": 2.1490478515625e-05, "model_forward_time": 0.025475502014160156, "step": 14084 }, { "epoch": 2.1490478515625e-05, "step": 14084, "training_step_time": 0.10708928108215332 }, { "epoch": 2.149200439453125e-05, "model_forward_time": 0.024705171585083008, "step": 14085 }, { "epoch": 2.149200439453125e-05, "step": 14085, "training_step_time": 0.10597777366638184 }, { "epoch": 2.14935302734375e-05, "model_forward_time": 0.025627851486206055, "step": 14086 }, { "epoch": 2.14935302734375e-05, "step": 14086, "training_step_time": 0.10751771926879883 }, { "epoch": 2.149505615234375e-05, "model_forward_time": 0.025578975677490234, "step": 14087 }, { "epoch": 2.149505615234375e-05, "step": 14087, "training_step_time": 0.1107940673828125 }, { "epoch": 2.149658203125e-05, "model_forward_time": 0.025463104248046875, "step": 14088 }, { "epoch": 2.149658203125e-05, "step": 14088, "training_step_time": 0.10952115058898926 }, { "epoch": 2.149810791015625e-05, "model_forward_time": 0.025266647338867188, "step": 14089 }, { "epoch": 2.149810791015625e-05, "step": 14089, "training_step_time": 0.10568642616271973 }, { "epoch": 2.14996337890625e-05, "grad_norm": 0.18737411499023438, "learning_rate": 5.909822774284971e-05, "loss": 0.0114, "step": 14090 }, { "epoch": 2.14996337890625e-05, "model_forward_time": 0.02570343017578125, "step": 14090 }, { "epoch": 2.14996337890625e-05, "step": 14090, "training_step_time": 0.10887432098388672 }, { "epoch": 2.150115966796875e-05, "model_forward_time": 0.025948286056518555, "step": 14091 }, { "epoch": 2.150115966796875e-05, "step": 14091, "training_step_time": 0.18009281158447266 }, { "epoch": 2.1502685546875e-05, "model_forward_time": 0.024937868118286133, "step": 14092 }, { "epoch": 2.1502685546875e-05, "step": 14092, "training_step_time": 0.1372060775756836 }, { "epoch": 2.150421142578125e-05, "model_forward_time": 0.02493143081665039, "step": 14093 }, { "epoch": 2.150421142578125e-05, "step": 14093, "training_step_time": 0.10674643516540527 }, { "epoch": 2.15057373046875e-05, "model_forward_time": 0.02538323402404785, "step": 14094 }, { "epoch": 2.15057373046875e-05, "step": 14094, "training_step_time": 0.11157369613647461 }, { "epoch": 2.150726318359375e-05, "model_forward_time": 0.02501702308654785, "step": 14095 }, { "epoch": 2.150726318359375e-05, "step": 14095, "training_step_time": 0.1174626350402832 }, { "epoch": 2.15087890625e-05, "model_forward_time": 0.025490283966064453, "step": 14096 }, { "epoch": 2.15087890625e-05, "step": 14096, "training_step_time": 0.22123122215270996 }, { "epoch": 2.151031494140625e-05, "model_forward_time": 0.026398897171020508, "step": 14097 }, { "epoch": 2.151031494140625e-05, "step": 14097, "training_step_time": 0.1315312385559082 }, { "epoch": 2.15118408203125e-05, "model_forward_time": 0.02446150779724121, "step": 14098 }, { "epoch": 2.15118408203125e-05, "step": 14098, "training_step_time": 0.11582279205322266 }, { "epoch": 2.151336669921875e-05, "model_forward_time": 0.025528430938720703, "step": 14099 }, { "epoch": 2.151336669921875e-05, "step": 14099, "training_step_time": 0.11322712898254395 }, { "epoch": 2.1514892578125e-05, "grad_norm": 0.16362541913986206, "learning_rate": 5.90440267166055e-05, "loss": 0.0182, "step": 14100 }, { "epoch": 2.1514892578125e-05, "model_forward_time": 0.025630950927734375, "step": 14100 }, { "epoch": 2.1514892578125e-05, "step": 14100, "training_step_time": 0.10351681709289551 }, { "epoch": 2.151641845703125e-05, "model_forward_time": 0.024866104125976562, "step": 14101 }, { "epoch": 2.151641845703125e-05, "step": 14101, "training_step_time": 0.11760997772216797 }, { "epoch": 2.15179443359375e-05, "model_forward_time": 0.024771928787231445, "step": 14102 }, { "epoch": 2.15179443359375e-05, "step": 14102, "training_step_time": 0.11686849594116211 }, { "epoch": 2.151947021484375e-05, "model_forward_time": 0.025556564331054688, "step": 14103 }, { "epoch": 2.151947021484375e-05, "step": 14103, "training_step_time": 0.11809396743774414 }, { "epoch": 2.152099609375e-05, "model_forward_time": 0.02827143669128418, "step": 14104 }, { "epoch": 2.152099609375e-05, "step": 14104, "training_step_time": 0.11651730537414551 }, { "epoch": 2.152252197265625e-05, "model_forward_time": 0.025686264038085938, "step": 14105 }, { "epoch": 2.152252197265625e-05, "step": 14105, "training_step_time": 0.11438918113708496 }, { "epoch": 2.15240478515625e-05, "model_forward_time": 0.02637457847595215, "step": 14106 }, { "epoch": 2.15240478515625e-05, "step": 14106, "training_step_time": 0.12194037437438965 }, { "epoch": 2.152557373046875e-05, "model_forward_time": 0.02580714225769043, "step": 14107 }, { "epoch": 2.152557373046875e-05, "step": 14107, "training_step_time": 0.11693167686462402 }, { "epoch": 2.1527099609375e-05, "model_forward_time": 0.0257415771484375, "step": 14108 }, { "epoch": 2.1527099609375e-05, "step": 14108, "training_step_time": 0.10375785827636719 }, { "epoch": 2.152862548828125e-05, "model_forward_time": 0.02541518211364746, "step": 14109 }, { "epoch": 2.152862548828125e-05, "step": 14109, "training_step_time": 0.13335847854614258 }, { "epoch": 2.15301513671875e-05, "grad_norm": 0.3614904582500458, "learning_rate": 5.8989814701016035e-05, "loss": 0.0166, "step": 14110 }, { "epoch": 2.15301513671875e-05, "model_forward_time": 0.025261640548706055, "step": 14110 }, { "epoch": 2.15301513671875e-05, "step": 14110, "training_step_time": 0.10974979400634766 }, { "epoch": 2.153167724609375e-05, "model_forward_time": 0.0260317325592041, "step": 14111 }, { "epoch": 2.153167724609375e-05, "step": 14111, "training_step_time": 0.1247396469116211 }, { "epoch": 2.1533203125e-05, "model_forward_time": 0.025685787200927734, "step": 14112 }, { "epoch": 2.1533203125e-05, "step": 14112, "training_step_time": 0.1055452823638916 }, { "epoch": 2.153472900390625e-05, "model_forward_time": 0.025370121002197266, "step": 14113 }, { "epoch": 2.153472900390625e-05, "step": 14113, "training_step_time": 0.1899101734161377 }, { "epoch": 2.15362548828125e-05, "model_forward_time": 0.025854825973510742, "step": 14114 }, { "epoch": 2.15362548828125e-05, "step": 14114, "training_step_time": 0.15713882446289062 }, { "epoch": 2.153778076171875e-05, "model_forward_time": 0.024793624877929688, "step": 14115 }, { "epoch": 2.153778076171875e-05, "step": 14115, "training_step_time": 0.12921881675720215 }, { "epoch": 2.1539306640625e-05, "model_forward_time": 0.024428606033325195, "step": 14116 }, { "epoch": 2.1539306640625e-05, "step": 14116, "training_step_time": 0.14049077033996582 }, { "epoch": 2.154083251953125e-05, "model_forward_time": 0.029732704162597656, "step": 14117 }, { "epoch": 2.154083251953125e-05, "step": 14117, "training_step_time": 0.13073372840881348 }, { "epoch": 2.15423583984375e-05, "model_forward_time": 0.024417638778686523, "step": 14118 }, { "epoch": 2.15423583984375e-05, "step": 14118, "training_step_time": 0.12877964973449707 }, { "epoch": 2.154388427734375e-05, "model_forward_time": 0.025296926498413086, "step": 14119 }, { "epoch": 2.154388427734375e-05, "step": 14119, "training_step_time": 0.13159656524658203 }, { "epoch": 2.154541015625e-05, "grad_norm": 0.17757172882556915, "learning_rate": 5.8935591761954025e-05, "loss": 0.0124, "step": 14120 }, { "epoch": 2.154541015625e-05, "model_forward_time": 0.02501225471496582, "step": 14120 }, { "epoch": 2.154541015625e-05, "step": 14120, "training_step_time": 0.12037229537963867 }, { "epoch": 2.154693603515625e-05, "model_forward_time": 0.024975061416625977, "step": 14121 }, { "epoch": 2.154693603515625e-05, "step": 14121, "training_step_time": 0.12013745307922363 }, { "epoch": 2.15484619140625e-05, "model_forward_time": 0.02554464340209961, "step": 14122 }, { "epoch": 2.15484619140625e-05, "step": 14122, "training_step_time": 0.11409902572631836 }, { "epoch": 2.154998779296875e-05, "model_forward_time": 0.025319814682006836, "step": 14123 }, { "epoch": 2.154998779296875e-05, "step": 14123, "training_step_time": 0.11845993995666504 }, { "epoch": 2.1551513671875e-05, "model_forward_time": 0.024976730346679688, "step": 14124 }, { "epoch": 2.1551513671875e-05, "step": 14124, "training_step_time": 0.11191344261169434 }, { "epoch": 2.155303955078125e-05, "model_forward_time": 0.024422645568847656, "step": 14125 }, { "epoch": 2.155303955078125e-05, "step": 14125, "training_step_time": 0.10864067077636719 }, { "epoch": 2.15545654296875e-05, "model_forward_time": 0.02524280548095703, "step": 14126 }, { "epoch": 2.15545654296875e-05, "step": 14126, "training_step_time": 0.11043024063110352 }, { "epoch": 2.155609130859375e-05, "model_forward_time": 0.025084972381591797, "step": 14127 }, { "epoch": 2.155609130859375e-05, "step": 14127, "training_step_time": 0.10823798179626465 }, { "epoch": 2.15576171875e-05, "model_forward_time": 0.02534651756286621, "step": 14128 }, { "epoch": 2.15576171875e-05, "step": 14128, "training_step_time": 0.1072242259979248 }, { "epoch": 2.155914306640625e-05, "model_forward_time": 0.025495290756225586, "step": 14129 }, { "epoch": 2.155914306640625e-05, "step": 14129, "training_step_time": 0.10645627975463867 }, { "epoch": 2.15606689453125e-05, "grad_norm": 0.4064501225948334, "learning_rate": 5.888135796530544e-05, "loss": 0.0228, "step": 14130 }, { "epoch": 2.15606689453125e-05, "model_forward_time": 0.024997711181640625, "step": 14130 }, { "epoch": 2.15606689453125e-05, "step": 14130, "training_step_time": 0.10503482818603516 }, { "epoch": 2.156219482421875e-05, "model_forward_time": 0.025748252868652344, "step": 14131 }, { "epoch": 2.156219482421875e-05, "step": 14131, "training_step_time": 0.10864973068237305 }, { "epoch": 2.1563720703125e-05, "model_forward_time": 0.025141000747680664, "step": 14132 }, { "epoch": 2.1563720703125e-05, "step": 14132, "training_step_time": 0.10582256317138672 }, { "epoch": 2.156524658203125e-05, "model_forward_time": 0.025005578994750977, "step": 14133 }, { "epoch": 2.156524658203125e-05, "step": 14133, "training_step_time": 0.10696983337402344 }, { "epoch": 2.15667724609375e-05, "model_forward_time": 0.025117874145507812, "step": 14134 }, { "epoch": 2.15667724609375e-05, "step": 14134, "training_step_time": 0.10648870468139648 }, { "epoch": 2.156829833984375e-05, "model_forward_time": 0.025725841522216797, "step": 14135 }, { "epoch": 2.156829833984375e-05, "step": 14135, "training_step_time": 0.10704541206359863 }, { "epoch": 2.156982421875e-05, "model_forward_time": 0.025290489196777344, "step": 14136 }, { "epoch": 2.156982421875e-05, "step": 14136, "training_step_time": 0.10841941833496094 }, { "epoch": 2.157135009765625e-05, "model_forward_time": 0.025128602981567383, "step": 14137 }, { "epoch": 2.157135009765625e-05, "step": 14137, "training_step_time": 0.10879731178283691 }, { "epoch": 2.15728759765625e-05, "model_forward_time": 0.0250704288482666, "step": 14138 }, { "epoch": 2.15728759765625e-05, "step": 14138, "training_step_time": 0.13630914688110352 }, { "epoch": 2.157440185546875e-05, "model_forward_time": 0.02517867088317871, "step": 14139 }, { "epoch": 2.157440185546875e-05, "step": 14139, "training_step_time": 0.10772919654846191 }, { "epoch": 2.1575927734375e-05, "grad_norm": 0.36874639987945557, "learning_rate": 5.8827113376969465e-05, "loss": 0.0189, "step": 14140 }, { "epoch": 2.1575927734375e-05, "model_forward_time": 0.025316238403320312, "step": 14140 }, { "epoch": 2.1575927734375e-05, "step": 14140, "training_step_time": 0.12947535514831543 }, { "epoch": 2.157745361328125e-05, "model_forward_time": 0.025620698928833008, "step": 14141 }, { "epoch": 2.157745361328125e-05, "step": 14141, "training_step_time": 0.11558699607849121 }, { "epoch": 2.15789794921875e-05, "model_forward_time": 0.025434255599975586, "step": 14142 }, { "epoch": 2.15789794921875e-05, "step": 14142, "training_step_time": 0.18879175186157227 }, { "epoch": 2.158050537109375e-05, "model_forward_time": 0.024472951889038086, "step": 14143 }, { "epoch": 2.158050537109375e-05, "step": 14143, "training_step_time": 0.16038203239440918 }, { "epoch": 2.158203125e-05, "model_forward_time": 0.024168014526367188, "step": 14144 }, { "epoch": 2.158203125e-05, "step": 14144, "training_step_time": 0.12031984329223633 }, { "epoch": 2.158355712890625e-05, "model_forward_time": 0.02750706672668457, "step": 14145 }, { "epoch": 2.158355712890625e-05, "step": 14145, "training_step_time": 0.11200141906738281 }, { "epoch": 2.15850830078125e-05, "model_forward_time": 0.025278806686401367, "step": 14146 }, { "epoch": 2.15850830078125e-05, "step": 14146, "training_step_time": 0.14926481246948242 }, { "epoch": 2.158660888671875e-05, "model_forward_time": 0.02560567855834961, "step": 14147 }, { "epoch": 2.158660888671875e-05, "step": 14147, "training_step_time": 0.10370731353759766 }, { "epoch": 2.1588134765625e-05, "model_forward_time": 0.024811983108520508, "step": 14148 }, { "epoch": 2.1588134765625e-05, "step": 14148, "training_step_time": 0.14701366424560547 }, { "epoch": 2.158966064453125e-05, "model_forward_time": 0.025099754333496094, "step": 14149 }, { "epoch": 2.158966064453125e-05, "step": 14149, "training_step_time": 0.12157464027404785 }, { "epoch": 2.15911865234375e-05, "grad_norm": 0.2832779586315155, "learning_rate": 5.8772858062858416e-05, "loss": 0.0127, "step": 14150 }, { "epoch": 2.15911865234375e-05, "model_forward_time": 0.025452375411987305, "step": 14150 }, { "epoch": 2.15911865234375e-05, "step": 14150, "training_step_time": 0.10580182075500488 }, { "epoch": 2.159271240234375e-05, "model_forward_time": 0.025459766387939453, "step": 14151 }, { "epoch": 2.159271240234375e-05, "step": 14151, "training_step_time": 0.11262893676757812 }, { "epoch": 2.159423828125e-05, "model_forward_time": 0.025344133377075195, "step": 14152 }, { "epoch": 2.159423828125e-05, "step": 14152, "training_step_time": 0.10612893104553223 }, { "epoch": 2.159576416015625e-05, "model_forward_time": 0.025527238845825195, "step": 14153 }, { "epoch": 2.159576416015625e-05, "step": 14153, "training_step_time": 0.11699652671813965 }, { "epoch": 2.15972900390625e-05, "model_forward_time": 0.02534961700439453, "step": 14154 }, { "epoch": 2.15972900390625e-05, "step": 14154, "training_step_time": 0.1320171356201172 }, { "epoch": 2.159881591796875e-05, "model_forward_time": 0.025344371795654297, "step": 14155 }, { "epoch": 2.159881591796875e-05, "step": 14155, "training_step_time": 0.16156935691833496 }, { "epoch": 2.1600341796875e-05, "model_forward_time": 0.024556636810302734, "step": 14156 }, { "epoch": 2.1600341796875e-05, "step": 14156, "training_step_time": 0.1331944465637207 }, { "epoch": 2.160186767578125e-05, "model_forward_time": 0.02551412582397461, "step": 14157 }, { "epoch": 2.160186767578125e-05, "step": 14157, "training_step_time": 0.12624788284301758 }, { "epoch": 2.16033935546875e-05, "model_forward_time": 0.025478839874267578, "step": 14158 }, { "epoch": 2.16033935546875e-05, "step": 14158, "training_step_time": 0.156083345413208 }, { "epoch": 2.160491943359375e-05, "model_forward_time": 0.024598360061645508, "step": 14159 }, { "epoch": 2.160491943359375e-05, "step": 14159, "training_step_time": 0.17516732215881348 }, { "epoch": 2.16064453125e-05, "grad_norm": 0.40027958154678345, "learning_rate": 5.871859208889759e-05, "loss": 0.0274, "step": 14160 }, { "epoch": 2.16064453125e-05, "model_forward_time": 0.025252580642700195, "step": 14160 }, { "epoch": 2.16064453125e-05, "step": 14160, "training_step_time": 0.16366815567016602 }, { "epoch": 2.160797119140625e-05, "model_forward_time": 0.02539539337158203, "step": 14161 }, { "epoch": 2.160797119140625e-05, "step": 14161, "training_step_time": 0.10684776306152344 }, { "epoch": 2.16094970703125e-05, "model_forward_time": 0.025539875030517578, "step": 14162 }, { "epoch": 2.16094970703125e-05, "step": 14162, "training_step_time": 0.10494709014892578 }, { "epoch": 2.161102294921875e-05, "model_forward_time": 0.025307178497314453, "step": 14163 }, { "epoch": 2.161102294921875e-05, "step": 14163, "training_step_time": 0.10545825958251953 }, { "epoch": 2.1612548828125e-05, "model_forward_time": 0.025830745697021484, "step": 14164 }, { "epoch": 2.1612548828125e-05, "step": 14164, "training_step_time": 0.1083676815032959 }, { "epoch": 2.161407470703125e-05, "model_forward_time": 0.026053667068481445, "step": 14165 }, { "epoch": 2.161407470703125e-05, "step": 14165, "training_step_time": 0.10612344741821289 }, { "epoch": 2.16156005859375e-05, "model_forward_time": 0.02515697479248047, "step": 14166 }, { "epoch": 2.16156005859375e-05, "step": 14166, "training_step_time": 0.10560202598571777 }, { "epoch": 2.161712646484375e-05, "model_forward_time": 0.02541184425354004, "step": 14167 }, { "epoch": 2.161712646484375e-05, "step": 14167, "training_step_time": 0.10646700859069824 }, { "epoch": 2.161865234375e-05, "model_forward_time": 0.02543163299560547, "step": 14168 }, { "epoch": 2.161865234375e-05, "step": 14168, "training_step_time": 0.10625100135803223 }, { "epoch": 2.162017822265625e-05, "model_forward_time": 0.02502274513244629, "step": 14169 }, { "epoch": 2.162017822265625e-05, "step": 14169, "training_step_time": 0.10677647590637207 }, { "epoch": 2.16217041015625e-05, "grad_norm": 0.36665669083595276, "learning_rate": 5.866431552102526e-05, "loss": 0.0162, "step": 14170 }, { "epoch": 2.16217041015625e-05, "model_forward_time": 0.025372982025146484, "step": 14170 }, { "epoch": 2.16217041015625e-05, "step": 14170, "training_step_time": 0.10375285148620605 }, { "epoch": 2.162322998046875e-05, "model_forward_time": 0.025723695755004883, "step": 14171 }, { "epoch": 2.162322998046875e-05, "step": 14171, "training_step_time": 0.10509753227233887 }, { "epoch": 2.1624755859375e-05, "model_forward_time": 0.025551795959472656, "step": 14172 }, { "epoch": 2.1624755859375e-05, "step": 14172, "training_step_time": 0.10844302177429199 }, { "epoch": 2.162628173828125e-05, "model_forward_time": 0.02575540542602539, "step": 14173 }, { "epoch": 2.162628173828125e-05, "step": 14173, "training_step_time": 0.10551309585571289 }, { "epoch": 2.16278076171875e-05, "model_forward_time": 0.02541351318359375, "step": 14174 }, { "epoch": 2.16278076171875e-05, "step": 14174, "training_step_time": 0.10884428024291992 }, { "epoch": 2.162933349609375e-05, "model_forward_time": 0.0254366397857666, "step": 14175 }, { "epoch": 2.162933349609375e-05, "step": 14175, "training_step_time": 0.10845756530761719 }, { "epoch": 2.1630859375e-05, "model_forward_time": 0.025246381759643555, "step": 14176 }, { "epoch": 2.1630859375e-05, "step": 14176, "training_step_time": 0.10660719871520996 }, { "epoch": 2.163238525390625e-05, "model_forward_time": 0.025371074676513672, "step": 14177 }, { "epoch": 2.163238525390625e-05, "step": 14177, "training_step_time": 0.1057732105255127 }, { "epoch": 2.16339111328125e-05, "model_forward_time": 0.02570319175720215, "step": 14178 }, { "epoch": 2.16339111328125e-05, "step": 14178, "training_step_time": 0.10518550872802734 }, { "epoch": 2.163543701171875e-05, "model_forward_time": 0.025697946548461914, "step": 14179 }, { "epoch": 2.163543701171875e-05, "step": 14179, "training_step_time": 0.11314558982849121 }, { "epoch": 2.1636962890625e-05, "grad_norm": 0.27468445897102356, "learning_rate": 5.861002842519259e-05, "loss": 0.0144, "step": 14180 }, { "epoch": 2.1636962890625e-05, "model_forward_time": 0.02560281753540039, "step": 14180 }, { "epoch": 2.1636962890625e-05, "step": 14180, "training_step_time": 0.12124824523925781 }, { "epoch": 2.163848876953125e-05, "model_forward_time": 0.0256345272064209, "step": 14181 }, { "epoch": 2.163848876953125e-05, "step": 14181, "training_step_time": 0.12052679061889648 }, { "epoch": 2.16400146484375e-05, "model_forward_time": 0.025391340255737305, "step": 14182 }, { "epoch": 2.16400146484375e-05, "step": 14182, "training_step_time": 0.11876845359802246 }, { "epoch": 2.164154052734375e-05, "model_forward_time": 0.02524399757385254, "step": 14183 }, { "epoch": 2.164154052734375e-05, "step": 14183, "training_step_time": 0.13223910331726074 }, { "epoch": 2.164306640625e-05, "model_forward_time": 0.025165319442749023, "step": 14184 }, { "epoch": 2.164306640625e-05, "step": 14184, "training_step_time": 0.13660001754760742 }, { "epoch": 2.164459228515625e-05, "model_forward_time": 0.0256807804107666, "step": 14185 }, { "epoch": 2.164459228515625e-05, "step": 14185, "training_step_time": 0.11591839790344238 }, { "epoch": 2.16461181640625e-05, "model_forward_time": 0.026081085205078125, "step": 14186 }, { "epoch": 2.16461181640625e-05, "step": 14186, "training_step_time": 0.11262989044189453 }, { "epoch": 2.164764404296875e-05, "model_forward_time": 0.026117801666259766, "step": 14187 }, { "epoch": 2.164764404296875e-05, "step": 14187, "training_step_time": 0.1134040355682373 }, { "epoch": 2.1649169921875e-05, "model_forward_time": 0.025333642959594727, "step": 14188 }, { "epoch": 2.1649169921875e-05, "step": 14188, "training_step_time": 0.10905623435974121 }, { "epoch": 2.165069580078125e-05, "model_forward_time": 0.0254361629486084, "step": 14189 }, { "epoch": 2.165069580078125e-05, "step": 14189, "training_step_time": 0.22716045379638672 }, { "epoch": 2.16522216796875e-05, "grad_norm": 0.6543409824371338, "learning_rate": 5.85557308673635e-05, "loss": 0.0187, "step": 14190 }, { "epoch": 2.16522216796875e-05, "model_forward_time": 0.024791955947875977, "step": 14190 }, { "epoch": 2.16522216796875e-05, "step": 14190, "training_step_time": 0.12995529174804688 }, { "epoch": 2.165374755859375e-05, "model_forward_time": 0.024709463119506836, "step": 14191 }, { "epoch": 2.165374755859375e-05, "step": 14191, "training_step_time": 0.12517738342285156 }, { "epoch": 2.16552734375e-05, "model_forward_time": 0.02505636215209961, "step": 14192 }, { "epoch": 2.16552734375e-05, "step": 14192, "training_step_time": 0.1282491683959961 }, { "epoch": 2.165679931640625e-05, "model_forward_time": 0.025296926498413086, "step": 14193 }, { "epoch": 2.165679931640625e-05, "step": 14193, "training_step_time": 0.11142683029174805 }, { "epoch": 2.16583251953125e-05, "model_forward_time": 0.026098251342773438, "step": 14194 }, { "epoch": 2.16583251953125e-05, "step": 14194, "training_step_time": 0.1708202362060547 }, { "epoch": 2.165985107421875e-05, "model_forward_time": 0.027638673782348633, "step": 14195 }, { "epoch": 2.165985107421875e-05, "step": 14195, "training_step_time": 0.17023277282714844 }, { "epoch": 2.1661376953125e-05, "model_forward_time": 0.024567842483520508, "step": 14196 }, { "epoch": 2.1661376953125e-05, "step": 14196, "training_step_time": 0.10980224609375 }, { "epoch": 2.166290283203125e-05, "model_forward_time": 0.025326967239379883, "step": 14197 }, { "epoch": 2.166290283203125e-05, "step": 14197, "training_step_time": 0.11282753944396973 }, { "epoch": 2.16644287109375e-05, "model_forward_time": 0.02573561668395996, "step": 14198 }, { "epoch": 2.16644287109375e-05, "step": 14198, "training_step_time": 0.11056089401245117 }, { "epoch": 2.166595458984375e-05, "model_forward_time": 0.025129079818725586, "step": 14199 }, { "epoch": 2.166595458984375e-05, "step": 14199, "training_step_time": 0.1680583953857422 }, { "epoch": 2.166748046875e-05, "grad_norm": 0.3905617892742157, "learning_rate": 5.850142291351466e-05, "loss": 0.0181, "step": 14200 }, { "epoch": 2.166748046875e-05, "model_forward_time": 0.026114225387573242, "step": 14200 }, { "epoch": 2.166748046875e-05, "step": 14200, "training_step_time": 0.18952727317810059 }, { "epoch": 2.166900634765625e-05, "model_forward_time": 0.02424931526184082, "step": 14201 }, { "epoch": 2.166900634765625e-05, "step": 14201, "training_step_time": 0.19691848754882812 }, { "epoch": 2.16705322265625e-05, "model_forward_time": 0.023989200592041016, "step": 14202 }, { "epoch": 2.16705322265625e-05, "step": 14202, "training_step_time": 0.17379045486450195 }, { "epoch": 2.167205810546875e-05, "model_forward_time": 0.024037837982177734, "step": 14203 }, { "epoch": 2.167205810546875e-05, "step": 14203, "training_step_time": 0.16141510009765625 }, { "epoch": 2.1673583984375e-05, "model_forward_time": 0.023444175720214844, "step": 14204 }, { "epoch": 2.1673583984375e-05, "step": 14204, "training_step_time": 0.15222668647766113 }, { "epoch": 2.167510986328125e-05, "model_forward_time": 0.024636030197143555, "step": 14205 }, { "epoch": 2.167510986328125e-05, "step": 14205, "training_step_time": 0.11674141883850098 }, { "epoch": 2.16766357421875e-05, "model_forward_time": 0.025009632110595703, "step": 14206 }, { "epoch": 2.16766357421875e-05, "step": 14206, "training_step_time": 0.1020820140838623 }, { "epoch": 2.167816162109375e-05, "model_forward_time": 0.0256040096282959, "step": 14207 }, { "epoch": 2.167816162109375e-05, "step": 14207, "training_step_time": 0.10316848754882812 }, { "epoch": 2.16796875e-05, "model_forward_time": 0.02793288230895996, "step": 14208 }, { "epoch": 2.16796875e-05, "step": 14208, "training_step_time": 0.10660958290100098 }, { "epoch": 2.168121337890625e-05, "model_forward_time": 0.02549910545349121, "step": 14209 }, { "epoch": 2.168121337890625e-05, "step": 14209, "training_step_time": 0.1085367202758789 }, { "epoch": 2.16827392578125e-05, "grad_norm": 0.2370947003364563, "learning_rate": 5.8447104629635344e-05, "loss": 0.0162, "step": 14210 }, { "epoch": 2.16827392578125e-05, "model_forward_time": 0.024593353271484375, "step": 14210 }, { "epoch": 2.16827392578125e-05, "step": 14210, "training_step_time": 0.10286641120910645 }, { "epoch": 2.168426513671875e-05, "model_forward_time": 0.025765657424926758, "step": 14211 }, { "epoch": 2.168426513671875e-05, "step": 14211, "training_step_time": 0.10534024238586426 }, { "epoch": 2.1685791015625e-05, "model_forward_time": 0.024977445602416992, "step": 14212 }, { "epoch": 2.1685791015625e-05, "step": 14212, "training_step_time": 0.1051020622253418 }, { "epoch": 2.168731689453125e-05, "model_forward_time": 0.02566838264465332, "step": 14213 }, { "epoch": 2.168731689453125e-05, "step": 14213, "training_step_time": 0.11739373207092285 }, { "epoch": 2.16888427734375e-05, "model_forward_time": 0.024464845657348633, "step": 14214 }, { "epoch": 2.16888427734375e-05, "step": 14214, "training_step_time": 0.1219627857208252 }, { "epoch": 2.169036865234375e-05, "model_forward_time": 0.024508953094482422, "step": 14215 }, { "epoch": 2.169036865234375e-05, "step": 14215, "training_step_time": 0.1144554615020752 }, { "epoch": 2.169189453125e-05, "model_forward_time": 0.025665760040283203, "step": 14216 }, { "epoch": 2.169189453125e-05, "step": 14216, "training_step_time": 0.1211385726928711 }, { "epoch": 2.169342041015625e-05, "model_forward_time": 0.023913145065307617, "step": 14217 }, { "epoch": 2.169342041015625e-05, "step": 14217, "training_step_time": 0.11816740036010742 }, { "epoch": 2.16949462890625e-05, "model_forward_time": 0.024271488189697266, "step": 14218 }, { "epoch": 2.16949462890625e-05, "step": 14218, "training_step_time": 0.11749529838562012 }, { "epoch": 2.169647216796875e-05, "model_forward_time": 0.024726390838623047, "step": 14219 }, { "epoch": 2.169647216796875e-05, "step": 14219, "training_step_time": 0.11262989044189453 }, { "epoch": 2.1697998046875e-05, "grad_norm": 0.3323439061641693, "learning_rate": 5.8392776081727385e-05, "loss": 0.0253, "step": 14220 }, { "epoch": 2.1697998046875e-05, "model_forward_time": 0.02440810203552246, "step": 14220 }, { "epoch": 2.1697998046875e-05, "step": 14220, "training_step_time": 0.1128697395324707 }, { "epoch": 2.169952392578125e-05, "model_forward_time": 0.02443099021911621, "step": 14221 }, { "epoch": 2.169952392578125e-05, "step": 14221, "training_step_time": 0.11056923866271973 }, { "epoch": 2.17010498046875e-05, "model_forward_time": 0.024686813354492188, "step": 14222 }, { "epoch": 2.17010498046875e-05, "step": 14222, "training_step_time": 0.1112356185913086 }, { "epoch": 2.170257568359375e-05, "model_forward_time": 0.025220632553100586, "step": 14223 }, { "epoch": 2.170257568359375e-05, "step": 14223, "training_step_time": 0.10770916938781738 }, { "epoch": 2.17041015625e-05, "model_forward_time": 0.025386810302734375, "step": 14224 }, { "epoch": 2.17041015625e-05, "step": 14224, "training_step_time": 0.11013650894165039 }, { "epoch": 2.170562744140625e-05, "model_forward_time": 0.02533435821533203, "step": 14225 }, { "epoch": 2.170562744140625e-05, "step": 14225, "training_step_time": 0.10602068901062012 }, { "epoch": 2.17071533203125e-05, "model_forward_time": 0.025584936141967773, "step": 14226 }, { "epoch": 2.17071533203125e-05, "step": 14226, "training_step_time": 0.10624957084655762 }, { "epoch": 2.170867919921875e-05, "model_forward_time": 0.02587437629699707, "step": 14227 }, { "epoch": 2.170867919921875e-05, "step": 14227, "training_step_time": 0.10594892501831055 }, { "epoch": 2.1710205078125e-05, "model_forward_time": 0.025670766830444336, "step": 14228 }, { "epoch": 2.1710205078125e-05, "step": 14228, "training_step_time": 0.1812424659729004 }, { "epoch": 2.171173095703125e-05, "model_forward_time": 0.02523946762084961, "step": 14229 }, { "epoch": 2.171173095703125e-05, "step": 14229, "training_step_time": 0.14331388473510742 }, { "epoch": 2.17132568359375e-05, "grad_norm": 0.44740182161331177, "learning_rate": 5.833843733580512e-05, "loss": 0.0164, "step": 14230 }, { "epoch": 2.17132568359375e-05, "model_forward_time": 0.024950742721557617, "step": 14230 }, { "epoch": 2.17132568359375e-05, "step": 14230, "training_step_time": 0.11131453514099121 }, { "epoch": 2.171478271484375e-05, "model_forward_time": 0.025397300720214844, "step": 14231 }, { "epoch": 2.171478271484375e-05, "step": 14231, "training_step_time": 0.10571742057800293 }, { "epoch": 2.171630859375e-05, "model_forward_time": 0.027855634689331055, "step": 14232 }, { "epoch": 2.171630859375e-05, "step": 14232, "training_step_time": 0.12242555618286133 }, { "epoch": 2.171783447265625e-05, "model_forward_time": 0.025373458862304688, "step": 14233 }, { "epoch": 2.171783447265625e-05, "step": 14233, "training_step_time": 0.1830766201019287 }, { "epoch": 2.17193603515625e-05, "model_forward_time": 0.024764299392700195, "step": 14234 }, { "epoch": 2.17193603515625e-05, "step": 14234, "training_step_time": 0.16950225830078125 }, { "epoch": 2.172088623046875e-05, "model_forward_time": 0.024750947952270508, "step": 14235 }, { "epoch": 2.172088623046875e-05, "step": 14235, "training_step_time": 0.12900304794311523 }, { "epoch": 2.1722412109375e-05, "model_forward_time": 0.024866819381713867, "step": 14236 }, { "epoch": 2.1722412109375e-05, "step": 14236, "training_step_time": 0.1181650161743164 }, { "epoch": 2.172393798828125e-05, "model_forward_time": 0.025434494018554688, "step": 14237 }, { "epoch": 2.172393798828125e-05, "step": 14237, "training_step_time": 0.10656118392944336 }, { "epoch": 2.17254638671875e-05, "model_forward_time": 0.0257108211517334, "step": 14238 }, { "epoch": 2.17254638671875e-05, "step": 14238, "training_step_time": 0.10633683204650879 }, { "epoch": 2.172698974609375e-05, "model_forward_time": 0.025119781494140625, "step": 14239 }, { "epoch": 2.172698974609375e-05, "step": 14239, "training_step_time": 0.10781002044677734 }, { "epoch": 2.1728515625e-05, "grad_norm": 0.30819186568260193, "learning_rate": 5.828408845789522e-05, "loss": 0.0165, "step": 14240 }, { "epoch": 2.1728515625e-05, "model_forward_time": 0.025023698806762695, "step": 14240 }, { "epoch": 2.1728515625e-05, "step": 14240, "training_step_time": 0.10374188423156738 }, { "epoch": 2.173004150390625e-05, "model_forward_time": 0.02571702003479004, "step": 14241 }, { "epoch": 2.173004150390625e-05, "step": 14241, "training_step_time": 0.1978161334991455 }, { "epoch": 2.17315673828125e-05, "model_forward_time": 0.025000810623168945, "step": 14242 }, { "epoch": 2.17315673828125e-05, "step": 14242, "training_step_time": 0.13852572441101074 }, { "epoch": 2.173309326171875e-05, "model_forward_time": 0.025410890579223633, "step": 14243 }, { "epoch": 2.173309326171875e-05, "step": 14243, "training_step_time": 0.11875605583190918 }, { "epoch": 2.1734619140625e-05, "model_forward_time": 0.025330305099487305, "step": 14244 }, { "epoch": 2.1734619140625e-05, "step": 14244, "training_step_time": 0.12769365310668945 }, { "epoch": 2.173614501953125e-05, "model_forward_time": 0.02568650245666504, "step": 14245 }, { "epoch": 2.173614501953125e-05, "step": 14245, "training_step_time": 0.19179177284240723 }, { "epoch": 2.17376708984375e-05, "model_forward_time": 0.024847030639648438, "step": 14246 }, { "epoch": 2.17376708984375e-05, "step": 14246, "training_step_time": 0.12841367721557617 }, { "epoch": 2.173919677734375e-05, "model_forward_time": 0.024584531784057617, "step": 14247 }, { "epoch": 2.173919677734375e-05, "step": 14247, "training_step_time": 0.10552549362182617 }, { "epoch": 2.174072265625e-05, "model_forward_time": 0.026430606842041016, "step": 14248 }, { "epoch": 2.174072265625e-05, "step": 14248, "training_step_time": 0.11323213577270508 }, { "epoch": 2.174224853515625e-05, "model_forward_time": 0.025250911712646484, "step": 14249 }, { "epoch": 2.174224853515625e-05, "step": 14249, "training_step_time": 0.10681486129760742 }, { "epoch": 2.17437744140625e-05, "grad_norm": 0.35048770904541016, "learning_rate": 5.8229729514036705e-05, "loss": 0.0223, "step": 14250 }, { "epoch": 2.17437744140625e-05, "model_forward_time": 0.025784969329833984, "step": 14250 }, { "epoch": 2.17437744140625e-05, "step": 14250, "training_step_time": 0.10779976844787598 }, { "epoch": 2.174530029296875e-05, "model_forward_time": 0.025392770767211914, "step": 14251 }, { "epoch": 2.174530029296875e-05, "step": 14251, "training_step_time": 0.11716365814208984 }, { "epoch": 2.1746826171875e-05, "model_forward_time": 0.02602982521057129, "step": 14252 }, { "epoch": 2.1746826171875e-05, "step": 14252, "training_step_time": 0.1142120361328125 }, { "epoch": 2.174835205078125e-05, "model_forward_time": 0.02585625648498535, "step": 14253 }, { "epoch": 2.174835205078125e-05, "step": 14253, "training_step_time": 0.10896039009094238 }, { "epoch": 2.17498779296875e-05, "model_forward_time": 0.0254361629486084, "step": 14254 }, { "epoch": 2.17498779296875e-05, "step": 14254, "training_step_time": 0.10609269142150879 }, { "epoch": 2.175140380859375e-05, "model_forward_time": 0.025432348251342773, "step": 14255 }, { "epoch": 2.175140380859375e-05, "step": 14255, "training_step_time": 0.10748791694641113 }, { "epoch": 2.17529296875e-05, "model_forward_time": 0.026241302490234375, "step": 14256 }, { "epoch": 2.17529296875e-05, "step": 14256, "training_step_time": 0.10540390014648438 }, { "epoch": 2.175445556640625e-05, "model_forward_time": 0.0257875919342041, "step": 14257 }, { "epoch": 2.175445556640625e-05, "step": 14257, "training_step_time": 0.10501241683959961 }, { "epoch": 2.17559814453125e-05, "model_forward_time": 0.025878190994262695, "step": 14258 }, { "epoch": 2.17559814453125e-05, "step": 14258, "training_step_time": 0.10561823844909668 }, { "epoch": 2.175750732421875e-05, "model_forward_time": 0.02545619010925293, "step": 14259 }, { "epoch": 2.175750732421875e-05, "step": 14259, "training_step_time": 0.10495257377624512 }, { "epoch": 2.1759033203125e-05, "grad_norm": 0.25206512212753296, "learning_rate": 5.817536057028081e-05, "loss": 0.0158, "step": 14260 }, { "epoch": 2.1759033203125e-05, "model_forward_time": 0.025283336639404297, "step": 14260 }, { "epoch": 2.1759033203125e-05, "step": 14260, "training_step_time": 0.10444951057434082 }, { "epoch": 2.176055908203125e-05, "model_forward_time": 0.02515578269958496, "step": 14261 }, { "epoch": 2.176055908203125e-05, "step": 14261, "training_step_time": 0.10490679740905762 }, { "epoch": 2.17620849609375e-05, "model_forward_time": 0.02617931365966797, "step": 14262 }, { "epoch": 2.17620849609375e-05, "step": 14262, "training_step_time": 0.1102452278137207 }, { "epoch": 2.176361083984375e-05, "model_forward_time": 0.025544166564941406, "step": 14263 }, { "epoch": 2.176361083984375e-05, "step": 14263, "training_step_time": 0.10574054718017578 }, { "epoch": 2.176513671875e-05, "model_forward_time": 0.0252535343170166, "step": 14264 }, { "epoch": 2.176513671875e-05, "step": 14264, "training_step_time": 0.10870695114135742 }, { "epoch": 2.176666259765625e-05, "model_forward_time": 0.025237321853637695, "step": 14265 }, { "epoch": 2.176666259765625e-05, "step": 14265, "training_step_time": 0.10875511169433594 }, { "epoch": 2.17681884765625e-05, "model_forward_time": 0.0253908634185791, "step": 14266 }, { "epoch": 2.17681884765625e-05, "step": 14266, "training_step_time": 0.10854411125183105 }, { "epoch": 2.176971435546875e-05, "model_forward_time": 0.025395870208740234, "step": 14267 }, { "epoch": 2.176971435546875e-05, "step": 14267, "training_step_time": 0.10894036293029785 }, { "epoch": 2.1771240234375e-05, "model_forward_time": 0.025535106658935547, "step": 14268 }, { "epoch": 2.1771240234375e-05, "step": 14268, "training_step_time": 0.11008191108703613 }, { "epoch": 2.177276611328125e-05, "model_forward_time": 0.025491714477539062, "step": 14269 }, { "epoch": 2.177276611328125e-05, "step": 14269, "training_step_time": 0.10747694969177246 }, { "epoch": 2.17742919921875e-05, "grad_norm": 0.37802183628082275, "learning_rate": 5.812098169269094e-05, "loss": 0.0215, "step": 14270 }, { "epoch": 2.17742919921875e-05, "model_forward_time": 0.0255584716796875, "step": 14270 }, { "epoch": 2.17742919921875e-05, "step": 14270, "training_step_time": 0.11397314071655273 }, { "epoch": 2.177581787109375e-05, "model_forward_time": 0.02548360824584961, "step": 14271 }, { "epoch": 2.177581787109375e-05, "step": 14271, "training_step_time": 0.10861039161682129 }, { "epoch": 2.177734375e-05, "model_forward_time": 0.02543950080871582, "step": 14272 }, { "epoch": 2.177734375e-05, "step": 14272, "training_step_time": 0.10734724998474121 }, { "epoch": 2.177886962890625e-05, "model_forward_time": 0.024877548217773438, "step": 14273 }, { "epoch": 2.177886962890625e-05, "step": 14273, "training_step_time": 0.10563921928405762 }, { "epoch": 2.17803955078125e-05, "model_forward_time": 0.02554464340209961, "step": 14274 }, { "epoch": 2.17803955078125e-05, "step": 14274, "training_step_time": 0.10756421089172363 }, { "epoch": 2.178192138671875e-05, "model_forward_time": 0.025127887725830078, "step": 14275 }, { "epoch": 2.178192138671875e-05, "step": 14275, "training_step_time": 0.1523747444152832 }, { "epoch": 2.1783447265625e-05, "model_forward_time": 0.02514362335205078, "step": 14276 }, { "epoch": 2.1783447265625e-05, "step": 14276, "training_step_time": 0.16162395477294922 }, { "epoch": 2.178497314453125e-05, "model_forward_time": 0.024582862854003906, "step": 14277 }, { "epoch": 2.178497314453125e-05, "step": 14277, "training_step_time": 0.24405694007873535 }, { "epoch": 2.17864990234375e-05, "model_forward_time": 0.024241209030151367, "step": 14278 }, { "epoch": 2.17864990234375e-05, "step": 14278, "training_step_time": 0.219435453414917 }, { "epoch": 2.178802490234375e-05, "model_forward_time": 0.024483680725097656, "step": 14279 }, { "epoch": 2.178802490234375e-05, "step": 14279, "training_step_time": 0.20451593399047852 }, { "epoch": 2.178955078125e-05, "grad_norm": 0.2612272799015045, "learning_rate": 5.8066592947342555e-05, "loss": 0.0153, "step": 14280 }, { "epoch": 2.178955078125e-05, "model_forward_time": 0.024660348892211914, "step": 14280 }, { "epoch": 2.178955078125e-05, "step": 14280, "training_step_time": 0.2109088897705078 }, { "epoch": 2.179107666015625e-05, "model_forward_time": 0.024587154388427734, "step": 14281 }, { "epoch": 2.179107666015625e-05, "step": 14281, "training_step_time": 0.15859603881835938 }, { "epoch": 2.17926025390625e-05, "model_forward_time": 0.025776386260986328, "step": 14282 }, { "epoch": 2.17926025390625e-05, "step": 14282, "training_step_time": 0.11010575294494629 }, { "epoch": 2.179412841796875e-05, "model_forward_time": 0.024687528610229492, "step": 14283 }, { "epoch": 2.179412841796875e-05, "step": 14283, "training_step_time": 0.14109063148498535 }, { "epoch": 2.1795654296875e-05, "model_forward_time": 0.02494025230407715, "step": 14284 }, { "epoch": 2.1795654296875e-05, "step": 14284, "training_step_time": 0.1917719841003418 }, { "epoch": 2.179718017578125e-05, "model_forward_time": 0.02471637725830078, "step": 14285 }, { "epoch": 2.179718017578125e-05, "step": 14285, "training_step_time": 0.12595129013061523 }, { "epoch": 2.17987060546875e-05, "model_forward_time": 0.024541378021240234, "step": 14286 }, { "epoch": 2.17987060546875e-05, "step": 14286, "training_step_time": 0.10734724998474121 }, { "epoch": 2.180023193359375e-05, "model_forward_time": 0.025301456451416016, "step": 14287 }, { "epoch": 2.180023193359375e-05, "step": 14287, "training_step_time": 0.13266682624816895 }, { "epoch": 2.18017578125e-05, "model_forward_time": 0.025513172149658203, "step": 14288 }, { "epoch": 2.18017578125e-05, "step": 14288, "training_step_time": 0.12051582336425781 }, { "epoch": 2.180328369140625e-05, "model_forward_time": 0.025593280792236328, "step": 14289 }, { "epoch": 2.180328369140625e-05, "step": 14289, "training_step_time": 0.24174928665161133 }, { "epoch": 2.18048095703125e-05, "grad_norm": 0.1933142989873886, "learning_rate": 5.8012194400323116e-05, "loss": 0.0115, "step": 14290 }, { "epoch": 2.18048095703125e-05, "model_forward_time": 0.026621103286743164, "step": 14290 }, { "epoch": 2.18048095703125e-05, "step": 14290, "training_step_time": 0.16365623474121094 }, { "epoch": 2.180633544921875e-05, "model_forward_time": 0.024871110916137695, "step": 14291 }, { "epoch": 2.180633544921875e-05, "step": 14291, "training_step_time": 0.17380857467651367 }, { "epoch": 2.1807861328125e-05, "model_forward_time": 0.024326801300048828, "step": 14292 }, { "epoch": 2.1807861328125e-05, "step": 14292, "training_step_time": 0.12635183334350586 }, { "epoch": 2.180938720703125e-05, "model_forward_time": 0.02431511878967285, "step": 14293 }, { "epoch": 2.180938720703125e-05, "step": 14293, "training_step_time": 0.20455551147460938 }, { "epoch": 2.18109130859375e-05, "model_forward_time": 0.02473306655883789, "step": 14294 }, { "epoch": 2.18109130859375e-05, "step": 14294, "training_step_time": 0.11234903335571289 }, { "epoch": 2.181243896484375e-05, "model_forward_time": 0.024663448333740234, "step": 14295 }, { "epoch": 2.181243896484375e-05, "step": 14295, "training_step_time": 0.10554838180541992 }, { "epoch": 2.181396484375e-05, "model_forward_time": 0.02562236785888672, "step": 14296 }, { "epoch": 2.181396484375e-05, "step": 14296, "training_step_time": 0.11009764671325684 }, { "epoch": 2.181549072265625e-05, "model_forward_time": 0.025576114654541016, "step": 14297 }, { "epoch": 2.181549072265625e-05, "step": 14297, "training_step_time": 0.1046440601348877 }, { "epoch": 2.18170166015625e-05, "model_forward_time": 0.025992631912231445, "step": 14298 }, { "epoch": 2.18170166015625e-05, "step": 14298, "training_step_time": 0.1050271987915039 }, { "epoch": 2.181854248046875e-05, "model_forward_time": 0.025273799896240234, "step": 14299 }, { "epoch": 2.181854248046875e-05, "step": 14299, "training_step_time": 0.10572218894958496 }, { "epoch": 2.1820068359375e-05, "grad_norm": 0.2315218150615692, "learning_rate": 5.795778611773197e-05, "loss": 0.0184, "step": 14300 }, { "epoch": 2.1820068359375e-05, "model_forward_time": 0.02531576156616211, "step": 14300 }, { "epoch": 2.1820068359375e-05, "step": 14300, "training_step_time": 0.10671496391296387 }, { "epoch": 2.182159423828125e-05, "model_forward_time": 0.02517557144165039, "step": 14301 }, { "epoch": 2.182159423828125e-05, "step": 14301, "training_step_time": 0.10485076904296875 }, { "epoch": 2.18231201171875e-05, "model_forward_time": 0.025173425674438477, "step": 14302 }, { "epoch": 2.18231201171875e-05, "step": 14302, "training_step_time": 0.10883092880249023 }, { "epoch": 2.182464599609375e-05, "model_forward_time": 0.025431394577026367, "step": 14303 }, { "epoch": 2.182464599609375e-05, "step": 14303, "training_step_time": 0.10502123832702637 }, { "epoch": 2.1826171875e-05, "model_forward_time": 0.025810718536376953, "step": 14304 }, { "epoch": 2.1826171875e-05, "step": 14304, "training_step_time": 0.10597395896911621 }, { "epoch": 2.182769775390625e-05, "model_forward_time": 0.0251920223236084, "step": 14305 }, { "epoch": 2.182769775390625e-05, "step": 14305, "training_step_time": 0.10492849349975586 }, { "epoch": 2.18292236328125e-05, "model_forward_time": 0.025177717208862305, "step": 14306 }, { "epoch": 2.18292236328125e-05, "step": 14306, "training_step_time": 0.10402321815490723 }, { "epoch": 2.183074951171875e-05, "model_forward_time": 0.02519512176513672, "step": 14307 }, { "epoch": 2.183074951171875e-05, "step": 14307, "training_step_time": 0.10391068458557129 }, { "epoch": 2.1832275390625e-05, "model_forward_time": 0.025062084197998047, "step": 14308 }, { "epoch": 2.1832275390625e-05, "step": 14308, "training_step_time": 0.1046140193939209 }, { "epoch": 2.183380126953125e-05, "model_forward_time": 0.024808645248413086, "step": 14309 }, { "epoch": 2.183380126953125e-05, "step": 14309, "training_step_time": 0.10811448097229004 }, { "epoch": 2.18353271484375e-05, "grad_norm": 0.40620726346969604, "learning_rate": 5.7903368165680327e-05, "loss": 0.0151, "step": 14310 }, { "epoch": 2.18353271484375e-05, "model_forward_time": 0.025340795516967773, "step": 14310 }, { "epoch": 2.18353271484375e-05, "step": 14310, "training_step_time": 0.11503791809082031 }, { "epoch": 2.183685302734375e-05, "model_forward_time": 0.02547907829284668, "step": 14311 }, { "epoch": 2.183685302734375e-05, "step": 14311, "training_step_time": 0.11793303489685059 }, { "epoch": 2.183837890625e-05, "model_forward_time": 0.026047229766845703, "step": 14312 }, { "epoch": 2.183837890625e-05, "step": 14312, "training_step_time": 0.11669039726257324 }, { "epoch": 2.183990478515625e-05, "model_forward_time": 0.02695488929748535, "step": 14313 }, { "epoch": 2.183990478515625e-05, "step": 14313, "training_step_time": 0.11562633514404297 }, { "epoch": 2.18414306640625e-05, "model_forward_time": 0.025776386260986328, "step": 14314 }, { "epoch": 2.18414306640625e-05, "step": 14314, "training_step_time": 0.11435222625732422 }, { "epoch": 2.184295654296875e-05, "model_forward_time": 0.025081396102905273, "step": 14315 }, { "epoch": 2.184295654296875e-05, "step": 14315, "training_step_time": 0.11242008209228516 }, { "epoch": 2.1844482421875e-05, "model_forward_time": 0.025320053100585938, "step": 14316 }, { "epoch": 2.1844482421875e-05, "step": 14316, "training_step_time": 0.16251707077026367 }, { "epoch": 2.184600830078125e-05, "model_forward_time": 0.02516627311706543, "step": 14317 }, { "epoch": 2.184600830078125e-05, "step": 14317, "training_step_time": 0.14265227317810059 }, { "epoch": 2.18475341796875e-05, "model_forward_time": 0.02472686767578125, "step": 14318 }, { "epoch": 2.18475341796875e-05, "step": 14318, "training_step_time": 0.11267542839050293 }, { "epoch": 2.184906005859375e-05, "model_forward_time": 0.0250852108001709, "step": 14319 }, { "epoch": 2.184906005859375e-05, "step": 14319, "training_step_time": 0.11116647720336914 }, { "epoch": 2.18505859375e-05, "grad_norm": 0.3635031282901764, "learning_rate": 5.7848940610291125e-05, "loss": 0.0301, "step": 14320 }, { "epoch": 2.18505859375e-05, "model_forward_time": 0.025061368942260742, "step": 14320 }, { "epoch": 2.18505859375e-05, "step": 14320, "training_step_time": 0.10956621170043945 }, { "epoch": 2.185211181640625e-05, "model_forward_time": 0.02559638023376465, "step": 14321 }, { "epoch": 2.185211181640625e-05, "step": 14321, "training_step_time": 0.18656277656555176 }, { "epoch": 2.18536376953125e-05, "model_forward_time": 0.024847030639648438, "step": 14322 }, { "epoch": 2.18536376953125e-05, "step": 14322, "training_step_time": 0.11123037338256836 }, { "epoch": 2.185516357421875e-05, "model_forward_time": 0.02480316162109375, "step": 14323 }, { "epoch": 2.185516357421875e-05, "step": 14323, "training_step_time": 0.11948442459106445 }, { "epoch": 2.1856689453125e-05, "model_forward_time": 0.02577352523803711, "step": 14324 }, { "epoch": 2.1856689453125e-05, "step": 14324, "training_step_time": 0.12388753890991211 }, { "epoch": 2.185821533203125e-05, "model_forward_time": 0.025455713272094727, "step": 14325 }, { "epoch": 2.185821533203125e-05, "step": 14325, "training_step_time": 0.11904740333557129 }, { "epoch": 2.18597412109375e-05, "model_forward_time": 0.025624990463256836, "step": 14326 }, { "epoch": 2.18597412109375e-05, "step": 14326, "training_step_time": 0.12269425392150879 }, { "epoch": 2.186126708984375e-05, "model_forward_time": 0.025709152221679688, "step": 14327 }, { "epoch": 2.186126708984375e-05, "step": 14327, "training_step_time": 0.10557889938354492 }, { "epoch": 2.186279296875e-05, "model_forward_time": 0.02441692352294922, "step": 14328 }, { "epoch": 2.186279296875e-05, "step": 14328, "training_step_time": 0.10581493377685547 }, { "epoch": 2.186431884765625e-05, "model_forward_time": 0.025171756744384766, "step": 14329 }, { "epoch": 2.186431884765625e-05, "step": 14329, "training_step_time": 0.17633891105651855 }, { "epoch": 2.18658447265625e-05, "grad_norm": 0.28926295042037964, "learning_rate": 5.7794503517698974e-05, "loss": 0.0138, "step": 14330 }, { "epoch": 2.18658447265625e-05, "model_forward_time": 0.024778127670288086, "step": 14330 }, { "epoch": 2.18658447265625e-05, "step": 14330, "training_step_time": 0.1089165210723877 }, { "epoch": 2.186737060546875e-05, "model_forward_time": 0.0248720645904541, "step": 14331 }, { "epoch": 2.186737060546875e-05, "step": 14331, "training_step_time": 0.11307716369628906 }, { "epoch": 2.1868896484375e-05, "model_forward_time": 0.025746822357177734, "step": 14332 }, { "epoch": 2.1868896484375e-05, "step": 14332, "training_step_time": 0.10546302795410156 }, { "epoch": 2.187042236328125e-05, "model_forward_time": 0.025802135467529297, "step": 14333 }, { "epoch": 2.187042236328125e-05, "step": 14333, "training_step_time": 0.11277556419372559 }, { "epoch": 2.18719482421875e-05, "model_forward_time": 0.025612831115722656, "step": 14334 }, { "epoch": 2.18719482421875e-05, "step": 14334, "training_step_time": 0.12517523765563965 }, { "epoch": 2.187347412109375e-05, "model_forward_time": 0.0253753662109375, "step": 14335 }, { "epoch": 2.187347412109375e-05, "step": 14335, "training_step_time": 0.1884758472442627 }, { "epoch": 2.1875e-05, "model_forward_time": 0.024528980255126953, "step": 14336 }, { "epoch": 2.1875e-05, "step": 14336, "training_step_time": 0.12221574783325195 }, { "epoch": 2.187652587890625e-05, "model_forward_time": 0.024448394775390625, "step": 14337 }, { "epoch": 2.187652587890625e-05, "step": 14337, "training_step_time": 0.13874363899230957 }, { "epoch": 2.18780517578125e-05, "model_forward_time": 0.024963855743408203, "step": 14338 }, { "epoch": 2.18780517578125e-05, "step": 14338, "training_step_time": 0.15686941146850586 }, { "epoch": 2.187957763671875e-05, "model_forward_time": 0.02448725700378418, "step": 14339 }, { "epoch": 2.187957763671875e-05, "step": 14339, "training_step_time": 0.11893534660339355 }, { "epoch": 2.1881103515625e-05, "grad_norm": 0.23410135507583618, "learning_rate": 5.7740056954050084e-05, "loss": 0.0163, "step": 14340 }, { "epoch": 2.1881103515625e-05, "model_forward_time": 0.02789139747619629, "step": 14340 }, { "epoch": 2.1881103515625e-05, "step": 14340, "training_step_time": 0.11692929267883301 }, { "epoch": 2.188262939453125e-05, "model_forward_time": 0.02580571174621582, "step": 14341 }, { "epoch": 2.188262939453125e-05, "step": 14341, "training_step_time": 0.1131293773651123 }, { "epoch": 2.18841552734375e-05, "model_forward_time": 0.025181055068969727, "step": 14342 }, { "epoch": 2.18841552734375e-05, "step": 14342, "training_step_time": 0.10871624946594238 }, { "epoch": 2.188568115234375e-05, "model_forward_time": 0.025333404541015625, "step": 14343 }, { "epoch": 2.188568115234375e-05, "step": 14343, "training_step_time": 0.10631489753723145 }, { "epoch": 2.188720703125e-05, "model_forward_time": 0.02524280548095703, "step": 14344 }, { "epoch": 2.188720703125e-05, "step": 14344, "training_step_time": 0.10632920265197754 }, { "epoch": 2.188873291015625e-05, "model_forward_time": 0.025287628173828125, "step": 14345 }, { "epoch": 2.188873291015625e-05, "step": 14345, "training_step_time": 0.1079554557800293 }, { "epoch": 2.18902587890625e-05, "model_forward_time": 0.025325298309326172, "step": 14346 }, { "epoch": 2.18902587890625e-05, "step": 14346, "training_step_time": 0.10594439506530762 }, { "epoch": 2.189178466796875e-05, "model_forward_time": 0.026143789291381836, "step": 14347 }, { "epoch": 2.189178466796875e-05, "step": 14347, "training_step_time": 0.10685396194458008 }, { "epoch": 2.1893310546875e-05, "model_forward_time": 0.025084733963012695, "step": 14348 }, { "epoch": 2.1893310546875e-05, "step": 14348, "training_step_time": 0.11030387878417969 }, { "epoch": 2.189483642578125e-05, "model_forward_time": 0.025169849395751953, "step": 14349 }, { "epoch": 2.189483642578125e-05, "step": 14349, "training_step_time": 0.10748672485351562 }, { "epoch": 2.18963623046875e-05, "grad_norm": 0.4476127028465271, "learning_rate": 5.768560098550213e-05, "loss": 0.0159, "step": 14350 }, { "epoch": 2.18963623046875e-05, "model_forward_time": 0.025304555892944336, "step": 14350 }, { "epoch": 2.18963623046875e-05, "step": 14350, "training_step_time": 0.10526037216186523 }, { "epoch": 2.189788818359375e-05, "model_forward_time": 0.02551102638244629, "step": 14351 }, { "epoch": 2.189788818359375e-05, "step": 14351, "training_step_time": 0.10544323921203613 }, { "epoch": 2.18994140625e-05, "model_forward_time": 0.025460481643676758, "step": 14352 }, { "epoch": 2.18994140625e-05, "step": 14352, "training_step_time": 0.1069488525390625 }, { "epoch": 2.190093994140625e-05, "model_forward_time": 0.025208473205566406, "step": 14353 }, { "epoch": 2.190093994140625e-05, "step": 14353, "training_step_time": 0.10629510879516602 }, { "epoch": 2.19024658203125e-05, "model_forward_time": 0.025322437286376953, "step": 14354 }, { "epoch": 2.19024658203125e-05, "step": 14354, "training_step_time": 0.10442280769348145 }, { "epoch": 2.190399169921875e-05, "model_forward_time": 0.025219202041625977, "step": 14355 }, { "epoch": 2.190399169921875e-05, "step": 14355, "training_step_time": 0.10483694076538086 }, { "epoch": 2.1905517578125e-05, "model_forward_time": 0.025483131408691406, "step": 14356 }, { "epoch": 2.1905517578125e-05, "step": 14356, "training_step_time": 0.1047523021697998 }, { "epoch": 2.190704345703125e-05, "model_forward_time": 0.027733325958251953, "step": 14357 }, { "epoch": 2.190704345703125e-05, "step": 14357, "training_step_time": 0.10677051544189453 }, { "epoch": 2.19085693359375e-05, "model_forward_time": 0.025628089904785156, "step": 14358 }, { "epoch": 2.19085693359375e-05, "step": 14358, "training_step_time": 0.11213159561157227 }, { "epoch": 2.191009521484375e-05, "model_forward_time": 0.0253298282623291, "step": 14359 }, { "epoch": 2.191009521484375e-05, "step": 14359, "training_step_time": 0.10497879981994629 }, { "epoch": 2.191162109375e-05, "grad_norm": 0.2515600621700287, "learning_rate": 5.763113567822429e-05, "loss": 0.0113, "step": 14360 }, { "epoch": 2.191162109375e-05, "model_forward_time": 0.025327444076538086, "step": 14360 }, { "epoch": 2.191162109375e-05, "step": 14360, "training_step_time": 0.1047203540802002 }, { "epoch": 2.191314697265625e-05, "model_forward_time": 0.02570939064025879, "step": 14361 }, { "epoch": 2.191314697265625e-05, "step": 14361, "training_step_time": 0.10623335838317871 }, { "epoch": 2.19146728515625e-05, "model_forward_time": 0.025732755661010742, "step": 14362 }, { "epoch": 2.19146728515625e-05, "step": 14362, "training_step_time": 0.10715031623840332 }, { "epoch": 2.191619873046875e-05, "model_forward_time": 0.025213003158569336, "step": 14363 }, { "epoch": 2.191619873046875e-05, "step": 14363, "training_step_time": 0.10350799560546875 }, { "epoch": 2.1917724609375e-05, "model_forward_time": 0.025738239288330078, "step": 14364 }, { "epoch": 2.1917724609375e-05, "step": 14364, "training_step_time": 0.18706274032592773 }, { "epoch": 2.191925048828125e-05, "model_forward_time": 0.026445388793945312, "step": 14365 }, { "epoch": 2.191925048828125e-05, "step": 14365, "training_step_time": 0.1490159034729004 }, { "epoch": 2.19207763671875e-05, "model_forward_time": 0.024654388427734375, "step": 14366 }, { "epoch": 2.19207763671875e-05, "step": 14366, "training_step_time": 0.10743904113769531 }, { "epoch": 2.192230224609375e-05, "model_forward_time": 0.02534770965576172, "step": 14367 }, { "epoch": 2.192230224609375e-05, "step": 14367, "training_step_time": 0.1786022186279297 }, { "epoch": 2.1923828125e-05, "model_forward_time": 0.024702072143554688, "step": 14368 }, { "epoch": 2.1923828125e-05, "step": 14368, "training_step_time": 0.15403366088867188 }, { "epoch": 2.192535400390625e-05, "model_forward_time": 0.024469614028930664, "step": 14369 }, { "epoch": 2.192535400390625e-05, "step": 14369, "training_step_time": 0.16647958755493164 }, { "epoch": 2.19268798828125e-05, "grad_norm": 0.21295957267284393, "learning_rate": 5.757666109839702e-05, "loss": 0.0159, "step": 14370 }, { "epoch": 2.19268798828125e-05, "model_forward_time": 0.024891138076782227, "step": 14370 }, { "epoch": 2.19268798828125e-05, "step": 14370, "training_step_time": 0.19384121894836426 }, { "epoch": 2.192840576171875e-05, "model_forward_time": 0.02559804916381836, "step": 14371 }, { "epoch": 2.192840576171875e-05, "step": 14371, "training_step_time": 0.12395977973937988 }, { "epoch": 2.1929931640625e-05, "model_forward_time": 0.02475285530090332, "step": 14372 }, { "epoch": 2.1929931640625e-05, "step": 14372, "training_step_time": 0.11702227592468262 }, { "epoch": 2.193145751953125e-05, "model_forward_time": 0.02573680877685547, "step": 14373 }, { "epoch": 2.193145751953125e-05, "step": 14373, "training_step_time": 0.11294937133789062 }, { "epoch": 2.19329833984375e-05, "model_forward_time": 0.025687694549560547, "step": 14374 }, { "epoch": 2.19329833984375e-05, "step": 14374, "training_step_time": 0.10483384132385254 }, { "epoch": 2.193450927734375e-05, "model_forward_time": 0.025590896606445312, "step": 14375 }, { "epoch": 2.193450927734375e-05, "step": 14375, "training_step_time": 0.21607661247253418 }, { "epoch": 2.193603515625e-05, "model_forward_time": 0.026109933853149414, "step": 14376 }, { "epoch": 2.193603515625e-05, "step": 14376, "training_step_time": 0.14435935020446777 }, { "epoch": 2.193756103515625e-05, "model_forward_time": 0.02444601058959961, "step": 14377 }, { "epoch": 2.193756103515625e-05, "step": 14377, "training_step_time": 0.2158801555633545 }, { "epoch": 2.19390869140625e-05, "model_forward_time": 0.025578975677490234, "step": 14378 }, { "epoch": 2.19390869140625e-05, "step": 14378, "training_step_time": 0.19213509559631348 }, { "epoch": 2.194061279296875e-05, "model_forward_time": 0.024686574935913086, "step": 14379 }, { "epoch": 2.194061279296875e-05, "step": 14379, "training_step_time": 0.17601871490478516 }, { "epoch": 2.1942138671875e-05, "grad_norm": 0.34961214661598206, "learning_rate": 5.75221773122121e-05, "loss": 0.0146, "step": 14380 }, { "epoch": 2.1942138671875e-05, "model_forward_time": 0.024730920791625977, "step": 14380 }, { "epoch": 2.1942138671875e-05, "step": 14380, "training_step_time": 0.18546605110168457 }, { "epoch": 2.194366455078125e-05, "model_forward_time": 0.02497553825378418, "step": 14381 }, { "epoch": 2.194366455078125e-05, "step": 14381, "training_step_time": 0.13523101806640625 }, { "epoch": 2.19451904296875e-05, "model_forward_time": 0.02430272102355957, "step": 14382 }, { "epoch": 2.19451904296875e-05, "step": 14382, "training_step_time": 0.1349170207977295 }, { "epoch": 2.194671630859375e-05, "model_forward_time": 0.02502274513244629, "step": 14383 }, { "epoch": 2.194671630859375e-05, "step": 14383, "training_step_time": 0.18671965599060059 }, { "epoch": 2.19482421875e-05, "model_forward_time": 0.02443218231201172, "step": 14384 }, { "epoch": 2.19482421875e-05, "step": 14384, "training_step_time": 0.12279272079467773 }, { "epoch": 2.194976806640625e-05, "model_forward_time": 0.0236661434173584, "step": 14385 }, { "epoch": 2.194976806640625e-05, "step": 14385, "training_step_time": 0.11693382263183594 }, { "epoch": 2.19512939453125e-05, "model_forward_time": 0.02473163604736328, "step": 14386 }, { "epoch": 2.19512939453125e-05, "step": 14386, "training_step_time": 0.11201095581054688 }, { "epoch": 2.195281982421875e-05, "model_forward_time": 0.02422785758972168, "step": 14387 }, { "epoch": 2.195281982421875e-05, "step": 14387, "training_step_time": 0.11353468894958496 }, { "epoch": 2.1954345703125e-05, "model_forward_time": 0.02459406852722168, "step": 14388 }, { "epoch": 2.1954345703125e-05, "step": 14388, "training_step_time": 0.10876941680908203 }, { "epoch": 2.195587158203125e-05, "model_forward_time": 0.02595233917236328, "step": 14389 }, { "epoch": 2.195587158203125e-05, "step": 14389, "training_step_time": 0.11036038398742676 }, { "epoch": 2.19573974609375e-05, "grad_norm": 0.31873825192451477, "learning_rate": 5.746768438587245e-05, "loss": 0.0258, "step": 14390 }, { "epoch": 2.19573974609375e-05, "model_forward_time": 0.025621414184570312, "step": 14390 }, { "epoch": 2.19573974609375e-05, "step": 14390, "training_step_time": 0.10696029663085938 }, { "epoch": 2.195892333984375e-05, "model_forward_time": 0.025578022003173828, "step": 14391 }, { "epoch": 2.195892333984375e-05, "step": 14391, "training_step_time": 0.10752630233764648 }, { "epoch": 2.196044921875e-05, "model_forward_time": 0.025530576705932617, "step": 14392 }, { "epoch": 2.196044921875e-05, "step": 14392, "training_step_time": 0.10671806335449219 }, { "epoch": 2.196197509765625e-05, "model_forward_time": 0.025248050689697266, "step": 14393 }, { "epoch": 2.196197509765625e-05, "step": 14393, "training_step_time": 0.1077568531036377 }, { "epoch": 2.19635009765625e-05, "model_forward_time": 0.02564096450805664, "step": 14394 }, { "epoch": 2.19635009765625e-05, "step": 14394, "training_step_time": 0.11052298545837402 }, { "epoch": 2.196502685546875e-05, "model_forward_time": 0.025376081466674805, "step": 14395 }, { "epoch": 2.196502685546875e-05, "step": 14395, "training_step_time": 0.11121463775634766 }, { "epoch": 2.1966552734375e-05, "model_forward_time": 0.025179386138916016, "step": 14396 }, { "epoch": 2.1966552734375e-05, "step": 14396, "training_step_time": 0.10913324356079102 }, { "epoch": 2.196807861328125e-05, "model_forward_time": 0.025421857833862305, "step": 14397 }, { "epoch": 2.196807861328125e-05, "step": 14397, "training_step_time": 0.10636591911315918 }, { "epoch": 2.19696044921875e-05, "model_forward_time": 0.025507688522338867, "step": 14398 }, { "epoch": 2.19696044921875e-05, "step": 14398, "training_step_time": 0.10549569129943848 }, { "epoch": 2.197113037109375e-05, "model_forward_time": 0.02544713020324707, "step": 14399 }, { "epoch": 2.197113037109375e-05, "step": 14399, "training_step_time": 0.11061692237854004 }, { "epoch": 2.197265625e-05, "grad_norm": 0.28007107973098755, "learning_rate": 5.74131823855921e-05, "loss": 0.0169, "step": 14400 }, { "epoch": 2.197265625e-05, "model_forward_time": 0.024945974349975586, "step": 14400 }, { "epoch": 2.197265625e-05, "step": 14400, "training_step_time": 0.11136388778686523 }, { "epoch": 2.197418212890625e-05, "model_forward_time": 0.02496790885925293, "step": 14401 }, { "epoch": 2.197418212890625e-05, "step": 14401, "training_step_time": 0.1042335033416748 }, { "epoch": 2.19757080078125e-05, "model_forward_time": 0.025563716888427734, "step": 14402 }, { "epoch": 2.19757080078125e-05, "step": 14402, "training_step_time": 0.10403132438659668 }, { "epoch": 2.197723388671875e-05, "model_forward_time": 0.02575230598449707, "step": 14403 }, { "epoch": 2.197723388671875e-05, "step": 14403, "training_step_time": 0.1058199405670166 }, { "epoch": 2.1978759765625e-05, "model_forward_time": 0.025254011154174805, "step": 14404 }, { "epoch": 2.1978759765625e-05, "step": 14404, "training_step_time": 0.10463547706604004 }, { "epoch": 2.198028564453125e-05, "model_forward_time": 0.025304555892944336, "step": 14405 }, { "epoch": 2.198028564453125e-05, "step": 14405, "training_step_time": 0.1062014102935791 }, { "epoch": 2.19818115234375e-05, "model_forward_time": 0.02547478675842285, "step": 14406 }, { "epoch": 2.19818115234375e-05, "step": 14406, "training_step_time": 0.10538983345031738 }, { "epoch": 2.198333740234375e-05, "model_forward_time": 0.02555251121520996, "step": 14407 }, { "epoch": 2.198333740234375e-05, "step": 14407, "training_step_time": 0.10567712783813477 }, { "epoch": 2.198486328125e-05, "model_forward_time": 0.025917768478393555, "step": 14408 }, { "epoch": 2.198486328125e-05, "step": 14408, "training_step_time": 0.12190699577331543 }, { "epoch": 2.198638916015625e-05, "model_forward_time": 0.025279998779296875, "step": 14409 }, { "epoch": 2.198638916015625e-05, "step": 14409, "training_step_time": 0.13457822799682617 }, { "epoch": 2.19879150390625e-05, "grad_norm": 0.32994768023490906, "learning_rate": 5.735867137759615e-05, "loss": 0.0147, "step": 14410 }, { "epoch": 2.19879150390625e-05, "model_forward_time": 0.025104045867919922, "step": 14410 }, { "epoch": 2.19879150390625e-05, "step": 14410, "training_step_time": 0.1033792495727539 }, { "epoch": 2.198944091796875e-05, "model_forward_time": 0.025578022003173828, "step": 14411 }, { "epoch": 2.198944091796875e-05, "step": 14411, "training_step_time": 0.12581372261047363 }, { "epoch": 2.1990966796875e-05, "model_forward_time": 0.026106595993041992, "step": 14412 }, { "epoch": 2.1990966796875e-05, "step": 14412, "training_step_time": 0.11005711555480957 }, { "epoch": 2.199249267578125e-05, "model_forward_time": 0.025022268295288086, "step": 14413 }, { "epoch": 2.199249267578125e-05, "step": 14413, "training_step_time": 0.20831060409545898 }, { "epoch": 2.19940185546875e-05, "model_forward_time": 0.023916006088256836, "step": 14414 }, { "epoch": 2.19940185546875e-05, "step": 14414, "training_step_time": 0.11057806015014648 }, { "epoch": 2.199554443359375e-05, "model_forward_time": 0.02718067169189453, "step": 14415 }, { "epoch": 2.199554443359375e-05, "step": 14415, "training_step_time": 0.10711979866027832 }, { "epoch": 2.19970703125e-05, "model_forward_time": 0.025145292282104492, "step": 14416 }, { "epoch": 2.19970703125e-05, "step": 14416, "training_step_time": 0.12464451789855957 }, { "epoch": 2.199859619140625e-05, "model_forward_time": 0.025227069854736328, "step": 14417 }, { "epoch": 2.199859619140625e-05, "step": 14417, "training_step_time": 0.13682222366333008 }, { "epoch": 2.20001220703125e-05, "model_forward_time": 0.025318384170532227, "step": 14418 }, { "epoch": 2.20001220703125e-05, "step": 14418, "training_step_time": 0.10161757469177246 }, { "epoch": 2.200164794921875e-05, "model_forward_time": 0.02474236488342285, "step": 14419 }, { "epoch": 2.200164794921875e-05, "step": 14419, "training_step_time": 0.1598207950592041 }, { "epoch": 2.2003173828125e-05, "grad_norm": 0.12650427222251892, "learning_rate": 5.730415142812059e-05, "loss": 0.0125, "step": 14420 }, { "epoch": 2.2003173828125e-05, "model_forward_time": 0.025153636932373047, "step": 14420 }, { "epoch": 2.2003173828125e-05, "step": 14420, "training_step_time": 0.1709437370300293 }, { "epoch": 2.200469970703125e-05, "model_forward_time": 0.02468729019165039, "step": 14421 }, { "epoch": 2.200469970703125e-05, "step": 14421, "training_step_time": 0.11967992782592773 }, { "epoch": 2.20062255859375e-05, "model_forward_time": 0.024868249893188477, "step": 14422 }, { "epoch": 2.20062255859375e-05, "step": 14422, "training_step_time": 0.16941547393798828 }, { "epoch": 2.200775146484375e-05, "model_forward_time": 0.0249025821685791, "step": 14423 }, { "epoch": 2.200775146484375e-05, "step": 14423, "training_step_time": 0.16129016876220703 }, { "epoch": 2.200927734375e-05, "model_forward_time": 0.024200916290283203, "step": 14424 }, { "epoch": 2.200927734375e-05, "step": 14424, "training_step_time": 0.10393476486206055 }, { "epoch": 2.201080322265625e-05, "model_forward_time": 0.02512335777282715, "step": 14425 }, { "epoch": 2.201080322265625e-05, "step": 14425, "training_step_time": 0.15414047241210938 }, { "epoch": 2.20123291015625e-05, "model_forward_time": 0.024946928024291992, "step": 14426 }, { "epoch": 2.20123291015625e-05, "step": 14426, "training_step_time": 0.11195707321166992 }, { "epoch": 2.201385498046875e-05, "model_forward_time": 0.025065183639526367, "step": 14427 }, { "epoch": 2.201385498046875e-05, "step": 14427, "training_step_time": 0.19071578979492188 }, { "epoch": 2.2015380859375e-05, "model_forward_time": 0.024942874908447266, "step": 14428 }, { "epoch": 2.2015380859375e-05, "step": 14428, "training_step_time": 0.19861268997192383 }, { "epoch": 2.201690673828125e-05, "model_forward_time": 0.024544239044189453, "step": 14429 }, { "epoch": 2.201690673828125e-05, "step": 14429, "training_step_time": 0.12487411499023438 }, { "epoch": 2.20184326171875e-05, "grad_norm": 0.35339513421058655, "learning_rate": 5.72496226034123e-05, "loss": 0.0149, "step": 14430 }, { "epoch": 2.20184326171875e-05, "model_forward_time": 0.025171518325805664, "step": 14430 }, { "epoch": 2.20184326171875e-05, "step": 14430, "training_step_time": 0.11470937728881836 }, { "epoch": 2.201995849609375e-05, "model_forward_time": 0.025522470474243164, "step": 14431 }, { "epoch": 2.201995849609375e-05, "step": 14431, "training_step_time": 0.10373687744140625 }, { "epoch": 2.2021484375e-05, "model_forward_time": 0.025337934494018555, "step": 14432 }, { "epoch": 2.2021484375e-05, "step": 14432, "training_step_time": 0.10462212562561035 }, { "epoch": 2.202301025390625e-05, "model_forward_time": 0.02532219886779785, "step": 14433 }, { "epoch": 2.202301025390625e-05, "step": 14433, "training_step_time": 0.10544323921203613 }, { "epoch": 2.20245361328125e-05, "model_forward_time": 0.025471210479736328, "step": 14434 }, { "epoch": 2.20245361328125e-05, "step": 14434, "training_step_time": 0.1040809154510498 }, { "epoch": 2.202606201171875e-05, "model_forward_time": 0.02524423599243164, "step": 14435 }, { "epoch": 2.202606201171875e-05, "step": 14435, "training_step_time": 0.1081094741821289 }, { "epoch": 2.2027587890625e-05, "model_forward_time": 0.02560710906982422, "step": 14436 }, { "epoch": 2.2027587890625e-05, "step": 14436, "training_step_time": 0.1131441593170166 }, { "epoch": 2.202911376953125e-05, "model_forward_time": 0.02537822723388672, "step": 14437 }, { "epoch": 2.202911376953125e-05, "step": 14437, "training_step_time": 0.16233253479003906 }, { "epoch": 2.20306396484375e-05, "model_forward_time": 0.024821996688842773, "step": 14438 }, { "epoch": 2.20306396484375e-05, "step": 14438, "training_step_time": 0.19222259521484375 }, { "epoch": 2.203216552734375e-05, "model_forward_time": 0.024534940719604492, "step": 14439 }, { "epoch": 2.203216552734375e-05, "step": 14439, "training_step_time": 0.18992280960083008 }, { "epoch": 2.203369140625e-05, "grad_norm": 0.3374268710613251, "learning_rate": 5.719508496972896e-05, "loss": 0.0126, "step": 14440 }, { "epoch": 2.203369140625e-05, "model_forward_time": 0.024710655212402344, "step": 14440 }, { "epoch": 2.203369140625e-05, "step": 14440, "training_step_time": 0.1766221523284912 }, { "epoch": 2.203521728515625e-05, "model_forward_time": 0.024690628051757812, "step": 14441 }, { "epoch": 2.203521728515625e-05, "step": 14441, "training_step_time": 0.1760387420654297 }, { "epoch": 2.20367431640625e-05, "model_forward_time": 0.024201631546020508, "step": 14442 }, { "epoch": 2.20367431640625e-05, "step": 14442, "training_step_time": 0.1626882553100586 }, { "epoch": 2.203826904296875e-05, "model_forward_time": 0.024474620819091797, "step": 14443 }, { "epoch": 2.203826904296875e-05, "step": 14443, "training_step_time": 0.14006662368774414 }, { "epoch": 2.2039794921875e-05, "model_forward_time": 0.02454209327697754, "step": 14444 }, { "epoch": 2.2039794921875e-05, "step": 14444, "training_step_time": 0.12909150123596191 }, { "epoch": 2.204132080078125e-05, "model_forward_time": 0.02589726448059082, "step": 14445 }, { "epoch": 2.204132080078125e-05, "step": 14445, "training_step_time": 0.12542128562927246 }, { "epoch": 2.20428466796875e-05, "model_forward_time": 0.02580404281616211, "step": 14446 }, { "epoch": 2.20428466796875e-05, "step": 14446, "training_step_time": 0.12280154228210449 }, { "epoch": 2.204437255859375e-05, "model_forward_time": 0.02495098114013672, "step": 14447 }, { "epoch": 2.204437255859375e-05, "step": 14447, "training_step_time": 0.1176149845123291 }, { "epoch": 2.20458984375e-05, "model_forward_time": 0.026503562927246094, "step": 14448 }, { "epoch": 2.20458984375e-05, "step": 14448, "training_step_time": 0.1156005859375 }, { "epoch": 2.204742431640625e-05, "model_forward_time": 0.02577829360961914, "step": 14449 }, { "epoch": 2.204742431640625e-05, "step": 14449, "training_step_time": 0.15075325965881348 }, { "epoch": 2.20489501953125e-05, "grad_norm": 0.3030347228050232, "learning_rate": 5.714053859333893e-05, "loss": 0.0149, "step": 14450 }, { "epoch": 2.20489501953125e-05, "model_forward_time": 0.02609705924987793, "step": 14450 }, { "epoch": 2.20489501953125e-05, "step": 14450, "training_step_time": 0.11271262168884277 }, { "epoch": 2.205047607421875e-05, "model_forward_time": 0.024673938751220703, "step": 14451 }, { "epoch": 2.205047607421875e-05, "step": 14451, "training_step_time": 0.1802356243133545 }, { "epoch": 2.2052001953125e-05, "model_forward_time": 0.024587392807006836, "step": 14452 }, { "epoch": 2.2052001953125e-05, "step": 14452, "training_step_time": 0.15288209915161133 }, { "epoch": 2.205352783203125e-05, "model_forward_time": 0.024260520935058594, "step": 14453 }, { "epoch": 2.205352783203125e-05, "step": 14453, "training_step_time": 0.11077260971069336 }, { "epoch": 2.20550537109375e-05, "model_forward_time": 0.025082111358642578, "step": 14454 }, { "epoch": 2.20550537109375e-05, "step": 14454, "training_step_time": 0.1906116008758545 }, { "epoch": 2.205657958984375e-05, "model_forward_time": 0.02485799789428711, "step": 14455 }, { "epoch": 2.205657958984375e-05, "step": 14455, "training_step_time": 0.10830879211425781 }, { "epoch": 2.205810546875e-05, "model_forward_time": 0.024625778198242188, "step": 14456 }, { "epoch": 2.205810546875e-05, "step": 14456, "training_step_time": 0.11713552474975586 }, { "epoch": 2.205963134765625e-05, "model_forward_time": 0.025302886962890625, "step": 14457 }, { "epoch": 2.205963134765625e-05, "step": 14457, "training_step_time": 0.12384819984436035 }, { "epoch": 2.20611572265625e-05, "model_forward_time": 0.025283098220825195, "step": 14458 }, { "epoch": 2.20611572265625e-05, "step": 14458, "training_step_time": 0.12203145027160645 }, { "epoch": 2.206268310546875e-05, "model_forward_time": 0.024929523468017578, "step": 14459 }, { "epoch": 2.206268310546875e-05, "step": 14459, "training_step_time": 0.1062629222869873 }, { "epoch": 2.2064208984375e-05, "grad_norm": 0.23954321444034576, "learning_rate": 5.7085983540521216e-05, "loss": 0.0164, "step": 14460 }, { "epoch": 2.2064208984375e-05, "model_forward_time": 0.024916648864746094, "step": 14460 }, { "epoch": 2.2064208984375e-05, "step": 14460, "training_step_time": 0.14684629440307617 }, { "epoch": 2.206573486328125e-05, "model_forward_time": 0.025134801864624023, "step": 14461 }, { "epoch": 2.206573486328125e-05, "step": 14461, "training_step_time": 0.1693110466003418 }, { "epoch": 2.20672607421875e-05, "model_forward_time": 0.024710893630981445, "step": 14462 }, { "epoch": 2.20672607421875e-05, "step": 14462, "training_step_time": 0.15579795837402344 }, { "epoch": 2.206878662109375e-05, "model_forward_time": 0.02496933937072754, "step": 14463 }, { "epoch": 2.206878662109375e-05, "step": 14463, "training_step_time": 0.12489008903503418 }, { "epoch": 2.20703125e-05, "model_forward_time": 0.024796724319458008, "step": 14464 }, { "epoch": 2.20703125e-05, "step": 14464, "training_step_time": 0.11968135833740234 }, { "epoch": 2.207183837890625e-05, "model_forward_time": 0.02559494972229004, "step": 14465 }, { "epoch": 2.207183837890625e-05, "step": 14465, "training_step_time": 0.11547517776489258 }, { "epoch": 2.20733642578125e-05, "model_forward_time": 0.025423765182495117, "step": 14466 }, { "epoch": 2.20733642578125e-05, "step": 14466, "training_step_time": 0.1246178150177002 }, { "epoch": 2.207489013671875e-05, "model_forward_time": 0.025397777557373047, "step": 14467 }, { "epoch": 2.207489013671875e-05, "step": 14467, "training_step_time": 0.1491076946258545 }, { "epoch": 2.2076416015625e-05, "model_forward_time": 0.02490520477294922, "step": 14468 }, { "epoch": 2.2076416015625e-05, "step": 14468, "training_step_time": 0.1610124111175537 }, { "epoch": 2.207794189453125e-05, "model_forward_time": 0.024445295333862305, "step": 14469 }, { "epoch": 2.207794189453125e-05, "step": 14469, "training_step_time": 0.143110990524292 }, { "epoch": 2.20794677734375e-05, "grad_norm": 0.27757343649864197, "learning_rate": 5.7031419877565317e-05, "loss": 0.0192, "step": 14470 }, { "epoch": 2.20794677734375e-05, "model_forward_time": 0.02394390106201172, "step": 14470 }, { "epoch": 2.20794677734375e-05, "step": 14470, "training_step_time": 0.20659661293029785 }, { "epoch": 2.208099365234375e-05, "model_forward_time": 0.02458024024963379, "step": 14471 }, { "epoch": 2.208099365234375e-05, "step": 14471, "training_step_time": 0.1291954517364502 }, { "epoch": 2.208251953125e-05, "model_forward_time": 0.024632930755615234, "step": 14472 }, { "epoch": 2.208251953125e-05, "step": 14472, "training_step_time": 0.12108206748962402 }, { "epoch": 2.208404541015625e-05, "model_forward_time": 0.02512335777282715, "step": 14473 }, { "epoch": 2.208404541015625e-05, "step": 14473, "training_step_time": 0.10269308090209961 }, { "epoch": 2.20855712890625e-05, "model_forward_time": 0.02545475959777832, "step": 14474 }, { "epoch": 2.20855712890625e-05, "step": 14474, "training_step_time": 0.10280704498291016 }, { "epoch": 2.208709716796875e-05, "model_forward_time": 0.02537846565246582, "step": 14475 }, { "epoch": 2.208709716796875e-05, "step": 14475, "training_step_time": 0.10709953308105469 }, { "epoch": 2.2088623046875e-05, "model_forward_time": 0.02534651756286621, "step": 14476 }, { "epoch": 2.2088623046875e-05, "step": 14476, "training_step_time": 0.10887026786804199 }, { "epoch": 2.209014892578125e-05, "model_forward_time": 0.025393009185791016, "step": 14477 }, { "epoch": 2.209014892578125e-05, "step": 14477, "training_step_time": 0.10451221466064453 }, { "epoch": 2.20916748046875e-05, "model_forward_time": 0.025368928909301758, "step": 14478 }, { "epoch": 2.20916748046875e-05, "step": 14478, "training_step_time": 0.10456395149230957 }, { "epoch": 2.209320068359375e-05, "model_forward_time": 0.024740219116210938, "step": 14479 }, { "epoch": 2.209320068359375e-05, "step": 14479, "training_step_time": 0.10389828681945801 }, { "epoch": 2.20947265625e-05, "grad_norm": 0.3883318305015564, "learning_rate": 5.697684767077125e-05, "loss": 0.0123, "step": 14480 }, { "epoch": 2.20947265625e-05, "model_forward_time": 0.024800777435302734, "step": 14480 }, { "epoch": 2.20947265625e-05, "step": 14480, "training_step_time": 0.10541391372680664 }, { "epoch": 2.209625244140625e-05, "model_forward_time": 0.024829387664794922, "step": 14481 }, { "epoch": 2.209625244140625e-05, "step": 14481, "training_step_time": 0.10607028007507324 }, { "epoch": 2.20977783203125e-05, "model_forward_time": 0.025574922561645508, "step": 14482 }, { "epoch": 2.20977783203125e-05, "step": 14482, "training_step_time": 0.10839486122131348 }, { "epoch": 2.209930419921875e-05, "model_forward_time": 0.02539992332458496, "step": 14483 }, { "epoch": 2.209930419921875e-05, "step": 14483, "training_step_time": 0.10468316078186035 }, { "epoch": 2.2100830078125e-05, "model_forward_time": 0.025543928146362305, "step": 14484 }, { "epoch": 2.2100830078125e-05, "step": 14484, "training_step_time": 0.1052558422088623 }, { "epoch": 2.210235595703125e-05, "model_forward_time": 0.025189876556396484, "step": 14485 }, { "epoch": 2.210235595703125e-05, "step": 14485, "training_step_time": 0.10957646369934082 }, { "epoch": 2.21038818359375e-05, "model_forward_time": 0.025109291076660156, "step": 14486 }, { "epoch": 2.21038818359375e-05, "step": 14486, "training_step_time": 0.10662221908569336 }, { "epoch": 2.210540771484375e-05, "model_forward_time": 0.0253598690032959, "step": 14487 }, { "epoch": 2.210540771484375e-05, "step": 14487, "training_step_time": 0.1046895980834961 }, { "epoch": 2.210693359375e-05, "model_forward_time": 0.025243043899536133, "step": 14488 }, { "epoch": 2.210693359375e-05, "step": 14488, "training_step_time": 0.10539865493774414 }, { "epoch": 2.210845947265625e-05, "model_forward_time": 0.025421857833862305, "step": 14489 }, { "epoch": 2.210845947265625e-05, "step": 14489, "training_step_time": 0.10492134094238281 }, { "epoch": 2.21099853515625e-05, "grad_norm": 0.6013140082359314, "learning_rate": 5.692226698644938e-05, "loss": 0.0142, "step": 14490 }, { "epoch": 2.21099853515625e-05, "model_forward_time": 0.02513575553894043, "step": 14490 }, { "epoch": 2.21099853515625e-05, "step": 14490, "training_step_time": 0.10460662841796875 }, { "epoch": 2.211151123046875e-05, "model_forward_time": 0.027991771697998047, "step": 14491 }, { "epoch": 2.211151123046875e-05, "step": 14491, "training_step_time": 0.10808277130126953 }, { "epoch": 2.2113037109375e-05, "model_forward_time": 0.025477886199951172, "step": 14492 }, { "epoch": 2.2113037109375e-05, "step": 14492, "training_step_time": 0.10950827598571777 }, { "epoch": 2.211456298828125e-05, "model_forward_time": 0.026910066604614258, "step": 14493 }, { "epoch": 2.211456298828125e-05, "step": 14493, "training_step_time": 0.13556432723999023 }, { "epoch": 2.21160888671875e-05, "model_forward_time": 0.025238990783691406, "step": 14494 }, { "epoch": 2.21160888671875e-05, "step": 14494, "training_step_time": 0.1399383544921875 }, { "epoch": 2.211761474609375e-05, "model_forward_time": 0.02437138557434082, "step": 14495 }, { "epoch": 2.211761474609375e-05, "step": 14495, "training_step_time": 0.10627007484436035 }, { "epoch": 2.2119140625e-05, "model_forward_time": 0.025246381759643555, "step": 14496 }, { "epoch": 2.2119140625e-05, "step": 14496, "training_step_time": 0.11527514457702637 }, { "epoch": 2.212066650390625e-05, "model_forward_time": 0.02501535415649414, "step": 14497 }, { "epoch": 2.212066650390625e-05, "step": 14497, "training_step_time": 0.11256074905395508 }, { "epoch": 2.21221923828125e-05, "model_forward_time": 0.025443315505981445, "step": 14498 }, { "epoch": 2.21221923828125e-05, "step": 14498, "training_step_time": 0.1087343692779541 }, { "epoch": 2.212371826171875e-05, "model_forward_time": 0.025124311447143555, "step": 14499 }, { "epoch": 2.212371826171875e-05, "step": 14499, "training_step_time": 0.1915757656097412 }, { "epoch": 2.2125244140625e-05, "grad_norm": 0.1677589863538742, "learning_rate": 5.686767789092041e-05, "loss": 0.0222, "step": 14500 }, { "epoch": 2.2125244140625e-05, "model_forward_time": 0.025127172470092773, "step": 14500 }, { "epoch": 2.2125244140625e-05, "step": 14500, "training_step_time": 0.1542203426361084 }, { "epoch": 2.212677001953125e-05, "model_forward_time": 0.024472713470458984, "step": 14501 }, { "epoch": 2.212677001953125e-05, "step": 14501, "training_step_time": 0.10672783851623535 }, { "epoch": 2.21282958984375e-05, "model_forward_time": 0.02491450309753418, "step": 14502 }, { "epoch": 2.21282958984375e-05, "step": 14502, "training_step_time": 0.1116936206817627 }, { "epoch": 2.212982177734375e-05, "model_forward_time": 0.025066614151000977, "step": 14503 }, { "epoch": 2.212982177734375e-05, "step": 14503, "training_step_time": 0.12648248672485352 }, { "epoch": 2.213134765625e-05, "model_forward_time": 0.025091886520385742, "step": 14504 }, { "epoch": 2.213134765625e-05, "step": 14504, "training_step_time": 0.12327194213867188 }, { "epoch": 2.213287353515625e-05, "model_forward_time": 0.02522730827331543, "step": 14505 }, { "epoch": 2.213287353515625e-05, "step": 14505, "training_step_time": 0.12079477310180664 }, { "epoch": 2.21343994140625e-05, "model_forward_time": 0.025873422622680664, "step": 14506 }, { "epoch": 2.21343994140625e-05, "step": 14506, "training_step_time": 0.10528755187988281 }, { "epoch": 2.213592529296875e-05, "model_forward_time": 0.025313854217529297, "step": 14507 }, { "epoch": 2.213592529296875e-05, "step": 14507, "training_step_time": 0.10801434516906738 }, { "epoch": 2.2137451171875e-05, "model_forward_time": 0.02560734748840332, "step": 14508 }, { "epoch": 2.2137451171875e-05, "step": 14508, "training_step_time": 0.11578941345214844 }, { "epoch": 2.213897705078125e-05, "model_forward_time": 0.025099754333496094, "step": 14509 }, { "epoch": 2.213897705078125e-05, "step": 14509, "training_step_time": 0.11017775535583496 }, { "epoch": 2.21405029296875e-05, "grad_norm": 0.23868466913700104, "learning_rate": 5.681308045051522e-05, "loss": 0.0172, "step": 14510 }, { "epoch": 2.21405029296875e-05, "model_forward_time": 0.025927066802978516, "step": 14510 }, { "epoch": 2.21405029296875e-05, "step": 14510, "training_step_time": 0.12167644500732422 }, { "epoch": 2.214202880859375e-05, "model_forward_time": 0.025357723236083984, "step": 14511 }, { "epoch": 2.214202880859375e-05, "step": 14511, "training_step_time": 0.18501615524291992 }, { "epoch": 2.21435546875e-05, "model_forward_time": 0.024504899978637695, "step": 14512 }, { "epoch": 2.21435546875e-05, "step": 14512, "training_step_time": 0.16508865356445312 }, { "epoch": 2.214508056640625e-05, "model_forward_time": 0.025059938430786133, "step": 14513 }, { "epoch": 2.214508056640625e-05, "step": 14513, "training_step_time": 0.20446133613586426 }, { "epoch": 2.21466064453125e-05, "model_forward_time": 0.02722954750061035, "step": 14514 }, { "epoch": 2.21466064453125e-05, "step": 14514, "training_step_time": 0.14736366271972656 }, { "epoch": 2.214813232421875e-05, "model_forward_time": 0.0245511531829834, "step": 14515 }, { "epoch": 2.214813232421875e-05, "step": 14515, "training_step_time": 0.14623451232910156 }, { "epoch": 2.2149658203125e-05, "model_forward_time": 0.024187088012695312, "step": 14516 }, { "epoch": 2.2149658203125e-05, "step": 14516, "training_step_time": 0.2178959846496582 }, { "epoch": 2.215118408203125e-05, "model_forward_time": 0.024257183074951172, "step": 14517 }, { "epoch": 2.215118408203125e-05, "step": 14517, "training_step_time": 0.12434029579162598 }, { "epoch": 2.21527099609375e-05, "model_forward_time": 0.024207592010498047, "step": 14518 }, { "epoch": 2.21527099609375e-05, "step": 14518, "training_step_time": 0.11719679832458496 }, { "epoch": 2.215423583984375e-05, "model_forward_time": 0.025354385375976562, "step": 14519 }, { "epoch": 2.215423583984375e-05, "step": 14519, "training_step_time": 0.11101865768432617 }, { "epoch": 2.215576171875e-05, "grad_norm": 0.24587038159370422, "learning_rate": 5.675847473157485e-05, "loss": 0.0113, "step": 14520 }, { "epoch": 2.215576171875e-05, "model_forward_time": 0.02551412582397461, "step": 14520 }, { "epoch": 2.215576171875e-05, "step": 14520, "training_step_time": 0.1083371639251709 }, { "epoch": 2.215728759765625e-05, "model_forward_time": 0.025506019592285156, "step": 14521 }, { "epoch": 2.215728759765625e-05, "step": 14521, "training_step_time": 0.10920929908752441 }, { "epoch": 2.21588134765625e-05, "model_forward_time": 0.025127410888671875, "step": 14522 }, { "epoch": 2.21588134765625e-05, "step": 14522, "training_step_time": 0.11072731018066406 }, { "epoch": 2.216033935546875e-05, "model_forward_time": 0.025318384170532227, "step": 14523 }, { "epoch": 2.216033935546875e-05, "step": 14523, "training_step_time": 0.10721158981323242 }, { "epoch": 2.2161865234375e-05, "model_forward_time": 0.02513265609741211, "step": 14524 }, { "epoch": 2.2161865234375e-05, "step": 14524, "training_step_time": 0.10646462440490723 }, { "epoch": 2.216339111328125e-05, "model_forward_time": 0.025191307067871094, "step": 14525 }, { "epoch": 2.216339111328125e-05, "step": 14525, "training_step_time": 0.10880589485168457 }, { "epoch": 2.21649169921875e-05, "model_forward_time": 0.024858474731445312, "step": 14526 }, { "epoch": 2.21649169921875e-05, "step": 14526, "training_step_time": 0.10600948333740234 }, { "epoch": 2.216644287109375e-05, "model_forward_time": 0.025404691696166992, "step": 14527 }, { "epoch": 2.216644287109375e-05, "step": 14527, "training_step_time": 0.10690474510192871 }, { "epoch": 2.216796875e-05, "model_forward_time": 0.025562763214111328, "step": 14528 }, { "epoch": 2.216796875e-05, "step": 14528, "training_step_time": 0.10588240623474121 }, { "epoch": 2.216949462890625e-05, "model_forward_time": 0.024974584579467773, "step": 14529 }, { "epoch": 2.216949462890625e-05, "step": 14529, "training_step_time": 0.10827827453613281 }, { "epoch": 2.21710205078125e-05, "grad_norm": 0.4669856131076813, "learning_rate": 5.670386080045039e-05, "loss": 0.0338, "step": 14530 }, { "epoch": 2.21710205078125e-05, "model_forward_time": 0.024988174438476562, "step": 14530 }, { "epoch": 2.21710205078125e-05, "step": 14530, "training_step_time": 0.10833001136779785 }, { "epoch": 2.217254638671875e-05, "model_forward_time": 0.025222301483154297, "step": 14531 }, { "epoch": 2.217254638671875e-05, "step": 14531, "training_step_time": 0.10470008850097656 }, { "epoch": 2.2174072265625e-05, "model_forward_time": 0.025252580642700195, "step": 14532 }, { "epoch": 2.2174072265625e-05, "step": 14532, "training_step_time": 0.10411214828491211 }, { "epoch": 2.217559814453125e-05, "model_forward_time": 0.025224924087524414, "step": 14533 }, { "epoch": 2.217559814453125e-05, "step": 14533, "training_step_time": 0.10711002349853516 }, { "epoch": 2.21771240234375e-05, "model_forward_time": 0.025225162506103516, "step": 14534 }, { "epoch": 2.21771240234375e-05, "step": 14534, "training_step_time": 0.10876584053039551 }, { "epoch": 2.217864990234375e-05, "model_forward_time": 0.025290250778198242, "step": 14535 }, { "epoch": 2.217864990234375e-05, "step": 14535, "training_step_time": 0.10790181159973145 }, { "epoch": 2.218017578125e-05, "model_forward_time": 0.025249481201171875, "step": 14536 }, { "epoch": 2.218017578125e-05, "step": 14536, "training_step_time": 0.10514235496520996 }, { "epoch": 2.218170166015625e-05, "model_forward_time": 0.025214672088623047, "step": 14537 }, { "epoch": 2.218170166015625e-05, "step": 14537, "training_step_time": 0.10736584663391113 }, { "epoch": 2.21832275390625e-05, "model_forward_time": 0.025692462921142578, "step": 14538 }, { "epoch": 2.21832275390625e-05, "step": 14538, "training_step_time": 0.10925865173339844 }, { "epoch": 2.218475341796875e-05, "model_forward_time": 0.025007963180541992, "step": 14539 }, { "epoch": 2.218475341796875e-05, "step": 14539, "training_step_time": 0.17052817344665527 }, { "epoch": 2.2186279296875e-05, "grad_norm": 0.31042104959487915, "learning_rate": 5.664923872350294e-05, "loss": 0.0157, "step": 14540 }, { "epoch": 2.2186279296875e-05, "model_forward_time": 0.024857282638549805, "step": 14540 }, { "epoch": 2.2186279296875e-05, "step": 14540, "training_step_time": 0.13193798065185547 }, { "epoch": 2.218780517578125e-05, "model_forward_time": 0.024276018142700195, "step": 14541 }, { "epoch": 2.218780517578125e-05, "step": 14541, "training_step_time": 0.10498857498168945 }, { "epoch": 2.21893310546875e-05, "model_forward_time": 0.02554798126220703, "step": 14542 }, { "epoch": 2.21893310546875e-05, "step": 14542, "training_step_time": 0.11996936798095703 }, { "epoch": 2.219085693359375e-05, "model_forward_time": 0.025265932083129883, "step": 14543 }, { "epoch": 2.219085693359375e-05, "step": 14543, "training_step_time": 0.11675453186035156 }, { "epoch": 2.21923828125e-05, "model_forward_time": 0.02521514892578125, "step": 14544 }, { "epoch": 2.21923828125e-05, "step": 14544, "training_step_time": 0.10510468482971191 }, { "epoch": 2.219390869140625e-05, "model_forward_time": 0.025184154510498047, "step": 14545 }, { "epoch": 2.219390869140625e-05, "step": 14545, "training_step_time": 0.19674015045166016 }, { "epoch": 2.21954345703125e-05, "model_forward_time": 0.024651288986206055, "step": 14546 }, { "epoch": 2.21954345703125e-05, "step": 14546, "training_step_time": 0.10736656188964844 }, { "epoch": 2.219696044921875e-05, "model_forward_time": 0.02479720115661621, "step": 14547 }, { "epoch": 2.219696044921875e-05, "step": 14547, "training_step_time": 0.11674380302429199 }, { "epoch": 2.2198486328125e-05, "model_forward_time": 0.02531743049621582, "step": 14548 }, { "epoch": 2.2198486328125e-05, "step": 14548, "training_step_time": 0.11290359497070312 }, { "epoch": 2.220001220703125e-05, "model_forward_time": 0.025397300720214844, "step": 14549 }, { "epoch": 2.220001220703125e-05, "step": 14549, "training_step_time": 0.11157083511352539 }, { "epoch": 2.22015380859375e-05, "grad_norm": 0.5008291006088257, "learning_rate": 5.6594608567103456e-05, "loss": 0.0132, "step": 14550 }, { "epoch": 2.22015380859375e-05, "model_forward_time": 0.0252227783203125, "step": 14550 }, { "epoch": 2.22015380859375e-05, "step": 14550, "training_step_time": 0.11755633354187012 }, { "epoch": 2.220306396484375e-05, "model_forward_time": 0.02504420280456543, "step": 14551 }, { "epoch": 2.220306396484375e-05, "step": 14551, "training_step_time": 0.11762428283691406 }, { "epoch": 2.220458984375e-05, "model_forward_time": 0.0259397029876709, "step": 14552 }, { "epoch": 2.220458984375e-05, "step": 14552, "training_step_time": 0.11055564880371094 }, { "epoch": 2.220611572265625e-05, "model_forward_time": 0.024779319763183594, "step": 14553 }, { "epoch": 2.220611572265625e-05, "step": 14553, "training_step_time": 0.16904354095458984 }, { "epoch": 2.22076416015625e-05, "model_forward_time": 0.025450706481933594, "step": 14554 }, { "epoch": 2.22076416015625e-05, "step": 14554, "training_step_time": 0.16411471366882324 }, { "epoch": 2.220916748046875e-05, "model_forward_time": 0.024248600006103516, "step": 14555 }, { "epoch": 2.220916748046875e-05, "step": 14555, "training_step_time": 0.11116313934326172 }, { "epoch": 2.2210693359375e-05, "model_forward_time": 0.025157451629638672, "step": 14556 }, { "epoch": 2.2210693359375e-05, "step": 14556, "training_step_time": 0.10460329055786133 }, { "epoch": 2.221221923828125e-05, "model_forward_time": 0.025165319442749023, "step": 14557 }, { "epoch": 2.221221923828125e-05, "step": 14557, "training_step_time": 0.11836457252502441 }, { "epoch": 2.22137451171875e-05, "model_forward_time": 0.02537250518798828, "step": 14558 }, { "epoch": 2.22137451171875e-05, "step": 14558, "training_step_time": 0.14841389656066895 }, { "epoch": 2.221527099609375e-05, "model_forward_time": 0.02514028549194336, "step": 14559 }, { "epoch": 2.221527099609375e-05, "step": 14559, "training_step_time": 0.11809015274047852 }, { "epoch": 2.2216796875e-05, "grad_norm": 0.3204619884490967, "learning_rate": 5.653997039763273e-05, "loss": 0.0164, "step": 14560 }, { "epoch": 2.2216796875e-05, "model_forward_time": 0.025328636169433594, "step": 14560 }, { "epoch": 2.2216796875e-05, "step": 14560, "training_step_time": 0.1295909881591797 }, { "epoch": 2.221832275390625e-05, "model_forward_time": 0.024994850158691406, "step": 14561 }, { "epoch": 2.221832275390625e-05, "step": 14561, "training_step_time": 0.13772082328796387 }, { "epoch": 2.22198486328125e-05, "model_forward_time": 0.024618864059448242, "step": 14562 }, { "epoch": 2.22198486328125e-05, "step": 14562, "training_step_time": 0.11031842231750488 }, { "epoch": 2.222137451171875e-05, "model_forward_time": 0.02543163299560547, "step": 14563 }, { "epoch": 2.222137451171875e-05, "step": 14563, "training_step_time": 0.1372203826904297 }, { "epoch": 2.2222900390625e-05, "model_forward_time": 0.025385618209838867, "step": 14564 }, { "epoch": 2.2222900390625e-05, "step": 14564, "training_step_time": 0.1085052490234375 }, { "epoch": 2.222442626953125e-05, "model_forward_time": 0.025194883346557617, "step": 14565 }, { "epoch": 2.222442626953125e-05, "step": 14565, "training_step_time": 0.10372090339660645 }, { "epoch": 2.22259521484375e-05, "model_forward_time": 0.025232553482055664, "step": 14566 }, { "epoch": 2.22259521484375e-05, "step": 14566, "training_step_time": 0.10683012008666992 }, { "epoch": 2.222747802734375e-05, "model_forward_time": 0.028279542922973633, "step": 14567 }, { "epoch": 2.222747802734375e-05, "step": 14567, "training_step_time": 0.10767650604248047 }, { "epoch": 2.222900390625e-05, "model_forward_time": 0.025249242782592773, "step": 14568 }, { "epoch": 2.222900390625e-05, "step": 14568, "training_step_time": 0.10807371139526367 }, { "epoch": 2.223052978515625e-05, "model_forward_time": 0.026694774627685547, "step": 14569 }, { "epoch": 2.223052978515625e-05, "step": 14569, "training_step_time": 0.10769462585449219 }, { "epoch": 2.22320556640625e-05, "grad_norm": 0.5669165253639221, "learning_rate": 5.648532428148128e-05, "loss": 0.0103, "step": 14570 }, { "epoch": 2.22320556640625e-05, "model_forward_time": 0.025313615798950195, "step": 14570 }, { "epoch": 2.22320556640625e-05, "step": 14570, "training_step_time": 0.1088409423828125 }, { "epoch": 2.223358154296875e-05, "model_forward_time": 0.02517247200012207, "step": 14571 }, { "epoch": 2.223358154296875e-05, "step": 14571, "training_step_time": 0.10522890090942383 }, { "epoch": 2.2235107421875e-05, "model_forward_time": 0.02527761459350586, "step": 14572 }, { "epoch": 2.2235107421875e-05, "step": 14572, "training_step_time": 0.10531353950500488 }, { "epoch": 2.223663330078125e-05, "model_forward_time": 0.02524876594543457, "step": 14573 }, { "epoch": 2.223663330078125e-05, "step": 14573, "training_step_time": 0.10536575317382812 }, { "epoch": 2.22381591796875e-05, "model_forward_time": 0.025576353073120117, "step": 14574 }, { "epoch": 2.22381591796875e-05, "step": 14574, "training_step_time": 0.1059722900390625 }, { "epoch": 2.223968505859375e-05, "model_forward_time": 0.025562524795532227, "step": 14575 }, { "epoch": 2.223968505859375e-05, "step": 14575, "training_step_time": 0.10818719863891602 }, { "epoch": 2.22412109375e-05, "model_forward_time": 0.025771617889404297, "step": 14576 }, { "epoch": 2.22412109375e-05, "step": 14576, "training_step_time": 0.10642242431640625 }, { "epoch": 2.224273681640625e-05, "model_forward_time": 0.025417327880859375, "step": 14577 }, { "epoch": 2.224273681640625e-05, "step": 14577, "training_step_time": 0.1053764820098877 }, { "epoch": 2.22442626953125e-05, "model_forward_time": 0.025953292846679688, "step": 14578 }, { "epoch": 2.22442626953125e-05, "step": 14578, "training_step_time": 0.1078343391418457 }, { "epoch": 2.224578857421875e-05, "model_forward_time": 0.025502443313598633, "step": 14579 }, { "epoch": 2.224578857421875e-05, "step": 14579, "training_step_time": 0.10656881332397461 }, { "epoch": 2.2247314453125e-05, "grad_norm": 0.28606441617012024, "learning_rate": 5.6430670285049314e-05, "loss": 0.0208, "step": 14580 }, { "epoch": 2.2247314453125e-05, "model_forward_time": 0.025617122650146484, "step": 14580 }, { "epoch": 2.2247314453125e-05, "step": 14580, "training_step_time": 0.10574054718017578 }, { "epoch": 2.224884033203125e-05, "model_forward_time": 0.026617765426635742, "step": 14581 }, { "epoch": 2.224884033203125e-05, "step": 14581, "training_step_time": 0.11324524879455566 }, { "epoch": 2.22503662109375e-05, "model_forward_time": 0.025490760803222656, "step": 14582 }, { "epoch": 2.22503662109375e-05, "step": 14582, "training_step_time": 0.10758352279663086 }, { "epoch": 2.225189208984375e-05, "model_forward_time": 0.025390148162841797, "step": 14583 }, { "epoch": 2.225189208984375e-05, "step": 14583, "training_step_time": 0.10762429237365723 }, { "epoch": 2.225341796875e-05, "model_forward_time": 0.028012514114379883, "step": 14584 }, { "epoch": 2.225341796875e-05, "step": 14584, "training_step_time": 0.10878801345825195 }, { "epoch": 2.225494384765625e-05, "model_forward_time": 0.02500605583190918, "step": 14585 }, { "epoch": 2.225494384765625e-05, "step": 14585, "training_step_time": 0.10852932929992676 }, { "epoch": 2.22564697265625e-05, "model_forward_time": 0.025295257568359375, "step": 14586 }, { "epoch": 2.22564697265625e-05, "step": 14586, "training_step_time": 0.19635224342346191 }, { "epoch": 2.225799560546875e-05, "model_forward_time": 0.024827241897583008, "step": 14587 }, { "epoch": 2.225799560546875e-05, "step": 14587, "training_step_time": 0.13767170906066895 }, { "epoch": 2.2259521484375e-05, "model_forward_time": 0.024292945861816406, "step": 14588 }, { "epoch": 2.2259521484375e-05, "step": 14588, "training_step_time": 0.10584115982055664 }, { "epoch": 2.226104736328125e-05, "model_forward_time": 0.025574445724487305, "step": 14589 }, { "epoch": 2.226104736328125e-05, "step": 14589, "training_step_time": 0.12027215957641602 }, { "epoch": 2.22625732421875e-05, "grad_norm": 0.2916117012500763, "learning_rate": 5.637600847474656e-05, "loss": 0.0259, "step": 14590 }, { "epoch": 2.22625732421875e-05, "model_forward_time": 0.025522708892822266, "step": 14590 }, { "epoch": 2.22625732421875e-05, "step": 14590, "training_step_time": 0.14209604263305664 }, { "epoch": 2.226409912109375e-05, "model_forward_time": 0.024690628051757812, "step": 14591 }, { "epoch": 2.226409912109375e-05, "step": 14591, "training_step_time": 0.1726839542388916 }, { "epoch": 2.2265625e-05, "model_forward_time": 0.024991273880004883, "step": 14592 }, { "epoch": 2.2265625e-05, "step": 14592, "training_step_time": 0.1297597885131836 }, { "epoch": 2.226715087890625e-05, "model_forward_time": 0.024867534637451172, "step": 14593 }, { "epoch": 2.226715087890625e-05, "step": 14593, "training_step_time": 0.12020111083984375 }, { "epoch": 2.22686767578125e-05, "model_forward_time": 0.02412581443786621, "step": 14594 }, { "epoch": 2.22686767578125e-05, "step": 14594, "training_step_time": 0.13795804977416992 }, { "epoch": 2.227020263671875e-05, "model_forward_time": 0.02532672882080078, "step": 14595 }, { "epoch": 2.227020263671875e-05, "step": 14595, "training_step_time": 0.11986804008483887 }, { "epoch": 2.2271728515625e-05, "model_forward_time": 0.025309324264526367, "step": 14596 }, { "epoch": 2.2271728515625e-05, "step": 14596, "training_step_time": 0.21877098083496094 }, { "epoch": 2.227325439453125e-05, "model_forward_time": 0.024305343627929688, "step": 14597 }, { "epoch": 2.227325439453125e-05, "step": 14597, "training_step_time": 0.13095593452453613 }, { "epoch": 2.22747802734375e-05, "model_forward_time": 0.025554656982421875, "step": 14598 }, { "epoch": 2.22747802734375e-05, "step": 14598, "training_step_time": 0.1100459098815918 }, { "epoch": 2.227630615234375e-05, "model_forward_time": 0.025024890899658203, "step": 14599 }, { "epoch": 2.227630615234375e-05, "step": 14599, "training_step_time": 0.16653084754943848 }, { "epoch": 2.227783203125e-05, "grad_norm": 0.26374122500419617, "learning_rate": 5.6321338916992315e-05, "loss": 0.0183, "step": 14600 }, { "epoch": 2.227783203125e-05, "model_forward_time": 0.025025367736816406, "step": 14600 }, { "epoch": 2.227783203125e-05, "step": 14600, "training_step_time": 0.1618502140045166 }, { "epoch": 2.227935791015625e-05, "model_forward_time": 0.024567127227783203, "step": 14601 }, { "epoch": 2.227935791015625e-05, "step": 14601, "training_step_time": 0.10683107376098633 }, { "epoch": 2.22808837890625e-05, "model_forward_time": 0.025127172470092773, "step": 14602 }, { "epoch": 2.22808837890625e-05, "step": 14602, "training_step_time": 0.13989615440368652 }, { "epoch": 2.228240966796875e-05, "model_forward_time": 0.025621652603149414, "step": 14603 }, { "epoch": 2.228240966796875e-05, "step": 14603, "training_step_time": 0.10675883293151855 }, { "epoch": 2.2283935546875e-05, "model_forward_time": 0.02579474449157715, "step": 14604 }, { "epoch": 2.2283935546875e-05, "step": 14604, "training_step_time": 0.1613454818725586 }, { "epoch": 2.228546142578125e-05, "model_forward_time": 0.02497124671936035, "step": 14605 }, { "epoch": 2.228546142578125e-05, "step": 14605, "training_step_time": 0.12167668342590332 }, { "epoch": 2.22869873046875e-05, "model_forward_time": 0.024556636810302734, "step": 14606 }, { "epoch": 2.22869873046875e-05, "step": 14606, "training_step_time": 0.12325596809387207 }, { "epoch": 2.228851318359375e-05, "model_forward_time": 0.025227069854736328, "step": 14607 }, { "epoch": 2.228851318359375e-05, "step": 14607, "training_step_time": 0.10447835922241211 }, { "epoch": 2.22900390625e-05, "model_forward_time": 0.025501012802124023, "step": 14608 }, { "epoch": 2.22900390625e-05, "step": 14608, "training_step_time": 0.15616369247436523 }, { "epoch": 2.229156494140625e-05, "model_forward_time": 0.02691817283630371, "step": 14609 }, { "epoch": 2.229156494140625e-05, "step": 14609, "training_step_time": 0.13019704818725586 }, { "epoch": 2.22930908203125e-05, "grad_norm": 0.22930274903774261, "learning_rate": 5.6266661678215216e-05, "loss": 0.0131, "step": 14610 }, { "epoch": 2.22930908203125e-05, "model_forward_time": 0.02466869354248047, "step": 14610 }, { "epoch": 2.22930908203125e-05, "step": 14610, "training_step_time": 0.10407185554504395 }, { "epoch": 2.229461669921875e-05, "model_forward_time": 0.02530384063720703, "step": 14611 }, { "epoch": 2.229461669921875e-05, "step": 14611, "training_step_time": 0.10439634323120117 }, { "epoch": 2.2296142578125e-05, "model_forward_time": 0.0257565975189209, "step": 14612 }, { "epoch": 2.2296142578125e-05, "step": 14612, "training_step_time": 0.1071312427520752 }, { "epoch": 2.229766845703125e-05, "model_forward_time": 0.025597810745239258, "step": 14613 }, { "epoch": 2.229766845703125e-05, "step": 14613, "training_step_time": 0.1091303825378418 }, { "epoch": 2.22991943359375e-05, "model_forward_time": 0.025457382202148438, "step": 14614 }, { "epoch": 2.22991943359375e-05, "step": 14614, "training_step_time": 0.10671138763427734 }, { "epoch": 2.230072021484375e-05, "model_forward_time": 0.025119543075561523, "step": 14615 }, { "epoch": 2.230072021484375e-05, "step": 14615, "training_step_time": 0.10379862785339355 }, { "epoch": 2.230224609375e-05, "model_forward_time": 0.02516794204711914, "step": 14616 }, { "epoch": 2.230224609375e-05, "step": 14616, "training_step_time": 0.10806655883789062 }, { "epoch": 2.230377197265625e-05, "model_forward_time": 0.025177955627441406, "step": 14617 }, { "epoch": 2.230377197265625e-05, "step": 14617, "training_step_time": 0.10591745376586914 }, { "epoch": 2.23052978515625e-05, "model_forward_time": 0.025246620178222656, "step": 14618 }, { "epoch": 2.23052978515625e-05, "step": 14618, "training_step_time": 0.10493636131286621 }, { "epoch": 2.230682373046875e-05, "model_forward_time": 0.025217771530151367, "step": 14619 }, { "epoch": 2.230682373046875e-05, "step": 14619, "training_step_time": 0.1037290096282959 }, { "epoch": 2.2308349609375e-05, "grad_norm": 0.27034735679626465, "learning_rate": 5.621197682485327e-05, "loss": 0.0147, "step": 14620 }, { "epoch": 2.2308349609375e-05, "model_forward_time": 0.025237321853637695, "step": 14620 }, { "epoch": 2.2308349609375e-05, "step": 14620, "training_step_time": 0.10933399200439453 }, { "epoch": 2.230987548828125e-05, "model_forward_time": 0.025169849395751953, "step": 14621 }, { "epoch": 2.230987548828125e-05, "step": 14621, "training_step_time": 0.10695219039916992 }, { "epoch": 2.23114013671875e-05, "model_forward_time": 0.02432990074157715, "step": 14622 }, { "epoch": 2.23114013671875e-05, "step": 14622, "training_step_time": 0.10440874099731445 }, { "epoch": 2.231292724609375e-05, "model_forward_time": 0.024834871292114258, "step": 14623 }, { "epoch": 2.231292724609375e-05, "step": 14623, "training_step_time": 0.10554957389831543 }, { "epoch": 2.2314453125e-05, "model_forward_time": 0.02553868293762207, "step": 14624 }, { "epoch": 2.2314453125e-05, "step": 14624, "training_step_time": 0.1040349006652832 }, { "epoch": 2.231597900390625e-05, "model_forward_time": 0.0252225399017334, "step": 14625 }, { "epoch": 2.231597900390625e-05, "step": 14625, "training_step_time": 0.10519552230834961 }, { "epoch": 2.23175048828125e-05, "model_forward_time": 0.02524709701538086, "step": 14626 }, { "epoch": 2.23175048828125e-05, "step": 14626, "training_step_time": 0.10590577125549316 }, { "epoch": 2.231903076171875e-05, "model_forward_time": 0.028827428817749023, "step": 14627 }, { "epoch": 2.231903076171875e-05, "step": 14627, "training_step_time": 0.17014288902282715 }, { "epoch": 2.2320556640625e-05, "model_forward_time": 0.02440166473388672, "step": 14628 }, { "epoch": 2.2320556640625e-05, "step": 14628, "training_step_time": 0.19220995903015137 }, { "epoch": 2.232208251953125e-05, "model_forward_time": 0.02406001091003418, "step": 14629 }, { "epoch": 2.232208251953125e-05, "step": 14629, "training_step_time": 0.18671131134033203 }, { "epoch": 2.23236083984375e-05, "grad_norm": 0.20472361147403717, "learning_rate": 5.615728442335373e-05, "loss": 0.0141, "step": 14630 }, { "epoch": 2.23236083984375e-05, "model_forward_time": 0.023957252502441406, "step": 14630 }, { "epoch": 2.23236083984375e-05, "step": 14630, "training_step_time": 0.21802139282226562 }, { "epoch": 2.232513427734375e-05, "model_forward_time": 0.02455902099609375, "step": 14631 }, { "epoch": 2.232513427734375e-05, "step": 14631, "training_step_time": 0.16136908531188965 }, { "epoch": 2.232666015625e-05, "model_forward_time": 0.025538206100463867, "step": 14632 }, { "epoch": 2.232666015625e-05, "step": 14632, "training_step_time": 0.17586064338684082 }, { "epoch": 2.232818603515625e-05, "model_forward_time": 0.02447223663330078, "step": 14633 }, { "epoch": 2.232818603515625e-05, "step": 14633, "training_step_time": 0.1186366081237793 }, { "epoch": 2.23297119140625e-05, "model_forward_time": 0.030089855194091797, "step": 14634 }, { "epoch": 2.23297119140625e-05, "step": 14634, "training_step_time": 0.1081693172454834 }, { "epoch": 2.233123779296875e-05, "model_forward_time": 0.025067567825317383, "step": 14635 }, { "epoch": 2.233123779296875e-05, "step": 14635, "training_step_time": 0.19274067878723145 }, { "epoch": 2.2332763671875e-05, "model_forward_time": 0.02409505844116211, "step": 14636 }, { "epoch": 2.2332763671875e-05, "step": 14636, "training_step_time": 0.10451650619506836 }, { "epoch": 2.233428955078125e-05, "model_forward_time": 0.02441096305847168, "step": 14637 }, { "epoch": 2.233428955078125e-05, "step": 14637, "training_step_time": 0.10161113739013672 }, { "epoch": 2.23358154296875e-05, "model_forward_time": 0.025036334991455078, "step": 14638 }, { "epoch": 2.23358154296875e-05, "step": 14638, "training_step_time": 0.11412167549133301 }, { "epoch": 2.233734130859375e-05, "model_forward_time": 0.025182247161865234, "step": 14639 }, { "epoch": 2.233734130859375e-05, "step": 14639, "training_step_time": 0.10583949089050293 }, { "epoch": 2.23388671875e-05, "grad_norm": 0.513239324092865, "learning_rate": 5.6102584540173006e-05, "loss": 0.0139, "step": 14640 }, { "epoch": 2.23388671875e-05, "model_forward_time": 0.025196075439453125, "step": 14640 }, { "epoch": 2.23388671875e-05, "step": 14640, "training_step_time": 0.10657310485839844 }, { "epoch": 2.234039306640625e-05, "model_forward_time": 0.025290727615356445, "step": 14641 }, { "epoch": 2.234039306640625e-05, "step": 14641, "training_step_time": 0.13177871704101562 }, { "epoch": 2.23419189453125e-05, "model_forward_time": 0.025245189666748047, "step": 14642 }, { "epoch": 2.23419189453125e-05, "step": 14642, "training_step_time": 0.12120795249938965 }, { "epoch": 2.234344482421875e-05, "model_forward_time": 0.0250394344329834, "step": 14643 }, { "epoch": 2.234344482421875e-05, "step": 14643, "training_step_time": 0.10208606719970703 }, { "epoch": 2.2344970703125e-05, "model_forward_time": 0.024508953094482422, "step": 14644 }, { "epoch": 2.2344970703125e-05, "step": 14644, "training_step_time": 0.1603856086730957 }, { "epoch": 2.234649658203125e-05, "model_forward_time": 0.024504423141479492, "step": 14645 }, { "epoch": 2.234649658203125e-05, "step": 14645, "training_step_time": 0.17503833770751953 }, { "epoch": 2.23480224609375e-05, "model_forward_time": 0.024655818939208984, "step": 14646 }, { "epoch": 2.23480224609375e-05, "step": 14646, "training_step_time": 0.12986087799072266 }, { "epoch": 2.234954833984375e-05, "model_forward_time": 0.024106740951538086, "step": 14647 }, { "epoch": 2.234954833984375e-05, "step": 14647, "training_step_time": 0.13033533096313477 }, { "epoch": 2.235107421875e-05, "model_forward_time": 0.026260852813720703, "step": 14648 }, { "epoch": 2.235107421875e-05, "step": 14648, "training_step_time": 0.20872092247009277 }, { "epoch": 2.235260009765625e-05, "model_forward_time": 0.024337291717529297, "step": 14649 }, { "epoch": 2.235260009765625e-05, "step": 14649, "training_step_time": 0.11283254623413086 }, { "epoch": 2.23541259765625e-05, "grad_norm": 0.22841264307498932, "learning_rate": 5.604787724177666e-05, "loss": 0.0152, "step": 14650 }, { "epoch": 2.23541259765625e-05, "model_forward_time": 0.02446269989013672, "step": 14650 }, { "epoch": 2.23541259765625e-05, "step": 14650, "training_step_time": 0.12491631507873535 }, { "epoch": 2.235565185546875e-05, "model_forward_time": 0.02512836456298828, "step": 14651 }, { "epoch": 2.235565185546875e-05, "step": 14651, "training_step_time": 0.14040637016296387 }, { "epoch": 2.2357177734375e-05, "model_forward_time": 0.02445054054260254, "step": 14652 }, { "epoch": 2.2357177734375e-05, "step": 14652, "training_step_time": 0.11809396743774414 }, { "epoch": 2.235870361328125e-05, "model_forward_time": 0.024946928024291992, "step": 14653 }, { "epoch": 2.235870361328125e-05, "step": 14653, "training_step_time": 0.17241668701171875 }, { "epoch": 2.23602294921875e-05, "model_forward_time": 0.023530244827270508, "step": 14654 }, { "epoch": 2.23602294921875e-05, "step": 14654, "training_step_time": 0.17937898635864258 }, { "epoch": 2.236175537109375e-05, "model_forward_time": 0.023753881454467773, "step": 14655 }, { "epoch": 2.236175537109375e-05, "step": 14655, "training_step_time": 0.16491937637329102 }, { "epoch": 2.236328125e-05, "model_forward_time": 0.023511171340942383, "step": 14656 }, { "epoch": 2.236328125e-05, "step": 14656, "training_step_time": 0.1552143096923828 }, { "epoch": 2.236480712890625e-05, "model_forward_time": 0.023363351821899414, "step": 14657 }, { "epoch": 2.236480712890625e-05, "step": 14657, "training_step_time": 0.14308810234069824 }, { "epoch": 2.23663330078125e-05, "model_forward_time": 0.023589611053466797, "step": 14658 }, { "epoch": 2.23663330078125e-05, "step": 14658, "training_step_time": 0.12970399856567383 }, { "epoch": 2.236785888671875e-05, "model_forward_time": 0.023662328720092773, "step": 14659 }, { "epoch": 2.236785888671875e-05, "step": 14659, "training_step_time": 0.12605071067810059 }, { "epoch": 2.2369384765625e-05, "grad_norm": 0.15974995493888855, "learning_rate": 5.599316259463916e-05, "loss": 0.0136, "step": 14660 }, { "epoch": 2.2369384765625e-05, "model_forward_time": 0.024079084396362305, "step": 14660 }, { "epoch": 2.2369384765625e-05, "step": 14660, "training_step_time": 0.12470412254333496 }, { "epoch": 2.237091064453125e-05, "model_forward_time": 0.024296998977661133, "step": 14661 }, { "epoch": 2.237091064453125e-05, "step": 14661, "training_step_time": 0.11998844146728516 }, { "epoch": 2.23724365234375e-05, "model_forward_time": 0.023944616317749023, "step": 14662 }, { "epoch": 2.23724365234375e-05, "step": 14662, "training_step_time": 0.11277937889099121 }, { "epoch": 2.237396240234375e-05, "model_forward_time": 0.02459716796875, "step": 14663 }, { "epoch": 2.237396240234375e-05, "step": 14663, "training_step_time": 0.10959362983703613 }, { "epoch": 2.237548828125e-05, "model_forward_time": 0.02400064468383789, "step": 14664 }, { "epoch": 2.237548828125e-05, "step": 14664, "training_step_time": 0.1081092357635498 }, { "epoch": 2.237701416015625e-05, "model_forward_time": 0.02496814727783203, "step": 14665 }, { "epoch": 2.237701416015625e-05, "step": 14665, "training_step_time": 0.11006307601928711 }, { "epoch": 2.23785400390625e-05, "model_forward_time": 0.024898767471313477, "step": 14666 }, { "epoch": 2.23785400390625e-05, "step": 14666, "training_step_time": 0.10990118980407715 }, { "epoch": 2.238006591796875e-05, "model_forward_time": 0.025090932846069336, "step": 14667 }, { "epoch": 2.238006591796875e-05, "step": 14667, "training_step_time": 0.10628461837768555 }, { "epoch": 2.2381591796875e-05, "model_forward_time": 0.025376558303833008, "step": 14668 }, { "epoch": 2.2381591796875e-05, "step": 14668, "training_step_time": 0.10677194595336914 }, { "epoch": 2.238311767578125e-05, "model_forward_time": 0.027096271514892578, "step": 14669 }, { "epoch": 2.238311767578125e-05, "step": 14669, "training_step_time": 0.11064720153808594 }, { "epoch": 2.23846435546875e-05, "grad_norm": 0.32507702708244324, "learning_rate": 5.5938440665244006e-05, "loss": 0.0198, "step": 14670 }, { "epoch": 2.23846435546875e-05, "model_forward_time": 0.025203466415405273, "step": 14670 }, { "epoch": 2.23846435546875e-05, "step": 14670, "training_step_time": 0.11006307601928711 }, { "epoch": 2.238616943359375e-05, "model_forward_time": 0.025109529495239258, "step": 14671 }, { "epoch": 2.238616943359375e-05, "step": 14671, "training_step_time": 0.10503411293029785 }, { "epoch": 2.23876953125e-05, "model_forward_time": 0.025461912155151367, "step": 14672 }, { "epoch": 2.23876953125e-05, "step": 14672, "training_step_time": 0.10463738441467285 }, { "epoch": 2.238922119140625e-05, "model_forward_time": 0.025249719619750977, "step": 14673 }, { "epoch": 2.238922119140625e-05, "step": 14673, "training_step_time": 0.10505008697509766 }, { "epoch": 2.23907470703125e-05, "model_forward_time": 0.024866819381713867, "step": 14674 }, { "epoch": 2.23907470703125e-05, "step": 14674, "training_step_time": 0.11972832679748535 }, { "epoch": 2.239227294921875e-05, "model_forward_time": 0.02445387840270996, "step": 14675 }, { "epoch": 2.239227294921875e-05, "step": 14675, "training_step_time": 0.14861822128295898 }, { "epoch": 2.2393798828125e-05, "model_forward_time": 0.024769067764282227, "step": 14676 }, { "epoch": 2.2393798828125e-05, "step": 14676, "training_step_time": 0.10760045051574707 }, { "epoch": 2.239532470703125e-05, "model_forward_time": 0.024705171585083008, "step": 14677 }, { "epoch": 2.239532470703125e-05, "step": 14677, "training_step_time": 0.11992812156677246 }, { "epoch": 2.23968505859375e-05, "model_forward_time": 0.0251309871673584, "step": 14678 }, { "epoch": 2.23968505859375e-05, "step": 14678, "training_step_time": 0.11035561561584473 }, { "epoch": 2.239837646484375e-05, "model_forward_time": 0.02546548843383789, "step": 14679 }, { "epoch": 2.239837646484375e-05, "step": 14679, "training_step_time": 0.15317606925964355 }, { "epoch": 2.239990234375e-05, "grad_norm": 0.26223987340927124, "learning_rate": 5.588371152008349e-05, "loss": 0.0133, "step": 14680 }, { "epoch": 2.239990234375e-05, "model_forward_time": 0.024795055389404297, "step": 14680 }, { "epoch": 2.239990234375e-05, "step": 14680, "training_step_time": 0.1347506046295166 }, { "epoch": 2.240142822265625e-05, "model_forward_time": 0.02449512481689453, "step": 14681 }, { "epoch": 2.240142822265625e-05, "step": 14681, "training_step_time": 0.10308504104614258 }, { "epoch": 2.24029541015625e-05, "model_forward_time": 0.02513909339904785, "step": 14682 }, { "epoch": 2.24029541015625e-05, "step": 14682, "training_step_time": 0.11237502098083496 }, { "epoch": 2.240447998046875e-05, "model_forward_time": 0.02517223358154297, "step": 14683 }, { "epoch": 2.240447998046875e-05, "step": 14683, "training_step_time": 0.11715102195739746 }, { "epoch": 2.2406005859375e-05, "model_forward_time": 0.025468111038208008, "step": 14684 }, { "epoch": 2.2406005859375e-05, "step": 14684, "training_step_time": 0.10817742347717285 }, { "epoch": 2.240753173828125e-05, "model_forward_time": 0.026155471801757812, "step": 14685 }, { "epoch": 2.240753173828125e-05, "step": 14685, "training_step_time": 0.10857129096984863 }, { "epoch": 2.24090576171875e-05, "model_forward_time": 0.02526569366455078, "step": 14686 }, { "epoch": 2.24090576171875e-05, "step": 14686, "training_step_time": 0.17660069465637207 }, { "epoch": 2.241058349609375e-05, "model_forward_time": 0.024334430694580078, "step": 14687 }, { "epoch": 2.241058349609375e-05, "step": 14687, "training_step_time": 0.10007476806640625 }, { "epoch": 2.2412109375e-05, "model_forward_time": 0.024341344833374023, "step": 14688 }, { "epoch": 2.2412109375e-05, "step": 14688, "training_step_time": 0.15075278282165527 }, { "epoch": 2.241363525390625e-05, "model_forward_time": 0.02454066276550293, "step": 14689 }, { "epoch": 2.241363525390625e-05, "step": 14689, "training_step_time": 0.16780376434326172 }, { "epoch": 2.24151611328125e-05, "grad_norm": 0.2609595060348511, "learning_rate": 5.5828975225658666e-05, "loss": 0.0179, "step": 14690 }, { "epoch": 2.24151611328125e-05, "model_forward_time": 0.024750947952270508, "step": 14690 }, { "epoch": 2.24151611328125e-05, "step": 14690, "training_step_time": 0.10785245895385742 }, { "epoch": 2.241668701171875e-05, "model_forward_time": 0.02433323860168457, "step": 14691 }, { "epoch": 2.241668701171875e-05, "step": 14691, "training_step_time": 0.13776803016662598 }, { "epoch": 2.2418212890625e-05, "model_forward_time": 0.025391101837158203, "step": 14692 }, { "epoch": 2.2418212890625e-05, "step": 14692, "training_step_time": 0.19810724258422852 }, { "epoch": 2.241973876953125e-05, "model_forward_time": 0.024683713912963867, "step": 14693 }, { "epoch": 2.241973876953125e-05, "step": 14693, "training_step_time": 0.1101081371307373 }, { "epoch": 2.24212646484375e-05, "model_forward_time": 0.02480912208557129, "step": 14694 }, { "epoch": 2.24212646484375e-05, "step": 14694, "training_step_time": 0.13306260108947754 }, { "epoch": 2.242279052734375e-05, "model_forward_time": 0.025397777557373047, "step": 14695 }, { "epoch": 2.242279052734375e-05, "step": 14695, "training_step_time": 0.1423022747039795 }, { "epoch": 2.242431640625e-05, "model_forward_time": 0.024962663650512695, "step": 14696 }, { "epoch": 2.242431640625e-05, "step": 14696, "training_step_time": 0.11060333251953125 }, { "epoch": 2.242584228515625e-05, "model_forward_time": 0.02542400360107422, "step": 14697 }, { "epoch": 2.242584228515625e-05, "step": 14697, "training_step_time": 0.13126492500305176 }, { "epoch": 2.24273681640625e-05, "model_forward_time": 0.02873969078063965, "step": 14698 }, { "epoch": 2.24273681640625e-05, "step": 14698, "training_step_time": 0.10872316360473633 }, { "epoch": 2.242889404296875e-05, "model_forward_time": 0.02545619010925293, "step": 14699 }, { "epoch": 2.242889404296875e-05, "step": 14699, "training_step_time": 0.10780119895935059 }, { "epoch": 2.2430419921875e-05, "grad_norm": 0.3035556375980377, "learning_rate": 5.577423184847932e-05, "loss": 0.017, "step": 14700 }, { "epoch": 2.2430419921875e-05, "model_forward_time": 0.02522873878479004, "step": 14700 }, { "epoch": 2.2430419921875e-05, "step": 14700, "training_step_time": 0.10416412353515625 }, { "epoch": 2.243194580078125e-05, "model_forward_time": 0.024952173233032227, "step": 14701 }, { "epoch": 2.243194580078125e-05, "step": 14701, "training_step_time": 0.12407088279724121 }, { "epoch": 2.24334716796875e-05, "model_forward_time": 0.024108409881591797, "step": 14702 }, { "epoch": 2.24334716796875e-05, "step": 14702, "training_step_time": 0.13840937614440918 }, { "epoch": 2.243499755859375e-05, "model_forward_time": 0.02394247055053711, "step": 14703 }, { "epoch": 2.243499755859375e-05, "step": 14703, "training_step_time": 0.13051557540893555 }, { "epoch": 2.24365234375e-05, "model_forward_time": 0.023429393768310547, "step": 14704 }, { "epoch": 2.24365234375e-05, "step": 14704, "training_step_time": 0.12807250022888184 }, { "epoch": 2.243804931640625e-05, "model_forward_time": 0.023743152618408203, "step": 14705 }, { "epoch": 2.243804931640625e-05, "step": 14705, "training_step_time": 0.12237143516540527 }, { "epoch": 2.24395751953125e-05, "model_forward_time": 0.024407386779785156, "step": 14706 }, { "epoch": 2.24395751953125e-05, "step": 14706, "training_step_time": 0.1209573745727539 }, { "epoch": 2.244110107421875e-05, "model_forward_time": 0.023970842361450195, "step": 14707 }, { "epoch": 2.244110107421875e-05, "step": 14707, "training_step_time": 0.11579775810241699 }, { "epoch": 2.2442626953125e-05, "model_forward_time": 0.025153636932373047, "step": 14708 }, { "epoch": 2.2442626953125e-05, "step": 14708, "training_step_time": 0.11315512657165527 }, { "epoch": 2.244415283203125e-05, "model_forward_time": 0.025513648986816406, "step": 14709 }, { "epoch": 2.244415283203125e-05, "step": 14709, "training_step_time": 0.11139297485351562 }, { "epoch": 2.24456787109375e-05, "grad_norm": 0.29737013578414917, "learning_rate": 5.5719481455063784e-05, "loss": 0.0198, "step": 14710 }, { "epoch": 2.24456787109375e-05, "model_forward_time": 0.02487945556640625, "step": 14710 }, { "epoch": 2.24456787109375e-05, "step": 14710, "training_step_time": 0.10619688034057617 }, { "epoch": 2.244720458984375e-05, "model_forward_time": 0.0250244140625, "step": 14711 }, { "epoch": 2.244720458984375e-05, "step": 14711, "training_step_time": 0.10650157928466797 }, { "epoch": 2.244873046875e-05, "model_forward_time": 0.025164365768432617, "step": 14712 }, { "epoch": 2.244873046875e-05, "step": 14712, "training_step_time": 0.10634756088256836 }, { "epoch": 2.245025634765625e-05, "model_forward_time": 0.025141000747680664, "step": 14713 }, { "epoch": 2.245025634765625e-05, "step": 14713, "training_step_time": 0.10593199729919434 }, { "epoch": 2.24517822265625e-05, "model_forward_time": 0.026380538940429688, "step": 14714 }, { "epoch": 2.24517822265625e-05, "step": 14714, "training_step_time": 0.10787773132324219 }, { "epoch": 2.245330810546875e-05, "model_forward_time": 0.02494955062866211, "step": 14715 }, { "epoch": 2.245330810546875e-05, "step": 14715, "training_step_time": 0.10883569717407227 }, { "epoch": 2.2454833984375e-05, "model_forward_time": 0.0250546932220459, "step": 14716 }, { "epoch": 2.2454833984375e-05, "step": 14716, "training_step_time": 0.10652041435241699 }, { "epoch": 2.245635986328125e-05, "model_forward_time": 0.02526068687438965, "step": 14717 }, { "epoch": 2.245635986328125e-05, "step": 14717, "training_step_time": 0.10683727264404297 }, { "epoch": 2.24578857421875e-05, "model_forward_time": 0.025353193283081055, "step": 14718 }, { "epoch": 2.24578857421875e-05, "step": 14718, "training_step_time": 0.10577178001403809 }, { "epoch": 2.245941162109375e-05, "model_forward_time": 0.025292634963989258, "step": 14719 }, { "epoch": 2.245941162109375e-05, "step": 14719, "training_step_time": 0.18582653999328613 }, { "epoch": 2.24609375e-05, "grad_norm": 0.3045312762260437, "learning_rate": 5.566472411193897e-05, "loss": 0.0192, "step": 14720 }, { "epoch": 2.24609375e-05, "model_forward_time": 0.024792194366455078, "step": 14720 }, { "epoch": 2.24609375e-05, "step": 14720, "training_step_time": 0.13614845275878906 }, { "epoch": 2.246246337890625e-05, "model_forward_time": 0.024472713470458984, "step": 14721 }, { "epoch": 2.246246337890625e-05, "step": 14721, "training_step_time": 0.10914134979248047 }, { "epoch": 2.24639892578125e-05, "model_forward_time": 0.02533555030822754, "step": 14722 }, { "epoch": 2.24639892578125e-05, "step": 14722, "training_step_time": 0.10874700546264648 }, { "epoch": 2.246551513671875e-05, "model_forward_time": 0.0251467227935791, "step": 14723 }, { "epoch": 2.246551513671875e-05, "step": 14723, "training_step_time": 0.16874957084655762 }, { "epoch": 2.2467041015625e-05, "model_forward_time": 0.02476358413696289, "step": 14724 }, { "epoch": 2.2467041015625e-05, "step": 14724, "training_step_time": 0.10590100288391113 }, { "epoch": 2.246856689453125e-05, "model_forward_time": 0.02458810806274414, "step": 14725 }, { "epoch": 2.246856689453125e-05, "step": 14725, "training_step_time": 0.1580822467803955 }, { "epoch": 2.24700927734375e-05, "model_forward_time": 0.02401137351989746, "step": 14726 }, { "epoch": 2.24700927734375e-05, "step": 14726, "training_step_time": 0.10606908798217773 }, { "epoch": 2.247161865234375e-05, "model_forward_time": 0.02607560157775879, "step": 14727 }, { "epoch": 2.247161865234375e-05, "step": 14727, "training_step_time": 0.10567808151245117 }, { "epoch": 2.247314453125e-05, "model_forward_time": 0.025455713272094727, "step": 14728 }, { "epoch": 2.247314453125e-05, "step": 14728, "training_step_time": 0.10681891441345215 }, { "epoch": 2.247467041015625e-05, "model_forward_time": 0.025736093521118164, "step": 14729 }, { "epoch": 2.247467041015625e-05, "step": 14729, "training_step_time": 0.1573047637939453 }, { "epoch": 2.24761962890625e-05, "grad_norm": 0.2626732587814331, "learning_rate": 5.560995988564023e-05, "loss": 0.0196, "step": 14730 }, { "epoch": 2.24761962890625e-05, "model_forward_time": 0.024536609649658203, "step": 14730 }, { "epoch": 2.24761962890625e-05, "step": 14730, "training_step_time": 0.10517406463623047 }, { "epoch": 2.247772216796875e-05, "model_forward_time": 0.025447368621826172, "step": 14731 }, { "epoch": 2.247772216796875e-05, "step": 14731, "training_step_time": 0.10872721672058105 }, { "epoch": 2.2479248046875e-05, "model_forward_time": 0.02533745765686035, "step": 14732 }, { "epoch": 2.2479248046875e-05, "step": 14732, "training_step_time": 0.12754130363464355 }, { "epoch": 2.248077392578125e-05, "model_forward_time": 0.025287389755249023, "step": 14733 }, { "epoch": 2.248077392578125e-05, "step": 14733, "training_step_time": 0.1068577766418457 }, { "epoch": 2.24822998046875e-05, "model_forward_time": 0.02437877655029297, "step": 14734 }, { "epoch": 2.24822998046875e-05, "step": 14734, "training_step_time": 0.17467927932739258 }, { "epoch": 2.248382568359375e-05, "model_forward_time": 0.02448582649230957, "step": 14735 }, { "epoch": 2.248382568359375e-05, "step": 14735, "training_step_time": 0.1784205436706543 }, { "epoch": 2.24853515625e-05, "model_forward_time": 0.027005672454833984, "step": 14736 }, { "epoch": 2.24853515625e-05, "step": 14736, "training_step_time": 0.1210944652557373 }, { "epoch": 2.248687744140625e-05, "model_forward_time": 0.024851322174072266, "step": 14737 }, { "epoch": 2.248687744140625e-05, "step": 14737, "training_step_time": 0.2062077522277832 }, { "epoch": 2.24884033203125e-05, "model_forward_time": 0.024229764938354492, "step": 14738 }, { "epoch": 2.24884033203125e-05, "step": 14738, "training_step_time": 0.1498866081237793 }, { "epoch": 2.248992919921875e-05, "model_forward_time": 0.024547100067138672, "step": 14739 }, { "epoch": 2.248992919921875e-05, "step": 14739, "training_step_time": 0.10862874984741211 }, { "epoch": 2.2491455078125e-05, "grad_norm": 0.22782595455646515, "learning_rate": 5.555518884271122e-05, "loss": 0.0222, "step": 14740 }, { "epoch": 2.2491455078125e-05, "model_forward_time": 0.024859905242919922, "step": 14740 }, { "epoch": 2.2491455078125e-05, "step": 14740, "training_step_time": 0.15157628059387207 }, { "epoch": 2.249298095703125e-05, "model_forward_time": 0.02439594268798828, "step": 14741 }, { "epoch": 2.249298095703125e-05, "step": 14741, "training_step_time": 0.2140343189239502 }, { "epoch": 2.24945068359375e-05, "model_forward_time": 0.024020910263061523, "step": 14742 }, { "epoch": 2.24945068359375e-05, "step": 14742, "training_step_time": 0.12701630592346191 }, { "epoch": 2.249603271484375e-05, "model_forward_time": 0.024662494659423828, "step": 14743 }, { "epoch": 2.249603271484375e-05, "step": 14743, "training_step_time": 0.10414314270019531 }, { "epoch": 2.249755859375e-05, "model_forward_time": 0.024993181228637695, "step": 14744 }, { "epoch": 2.249755859375e-05, "step": 14744, "training_step_time": 0.1050117015838623 }, { "epoch": 2.249908447265625e-05, "model_forward_time": 0.02480626106262207, "step": 14745 }, { "epoch": 2.249908447265625e-05, "step": 14745, "training_step_time": 0.1041867733001709 }, { "epoch": 2.25006103515625e-05, "model_forward_time": 0.025339841842651367, "step": 14746 }, { "epoch": 2.25006103515625e-05, "step": 14746, "training_step_time": 0.10600638389587402 }, { "epoch": 2.250213623046875e-05, "model_forward_time": 0.027340412139892578, "step": 14747 }, { "epoch": 2.250213623046875e-05, "step": 14747, "training_step_time": 0.10669994354248047 }, { "epoch": 2.2503662109375e-05, "model_forward_time": 0.025320768356323242, "step": 14748 }, { "epoch": 2.2503662109375e-05, "step": 14748, "training_step_time": 0.1030266284942627 }, { "epoch": 2.250518798828125e-05, "model_forward_time": 0.0251922607421875, "step": 14749 }, { "epoch": 2.250518798828125e-05, "step": 14749, "training_step_time": 0.10486316680908203 }, { "epoch": 2.25067138671875e-05, "grad_norm": 0.1958162486553192, "learning_rate": 5.550041104970397e-05, "loss": 0.0158, "step": 14750 }, { "epoch": 2.25067138671875e-05, "model_forward_time": 0.025376081466674805, "step": 14750 }, { "epoch": 2.25067138671875e-05, "step": 14750, "training_step_time": 0.10457491874694824 }, { "epoch": 2.250823974609375e-05, "model_forward_time": 0.02504873275756836, "step": 14751 }, { "epoch": 2.250823974609375e-05, "step": 14751, "training_step_time": 0.10509443283081055 }, { "epoch": 2.2509765625e-05, "model_forward_time": 0.02506089210510254, "step": 14752 }, { "epoch": 2.2509765625e-05, "step": 14752, "training_step_time": 0.1042473316192627 }, { "epoch": 2.251129150390625e-05, "model_forward_time": 0.02558755874633789, "step": 14753 }, { "epoch": 2.251129150390625e-05, "step": 14753, "training_step_time": 0.1072998046875 }, { "epoch": 2.25128173828125e-05, "model_forward_time": 0.025144577026367188, "step": 14754 }, { "epoch": 2.25128173828125e-05, "step": 14754, "training_step_time": 0.10531210899353027 }, { "epoch": 2.251434326171875e-05, "model_forward_time": 0.025300025939941406, "step": 14755 }, { "epoch": 2.251434326171875e-05, "step": 14755, "training_step_time": 0.10615301132202148 }, { "epoch": 2.2515869140625e-05, "model_forward_time": 0.02512192726135254, "step": 14756 }, { "epoch": 2.2515869140625e-05, "step": 14756, "training_step_time": 0.10626769065856934 }, { "epoch": 2.251739501953125e-05, "model_forward_time": 0.02468705177307129, "step": 14757 }, { "epoch": 2.251739501953125e-05, "step": 14757, "training_step_time": 0.10579586029052734 }, { "epoch": 2.25189208984375e-05, "model_forward_time": 0.025091171264648438, "step": 14758 }, { "epoch": 2.25189208984375e-05, "step": 14758, "training_step_time": 0.18588662147521973 }, { "epoch": 2.252044677734375e-05, "model_forward_time": 0.024283885955810547, "step": 14759 }, { "epoch": 2.252044677734375e-05, "step": 14759, "training_step_time": 0.2110607624053955 }, { "epoch": 2.252197265625e-05, "grad_norm": 0.21639585494995117, "learning_rate": 5.544562657317863e-05, "loss": 0.0191, "step": 14760 }, { "epoch": 2.252197265625e-05, "model_forward_time": 0.024493694305419922, "step": 14760 }, { "epoch": 2.252197265625e-05, "step": 14760, "training_step_time": 0.2008209228515625 }, { "epoch": 2.252349853515625e-05, "model_forward_time": 0.025089025497436523, "step": 14761 }, { "epoch": 2.252349853515625e-05, "step": 14761, "training_step_time": 0.20092225074768066 }, { "epoch": 2.25250244140625e-05, "model_forward_time": 0.024183988571166992, "step": 14762 }, { "epoch": 2.25250244140625e-05, "step": 14762, "training_step_time": 0.20627498626708984 }, { "epoch": 2.252655029296875e-05, "model_forward_time": 0.027688980102539062, "step": 14763 }, { "epoch": 2.252655029296875e-05, "step": 14763, "training_step_time": 0.16882729530334473 }, { "epoch": 2.2528076171875e-05, "model_forward_time": 0.02386188507080078, "step": 14764 }, { "epoch": 2.2528076171875e-05, "step": 14764, "training_step_time": 0.18313860893249512 }, { "epoch": 2.252960205078125e-05, "model_forward_time": 0.02484273910522461, "step": 14765 }, { "epoch": 2.252960205078125e-05, "step": 14765, "training_step_time": 0.1572110652923584 }, { "epoch": 2.25311279296875e-05, "model_forward_time": 0.023606538772583008, "step": 14766 }, { "epoch": 2.25311279296875e-05, "step": 14766, "training_step_time": 0.15790176391601562 }, { "epoch": 2.253265380859375e-05, "model_forward_time": 0.023847103118896484, "step": 14767 }, { "epoch": 2.253265380859375e-05, "step": 14767, "training_step_time": 0.12670350074768066 }, { "epoch": 2.25341796875e-05, "model_forward_time": 0.02452850341796875, "step": 14768 }, { "epoch": 2.25341796875e-05, "step": 14768, "training_step_time": 0.12925505638122559 }, { "epoch": 2.253570556640625e-05, "model_forward_time": 0.024738788604736328, "step": 14769 }, { "epoch": 2.253570556640625e-05, "step": 14769, "training_step_time": 0.10661530494689941 }, { "epoch": 2.25372314453125e-05, "grad_norm": 0.3235095143318176, "learning_rate": 5.539083547970352e-05, "loss": 0.0166, "step": 14770 }, { "epoch": 2.25372314453125e-05, "model_forward_time": 0.025157928466796875, "step": 14770 }, { "epoch": 2.25372314453125e-05, "step": 14770, "training_step_time": 0.1070404052734375 }, { "epoch": 2.253875732421875e-05, "model_forward_time": 0.024747610092163086, "step": 14771 }, { "epoch": 2.253875732421875e-05, "step": 14771, "training_step_time": 0.10478854179382324 }, { "epoch": 2.2540283203125e-05, "model_forward_time": 0.024283885955810547, "step": 14772 }, { "epoch": 2.2540283203125e-05, "step": 14772, "training_step_time": 0.10873150825500488 }, { "epoch": 2.254180908203125e-05, "model_forward_time": 0.024384737014770508, "step": 14773 }, { "epoch": 2.254180908203125e-05, "step": 14773, "training_step_time": 0.11200404167175293 }, { "epoch": 2.25433349609375e-05, "model_forward_time": 0.023879289627075195, "step": 14774 }, { "epoch": 2.25433349609375e-05, "step": 14774, "training_step_time": 0.12871336936950684 }, { "epoch": 2.254486083984375e-05, "model_forward_time": 0.024820327758789062, "step": 14775 }, { "epoch": 2.254486083984375e-05, "step": 14775, "training_step_time": 0.10478377342224121 }, { "epoch": 2.254638671875e-05, "model_forward_time": 0.02547454833984375, "step": 14776 }, { "epoch": 2.254638671875e-05, "step": 14776, "training_step_time": 0.15362191200256348 }, { "epoch": 2.254791259765625e-05, "model_forward_time": 0.025838136672973633, "step": 14777 }, { "epoch": 2.254791259765625e-05, "step": 14777, "training_step_time": 0.18442440032958984 }, { "epoch": 2.25494384765625e-05, "model_forward_time": 0.024869441986083984, "step": 14778 }, { "epoch": 2.25494384765625e-05, "step": 14778, "training_step_time": 0.10862445831298828 }, { "epoch": 2.255096435546875e-05, "model_forward_time": 0.02480030059814453, "step": 14779 }, { "epoch": 2.255096435546875e-05, "step": 14779, "training_step_time": 0.17042946815490723 }, { "epoch": 2.2552490234375e-05, "grad_norm": 0.37016212940216064, "learning_rate": 5.533603783585496e-05, "loss": 0.0143, "step": 14780 }, { "epoch": 2.2552490234375e-05, "model_forward_time": 0.024486780166625977, "step": 14780 }, { "epoch": 2.2552490234375e-05, "step": 14780, "training_step_time": 0.22100114822387695 }, { "epoch": 2.255401611328125e-05, "model_forward_time": 0.024599790573120117, "step": 14781 }, { "epoch": 2.255401611328125e-05, "step": 14781, "training_step_time": 0.13791441917419434 }, { "epoch": 2.25555419921875e-05, "model_forward_time": 0.025160551071166992, "step": 14782 }, { "epoch": 2.25555419921875e-05, "step": 14782, "training_step_time": 0.11895251274108887 }, { "epoch": 2.255706787109375e-05, "model_forward_time": 0.025170326232910156, "step": 14783 }, { "epoch": 2.255706787109375e-05, "step": 14783, "training_step_time": 0.12685704231262207 }, { "epoch": 2.255859375e-05, "model_forward_time": 0.025570392608642578, "step": 14784 }, { "epoch": 2.255859375e-05, "step": 14784, "training_step_time": 0.10845470428466797 }, { "epoch": 2.256011962890625e-05, "model_forward_time": 0.025302410125732422, "step": 14785 }, { "epoch": 2.256011962890625e-05, "step": 14785, "training_step_time": 0.10517096519470215 }, { "epoch": 2.25616455078125e-05, "model_forward_time": 0.02496933937072754, "step": 14786 }, { "epoch": 2.25616455078125e-05, "step": 14786, "training_step_time": 0.10517144203186035 }, { "epoch": 2.256317138671875e-05, "model_forward_time": 0.024996519088745117, "step": 14787 }, { "epoch": 2.256317138671875e-05, "step": 14787, "training_step_time": 0.10824942588806152 }, { "epoch": 2.2564697265625e-05, "model_forward_time": 0.02556014060974121, "step": 14788 }, { "epoch": 2.2564697265625e-05, "step": 14788, "training_step_time": 0.10934758186340332 }, { "epoch": 2.256622314453125e-05, "model_forward_time": 0.025942564010620117, "step": 14789 }, { "epoch": 2.256622314453125e-05, "step": 14789, "training_step_time": 0.10755753517150879 }, { "epoch": 2.25677490234375e-05, "grad_norm": 0.2053672969341278, "learning_rate": 5.52812337082173e-05, "loss": 0.009, "step": 14790 }, { "epoch": 2.25677490234375e-05, "model_forward_time": 0.0259702205657959, "step": 14790 }, { "epoch": 2.25677490234375e-05, "step": 14790, "training_step_time": 0.10682296752929688 }, { "epoch": 2.256927490234375e-05, "model_forward_time": 0.02599930763244629, "step": 14791 }, { "epoch": 2.256927490234375e-05, "step": 14791, "training_step_time": 0.10543608665466309 }, { "epoch": 2.257080078125e-05, "model_forward_time": 0.02496790885925293, "step": 14792 }, { "epoch": 2.257080078125e-05, "step": 14792, "training_step_time": 0.10713768005371094 }, { "epoch": 2.257232666015625e-05, "model_forward_time": 0.025805950164794922, "step": 14793 }, { "epoch": 2.257232666015625e-05, "step": 14793, "training_step_time": 0.10598206520080566 }, { "epoch": 2.25738525390625e-05, "model_forward_time": 0.02528858184814453, "step": 14794 }, { "epoch": 2.25738525390625e-05, "step": 14794, "training_step_time": 0.11873292922973633 }, { "epoch": 2.257537841796875e-05, "model_forward_time": 0.024210453033447266, "step": 14795 }, { "epoch": 2.257537841796875e-05, "step": 14795, "training_step_time": 0.10839581489562988 }, { "epoch": 2.2576904296875e-05, "model_forward_time": 0.025251388549804688, "step": 14796 }, { "epoch": 2.2576904296875e-05, "step": 14796, "training_step_time": 0.10885143280029297 }, { "epoch": 2.257843017578125e-05, "model_forward_time": 0.026015043258666992, "step": 14797 }, { "epoch": 2.257843017578125e-05, "step": 14797, "training_step_time": 0.10935354232788086 }, { "epoch": 2.25799560546875e-05, "model_forward_time": 0.025206565856933594, "step": 14798 }, { "epoch": 2.25799560546875e-05, "step": 14798, "training_step_time": 0.10524392127990723 }, { "epoch": 2.258148193359375e-05, "model_forward_time": 0.02510666847229004, "step": 14799 }, { "epoch": 2.258148193359375e-05, "step": 14799, "training_step_time": 0.11235666275024414 }, { "epoch": 2.25830078125e-05, "grad_norm": 0.4688062071800232, "learning_rate": 5.522642316338268e-05, "loss": 0.0181, "step": 14800 }, { "epoch": 2.25830078125e-05, "model_forward_time": 0.02641916275024414, "step": 14800 }, { "epoch": 2.25830078125e-05, "step": 14800, "training_step_time": 0.11036992073059082 }, { "epoch": 2.258453369140625e-05, "model_forward_time": 0.02584528923034668, "step": 14801 }, { "epoch": 2.258453369140625e-05, "step": 14801, "training_step_time": 0.10625886917114258 }, { "epoch": 2.25860595703125e-05, "model_forward_time": 0.025241374969482422, "step": 14802 }, { "epoch": 2.25860595703125e-05, "step": 14802, "training_step_time": 0.10579252243041992 }, { "epoch": 2.258758544921875e-05, "model_forward_time": 0.025835514068603516, "step": 14803 }, { "epoch": 2.258758544921875e-05, "step": 14803, "training_step_time": 0.10532093048095703 }, { "epoch": 2.2589111328125e-05, "model_forward_time": 0.02572798728942871, "step": 14804 }, { "epoch": 2.2589111328125e-05, "step": 14804, "training_step_time": 0.10525321960449219 }, { "epoch": 2.259063720703125e-05, "model_forward_time": 0.02539825439453125, "step": 14805 }, { "epoch": 2.259063720703125e-05, "step": 14805, "training_step_time": 0.10481762886047363 }, { "epoch": 2.25921630859375e-05, "model_forward_time": 0.025761842727661133, "step": 14806 }, { "epoch": 2.25921630859375e-05, "step": 14806, "training_step_time": 0.10483098030090332 }, { "epoch": 2.259368896484375e-05, "model_forward_time": 0.025554418563842773, "step": 14807 }, { "epoch": 2.259368896484375e-05, "step": 14807, "training_step_time": 0.150787353515625 }, { "epoch": 2.259521484375e-05, "model_forward_time": 0.02520132064819336, "step": 14808 }, { "epoch": 2.259521484375e-05, "step": 14808, "training_step_time": 0.139662504196167 }, { "epoch": 2.259674072265625e-05, "model_forward_time": 0.02579951286315918, "step": 14809 }, { "epoch": 2.259674072265625e-05, "step": 14809, "training_step_time": 0.11650347709655762 }, { "epoch": 2.25982666015625e-05, "grad_norm": 0.2553534507751465, "learning_rate": 5.51716062679511e-05, "loss": 0.0103, "step": 14810 }, { "epoch": 2.25982666015625e-05, "model_forward_time": 0.024725675582885742, "step": 14810 }, { "epoch": 2.25982666015625e-05, "step": 14810, "training_step_time": 0.11183643341064453 }, { "epoch": 2.259979248046875e-05, "model_forward_time": 0.02544260025024414, "step": 14811 }, { "epoch": 2.259979248046875e-05, "step": 14811, "training_step_time": 0.1734323501586914 }, { "epoch": 2.2601318359375e-05, "model_forward_time": 0.025006532669067383, "step": 14812 }, { "epoch": 2.2601318359375e-05, "step": 14812, "training_step_time": 0.13037443161010742 }, { "epoch": 2.260284423828125e-05, "model_forward_time": 0.024850845336914062, "step": 14813 }, { "epoch": 2.260284423828125e-05, "step": 14813, "training_step_time": 0.11062312126159668 }, { "epoch": 2.26043701171875e-05, "model_forward_time": 0.024623632431030273, "step": 14814 }, { "epoch": 2.26043701171875e-05, "step": 14814, "training_step_time": 0.10833477973937988 }, { "epoch": 2.260589599609375e-05, "model_forward_time": 0.025421857833862305, "step": 14815 }, { "epoch": 2.260589599609375e-05, "step": 14815, "training_step_time": 0.10810446739196777 }, { "epoch": 2.2607421875e-05, "model_forward_time": 0.02528548240661621, "step": 14816 }, { "epoch": 2.2607421875e-05, "step": 14816, "training_step_time": 0.10900306701660156 }, { "epoch": 2.260894775390625e-05, "model_forward_time": 0.02529764175415039, "step": 14817 }, { "epoch": 2.260894775390625e-05, "step": 14817, "training_step_time": 0.1571967601776123 }, { "epoch": 2.26104736328125e-05, "model_forward_time": 0.024762868881225586, "step": 14818 }, { "epoch": 2.26104736328125e-05, "step": 14818, "training_step_time": 0.1141808032989502 }, { "epoch": 2.261199951171875e-05, "model_forward_time": 0.02491140365600586, "step": 14819 }, { "epoch": 2.261199951171875e-05, "step": 14819, "training_step_time": 0.11102509498596191 }, { "epoch": 2.2613525390625e-05, "grad_norm": 0.3513282239437103, "learning_rate": 5.511678308853026e-05, "loss": 0.0176, "step": 14820 }, { "epoch": 2.2613525390625e-05, "model_forward_time": 0.025198936462402344, "step": 14820 }, { "epoch": 2.2613525390625e-05, "step": 14820, "training_step_time": 0.11933422088623047 }, { "epoch": 2.261505126953125e-05, "model_forward_time": 0.025443077087402344, "step": 14821 }, { "epoch": 2.261505126953125e-05, "step": 14821, "training_step_time": 0.10541892051696777 }, { "epoch": 2.26165771484375e-05, "model_forward_time": 0.028615713119506836, "step": 14822 }, { "epoch": 2.26165771484375e-05, "step": 14822, "training_step_time": 0.17497992515563965 }, { "epoch": 2.261810302734375e-05, "model_forward_time": 0.0250246524810791, "step": 14823 }, { "epoch": 2.261810302734375e-05, "step": 14823, "training_step_time": 0.1685009002685547 }, { "epoch": 2.261962890625e-05, "model_forward_time": 0.024708986282348633, "step": 14824 }, { "epoch": 2.261962890625e-05, "step": 14824, "training_step_time": 0.10331392288208008 }, { "epoch": 2.262115478515625e-05, "model_forward_time": 0.02632904052734375, "step": 14825 }, { "epoch": 2.262115478515625e-05, "step": 14825, "training_step_time": 0.18813610076904297 }, { "epoch": 2.26226806640625e-05, "model_forward_time": 0.024967193603515625, "step": 14826 }, { "epoch": 2.26226806640625e-05, "step": 14826, "training_step_time": 0.15134549140930176 }, { "epoch": 2.262420654296875e-05, "model_forward_time": 0.02448892593383789, "step": 14827 }, { "epoch": 2.262420654296875e-05, "step": 14827, "training_step_time": 0.10665702819824219 }, { "epoch": 2.2625732421875e-05, "model_forward_time": 0.025063753128051758, "step": 14828 }, { "epoch": 2.2625732421875e-05, "step": 14828, "training_step_time": 0.1487720012664795 }, { "epoch": 2.262725830078125e-05, "model_forward_time": 0.025043010711669922, "step": 14829 }, { "epoch": 2.262725830078125e-05, "step": 14829, "training_step_time": 0.20940709114074707 }, { "epoch": 2.26287841796875e-05, "grad_norm": 0.3493926525115967, "learning_rate": 5.506195369173548e-05, "loss": 0.0246, "step": 14830 }, { "epoch": 2.26287841796875e-05, "model_forward_time": 0.024427175521850586, "step": 14830 }, { "epoch": 2.26287841796875e-05, "step": 14830, "training_step_time": 0.11499714851379395 }, { "epoch": 2.263031005859375e-05, "model_forward_time": 0.025658369064331055, "step": 14831 }, { "epoch": 2.263031005859375e-05, "step": 14831, "training_step_time": 0.1037442684173584 }, { "epoch": 2.26318359375e-05, "model_forward_time": 0.0267179012298584, "step": 14832 }, { "epoch": 2.26318359375e-05, "step": 14832, "training_step_time": 0.10761761665344238 }, { "epoch": 2.263336181640625e-05, "model_forward_time": 0.02652144432067871, "step": 14833 }, { "epoch": 2.263336181640625e-05, "step": 14833, "training_step_time": 0.1071329116821289 }, { "epoch": 2.26348876953125e-05, "model_forward_time": 0.0265045166015625, "step": 14834 }, { "epoch": 2.26348876953125e-05, "step": 14834, "training_step_time": 0.10627508163452148 }, { "epoch": 2.263641357421875e-05, "model_forward_time": 0.026355743408203125, "step": 14835 }, { "epoch": 2.263641357421875e-05, "step": 14835, "training_step_time": 0.10593533515930176 }, { "epoch": 2.2637939453125e-05, "model_forward_time": 0.027586936950683594, "step": 14836 }, { "epoch": 2.2637939453125e-05, "step": 14836, "training_step_time": 0.10930085182189941 }, { "epoch": 2.263946533203125e-05, "model_forward_time": 0.02616262435913086, "step": 14837 }, { "epoch": 2.263946533203125e-05, "step": 14837, "training_step_time": 0.10576725006103516 }, { "epoch": 2.26409912109375e-05, "model_forward_time": 0.025001049041748047, "step": 14838 }, { "epoch": 2.26409912109375e-05, "step": 14838, "training_step_time": 0.10335540771484375 }, { "epoch": 2.264251708984375e-05, "model_forward_time": 0.02640986442565918, "step": 14839 }, { "epoch": 2.264251708984375e-05, "step": 14839, "training_step_time": 0.10802435874938965 }, { "epoch": 2.264404296875e-05, "grad_norm": 0.22913873195648193, "learning_rate": 5.500711814418966e-05, "loss": 0.032, "step": 14840 }, { "epoch": 2.264404296875e-05, "model_forward_time": 0.025347232818603516, "step": 14840 }, { "epoch": 2.264404296875e-05, "step": 14840, "training_step_time": 0.10446739196777344 }, { "epoch": 2.264556884765625e-05, "model_forward_time": 0.024825096130371094, "step": 14841 }, { "epoch": 2.264556884765625e-05, "step": 14841, "training_step_time": 0.1092381477355957 }, { "epoch": 2.26470947265625e-05, "model_forward_time": 0.024314403533935547, "step": 14842 }, { "epoch": 2.26470947265625e-05, "step": 14842, "training_step_time": 0.10326933860778809 }, { "epoch": 2.264862060546875e-05, "model_forward_time": 0.02485179901123047, "step": 14843 }, { "epoch": 2.264862060546875e-05, "step": 14843, "training_step_time": 0.10414695739746094 }, { "epoch": 2.2650146484375e-05, "model_forward_time": 0.024906635284423828, "step": 14844 }, { "epoch": 2.2650146484375e-05, "step": 14844, "training_step_time": 0.10831308364868164 }, { "epoch": 2.265167236328125e-05, "model_forward_time": 0.02557849884033203, "step": 14845 }, { "epoch": 2.265167236328125e-05, "step": 14845, "training_step_time": 0.10463929176330566 }, { "epoch": 2.26531982421875e-05, "model_forward_time": 0.02520895004272461, "step": 14846 }, { "epoch": 2.26531982421875e-05, "step": 14846, "training_step_time": 0.10445737838745117 }, { "epoch": 2.265472412109375e-05, "model_forward_time": 0.025127887725830078, "step": 14847 }, { "epoch": 2.265472412109375e-05, "step": 14847, "training_step_time": 0.10575389862060547 }, { "epoch": 2.265625e-05, "model_forward_time": 0.02509784698486328, "step": 14848 }, { "epoch": 2.265625e-05, "step": 14848, "training_step_time": 0.10387110710144043 }, { "epoch": 2.265777587890625e-05, "model_forward_time": 0.024988412857055664, "step": 14849 }, { "epoch": 2.265777587890625e-05, "step": 14849, "training_step_time": 0.10509228706359863 }, { "epoch": 2.26593017578125e-05, "grad_norm": 0.2163519263267517, "learning_rate": 5.495227651252315e-05, "loss": 0.026, "step": 14850 }, { "epoch": 2.26593017578125e-05, "model_forward_time": 0.026961803436279297, "step": 14850 }, { "epoch": 2.26593017578125e-05, "step": 14850, "training_step_time": 0.10823345184326172 }, { "epoch": 2.266082763671875e-05, "model_forward_time": 0.02663135528564453, "step": 14851 }, { "epoch": 2.266082763671875e-05, "step": 14851, "training_step_time": 0.10627174377441406 }, { "epoch": 2.2662353515625e-05, "model_forward_time": 0.025142431259155273, "step": 14852 }, { "epoch": 2.2662353515625e-05, "step": 14852, "training_step_time": 0.10399889945983887 }, { "epoch": 2.266387939453125e-05, "model_forward_time": 0.026178836822509766, "step": 14853 }, { "epoch": 2.266387939453125e-05, "step": 14853, "training_step_time": 0.15939760208129883 }, { "epoch": 2.26654052734375e-05, "model_forward_time": 0.02554154396057129, "step": 14854 }, { "epoch": 2.26654052734375e-05, "step": 14854, "training_step_time": 0.1461200714111328 }, { "epoch": 2.266693115234375e-05, "model_forward_time": 0.024308204650878906, "step": 14855 }, { "epoch": 2.266693115234375e-05, "step": 14855, "training_step_time": 0.10918974876403809 }, { "epoch": 2.266845703125e-05, "model_forward_time": 0.02513861656188965, "step": 14856 }, { "epoch": 2.266845703125e-05, "step": 14856, "training_step_time": 0.11739492416381836 }, { "epoch": 2.266998291015625e-05, "model_forward_time": 0.0248568058013916, "step": 14857 }, { "epoch": 2.266998291015625e-05, "step": 14857, "training_step_time": 0.18959736824035645 }, { "epoch": 2.26715087890625e-05, "model_forward_time": 0.024457693099975586, "step": 14858 }, { "epoch": 2.26715087890625e-05, "step": 14858, "training_step_time": 0.20301127433776855 }, { "epoch": 2.267303466796875e-05, "model_forward_time": 0.024655580520629883, "step": 14859 }, { "epoch": 2.267303466796875e-05, "step": 14859, "training_step_time": 0.2040097713470459 }, { "epoch": 2.2674560546875e-05, "grad_norm": 0.2992362082004547, "learning_rate": 5.48974288633737e-05, "loss": 0.0121, "step": 14860 }, { "epoch": 2.2674560546875e-05, "model_forward_time": 0.024125337600708008, "step": 14860 }, { "epoch": 2.2674560546875e-05, "step": 14860, "training_step_time": 0.18449807167053223 }, { "epoch": 2.267608642578125e-05, "model_forward_time": 0.024123430252075195, "step": 14861 }, { "epoch": 2.267608642578125e-05, "step": 14861, "training_step_time": 0.2059779167175293 }, { "epoch": 2.26776123046875e-05, "model_forward_time": 0.024475574493408203, "step": 14862 }, { "epoch": 2.26776123046875e-05, "step": 14862, "training_step_time": 0.21816611289978027 }, { "epoch": 2.267913818359375e-05, "model_forward_time": 0.024101734161376953, "step": 14863 }, { "epoch": 2.267913818359375e-05, "step": 14863, "training_step_time": 0.15960311889648438 }, { "epoch": 2.26806640625e-05, "model_forward_time": 0.02553558349609375, "step": 14864 }, { "epoch": 2.26806640625e-05, "step": 14864, "training_step_time": 0.13377737998962402 }, { "epoch": 2.268218994140625e-05, "model_forward_time": 0.024745702743530273, "step": 14865 }, { "epoch": 2.268218994140625e-05, "step": 14865, "training_step_time": 0.1947021484375 }, { "epoch": 2.26837158203125e-05, "model_forward_time": 0.02460479736328125, "step": 14866 }, { "epoch": 2.26837158203125e-05, "step": 14866, "training_step_time": 0.10844182968139648 }, { "epoch": 2.268524169921875e-05, "model_forward_time": 0.024878740310668945, "step": 14867 }, { "epoch": 2.268524169921875e-05, "step": 14867, "training_step_time": 0.1087791919708252 }, { "epoch": 2.2686767578125e-05, "model_forward_time": 0.025892019271850586, "step": 14868 }, { "epoch": 2.2686767578125e-05, "step": 14868, "training_step_time": 0.1576218605041504 }, { "epoch": 2.268829345703125e-05, "model_forward_time": 0.025026798248291016, "step": 14869 }, { "epoch": 2.268829345703125e-05, "step": 14869, "training_step_time": 0.18625140190124512 }, { "epoch": 2.26898193359375e-05, "grad_norm": 0.26298847794532776, "learning_rate": 5.4842575263386386e-05, "loss": 0.0111, "step": 14870 }, { "epoch": 2.26898193359375e-05, "model_forward_time": 0.02471137046813965, "step": 14870 }, { "epoch": 2.26898193359375e-05, "step": 14870, "training_step_time": 0.134918212890625 }, { "epoch": 2.269134521484375e-05, "model_forward_time": 0.024823427200317383, "step": 14871 }, { "epoch": 2.269134521484375e-05, "step": 14871, "training_step_time": 0.15197420120239258 }, { "epoch": 2.269287109375e-05, "model_forward_time": 0.02453160285949707, "step": 14872 }, { "epoch": 2.269287109375e-05, "step": 14872, "training_step_time": 0.1039285659790039 }, { "epoch": 2.269439697265625e-05, "model_forward_time": 0.024767398834228516, "step": 14873 }, { "epoch": 2.269439697265625e-05, "step": 14873, "training_step_time": 0.1219797134399414 }, { "epoch": 2.26959228515625e-05, "model_forward_time": 0.025044918060302734, "step": 14874 }, { "epoch": 2.26959228515625e-05, "step": 14874, "training_step_time": 0.11231827735900879 }, { "epoch": 2.269744873046875e-05, "model_forward_time": 0.02501058578491211, "step": 14875 }, { "epoch": 2.269744873046875e-05, "step": 14875, "training_step_time": 0.10689258575439453 }, { "epoch": 2.2698974609375e-05, "model_forward_time": 0.024886131286621094, "step": 14876 }, { "epoch": 2.2698974609375e-05, "step": 14876, "training_step_time": 0.15097260475158691 }, { "epoch": 2.270050048828125e-05, "model_forward_time": 0.02499866485595703, "step": 14877 }, { "epoch": 2.270050048828125e-05, "step": 14877, "training_step_time": 0.17195558547973633 }, { "epoch": 2.27020263671875e-05, "model_forward_time": 0.02435469627380371, "step": 14878 }, { "epoch": 2.27020263671875e-05, "step": 14878, "training_step_time": 0.17908620834350586 }, { "epoch": 2.270355224609375e-05, "model_forward_time": 0.024644851684570312, "step": 14879 }, { "epoch": 2.270355224609375e-05, "step": 14879, "training_step_time": 0.1671741008758545 }, { "epoch": 2.2705078125e-05, "grad_norm": 0.3613302707672119, "learning_rate": 5.478771577921351e-05, "loss": 0.0165, "step": 14880 }, { "epoch": 2.2705078125e-05, "model_forward_time": 0.02400803565979004, "step": 14880 }, { "epoch": 2.2705078125e-05, "step": 14880, "training_step_time": 0.1529064178466797 }, { "epoch": 2.270660400390625e-05, "model_forward_time": 0.023691177368164062, "step": 14881 }, { "epoch": 2.270660400390625e-05, "step": 14881, "training_step_time": 0.13833260536193848 }, { "epoch": 2.27081298828125e-05, "model_forward_time": 0.02431488037109375, "step": 14882 }, { "epoch": 2.27081298828125e-05, "step": 14882, "training_step_time": 0.13045287132263184 }, { "epoch": 2.270965576171875e-05, "model_forward_time": 0.024327993392944336, "step": 14883 }, { "epoch": 2.270965576171875e-05, "step": 14883, "training_step_time": 0.12671160697937012 }, { "epoch": 2.2711181640625e-05, "model_forward_time": 0.024814128875732422, "step": 14884 }, { "epoch": 2.2711181640625e-05, "step": 14884, "training_step_time": 0.12410783767700195 }, { "epoch": 2.271270751953125e-05, "model_forward_time": 0.027817964553833008, "step": 14885 }, { "epoch": 2.271270751953125e-05, "step": 14885, "training_step_time": 0.1152656078338623 }, { "epoch": 2.27142333984375e-05, "model_forward_time": 0.024953126907348633, "step": 14886 }, { "epoch": 2.27142333984375e-05, "step": 14886, "training_step_time": 0.11551499366760254 }, { "epoch": 2.271575927734375e-05, "model_forward_time": 0.025027036666870117, "step": 14887 }, { "epoch": 2.271575927734375e-05, "step": 14887, "training_step_time": 0.1104745864868164 }, { "epoch": 2.271728515625e-05, "model_forward_time": 0.024927616119384766, "step": 14888 }, { "epoch": 2.271728515625e-05, "step": 14888, "training_step_time": 0.10978388786315918 }, { "epoch": 2.271881103515625e-05, "model_forward_time": 0.026190996170043945, "step": 14889 }, { "epoch": 2.271881103515625e-05, "step": 14889, "training_step_time": 0.10906171798706055 }, { "epoch": 2.27203369140625e-05, "grad_norm": 0.4646231532096863, "learning_rate": 5.473285047751451e-05, "loss": 0.0132, "step": 14890 }, { "epoch": 2.27203369140625e-05, "model_forward_time": 0.024974584579467773, "step": 14890 }, { "epoch": 2.27203369140625e-05, "step": 14890, "training_step_time": 0.10864377021789551 }, { "epoch": 2.272186279296875e-05, "model_forward_time": 0.024869441986083984, "step": 14891 }, { "epoch": 2.272186279296875e-05, "step": 14891, "training_step_time": 0.1066129207611084 }, { "epoch": 2.2723388671875e-05, "model_forward_time": 0.02564406394958496, "step": 14892 }, { "epoch": 2.2723388671875e-05, "step": 14892, "training_step_time": 0.10619688034057617 }, { "epoch": 2.272491455078125e-05, "model_forward_time": 0.02511763572692871, "step": 14893 }, { "epoch": 2.272491455078125e-05, "step": 14893, "training_step_time": 0.10897254943847656 }, { "epoch": 2.27264404296875e-05, "model_forward_time": 0.025453805923461914, "step": 14894 }, { "epoch": 2.27264404296875e-05, "step": 14894, "training_step_time": 0.12895774841308594 }, { "epoch": 2.272796630859375e-05, "model_forward_time": 0.025157451629638672, "step": 14895 }, { "epoch": 2.272796630859375e-05, "step": 14895, "training_step_time": 0.11252832412719727 }, { "epoch": 2.27294921875e-05, "model_forward_time": 0.02543330192565918, "step": 14896 }, { "epoch": 2.27294921875e-05, "step": 14896, "training_step_time": 0.1196587085723877 }, { "epoch": 2.273101806640625e-05, "model_forward_time": 0.025203227996826172, "step": 14897 }, { "epoch": 2.273101806640625e-05, "step": 14897, "training_step_time": 0.11507296562194824 }, { "epoch": 2.27325439453125e-05, "model_forward_time": 0.024980545043945312, "step": 14898 }, { "epoch": 2.27325439453125e-05, "step": 14898, "training_step_time": 0.18722319602966309 }, { "epoch": 2.273406982421875e-05, "model_forward_time": 0.02448105812072754, "step": 14899 }, { "epoch": 2.273406982421875e-05, "step": 14899, "training_step_time": 0.11398124694824219 }, { "epoch": 2.2735595703125e-05, "grad_norm": 0.26571959257125854, "learning_rate": 5.467797942495589e-05, "loss": 0.0107, "step": 14900 }, { "epoch": 2.2735595703125e-05, "model_forward_time": 0.024373531341552734, "step": 14900 }, { "epoch": 2.2735595703125e-05, "step": 14900, "training_step_time": 0.10998344421386719 }, { "epoch": 2.273712158203125e-05, "model_forward_time": 0.0252382755279541, "step": 14901 }, { "epoch": 2.273712158203125e-05, "step": 14901, "training_step_time": 0.11007142066955566 }, { "epoch": 2.27386474609375e-05, "model_forward_time": 0.025246858596801758, "step": 14902 }, { "epoch": 2.27386474609375e-05, "step": 14902, "training_step_time": 0.10644125938415527 }, { "epoch": 2.274017333984375e-05, "model_forward_time": 0.025379180908203125, "step": 14903 }, { "epoch": 2.274017333984375e-05, "step": 14903, "training_step_time": 0.10727334022521973 }, { "epoch": 2.274169921875e-05, "model_forward_time": 0.025216341018676758, "step": 14904 }, { "epoch": 2.274169921875e-05, "step": 14904, "training_step_time": 0.15033793449401855 }, { "epoch": 2.274322509765625e-05, "model_forward_time": 0.02520012855529785, "step": 14905 }, { "epoch": 2.274322509765625e-05, "step": 14905, "training_step_time": 0.10689067840576172 }, { "epoch": 2.27447509765625e-05, "model_forward_time": 0.02510547637939453, "step": 14906 }, { "epoch": 2.27447509765625e-05, "step": 14906, "training_step_time": 0.11208510398864746 }, { "epoch": 2.274627685546875e-05, "model_forward_time": 0.024983882904052734, "step": 14907 }, { "epoch": 2.274627685546875e-05, "step": 14907, "training_step_time": 0.12486839294433594 }, { "epoch": 2.2747802734375e-05, "model_forward_time": 0.024580717086791992, "step": 14908 }, { "epoch": 2.2747802734375e-05, "step": 14908, "training_step_time": 0.10437130928039551 }, { "epoch": 2.274932861328125e-05, "model_forward_time": 0.024425983428955078, "step": 14909 }, { "epoch": 2.274932861328125e-05, "step": 14909, "training_step_time": 0.16484642028808594 }, { "epoch": 2.27508544921875e-05, "grad_norm": 0.42848464846611023, "learning_rate": 5.462310268821118e-05, "loss": 0.0156, "step": 14910 }, { "epoch": 2.27508544921875e-05, "model_forward_time": 0.024337291717529297, "step": 14910 }, { "epoch": 2.27508544921875e-05, "step": 14910, "training_step_time": 0.14425039291381836 }, { "epoch": 2.275238037109375e-05, "model_forward_time": 0.025024890899658203, "step": 14911 }, { "epoch": 2.275238037109375e-05, "step": 14911, "training_step_time": 0.11900901794433594 }, { "epoch": 2.275390625e-05, "model_forward_time": 0.02473616600036621, "step": 14912 }, { "epoch": 2.275390625e-05, "step": 14912, "training_step_time": 0.12766051292419434 }, { "epoch": 2.275543212890625e-05, "model_forward_time": 0.0250396728515625, "step": 14913 }, { "epoch": 2.275543212890625e-05, "step": 14913, "training_step_time": 0.23004651069641113 }, { "epoch": 2.27569580078125e-05, "model_forward_time": 0.024539470672607422, "step": 14914 }, { "epoch": 2.27569580078125e-05, "step": 14914, "training_step_time": 0.11830687522888184 }, { "epoch": 2.275848388671875e-05, "model_forward_time": 0.02417278289794922, "step": 14915 }, { "epoch": 2.275848388671875e-05, "step": 14915, "training_step_time": 0.13068675994873047 }, { "epoch": 2.2760009765625e-05, "model_forward_time": 0.024727582931518555, "step": 14916 }, { "epoch": 2.2760009765625e-05, "step": 14916, "training_step_time": 0.16162729263305664 }, { "epoch": 2.276153564453125e-05, "model_forward_time": 0.024370908737182617, "step": 14917 }, { "epoch": 2.276153564453125e-05, "step": 14917, "training_step_time": 0.10608196258544922 }, { "epoch": 2.27630615234375e-05, "model_forward_time": 0.024675607681274414, "step": 14918 }, { "epoch": 2.27630615234375e-05, "step": 14918, "training_step_time": 0.12104129791259766 }, { "epoch": 2.276458740234375e-05, "model_forward_time": 0.02509474754333496, "step": 14919 }, { "epoch": 2.276458740234375e-05, "step": 14919, "training_step_time": 0.11359143257141113 }, { "epoch": 2.276611328125e-05, "grad_norm": 0.26866063475608826, "learning_rate": 5.456822033396076e-05, "loss": 0.0106, "step": 14920 }, { "epoch": 2.276611328125e-05, "model_forward_time": 0.025063753128051758, "step": 14920 }, { "epoch": 2.276611328125e-05, "step": 14920, "training_step_time": 0.11717700958251953 }, { "epoch": 2.276763916015625e-05, "model_forward_time": 0.025421857833862305, "step": 14921 }, { "epoch": 2.276763916015625e-05, "step": 14921, "training_step_time": 0.11031651496887207 }, { "epoch": 2.27691650390625e-05, "model_forward_time": 0.02510666847229004, "step": 14922 }, { "epoch": 2.27691650390625e-05, "step": 14922, "training_step_time": 0.11158514022827148 }, { "epoch": 2.277069091796875e-05, "model_forward_time": 0.02490091323852539, "step": 14923 }, { "epoch": 2.277069091796875e-05, "step": 14923, "training_step_time": 0.10818672180175781 }, { "epoch": 2.2772216796875e-05, "model_forward_time": 0.025289535522460938, "step": 14924 }, { "epoch": 2.2772216796875e-05, "step": 14924, "training_step_time": 0.10561609268188477 }, { "epoch": 2.277374267578125e-05, "model_forward_time": 0.024762392044067383, "step": 14925 }, { "epoch": 2.277374267578125e-05, "step": 14925, "training_step_time": 0.10829544067382812 }, { "epoch": 2.27752685546875e-05, "model_forward_time": 0.025540828704833984, "step": 14926 }, { "epoch": 2.27752685546875e-05, "step": 14926, "training_step_time": 0.11206722259521484 }, { "epoch": 2.277679443359375e-05, "model_forward_time": 0.025088787078857422, "step": 14927 }, { "epoch": 2.277679443359375e-05, "step": 14927, "training_step_time": 0.1061398983001709 }, { "epoch": 2.27783203125e-05, "model_forward_time": 0.02525162696838379, "step": 14928 }, { "epoch": 2.27783203125e-05, "step": 14928, "training_step_time": 0.10618829727172852 }, { "epoch": 2.277984619140625e-05, "model_forward_time": 0.027097463607788086, "step": 14929 }, { "epoch": 2.277984619140625e-05, "step": 14929, "training_step_time": 0.10720419883728027 }, { "epoch": 2.27813720703125e-05, "grad_norm": 0.5203060507774353, "learning_rate": 5.4513332428891887e-05, "loss": 0.0129, "step": 14930 }, { "epoch": 2.27813720703125e-05, "model_forward_time": 0.02560257911682129, "step": 14930 }, { "epoch": 2.27813720703125e-05, "step": 14930, "training_step_time": 0.10634350776672363 }, { "epoch": 2.278289794921875e-05, "model_forward_time": 0.026325702667236328, "step": 14931 }, { "epoch": 2.278289794921875e-05, "step": 14931, "training_step_time": 0.10548520088195801 }, { "epoch": 2.2784423828125e-05, "model_forward_time": 0.024842262268066406, "step": 14932 }, { "epoch": 2.2784423828125e-05, "step": 14932, "training_step_time": 0.10508370399475098 }, { "epoch": 2.278594970703125e-05, "model_forward_time": 0.023933887481689453, "step": 14933 }, { "epoch": 2.278594970703125e-05, "step": 14933, "training_step_time": 0.10372781753540039 }, { "epoch": 2.27874755859375e-05, "model_forward_time": 0.025464534759521484, "step": 14934 }, { "epoch": 2.27874755859375e-05, "step": 14934, "training_step_time": 0.10503840446472168 }, { "epoch": 2.278900146484375e-05, "model_forward_time": 0.024759292602539062, "step": 14935 }, { "epoch": 2.278900146484375e-05, "step": 14935, "training_step_time": 0.11078548431396484 }, { "epoch": 2.279052734375e-05, "model_forward_time": 0.02487921714782715, "step": 14936 }, { "epoch": 2.279052734375e-05, "step": 14936, "training_step_time": 0.10695219039916992 }, { "epoch": 2.279205322265625e-05, "model_forward_time": 0.024962425231933594, "step": 14937 }, { "epoch": 2.279205322265625e-05, "step": 14937, "training_step_time": 0.11014485359191895 }, { "epoch": 2.27935791015625e-05, "model_forward_time": 0.025368928909301758, "step": 14938 }, { "epoch": 2.27935791015625e-05, "step": 14938, "training_step_time": 0.10602331161499023 }, { "epoch": 2.279510498046875e-05, "model_forward_time": 0.024961233139038086, "step": 14939 }, { "epoch": 2.279510498046875e-05, "step": 14939, "training_step_time": 0.17903470993041992 }, { "epoch": 2.2796630859375e-05, "grad_norm": 0.21775636076927185, "learning_rate": 5.445843903969854e-05, "loss": 0.0228, "step": 14940 }, { "epoch": 2.2796630859375e-05, "model_forward_time": 0.02483344078063965, "step": 14940 }, { "epoch": 2.2796630859375e-05, "step": 14940, "training_step_time": 0.13159823417663574 }, { "epoch": 2.279815673828125e-05, "model_forward_time": 0.02428603172302246, "step": 14941 }, { "epoch": 2.279815673828125e-05, "step": 14941, "training_step_time": 0.11121535301208496 }, { "epoch": 2.27996826171875e-05, "model_forward_time": 0.026320457458496094, "step": 14942 }, { "epoch": 2.27996826171875e-05, "step": 14942, "training_step_time": 0.12621140480041504 }, { "epoch": 2.280120849609375e-05, "model_forward_time": 0.025321006774902344, "step": 14943 }, { "epoch": 2.280120849609375e-05, "step": 14943, "training_step_time": 0.10862612724304199 }, { "epoch": 2.2802734375e-05, "model_forward_time": 0.02555251121520996, "step": 14944 }, { "epoch": 2.2802734375e-05, "step": 14944, "training_step_time": 0.10481429100036621 }, { "epoch": 2.280426025390625e-05, "model_forward_time": 0.025687456130981445, "step": 14945 }, { "epoch": 2.280426025390625e-05, "step": 14945, "training_step_time": 0.1972184181213379 }, { "epoch": 2.28057861328125e-05, "model_forward_time": 0.024505138397216797, "step": 14946 }, { "epoch": 2.28057861328125e-05, "step": 14946, "training_step_time": 0.1042940616607666 }, { "epoch": 2.280731201171875e-05, "model_forward_time": 0.024341583251953125, "step": 14947 }, { "epoch": 2.280731201171875e-05, "step": 14947, "training_step_time": 0.10468935966491699 }, { "epoch": 2.2808837890625e-05, "model_forward_time": 0.024756669998168945, "step": 14948 }, { "epoch": 2.2808837890625e-05, "step": 14948, "training_step_time": 0.10578727722167969 }, { "epoch": 2.281036376953125e-05, "model_forward_time": 0.024662494659423828, "step": 14949 }, { "epoch": 2.281036376953125e-05, "step": 14949, "training_step_time": 0.10571455955505371 }, { "epoch": 2.28118896484375e-05, "grad_norm": 0.2750996947288513, "learning_rate": 5.440354023308134e-05, "loss": 0.03, "step": 14950 }, { "epoch": 2.28118896484375e-05, "model_forward_time": 0.024669170379638672, "step": 14950 }, { "epoch": 2.28118896484375e-05, "step": 14950, "training_step_time": 0.10721278190612793 }, { "epoch": 2.281341552734375e-05, "model_forward_time": 0.02471017837524414, "step": 14951 }, { "epoch": 2.281341552734375e-05, "step": 14951, "training_step_time": 0.11367106437683105 }, { "epoch": 2.281494140625e-05, "model_forward_time": 0.025657176971435547, "step": 14952 }, { "epoch": 2.281494140625e-05, "step": 14952, "training_step_time": 0.10823988914489746 }, { "epoch": 2.281646728515625e-05, "model_forward_time": 0.02899646759033203, "step": 14953 }, { "epoch": 2.281646728515625e-05, "step": 14953, "training_step_time": 0.10874676704406738 }, { "epoch": 2.28179931640625e-05, "model_forward_time": 0.025506973266601562, "step": 14954 }, { "epoch": 2.28179931640625e-05, "step": 14954, "training_step_time": 0.1342613697052002 }, { "epoch": 2.281951904296875e-05, "model_forward_time": 0.024376392364501953, "step": 14955 }, { "epoch": 2.281951904296875e-05, "step": 14955, "training_step_time": 0.10312962532043457 }, { "epoch": 2.2821044921875e-05, "model_forward_time": 0.023671627044677734, "step": 14956 }, { "epoch": 2.2821044921875e-05, "step": 14956, "training_step_time": 0.1657695770263672 }, { "epoch": 2.282257080078125e-05, "model_forward_time": 0.024108171463012695, "step": 14957 }, { "epoch": 2.282257080078125e-05, "step": 14957, "training_step_time": 0.14923524856567383 }, { "epoch": 2.28240966796875e-05, "model_forward_time": 0.023862361907958984, "step": 14958 }, { "epoch": 2.28240966796875e-05, "step": 14958, "training_step_time": 0.10638093948364258 }, { "epoch": 2.282562255859375e-05, "model_forward_time": 0.024613380432128906, "step": 14959 }, { "epoch": 2.282562255859375e-05, "step": 14959, "training_step_time": 0.14725017547607422 }, { "epoch": 2.28271484375e-05, "grad_norm": 0.22984150052070618, "learning_rate": 5.4348636075747536e-05, "loss": 0.0185, "step": 14960 }, { "epoch": 2.28271484375e-05, "model_forward_time": 0.025384902954101562, "step": 14960 }, { "epoch": 2.28271484375e-05, "step": 14960, "training_step_time": 0.1861410140991211 }, { "epoch": 2.282867431640625e-05, "model_forward_time": 0.023892641067504883, "step": 14961 }, { "epoch": 2.282867431640625e-05, "step": 14961, "training_step_time": 0.21457839012145996 }, { "epoch": 2.28302001953125e-05, "model_forward_time": 0.02549147605895996, "step": 14962 }, { "epoch": 2.28302001953125e-05, "step": 14962, "training_step_time": 0.13365602493286133 }, { "epoch": 2.283172607421875e-05, "model_forward_time": 0.0237424373626709, "step": 14963 }, { "epoch": 2.283172607421875e-05, "step": 14963, "training_step_time": 0.1128232479095459 }, { "epoch": 2.2833251953125e-05, "model_forward_time": 0.024410486221313477, "step": 14964 }, { "epoch": 2.2833251953125e-05, "step": 14964, "training_step_time": 0.13319945335388184 }, { "epoch": 2.283477783203125e-05, "model_forward_time": 0.024748563766479492, "step": 14965 }, { "epoch": 2.283477783203125e-05, "step": 14965, "training_step_time": 0.10736966133117676 }, { "epoch": 2.28363037109375e-05, "model_forward_time": 0.028514862060546875, "step": 14966 }, { "epoch": 2.28363037109375e-05, "step": 14966, "training_step_time": 0.1073768138885498 }, { "epoch": 2.283782958984375e-05, "model_forward_time": 0.024472951889038086, "step": 14967 }, { "epoch": 2.283782958984375e-05, "step": 14967, "training_step_time": 0.10858154296875 }, { "epoch": 2.283935546875e-05, "model_forward_time": 0.02510976791381836, "step": 14968 }, { "epoch": 2.283935546875e-05, "step": 14968, "training_step_time": 0.1047811508178711 }, { "epoch": 2.284088134765625e-05, "model_forward_time": 0.025711774826049805, "step": 14969 }, { "epoch": 2.284088134765625e-05, "step": 14969, "training_step_time": 0.10716390609741211 }, { "epoch": 2.28424072265625e-05, "grad_norm": 0.33416223526000977, "learning_rate": 5.4293726634410855e-05, "loss": 0.0151, "step": 14970 }, { "epoch": 2.28424072265625e-05, "model_forward_time": 0.025225162506103516, "step": 14970 }, { "epoch": 2.28424072265625e-05, "step": 14970, "training_step_time": 0.10739588737487793 }, { "epoch": 2.284393310546875e-05, "model_forward_time": 0.024729013442993164, "step": 14971 }, { "epoch": 2.284393310546875e-05, "step": 14971, "training_step_time": 0.12632513046264648 }, { "epoch": 2.2845458984375e-05, "model_forward_time": 0.02450394630432129, "step": 14972 }, { "epoch": 2.2845458984375e-05, "step": 14972, "training_step_time": 0.10403299331665039 }, { "epoch": 2.284698486328125e-05, "model_forward_time": 0.02456045150756836, "step": 14973 }, { "epoch": 2.284698486328125e-05, "step": 14973, "training_step_time": 0.10335516929626465 }, { "epoch": 2.28485107421875e-05, "model_forward_time": 0.024209022521972656, "step": 14974 }, { "epoch": 2.28485107421875e-05, "step": 14974, "training_step_time": 0.10730338096618652 }, { "epoch": 2.285003662109375e-05, "model_forward_time": 0.02454400062561035, "step": 14975 }, { "epoch": 2.285003662109375e-05, "step": 14975, "training_step_time": 0.10568857192993164 }, { "epoch": 2.28515625e-05, "model_forward_time": 0.024779558181762695, "step": 14976 }, { "epoch": 2.28515625e-05, "step": 14976, "training_step_time": 0.10506343841552734 }, { "epoch": 2.285308837890625e-05, "model_forward_time": 0.02538752555847168, "step": 14977 }, { "epoch": 2.285308837890625e-05, "step": 14977, "training_step_time": 0.10524964332580566 }, { "epoch": 2.28546142578125e-05, "model_forward_time": 0.024718046188354492, "step": 14978 }, { "epoch": 2.28546142578125e-05, "step": 14978, "training_step_time": 0.10432314872741699 }, { "epoch": 2.285614013671875e-05, "model_forward_time": 0.02632594108581543, "step": 14979 }, { "epoch": 2.285614013671875e-05, "step": 14979, "training_step_time": 0.10746026039123535 }, { "epoch": 2.2857666015625e-05, "grad_norm": 0.29110127687454224, "learning_rate": 5.423881197579144e-05, "loss": 0.016, "step": 14980 }, { "epoch": 2.2857666015625e-05, "model_forward_time": 0.024974346160888672, "step": 14980 }, { "epoch": 2.2857666015625e-05, "step": 14980, "training_step_time": 0.10523700714111328 }, { "epoch": 2.285919189453125e-05, "model_forward_time": 0.024628400802612305, "step": 14981 }, { "epoch": 2.285919189453125e-05, "step": 14981, "training_step_time": 0.10376644134521484 }, { "epoch": 2.28607177734375e-05, "model_forward_time": 0.0260012149810791, "step": 14982 }, { "epoch": 2.28607177734375e-05, "step": 14982, "training_step_time": 0.10646677017211914 }, { "epoch": 2.286224365234375e-05, "model_forward_time": 0.02599644660949707, "step": 14983 }, { "epoch": 2.286224365234375e-05, "step": 14983, "training_step_time": 0.10651421546936035 }, { "epoch": 2.286376953125e-05, "model_forward_time": 0.024258136749267578, "step": 14984 }, { "epoch": 2.286376953125e-05, "step": 14984, "training_step_time": 0.1071784496307373 }, { "epoch": 2.286529541015625e-05, "model_forward_time": 0.02463388442993164, "step": 14985 }, { "epoch": 2.286529541015625e-05, "step": 14985, "training_step_time": 0.10445713996887207 }, { "epoch": 2.28668212890625e-05, "model_forward_time": 0.02465200424194336, "step": 14986 }, { "epoch": 2.28668212890625e-05, "step": 14986, "training_step_time": 0.13952064514160156 }, { "epoch": 2.286834716796875e-05, "model_forward_time": 0.024690628051757812, "step": 14987 }, { "epoch": 2.286834716796875e-05, "step": 14987, "training_step_time": 0.130631685256958 }, { "epoch": 2.2869873046875e-05, "model_forward_time": 0.023593664169311523, "step": 14988 }, { "epoch": 2.2869873046875e-05, "step": 14988, "training_step_time": 0.1164402961730957 }, { "epoch": 2.287139892578125e-05, "model_forward_time": 0.025287866592407227, "step": 14989 }, { "epoch": 2.287139892578125e-05, "step": 14989, "training_step_time": 0.12649106979370117 }, { "epoch": 2.28729248046875e-05, "grad_norm": 0.222764253616333, "learning_rate": 5.418389216661579e-05, "loss": 0.0149, "step": 14990 }, { "epoch": 2.28729248046875e-05, "model_forward_time": 0.027182579040527344, "step": 14990 }, { "epoch": 2.28729248046875e-05, "step": 14990, "training_step_time": 0.16152191162109375 }, { "epoch": 2.287445068359375e-05, "model_forward_time": 0.028750896453857422, "step": 14991 }, { "epoch": 2.287445068359375e-05, "step": 14991, "training_step_time": 0.24138259887695312 }, { "epoch": 2.28759765625e-05, "model_forward_time": 0.02733898162841797, "step": 14992 }, { "epoch": 2.28759765625e-05, "step": 14992, "training_step_time": 0.21708059310913086 }, { "epoch": 2.287750244140625e-05, "model_forward_time": 0.027028322219848633, "step": 14993 }, { "epoch": 2.287750244140625e-05, "step": 14993, "training_step_time": 0.3045024871826172 }, { "epoch": 2.28790283203125e-05, "model_forward_time": 0.029262304306030273, "step": 14994 }, { "epoch": 2.28790283203125e-05, "step": 14994, "training_step_time": 0.36133384704589844 }, { "epoch": 2.288055419921875e-05, "model_forward_time": 0.030585765838623047, "step": 14995 }, { "epoch": 2.288055419921875e-05, "step": 14995, "training_step_time": 0.30129098892211914 }, { "epoch": 2.2882080078125e-05, "model_forward_time": 0.03171396255493164, "step": 14996 }, { "epoch": 2.2882080078125e-05, "step": 14996, "training_step_time": 0.3172571659088135 }, { "epoch": 2.288360595703125e-05, "model_forward_time": 0.02950263023376465, "step": 14997 }, { "epoch": 2.288360595703125e-05, "step": 14997, "training_step_time": 0.33362317085266113 }, { "epoch": 2.28851318359375e-05, "model_forward_time": 0.028829574584960938, "step": 14998 }, { "epoch": 2.28851318359375e-05, "step": 14998, "training_step_time": 0.4050710201263428 }, { "epoch": 2.288665771484375e-05, "model_forward_time": 0.03067159652709961, "step": 14999 }, { "epoch": 2.288665771484375e-05, "step": 14999, "training_step_time": 0.36992526054382324 }, { "epoch": 2.288818359375e-05, "grad_norm": 0.26424986124038696, "learning_rate": 5.4128967273616625e-05, "loss": 0.019, "step": 15000 }, { "epoch": 2.288818359375e-05, "model_forward_time": 0.029282093048095703, "step": 15000 }, { "epoch": 2.288818359375e-05, "step": 15000, "training_step_time": 0.11265778541564941 }, { "epoch": 2.288970947265625e-05, "model_forward_time": 0.023313045501708984, "step": 15001 }, { "epoch": 2.288970947265625e-05, "step": 15001, "training_step_time": 0.1719212532043457 }, { "epoch": 2.28912353515625e-05, "model_forward_time": 0.02394247055053711, "step": 15002 }, { "epoch": 2.28912353515625e-05, "step": 15002, "training_step_time": 0.19634008407592773 }, { "epoch": 2.289276123046875e-05, "model_forward_time": 0.029191017150878906, "step": 15003 }, { "epoch": 2.289276123046875e-05, "step": 15003, "training_step_time": 0.15695881843566895 }, { "epoch": 2.2894287109375e-05, "model_forward_time": 0.02435135841369629, "step": 15004 }, { "epoch": 2.2894287109375e-05, "step": 15004, "training_step_time": 0.14747929573059082 }, { "epoch": 2.289581298828125e-05, "model_forward_time": 0.02428150177001953, "step": 15005 }, { "epoch": 2.289581298828125e-05, "step": 15005, "training_step_time": 0.1345212459564209 }, { "epoch": 2.28973388671875e-05, "model_forward_time": 0.026450157165527344, "step": 15006 }, { "epoch": 2.28973388671875e-05, "step": 15006, "training_step_time": 0.11518311500549316 }, { "epoch": 2.289886474609375e-05, "model_forward_time": 0.025815725326538086, "step": 15007 }, { "epoch": 2.289886474609375e-05, "step": 15007, "training_step_time": 0.10572099685668945 }, { "epoch": 2.2900390625e-05, "model_forward_time": 0.026295900344848633, "step": 15008 }, { "epoch": 2.2900390625e-05, "step": 15008, "training_step_time": 0.10952973365783691 }, { "epoch": 2.290191650390625e-05, "model_forward_time": 0.025368213653564453, "step": 15009 }, { "epoch": 2.290191650390625e-05, "step": 15009, "training_step_time": 0.10861754417419434 }, { "epoch": 2.29034423828125e-05, "grad_norm": 0.18423643708229065, "learning_rate": 5.407403736353288e-05, "loss": 0.0112, "step": 15010 }, { "epoch": 2.29034423828125e-05, "model_forward_time": 0.025160551071166992, "step": 15010 }, { "epoch": 2.29034423828125e-05, "step": 15010, "training_step_time": 0.10837984085083008 }, { "epoch": 2.290496826171875e-05, "model_forward_time": 0.02501702308654785, "step": 15011 }, { "epoch": 2.290496826171875e-05, "step": 15011, "training_step_time": 0.10631418228149414 }, { "epoch": 2.2906494140625e-05, "model_forward_time": 0.025348424911499023, "step": 15012 }, { "epoch": 2.2906494140625e-05, "step": 15012, "training_step_time": 0.10901522636413574 }, { "epoch": 2.290802001953125e-05, "model_forward_time": 0.025160789489746094, "step": 15013 }, { "epoch": 2.290802001953125e-05, "step": 15013, "training_step_time": 0.10619091987609863 }, { "epoch": 2.29095458984375e-05, "model_forward_time": 0.025130510330200195, "step": 15014 }, { "epoch": 2.29095458984375e-05, "step": 15014, "training_step_time": 0.1072239875793457 }, { "epoch": 2.291107177734375e-05, "model_forward_time": 0.025676965713500977, "step": 15015 }, { "epoch": 2.291107177734375e-05, "step": 15015, "training_step_time": 0.11076855659484863 }, { "epoch": 2.291259765625e-05, "model_forward_time": 0.025240659713745117, "step": 15016 }, { "epoch": 2.291259765625e-05, "step": 15016, "training_step_time": 0.10893630981445312 }, { "epoch": 2.291412353515625e-05, "model_forward_time": 0.02554011344909668, "step": 15017 }, { "epoch": 2.291412353515625e-05, "step": 15017, "training_step_time": 0.11052870750427246 }, { "epoch": 2.29156494140625e-05, "model_forward_time": 0.0255129337310791, "step": 15018 }, { "epoch": 2.29156494140625e-05, "step": 15018, "training_step_time": 0.10647106170654297 }, { "epoch": 2.291717529296875e-05, "model_forward_time": 0.02864241600036621, "step": 15019 }, { "epoch": 2.291717529296875e-05, "step": 15019, "training_step_time": 0.11237406730651855 }, { "epoch": 2.2918701171875e-05, "grad_norm": 0.3490600287914276, "learning_rate": 5.401910250310961e-05, "loss": 0.0122, "step": 15020 }, { "epoch": 2.2918701171875e-05, "model_forward_time": 0.025377988815307617, "step": 15020 }, { "epoch": 2.2918701171875e-05, "step": 15020, "training_step_time": 0.10810637474060059 }, { "epoch": 2.292022705078125e-05, "model_forward_time": 0.02509331703186035, "step": 15021 }, { "epoch": 2.292022705078125e-05, "step": 15021, "training_step_time": 0.1086878776550293 }, { "epoch": 2.29217529296875e-05, "model_forward_time": 0.02539348602294922, "step": 15022 }, { "epoch": 2.29217529296875e-05, "step": 15022, "training_step_time": 0.10701227188110352 }, { "epoch": 2.292327880859375e-05, "model_forward_time": 0.02545928955078125, "step": 15023 }, { "epoch": 2.292327880859375e-05, "step": 15023, "training_step_time": 0.1645052433013916 }, { "epoch": 2.29248046875e-05, "model_forward_time": 0.024446725845336914, "step": 15024 }, { "epoch": 2.29248046875e-05, "step": 15024, "training_step_time": 0.14653420448303223 }, { "epoch": 2.292633056640625e-05, "model_forward_time": 0.024332761764526367, "step": 15025 }, { "epoch": 2.292633056640625e-05, "step": 15025, "training_step_time": 0.11445784568786621 }, { "epoch": 2.29278564453125e-05, "model_forward_time": 0.025975942611694336, "step": 15026 }, { "epoch": 2.29278564453125e-05, "step": 15026, "training_step_time": 0.11252903938293457 }, { "epoch": 2.292938232421875e-05, "model_forward_time": 0.02544713020324707, "step": 15027 }, { "epoch": 2.292938232421875e-05, "step": 15027, "training_step_time": 0.12750577926635742 }, { "epoch": 2.2930908203125e-05, "model_forward_time": 0.025244712829589844, "step": 15028 }, { "epoch": 2.2930908203125e-05, "step": 15028, "training_step_time": 0.10831785202026367 }, { "epoch": 2.293243408203125e-05, "model_forward_time": 0.025341033935546875, "step": 15029 }, { "epoch": 2.293243408203125e-05, "step": 15029, "training_step_time": 0.19951891899108887 }, { "epoch": 2.29339599609375e-05, "grad_norm": 0.4564681947231293, "learning_rate": 5.396416275909779e-05, "loss": 0.0127, "step": 15030 }, { "epoch": 2.29339599609375e-05, "model_forward_time": 0.02498793601989746, "step": 15030 }, { "epoch": 2.29339599609375e-05, "step": 15030, "training_step_time": 0.10516905784606934 }, { "epoch": 2.293548583984375e-05, "model_forward_time": 0.024854421615600586, "step": 15031 }, { "epoch": 2.293548583984375e-05, "step": 15031, "training_step_time": 0.11056303977966309 }, { "epoch": 2.293701171875e-05, "model_forward_time": 0.0254819393157959, "step": 15032 }, { "epoch": 2.293701171875e-05, "step": 15032, "training_step_time": 0.11067533493041992 }, { "epoch": 2.293853759765625e-05, "model_forward_time": 0.026250123977661133, "step": 15033 }, { "epoch": 2.293853759765625e-05, "step": 15033, "training_step_time": 0.12028074264526367 }, { "epoch": 2.29400634765625e-05, "model_forward_time": 0.028162479400634766, "step": 15034 }, { "epoch": 2.29400634765625e-05, "step": 15034, "training_step_time": 0.11025357246398926 }, { "epoch": 2.294158935546875e-05, "model_forward_time": 0.025998353958129883, "step": 15035 }, { "epoch": 2.294158935546875e-05, "step": 15035, "training_step_time": 0.11320853233337402 }, { "epoch": 2.2943115234375e-05, "model_forward_time": 0.02683877944946289, "step": 15036 }, { "epoch": 2.2943115234375e-05, "step": 15036, "training_step_time": 0.11117982864379883 }, { "epoch": 2.294464111328125e-05, "model_forward_time": 0.025893449783325195, "step": 15037 }, { "epoch": 2.294464111328125e-05, "step": 15037, "training_step_time": 0.11093521118164062 }, { "epoch": 2.29461669921875e-05, "model_forward_time": 0.025307655334472656, "step": 15038 }, { "epoch": 2.29461669921875e-05, "step": 15038, "training_step_time": 0.1437699794769287 }, { "epoch": 2.294769287109375e-05, "model_forward_time": 0.024954795837402344, "step": 15039 }, { "epoch": 2.294769287109375e-05, "step": 15039, "training_step_time": 0.11598753929138184 }, { "epoch": 2.294921875e-05, "grad_norm": 0.25832730531692505, "learning_rate": 5.390921819825445e-05, "loss": 0.0242, "step": 15040 }, { "epoch": 2.294921875e-05, "model_forward_time": 0.02530980110168457, "step": 15040 }, { "epoch": 2.294921875e-05, "step": 15040, "training_step_time": 0.21268057823181152 }, { "epoch": 2.295074462890625e-05, "model_forward_time": 0.026793956756591797, "step": 15041 }, { "epoch": 2.295074462890625e-05, "step": 15041, "training_step_time": 0.1333768367767334 }, { "epoch": 2.29522705078125e-05, "model_forward_time": 0.02501082420349121, "step": 15042 }, { "epoch": 2.29522705078125e-05, "step": 15042, "training_step_time": 0.1112372875213623 }, { "epoch": 2.295379638671875e-05, "model_forward_time": 0.025006532669067383, "step": 15043 }, { "epoch": 2.295379638671875e-05, "step": 15043, "training_step_time": 0.21079635620117188 }, { "epoch": 2.2955322265625e-05, "model_forward_time": 0.024823904037475586, "step": 15044 }, { "epoch": 2.2955322265625e-05, "step": 15044, "training_step_time": 0.10738348960876465 }, { "epoch": 2.295684814453125e-05, "model_forward_time": 0.025113821029663086, "step": 15045 }, { "epoch": 2.295684814453125e-05, "step": 15045, "training_step_time": 0.12389230728149414 }, { "epoch": 2.29583740234375e-05, "model_forward_time": 0.026228904724121094, "step": 15046 }, { "epoch": 2.29583740234375e-05, "step": 15046, "training_step_time": 0.21314024925231934 }, { "epoch": 2.295989990234375e-05, "model_forward_time": 0.02530384063720703, "step": 15047 }, { "epoch": 2.295989990234375e-05, "step": 15047, "training_step_time": 0.14976143836975098 }, { "epoch": 2.296142578125e-05, "model_forward_time": 0.024532079696655273, "step": 15048 }, { "epoch": 2.296142578125e-05, "step": 15048, "training_step_time": 0.1369328498840332 }, { "epoch": 2.296295166015625e-05, "model_forward_time": 0.024765729904174805, "step": 15049 }, { "epoch": 2.296295166015625e-05, "step": 15049, "training_step_time": 0.12376213073730469 }, { "epoch": 2.29644775390625e-05, "grad_norm": 0.45770174264907837, "learning_rate": 5.3854268887342374e-05, "loss": 0.0272, "step": 15050 }, { "epoch": 2.29644775390625e-05, "model_forward_time": 0.025024890899658203, "step": 15050 }, { "epoch": 2.29644775390625e-05, "step": 15050, "training_step_time": 0.11605238914489746 }, { "epoch": 2.296600341796875e-05, "model_forward_time": 0.024723052978515625, "step": 15051 }, { "epoch": 2.296600341796875e-05, "step": 15051, "training_step_time": 0.11722564697265625 }, { "epoch": 2.2967529296875e-05, "model_forward_time": 0.02539801597595215, "step": 15052 }, { "epoch": 2.2967529296875e-05, "step": 15052, "training_step_time": 0.10764002799987793 }, { "epoch": 2.296905517578125e-05, "model_forward_time": 0.025334596633911133, "step": 15053 }, { "epoch": 2.296905517578125e-05, "step": 15053, "training_step_time": 0.1038048267364502 }, { "epoch": 2.29705810546875e-05, "model_forward_time": 0.025327205657958984, "step": 15054 }, { "epoch": 2.29705810546875e-05, "step": 15054, "training_step_time": 0.10838985443115234 }, { "epoch": 2.297210693359375e-05, "model_forward_time": 0.0253598690032959, "step": 15055 }, { "epoch": 2.297210693359375e-05, "step": 15055, "training_step_time": 0.10774540901184082 }, { "epoch": 2.29736328125e-05, "model_forward_time": 0.02521228790283203, "step": 15056 }, { "epoch": 2.29736328125e-05, "step": 15056, "training_step_time": 0.11204814910888672 }, { "epoch": 2.297515869140625e-05, "model_forward_time": 0.025344133377075195, "step": 15057 }, { "epoch": 2.297515869140625e-05, "step": 15057, "training_step_time": 0.1063082218170166 }, { "epoch": 2.29766845703125e-05, "model_forward_time": 0.025377750396728516, "step": 15058 }, { "epoch": 2.29766845703125e-05, "step": 15058, "training_step_time": 0.10656857490539551 }, { "epoch": 2.297821044921875e-05, "model_forward_time": 0.02560138702392578, "step": 15059 }, { "epoch": 2.297821044921875e-05, "step": 15059, "training_step_time": 0.1133720874786377 }, { "epoch": 2.2979736328125e-05, "grad_norm": 0.3180738687515259, "learning_rate": 5.379931489313016e-05, "loss": 0.0244, "step": 15060 }, { "epoch": 2.2979736328125e-05, "model_forward_time": 0.025599002838134766, "step": 15060 }, { "epoch": 2.2979736328125e-05, "step": 15060, "training_step_time": 0.10819125175476074 }, { "epoch": 2.298126220703125e-05, "model_forward_time": 0.027921438217163086, "step": 15061 }, { "epoch": 2.298126220703125e-05, "step": 15061, "training_step_time": 0.11433076858520508 }, { "epoch": 2.29827880859375e-05, "model_forward_time": 0.024801254272460938, "step": 15062 }, { "epoch": 2.29827880859375e-05, "step": 15062, "training_step_time": 0.10566186904907227 }, { "epoch": 2.298431396484375e-05, "model_forward_time": 0.025398969650268555, "step": 15063 }, { "epoch": 2.298431396484375e-05, "step": 15063, "training_step_time": 0.11199212074279785 }, { "epoch": 2.298583984375e-05, "model_forward_time": 0.025867223739624023, "step": 15064 }, { "epoch": 2.298583984375e-05, "step": 15064, "training_step_time": 0.11232805252075195 }, { "epoch": 2.298736572265625e-05, "model_forward_time": 0.024958133697509766, "step": 15065 }, { "epoch": 2.298736572265625e-05, "step": 15065, "training_step_time": 0.1078646183013916 }, { "epoch": 2.29888916015625e-05, "model_forward_time": 0.025483131408691406, "step": 15066 }, { "epoch": 2.29888916015625e-05, "step": 15066, "training_step_time": 0.10941791534423828 }, { "epoch": 2.299041748046875e-05, "model_forward_time": 0.02559971809387207, "step": 15067 }, { "epoch": 2.299041748046875e-05, "step": 15067, "training_step_time": 0.10875630378723145 }, { "epoch": 2.2991943359375e-05, "model_forward_time": 0.025031089782714844, "step": 15068 }, { "epoch": 2.2991943359375e-05, "step": 15068, "training_step_time": 0.1502697467803955 }, { "epoch": 2.299346923828125e-05, "model_forward_time": 0.02503347396850586, "step": 15069 }, { "epoch": 2.299346923828125e-05, "step": 15069, "training_step_time": 0.12860774993896484 }, { "epoch": 2.29949951171875e-05, "grad_norm": 0.4590432941913605, "learning_rate": 5.3744356282392104e-05, "loss": 0.0161, "step": 15070 }, { "epoch": 2.29949951171875e-05, "model_forward_time": 0.02456808090209961, "step": 15070 }, { "epoch": 2.29949951171875e-05, "step": 15070, "training_step_time": 0.11590027809143066 }, { "epoch": 2.299652099609375e-05, "model_forward_time": 0.02557682991027832, "step": 15071 }, { "epoch": 2.299652099609375e-05, "step": 15071, "training_step_time": 0.1117405891418457 }, { "epoch": 2.2998046875e-05, "model_forward_time": 0.025438785552978516, "step": 15072 }, { "epoch": 2.2998046875e-05, "step": 15072, "training_step_time": 0.11348748207092285 }, { "epoch": 2.299957275390625e-05, "model_forward_time": 0.025197744369506836, "step": 15073 }, { "epoch": 2.299957275390625e-05, "step": 15073, "training_step_time": 0.10701990127563477 }, { "epoch": 2.30010986328125e-05, "model_forward_time": 0.024688720703125, "step": 15074 }, { "epoch": 2.30010986328125e-05, "step": 15074, "training_step_time": 0.19213581085205078 }, { "epoch": 2.300262451171875e-05, "model_forward_time": 0.023845672607421875, "step": 15075 }, { "epoch": 2.300262451171875e-05, "step": 15075, "training_step_time": 0.1041252613067627 }, { "epoch": 2.3004150390625e-05, "model_forward_time": 0.026081323623657227, "step": 15076 }, { "epoch": 2.3004150390625e-05, "step": 15076, "training_step_time": 0.10598015785217285 }, { "epoch": 2.300567626953125e-05, "model_forward_time": 0.025079965591430664, "step": 15077 }, { "epoch": 2.300567626953125e-05, "step": 15077, "training_step_time": 0.10759639739990234 }, { "epoch": 2.30072021484375e-05, "model_forward_time": 0.025138378143310547, "step": 15078 }, { "epoch": 2.30072021484375e-05, "step": 15078, "training_step_time": 0.1792159080505371 }, { "epoch": 2.300872802734375e-05, "model_forward_time": 0.024249553680419922, "step": 15079 }, { "epoch": 2.300872802734375e-05, "step": 15079, "training_step_time": 0.1941516399383545 }, { "epoch": 2.301025390625e-05, "grad_norm": 0.2518446743488312, "learning_rate": 5.368939312190808e-05, "loss": 0.0168, "step": 15080 }, { "epoch": 2.301025390625e-05, "model_forward_time": 0.023861408233642578, "step": 15080 }, { "epoch": 2.301025390625e-05, "step": 15080, "training_step_time": 0.18638324737548828 }, { "epoch": 2.301177978515625e-05, "model_forward_time": 0.024325132369995117, "step": 15081 }, { "epoch": 2.301177978515625e-05, "step": 15081, "training_step_time": 0.19497251510620117 }, { "epoch": 2.30133056640625e-05, "model_forward_time": 0.02428150177001953, "step": 15082 }, { "epoch": 2.30133056640625e-05, "step": 15082, "training_step_time": 0.17471981048583984 }, { "epoch": 2.301483154296875e-05, "model_forward_time": 0.024077177047729492, "step": 15083 }, { "epoch": 2.301483154296875e-05, "step": 15083, "training_step_time": 0.17238712310791016 }, { "epoch": 2.3016357421875e-05, "model_forward_time": 0.024082660675048828, "step": 15084 }, { "epoch": 2.3016357421875e-05, "step": 15084, "training_step_time": 0.11663174629211426 }, { "epoch": 2.301788330078125e-05, "model_forward_time": 0.024986982345581055, "step": 15085 }, { "epoch": 2.301788330078125e-05, "step": 15085, "training_step_time": 0.1058812141418457 }, { "epoch": 2.30194091796875e-05, "model_forward_time": 0.02472686767578125, "step": 15086 }, { "epoch": 2.30194091796875e-05, "step": 15086, "training_step_time": 0.15372014045715332 }, { "epoch": 2.302093505859375e-05, "model_forward_time": 0.0248258113861084, "step": 15087 }, { "epoch": 2.302093505859375e-05, "step": 15087, "training_step_time": 0.1794569492340088 }, { "epoch": 2.30224609375e-05, "model_forward_time": 0.024673938751220703, "step": 15088 }, { "epoch": 2.30224609375e-05, "step": 15088, "training_step_time": 0.11197328567504883 }, { "epoch": 2.302398681640625e-05, "model_forward_time": 0.024243831634521484, "step": 15089 }, { "epoch": 2.302398681640625e-05, "step": 15089, "training_step_time": 0.1715836524963379 }, { "epoch": 2.30255126953125e-05, "grad_norm": 0.372438907623291, "learning_rate": 5.363442547846356e-05, "loss": 0.0187, "step": 15090 }, { "epoch": 2.30255126953125e-05, "model_forward_time": 0.024494647979736328, "step": 15090 }, { "epoch": 2.30255126953125e-05, "step": 15090, "training_step_time": 0.20508861541748047 }, { "epoch": 2.302703857421875e-05, "model_forward_time": 0.02452683448791504, "step": 15091 }, { "epoch": 2.302703857421875e-05, "step": 15091, "training_step_time": 0.15358757972717285 }, { "epoch": 2.3028564453125e-05, "model_forward_time": 0.024537324905395508, "step": 15092 }, { "epoch": 2.3028564453125e-05, "step": 15092, "training_step_time": 0.10625314712524414 }, { "epoch": 2.303009033203125e-05, "model_forward_time": 0.024866342544555664, "step": 15093 }, { "epoch": 2.303009033203125e-05, "step": 15093, "training_step_time": 0.11246037483215332 }, { "epoch": 2.30316162109375e-05, "model_forward_time": 0.02501225471496582, "step": 15094 }, { "epoch": 2.30316162109375e-05, "step": 15094, "training_step_time": 0.11722922325134277 }, { "epoch": 2.303314208984375e-05, "model_forward_time": 0.024810075759887695, "step": 15095 }, { "epoch": 2.303314208984375e-05, "step": 15095, "training_step_time": 0.10576105117797852 }, { "epoch": 2.303466796875e-05, "model_forward_time": 0.025440692901611328, "step": 15096 }, { "epoch": 2.303466796875e-05, "step": 15096, "training_step_time": 0.10622239112854004 }, { "epoch": 2.303619384765625e-05, "model_forward_time": 0.025469541549682617, "step": 15097 }, { "epoch": 2.303619384765625e-05, "step": 15097, "training_step_time": 0.10528707504272461 }, { "epoch": 2.30377197265625e-05, "model_forward_time": 0.02519512176513672, "step": 15098 }, { "epoch": 2.30377197265625e-05, "step": 15098, "training_step_time": 0.1302187442779541 }, { "epoch": 2.303924560546875e-05, "model_forward_time": 0.025171756744384766, "step": 15099 }, { "epoch": 2.303924560546875e-05, "step": 15099, "training_step_time": 0.16975164413452148 }, { "epoch": 2.3040771484375e-05, "grad_norm": 0.20305180549621582, "learning_rate": 5.357945341884936e-05, "loss": 0.0138, "step": 15100 }, { "epoch": 2.3040771484375e-05, "model_forward_time": 0.02460002899169922, "step": 15100 }, { "epoch": 2.3040771484375e-05, "step": 15100, "training_step_time": 0.1485445499420166 }, { "epoch": 2.304229736328125e-05, "model_forward_time": 0.024235963821411133, "step": 15101 }, { "epoch": 2.304229736328125e-05, "step": 15101, "training_step_time": 0.14203429222106934 }, { "epoch": 2.30438232421875e-05, "model_forward_time": 0.024251461029052734, "step": 15102 }, { "epoch": 2.30438232421875e-05, "step": 15102, "training_step_time": 0.12809133529663086 }, { "epoch": 2.304534912109375e-05, "model_forward_time": 0.024077177047729492, "step": 15103 }, { "epoch": 2.304534912109375e-05, "step": 15103, "training_step_time": 0.12756061553955078 }, { "epoch": 2.3046875e-05, "model_forward_time": 0.024845600128173828, "step": 15104 }, { "epoch": 2.3046875e-05, "step": 15104, "training_step_time": 0.12728428840637207 }, { "epoch": 2.304840087890625e-05, "model_forward_time": 0.024758577346801758, "step": 15105 }, { "epoch": 2.304840087890625e-05, "step": 15105, "training_step_time": 0.11991763114929199 }, { "epoch": 2.30499267578125e-05, "model_forward_time": 0.025340795516967773, "step": 15106 }, { "epoch": 2.30499267578125e-05, "step": 15106, "training_step_time": 0.11484503746032715 }, { "epoch": 2.305145263671875e-05, "model_forward_time": 0.025386810302734375, "step": 15107 }, { "epoch": 2.305145263671875e-05, "step": 15107, "training_step_time": 0.11636590957641602 }, { "epoch": 2.3052978515625e-05, "model_forward_time": 0.025002479553222656, "step": 15108 }, { "epoch": 2.3052978515625e-05, "step": 15108, "training_step_time": 0.10548639297485352 }, { "epoch": 2.305450439453125e-05, "model_forward_time": 0.02538013458251953, "step": 15109 }, { "epoch": 2.305450439453125e-05, "step": 15109, "training_step_time": 0.10415124893188477 }, { "epoch": 2.30560302734375e-05, "grad_norm": 0.2843940556049347, "learning_rate": 5.352447700986173e-05, "loss": 0.0135, "step": 15110 }, { "epoch": 2.30560302734375e-05, "model_forward_time": 0.02521061897277832, "step": 15110 }, { "epoch": 2.30560302734375e-05, "step": 15110, "training_step_time": 0.10312724113464355 }, { "epoch": 2.305755615234375e-05, "model_forward_time": 0.025058269500732422, "step": 15111 }, { "epoch": 2.305755615234375e-05, "step": 15111, "training_step_time": 0.12854623794555664 }, { "epoch": 2.305908203125e-05, "model_forward_time": 0.02548694610595703, "step": 15112 }, { "epoch": 2.305908203125e-05, "step": 15112, "training_step_time": 0.10845613479614258 }, { "epoch": 2.306060791015625e-05, "model_forward_time": 0.02502608299255371, "step": 15113 }, { "epoch": 2.306060791015625e-05, "step": 15113, "training_step_time": 0.12392210960388184 }, { "epoch": 2.30621337890625e-05, "model_forward_time": 0.025203466415405273, "step": 15114 }, { "epoch": 2.30621337890625e-05, "step": 15114, "training_step_time": 0.11066484451293945 }, { "epoch": 2.306365966796875e-05, "model_forward_time": 0.02529740333557129, "step": 15115 }, { "epoch": 2.306365966796875e-05, "step": 15115, "training_step_time": 0.10944747924804688 }, { "epoch": 2.3065185546875e-05, "model_forward_time": 0.025159358978271484, "step": 15116 }, { "epoch": 2.3065185546875e-05, "step": 15116, "training_step_time": 0.19126367568969727 }, { "epoch": 2.306671142578125e-05, "model_forward_time": 0.02397012710571289, "step": 15117 }, { "epoch": 2.306671142578125e-05, "step": 15117, "training_step_time": 0.10267925262451172 }, { "epoch": 2.30682373046875e-05, "model_forward_time": 0.024417400360107422, "step": 15118 }, { "epoch": 2.30682373046875e-05, "step": 15118, "training_step_time": 0.1025233268737793 }, { "epoch": 2.306976318359375e-05, "model_forward_time": 0.02510976791381836, "step": 15119 }, { "epoch": 2.306976318359375e-05, "step": 15119, "training_step_time": 0.11516308784484863 }, { "epoch": 2.30712890625e-05, "grad_norm": 0.19091233611106873, "learning_rate": 5.3469496318302204e-05, "loss": 0.0098, "step": 15120 }, { "epoch": 2.30712890625e-05, "model_forward_time": 0.025023698806762695, "step": 15120 }, { "epoch": 2.30712890625e-05, "step": 15120, "training_step_time": 0.11725044250488281 }, { "epoch": 2.307281494140625e-05, "model_forward_time": 0.025522947311401367, "step": 15121 }, { "epoch": 2.307281494140625e-05, "step": 15121, "training_step_time": 0.12138772010803223 }, { "epoch": 2.30743408203125e-05, "model_forward_time": 0.024593353271484375, "step": 15122 }, { "epoch": 2.30743408203125e-05, "step": 15122, "training_step_time": 0.11642074584960938 }, { "epoch": 2.307586669921875e-05, "model_forward_time": 0.025285005569458008, "step": 15123 }, { "epoch": 2.307586669921875e-05, "step": 15123, "training_step_time": 0.11846733093261719 }, { "epoch": 2.3077392578125e-05, "model_forward_time": 0.02529120445251465, "step": 15124 }, { "epoch": 2.3077392578125e-05, "step": 15124, "training_step_time": 0.1178286075592041 }, { "epoch": 2.307891845703125e-05, "model_forward_time": 0.025361299514770508, "step": 15125 }, { "epoch": 2.307891845703125e-05, "step": 15125, "training_step_time": 0.1553044319152832 }, { "epoch": 2.30804443359375e-05, "model_forward_time": 0.024880647659301758, "step": 15126 }, { "epoch": 2.30804443359375e-05, "step": 15126, "training_step_time": 0.11622977256774902 }, { "epoch": 2.308197021484375e-05, "model_forward_time": 0.02504277229309082, "step": 15127 }, { "epoch": 2.308197021484375e-05, "step": 15127, "training_step_time": 0.11028909683227539 }, { "epoch": 2.308349609375e-05, "model_forward_time": 0.02505350112915039, "step": 15128 }, { "epoch": 2.308349609375e-05, "step": 15128, "training_step_time": 0.11510610580444336 }, { "epoch": 2.308502197265625e-05, "model_forward_time": 0.025426149368286133, "step": 15129 }, { "epoch": 2.308502197265625e-05, "step": 15129, "training_step_time": 0.13404321670532227 }, { "epoch": 2.30865478515625e-05, "grad_norm": 0.15558725595474243, "learning_rate": 5.341451141097751e-05, "loss": 0.0113, "step": 15130 }, { "epoch": 2.30865478515625e-05, "model_forward_time": 0.026026248931884766, "step": 15130 }, { "epoch": 2.30865478515625e-05, "step": 15130, "training_step_time": 0.23007774353027344 }, { "epoch": 2.308807373046875e-05, "model_forward_time": 0.024254798889160156, "step": 15131 }, { "epoch": 2.308807373046875e-05, "step": 15131, "training_step_time": 0.11007142066955566 }, { "epoch": 2.3089599609375e-05, "model_forward_time": 0.02466607093811035, "step": 15132 }, { "epoch": 2.3089599609375e-05, "step": 15132, "training_step_time": 0.20800542831420898 }, { "epoch": 2.309112548828125e-05, "model_forward_time": 0.024325847625732422, "step": 15133 }, { "epoch": 2.309112548828125e-05, "step": 15133, "training_step_time": 0.1589200496673584 }, { "epoch": 2.30926513671875e-05, "model_forward_time": 0.024413585662841797, "step": 15134 }, { "epoch": 2.30926513671875e-05, "step": 15134, "training_step_time": 0.20228028297424316 }, { "epoch": 2.309417724609375e-05, "model_forward_time": 0.024412155151367188, "step": 15135 }, { "epoch": 2.309417724609375e-05, "step": 15135, "training_step_time": 0.14936065673828125 }, { "epoch": 2.3095703125e-05, "model_forward_time": 0.024557113647460938, "step": 15136 }, { "epoch": 2.3095703125e-05, "step": 15136, "training_step_time": 0.10716509819030762 }, { "epoch": 2.309722900390625e-05, "model_forward_time": 0.024345874786376953, "step": 15137 }, { "epoch": 2.309722900390625e-05, "step": 15137, "training_step_time": 0.21418094635009766 }, { "epoch": 2.30987548828125e-05, "model_forward_time": 0.024988174438476562, "step": 15138 }, { "epoch": 2.30987548828125e-05, "step": 15138, "training_step_time": 0.1153714656829834 }, { "epoch": 2.310028076171875e-05, "model_forward_time": 0.02432107925415039, "step": 15139 }, { "epoch": 2.310028076171875e-05, "step": 15139, "training_step_time": 0.10955119132995605 }, { "epoch": 2.3101806640625e-05, "grad_norm": 0.18189500272274017, "learning_rate": 5.335952235469947e-05, "loss": 0.0063, "step": 15140 }, { "epoch": 2.3101806640625e-05, "model_forward_time": 0.025746583938598633, "step": 15140 }, { "epoch": 2.3101806640625e-05, "step": 15140, "training_step_time": 0.10930323600769043 }, { "epoch": 2.310333251953125e-05, "model_forward_time": 0.024839162826538086, "step": 15141 }, { "epoch": 2.310333251953125e-05, "step": 15141, "training_step_time": 0.10483169555664062 }, { "epoch": 2.31048583984375e-05, "model_forward_time": 0.024852752685546875, "step": 15142 }, { "epoch": 2.31048583984375e-05, "step": 15142, "training_step_time": 0.10447955131530762 }, { "epoch": 2.310638427734375e-05, "model_forward_time": 0.025101184844970703, "step": 15143 }, { "epoch": 2.310638427734375e-05, "step": 15143, "training_step_time": 0.10958504676818848 }, { "epoch": 2.310791015625e-05, "model_forward_time": 0.025983333587646484, "step": 15144 }, { "epoch": 2.310791015625e-05, "step": 15144, "training_step_time": 0.10749173164367676 }, { "epoch": 2.310943603515625e-05, "model_forward_time": 0.024897098541259766, "step": 15145 }, { "epoch": 2.310943603515625e-05, "step": 15145, "training_step_time": 0.10457134246826172 }, { "epoch": 2.31109619140625e-05, "model_forward_time": 0.025930166244506836, "step": 15146 }, { "epoch": 2.31109619140625e-05, "step": 15146, "training_step_time": 0.10493993759155273 }, { "epoch": 2.311248779296875e-05, "model_forward_time": 0.025051116943359375, "step": 15147 }, { "epoch": 2.311248779296875e-05, "step": 15147, "training_step_time": 0.10572504997253418 }, { "epoch": 2.3114013671875e-05, "model_forward_time": 0.02550220489501953, "step": 15148 }, { "epoch": 2.3114013671875e-05, "step": 15148, "training_step_time": 0.10783505439758301 }, { "epoch": 2.311553955078125e-05, "model_forward_time": 0.025368690490722656, "step": 15149 }, { "epoch": 2.311553955078125e-05, "step": 15149, "training_step_time": 0.1063392162322998 }, { "epoch": 2.31170654296875e-05, "grad_norm": 0.3034445345401764, "learning_rate": 5.330452921628497e-05, "loss": 0.0179, "step": 15150 }, { "epoch": 2.31170654296875e-05, "model_forward_time": 0.02475762367248535, "step": 15150 }, { "epoch": 2.31170654296875e-05, "step": 15150, "training_step_time": 0.10345005989074707 }, { "epoch": 2.311859130859375e-05, "model_forward_time": 0.025151968002319336, "step": 15151 }, { "epoch": 2.311859130859375e-05, "step": 15151, "training_step_time": 0.10395312309265137 }, { "epoch": 2.31201171875e-05, "model_forward_time": 0.02509450912475586, "step": 15152 }, { "epoch": 2.31201171875e-05, "step": 15152, "training_step_time": 0.10491251945495605 }, { "epoch": 2.312164306640625e-05, "model_forward_time": 0.02513575553894043, "step": 15153 }, { "epoch": 2.312164306640625e-05, "step": 15153, "training_step_time": 0.10374045372009277 }, { "epoch": 2.31231689453125e-05, "model_forward_time": 0.025278091430664062, "step": 15154 }, { "epoch": 2.31231689453125e-05, "step": 15154, "training_step_time": 0.11058187484741211 }, { "epoch": 2.312469482421875e-05, "model_forward_time": 0.024873018264770508, "step": 15155 }, { "epoch": 2.312469482421875e-05, "step": 15155, "training_step_time": 0.15862584114074707 }, { "epoch": 2.3126220703125e-05, "model_forward_time": 0.027033567428588867, "step": 15156 }, { "epoch": 2.3126220703125e-05, "step": 15156, "training_step_time": 0.13894367218017578 }, { "epoch": 2.312774658203125e-05, "model_forward_time": 0.024155378341674805, "step": 15157 }, { "epoch": 2.312774658203125e-05, "step": 15157, "training_step_time": 0.11385226249694824 }, { "epoch": 2.31292724609375e-05, "model_forward_time": 0.025581836700439453, "step": 15158 }, { "epoch": 2.31292724609375e-05, "step": 15158, "training_step_time": 0.12105393409729004 }, { "epoch": 2.313079833984375e-05, "model_forward_time": 0.025092124938964844, "step": 15159 }, { "epoch": 2.313079833984375e-05, "step": 15159, "training_step_time": 0.1070401668548584 }, { "epoch": 2.313232421875e-05, "grad_norm": 0.2653610408306122, "learning_rate": 5.32495320625559e-05, "loss": 0.0148, "step": 15160 }, { "epoch": 2.313232421875e-05, "model_forward_time": 0.027029991149902344, "step": 15160 }, { "epoch": 2.313232421875e-05, "step": 15160, "training_step_time": 0.1074836254119873 }, { "epoch": 2.313385009765625e-05, "model_forward_time": 0.02638387680053711, "step": 15161 }, { "epoch": 2.313385009765625e-05, "step": 15161, "training_step_time": 0.20430850982666016 }, { "epoch": 2.31353759765625e-05, "model_forward_time": 0.024222850799560547, "step": 15162 }, { "epoch": 2.31353759765625e-05, "step": 15162, "training_step_time": 0.11161994934082031 }, { "epoch": 2.313690185546875e-05, "model_forward_time": 0.02324390411376953, "step": 15163 }, { "epoch": 2.313690185546875e-05, "step": 15163, "training_step_time": 0.1516859531402588 }, { "epoch": 2.3138427734375e-05, "model_forward_time": 0.024242639541625977, "step": 15164 }, { "epoch": 2.3138427734375e-05, "step": 15164, "training_step_time": 0.16608548164367676 }, { "epoch": 2.313995361328125e-05, "model_forward_time": 0.023941516876220703, "step": 15165 }, { "epoch": 2.313995361328125e-05, "step": 15165, "training_step_time": 0.15635204315185547 }, { "epoch": 2.31414794921875e-05, "model_forward_time": 0.024400949478149414, "step": 15166 }, { "epoch": 2.31414794921875e-05, "step": 15166, "training_step_time": 0.15026259422302246 }, { "epoch": 2.314300537109375e-05, "model_forward_time": 0.024080753326416016, "step": 15167 }, { "epoch": 2.314300537109375e-05, "step": 15167, "training_step_time": 0.14571547508239746 }, { "epoch": 2.314453125e-05, "model_forward_time": 0.02380084991455078, "step": 15168 }, { "epoch": 2.314453125e-05, "step": 15168, "training_step_time": 0.13129019737243652 }, { "epoch": 2.314605712890625e-05, "model_forward_time": 0.024225234985351562, "step": 15169 }, { "epoch": 2.314605712890625e-05, "step": 15169, "training_step_time": 0.12425994873046875 }, { "epoch": 2.31475830078125e-05, "grad_norm": 0.2752803564071655, "learning_rate": 5.319453096033896e-05, "loss": 0.0201, "step": 15170 }, { "epoch": 2.31475830078125e-05, "model_forward_time": 0.024196863174438477, "step": 15170 }, { "epoch": 2.31475830078125e-05, "step": 15170, "training_step_time": 0.13992738723754883 }, { "epoch": 2.314910888671875e-05, "model_forward_time": 0.02509903907775879, "step": 15171 }, { "epoch": 2.314910888671875e-05, "step": 15171, "training_step_time": 0.1178598403930664 }, { "epoch": 2.3150634765625e-05, "model_forward_time": 0.025107622146606445, "step": 15172 }, { "epoch": 2.3150634765625e-05, "step": 15172, "training_step_time": 0.17634153366088867 }, { "epoch": 2.315216064453125e-05, "model_forward_time": 0.024816274642944336, "step": 15173 }, { "epoch": 2.315216064453125e-05, "step": 15173, "training_step_time": 0.16254186630249023 }, { "epoch": 2.31536865234375e-05, "model_forward_time": 0.02430248260498047, "step": 15174 }, { "epoch": 2.31536865234375e-05, "step": 15174, "training_step_time": 0.19335460662841797 }, { "epoch": 2.315521240234375e-05, "model_forward_time": 0.023804187774658203, "step": 15175 }, { "epoch": 2.315521240234375e-05, "step": 15175, "training_step_time": 0.14205002784729004 }, { "epoch": 2.315673828125e-05, "model_forward_time": 0.025174617767333984, "step": 15176 }, { "epoch": 2.315673828125e-05, "step": 15176, "training_step_time": 0.21821165084838867 }, { "epoch": 2.315826416015625e-05, "model_forward_time": 0.02447032928466797, "step": 15177 }, { "epoch": 2.315826416015625e-05, "step": 15177, "training_step_time": 0.11786389350891113 }, { "epoch": 2.31597900390625e-05, "model_forward_time": 0.024997234344482422, "step": 15178 }, { "epoch": 2.31597900390625e-05, "step": 15178, "training_step_time": 0.13414621353149414 }, { "epoch": 2.316131591796875e-05, "model_forward_time": 0.024641990661621094, "step": 15179 }, { "epoch": 2.316131591796875e-05, "step": 15179, "training_step_time": 0.11742496490478516 }, { "epoch": 2.3162841796875e-05, "grad_norm": 0.23647968471050262, "learning_rate": 5.313952597646568e-05, "loss": 0.0127, "step": 15180 }, { "epoch": 2.3162841796875e-05, "model_forward_time": 0.02506232261657715, "step": 15180 }, { "epoch": 2.3162841796875e-05, "step": 15180, "training_step_time": 0.16370511054992676 }, { "epoch": 2.316436767578125e-05, "model_forward_time": 0.024260282516479492, "step": 15181 }, { "epoch": 2.316436767578125e-05, "step": 15181, "training_step_time": 0.12792658805847168 }, { "epoch": 2.31658935546875e-05, "model_forward_time": 0.027574539184570312, "step": 15182 }, { "epoch": 2.31658935546875e-05, "step": 15182, "training_step_time": 0.11096715927124023 }, { "epoch": 2.316741943359375e-05, "model_forward_time": 0.02543163299560547, "step": 15183 }, { "epoch": 2.316741943359375e-05, "step": 15183, "training_step_time": 0.10979390144348145 }, { "epoch": 2.31689453125e-05, "model_forward_time": 0.02460479736328125, "step": 15184 }, { "epoch": 2.31689453125e-05, "step": 15184, "training_step_time": 0.11016511917114258 }, { "epoch": 2.317047119140625e-05, "model_forward_time": 0.025109052658081055, "step": 15185 }, { "epoch": 2.317047119140625e-05, "step": 15185, "training_step_time": 0.10927987098693848 }, { "epoch": 2.31719970703125e-05, "model_forward_time": 0.024995803833007812, "step": 15186 }, { "epoch": 2.31719970703125e-05, "step": 15186, "training_step_time": 0.10856270790100098 }, { "epoch": 2.317352294921875e-05, "model_forward_time": 0.025207996368408203, "step": 15187 }, { "epoch": 2.317352294921875e-05, "step": 15187, "training_step_time": 0.10843276977539062 }, { "epoch": 2.3175048828125e-05, "model_forward_time": 0.02490687370300293, "step": 15188 }, { "epoch": 2.3175048828125e-05, "step": 15188, "training_step_time": 0.1112217903137207 }, { "epoch": 2.317657470703125e-05, "model_forward_time": 0.0247952938079834, "step": 15189 }, { "epoch": 2.317657470703125e-05, "step": 15189, "training_step_time": 0.10732865333557129 }, { "epoch": 2.31781005859375e-05, "grad_norm": 0.268989622592926, "learning_rate": 5.308451717777228e-05, "loss": 0.0125, "step": 15190 }, { "epoch": 2.31781005859375e-05, "model_forward_time": 0.02546858787536621, "step": 15190 }, { "epoch": 2.31781005859375e-05, "step": 15190, "training_step_time": 0.1132197380065918 }, { "epoch": 2.317962646484375e-05, "model_forward_time": 0.02496647834777832, "step": 15191 }, { "epoch": 2.317962646484375e-05, "step": 15191, "training_step_time": 0.10580825805664062 }, { "epoch": 2.318115234375e-05, "model_forward_time": 0.02521204948425293, "step": 15192 }, { "epoch": 2.318115234375e-05, "step": 15192, "training_step_time": 0.10617494583129883 }, { "epoch": 2.318267822265625e-05, "model_forward_time": 0.024981021881103516, "step": 15193 }, { "epoch": 2.318267822265625e-05, "step": 15193, "training_step_time": 0.10784149169921875 }, { "epoch": 2.31842041015625e-05, "model_forward_time": 0.024806737899780273, "step": 15194 }, { "epoch": 2.31842041015625e-05, "step": 15194, "training_step_time": 0.11903071403503418 }, { "epoch": 2.318572998046875e-05, "model_forward_time": 0.02520608901977539, "step": 15195 }, { "epoch": 2.318572998046875e-05, "step": 15195, "training_step_time": 0.12225031852722168 }, { "epoch": 2.3187255859375e-05, "model_forward_time": 0.025177955627441406, "step": 15196 }, { "epoch": 2.3187255859375e-05, "step": 15196, "training_step_time": 0.11891865730285645 }, { "epoch": 2.318878173828125e-05, "model_forward_time": 0.02466726303100586, "step": 15197 }, { "epoch": 2.318878173828125e-05, "step": 15197, "training_step_time": 0.12197637557983398 }, { "epoch": 2.31903076171875e-05, "model_forward_time": 0.02492070198059082, "step": 15198 }, { "epoch": 2.31903076171875e-05, "step": 15198, "training_step_time": 0.1685185432434082 }, { "epoch": 2.319183349609375e-05, "model_forward_time": 0.024220705032348633, "step": 15199 }, { "epoch": 2.319183349609375e-05, "step": 15199, "training_step_time": 0.16828250885009766 }, { "epoch": 2.3193359375e-05, "grad_norm": 0.23509950935840607, "learning_rate": 5.3029504631099694e-05, "loss": 0.019, "step": 15200 }, { "epoch": 2.3193359375e-05, "model_forward_time": 0.026134252548217773, "step": 15200 }, { "epoch": 2.3193359375e-05, "step": 15200, "training_step_time": 0.11395716667175293 }, { "epoch": 2.319488525390625e-05, "model_forward_time": 0.024451017379760742, "step": 15201 }, { "epoch": 2.319488525390625e-05, "step": 15201, "training_step_time": 0.11915349960327148 }, { "epoch": 2.31964111328125e-05, "model_forward_time": 0.025002002716064453, "step": 15202 }, { "epoch": 2.31964111328125e-05, "step": 15202, "training_step_time": 0.11178994178771973 }, { "epoch": 2.319793701171875e-05, "model_forward_time": 0.025363683700561523, "step": 15203 }, { "epoch": 2.319793701171875e-05, "step": 15203, "training_step_time": 0.11108875274658203 }, { "epoch": 2.3199462890625e-05, "model_forward_time": 0.02533745765686035, "step": 15204 }, { "epoch": 2.3199462890625e-05, "step": 15204, "training_step_time": 0.19553828239440918 }, { "epoch": 2.320098876953125e-05, "model_forward_time": 0.02496957778930664, "step": 15205 }, { "epoch": 2.320098876953125e-05, "step": 15205, "training_step_time": 0.10851550102233887 }, { "epoch": 2.32025146484375e-05, "model_forward_time": 0.024639368057250977, "step": 15206 }, { "epoch": 2.32025146484375e-05, "step": 15206, "training_step_time": 0.10751748085021973 }, { "epoch": 2.320404052734375e-05, "model_forward_time": 0.025209426879882812, "step": 15207 }, { "epoch": 2.320404052734375e-05, "step": 15207, "training_step_time": 0.10505175590515137 }, { "epoch": 2.320556640625e-05, "model_forward_time": 0.02533864974975586, "step": 15208 }, { "epoch": 2.320556640625e-05, "step": 15208, "training_step_time": 0.10550045967102051 }, { "epoch": 2.320709228515625e-05, "model_forward_time": 0.02524542808532715, "step": 15209 }, { "epoch": 2.320709228515625e-05, "step": 15209, "training_step_time": 0.10498929023742676 }, { "epoch": 2.32086181640625e-05, "grad_norm": 0.2184288650751114, "learning_rate": 5.297448840329329e-05, "loss": 0.0138, "step": 15210 }, { "epoch": 2.32086181640625e-05, "model_forward_time": 0.025586366653442383, "step": 15210 }, { "epoch": 2.32086181640625e-05, "step": 15210, "training_step_time": 0.10554718971252441 }, { "epoch": 2.321014404296875e-05, "model_forward_time": 0.025174379348754883, "step": 15211 }, { "epoch": 2.321014404296875e-05, "step": 15211, "training_step_time": 0.10606169700622559 }, { "epoch": 2.3211669921875e-05, "model_forward_time": 0.025154829025268555, "step": 15212 }, { "epoch": 2.3211669921875e-05, "step": 15212, "training_step_time": 0.10537433624267578 }, { "epoch": 2.321319580078125e-05, "model_forward_time": 0.025281429290771484, "step": 15213 }, { "epoch": 2.321319580078125e-05, "step": 15213, "training_step_time": 0.10767698287963867 }, { "epoch": 2.32147216796875e-05, "model_forward_time": 0.02534317970275879, "step": 15214 }, { "epoch": 2.32147216796875e-05, "step": 15214, "training_step_time": 0.10593199729919434 }, { "epoch": 2.321624755859375e-05, "model_forward_time": 0.025388240814208984, "step": 15215 }, { "epoch": 2.321624755859375e-05, "step": 15215, "training_step_time": 0.18900823593139648 }, { "epoch": 2.32177734375e-05, "model_forward_time": 0.02385854721069336, "step": 15216 }, { "epoch": 2.32177734375e-05, "step": 15216, "training_step_time": 0.11395883560180664 }, { "epoch": 2.321929931640625e-05, "model_forward_time": 0.02419281005859375, "step": 15217 }, { "epoch": 2.321929931640625e-05, "step": 15217, "training_step_time": 0.10848593711853027 }, { "epoch": 2.32208251953125e-05, "model_forward_time": 0.025037050247192383, "step": 15218 }, { "epoch": 2.32208251953125e-05, "step": 15218, "training_step_time": 0.12380790710449219 }, { "epoch": 2.322235107421875e-05, "model_forward_time": 0.025401830673217773, "step": 15219 }, { "epoch": 2.322235107421875e-05, "step": 15219, "training_step_time": 0.10733461380004883 }, { "epoch": 2.3223876953125e-05, "grad_norm": 0.4464765787124634, "learning_rate": 5.2919468561203e-05, "loss": 0.0142, "step": 15220 }, { "epoch": 2.3223876953125e-05, "model_forward_time": 0.02494072914123535, "step": 15220 }, { "epoch": 2.3223876953125e-05, "step": 15220, "training_step_time": 0.1690807342529297 }, { "epoch": 2.322540283203125e-05, "model_forward_time": 0.02456521987915039, "step": 15221 }, { "epoch": 2.322540283203125e-05, "step": 15221, "training_step_time": 0.1689004898071289 }, { "epoch": 2.32269287109375e-05, "model_forward_time": 0.024642229080200195, "step": 15222 }, { "epoch": 2.32269287109375e-05, "step": 15222, "training_step_time": 0.1803112030029297 }, { "epoch": 2.322845458984375e-05, "model_forward_time": 0.024687767028808594, "step": 15223 }, { "epoch": 2.322845458984375e-05, "step": 15223, "training_step_time": 0.15085625648498535 }, { "epoch": 2.322998046875e-05, "model_forward_time": 0.024325847625732422, "step": 15224 }, { "epoch": 2.322998046875e-05, "step": 15224, "training_step_time": 0.1209559440612793 }, { "epoch": 2.323150634765625e-05, "model_forward_time": 0.02503824234008789, "step": 15225 }, { "epoch": 2.323150634765625e-05, "step": 15225, "training_step_time": 0.14713382720947266 }, { "epoch": 2.32330322265625e-05, "model_forward_time": 0.02477264404296875, "step": 15226 }, { "epoch": 2.32330322265625e-05, "step": 15226, "training_step_time": 0.1715548038482666 }, { "epoch": 2.323455810546875e-05, "model_forward_time": 0.025087594985961914, "step": 15227 }, { "epoch": 2.323455810546875e-05, "step": 15227, "training_step_time": 0.17395949363708496 }, { "epoch": 2.3236083984375e-05, "model_forward_time": 0.024259567260742188, "step": 15228 }, { "epoch": 2.3236083984375e-05, "step": 15228, "training_step_time": 0.10468769073486328 }, { "epoch": 2.323760986328125e-05, "model_forward_time": 0.024086475372314453, "step": 15229 }, { "epoch": 2.323760986328125e-05, "step": 15229, "training_step_time": 0.10098075866699219 }, { "epoch": 2.32391357421875e-05, "grad_norm": 0.2585608661174774, "learning_rate": 5.286444517168313e-05, "loss": 0.0078, "step": 15230 }, { "epoch": 2.32391357421875e-05, "model_forward_time": 0.025635480880737305, "step": 15230 }, { "epoch": 2.32391357421875e-05, "step": 15230, "training_step_time": 0.10502052307128906 }, { "epoch": 2.324066162109375e-05, "model_forward_time": 0.024561405181884766, "step": 15231 }, { "epoch": 2.324066162109375e-05, "step": 15231, "training_step_time": 0.17623448371887207 }, { "epoch": 2.32421875e-05, "model_forward_time": 0.024820566177368164, "step": 15232 }, { "epoch": 2.32421875e-05, "step": 15232, "training_step_time": 0.20193123817443848 }, { "epoch": 2.324371337890625e-05, "model_forward_time": 0.024186134338378906, "step": 15233 }, { "epoch": 2.324371337890625e-05, "step": 15233, "training_step_time": 0.19202160835266113 }, { "epoch": 2.32452392578125e-05, "model_forward_time": 0.024191617965698242, "step": 15234 }, { "epoch": 2.32452392578125e-05, "step": 15234, "training_step_time": 0.18738603591918945 }, { "epoch": 2.324676513671875e-05, "model_forward_time": 0.023966073989868164, "step": 15235 }, { "epoch": 2.324676513671875e-05, "step": 15235, "training_step_time": 0.17643141746520996 }, { "epoch": 2.3248291015625e-05, "model_forward_time": 0.024158477783203125, "step": 15236 }, { "epoch": 2.3248291015625e-05, "step": 15236, "training_step_time": 0.1663217544555664 }, { "epoch": 2.324981689453125e-05, "model_forward_time": 0.025075435638427734, "step": 15237 }, { "epoch": 2.324981689453125e-05, "step": 15237, "training_step_time": 0.14250564575195312 }, { "epoch": 2.32513427734375e-05, "model_forward_time": 0.024433612823486328, "step": 15238 }, { "epoch": 2.32513427734375e-05, "step": 15238, "training_step_time": 0.13890814781188965 }, { "epoch": 2.325286865234375e-05, "model_forward_time": 0.024560928344726562, "step": 15239 }, { "epoch": 2.325286865234375e-05, "step": 15239, "training_step_time": 0.13401579856872559 }, { "epoch": 2.325439453125e-05, "grad_norm": 0.1279037743806839, "learning_rate": 5.280941830159227e-05, "loss": 0.0105, "step": 15240 }, { "epoch": 2.325439453125e-05, "model_forward_time": 0.024589061737060547, "step": 15240 }, { "epoch": 2.325439453125e-05, "step": 15240, "training_step_time": 0.12030696868896484 }, { "epoch": 2.325592041015625e-05, "model_forward_time": 0.025700807571411133, "step": 15241 }, { "epoch": 2.325592041015625e-05, "step": 15241, "training_step_time": 0.12104487419128418 }, { "epoch": 2.32574462890625e-05, "model_forward_time": 0.025126934051513672, "step": 15242 }, { "epoch": 2.32574462890625e-05, "step": 15242, "training_step_time": 0.11581730842590332 }, { "epoch": 2.325897216796875e-05, "model_forward_time": 0.024884462356567383, "step": 15243 }, { "epoch": 2.325897216796875e-05, "step": 15243, "training_step_time": 0.10743594169616699 }, { "epoch": 2.3260498046875e-05, "model_forward_time": 0.025055646896362305, "step": 15244 }, { "epoch": 2.3260498046875e-05, "step": 15244, "training_step_time": 0.11369729042053223 }, { "epoch": 2.326202392578125e-05, "model_forward_time": 0.02525782585144043, "step": 15245 }, { "epoch": 2.326202392578125e-05, "step": 15245, "training_step_time": 0.10636377334594727 }, { "epoch": 2.32635498046875e-05, "model_forward_time": 0.025176525115966797, "step": 15246 }, { "epoch": 2.32635498046875e-05, "step": 15246, "training_step_time": 0.19860529899597168 }, { "epoch": 2.326507568359375e-05, "model_forward_time": 0.024476051330566406, "step": 15247 }, { "epoch": 2.326507568359375e-05, "step": 15247, "training_step_time": 0.10784411430358887 }, { "epoch": 2.32666015625e-05, "model_forward_time": 0.024445533752441406, "step": 15248 }, { "epoch": 2.32666015625e-05, "step": 15248, "training_step_time": 0.10091519355773926 }, { "epoch": 2.326812744140625e-05, "model_forward_time": 0.02532052993774414, "step": 15249 }, { "epoch": 2.326812744140625e-05, "step": 15249, "training_step_time": 0.10778450965881348 }, { "epoch": 2.32696533203125e-05, "grad_norm": 0.35009992122650146, "learning_rate": 5.2754388017793274e-05, "loss": 0.0257, "step": 15250 }, { "epoch": 2.32696533203125e-05, "model_forward_time": 0.024982690811157227, "step": 15250 }, { "epoch": 2.32696533203125e-05, "step": 15250, "training_step_time": 0.10418844223022461 }, { "epoch": 2.327117919921875e-05, "model_forward_time": 0.0249783992767334, "step": 15251 }, { "epoch": 2.327117919921875e-05, "step": 15251, "training_step_time": 0.103668212890625 }, { "epoch": 2.3272705078125e-05, "model_forward_time": 0.025068998336791992, "step": 15252 }, { "epoch": 2.3272705078125e-05, "step": 15252, "training_step_time": 0.10806918144226074 }, { "epoch": 2.327423095703125e-05, "model_forward_time": 0.024802446365356445, "step": 15253 }, { "epoch": 2.327423095703125e-05, "step": 15253, "training_step_time": 0.10371613502502441 }, { "epoch": 2.32757568359375e-05, "model_forward_time": 0.02511143684387207, "step": 15254 }, { "epoch": 2.32757568359375e-05, "step": 15254, "training_step_time": 0.10765218734741211 }, { "epoch": 2.327728271484375e-05, "model_forward_time": 0.025191783905029297, "step": 15255 }, { "epoch": 2.327728271484375e-05, "step": 15255, "training_step_time": 0.10999059677124023 }, { "epoch": 2.327880859375e-05, "model_forward_time": 0.025082826614379883, "step": 15256 }, { "epoch": 2.327880859375e-05, "step": 15256, "training_step_time": 0.10776138305664062 }, { "epoch": 2.328033447265625e-05, "model_forward_time": 0.024895429611206055, "step": 15257 }, { "epoch": 2.328033447265625e-05, "step": 15257, "training_step_time": 0.10508298873901367 }, { "epoch": 2.32818603515625e-05, "model_forward_time": 0.024883270263671875, "step": 15258 }, { "epoch": 2.32818603515625e-05, "step": 15258, "training_step_time": 0.12468743324279785 }, { "epoch": 2.328338623046875e-05, "model_forward_time": 0.025212526321411133, "step": 15259 }, { "epoch": 2.328338623046875e-05, "step": 15259, "training_step_time": 0.10891914367675781 }, { "epoch": 2.3284912109375e-05, "grad_norm": 0.35786086320877075, "learning_rate": 5.269935438715312e-05, "loss": 0.0167, "step": 15260 }, { "epoch": 2.3284912109375e-05, "model_forward_time": 0.025102615356445312, "step": 15260 }, { "epoch": 2.3284912109375e-05, "step": 15260, "training_step_time": 0.11223840713500977 }, { "epoch": 2.328643798828125e-05, "model_forward_time": 0.025061368942260742, "step": 15261 }, { "epoch": 2.328643798828125e-05, "step": 15261, "training_step_time": 0.1064004898071289 }, { "epoch": 2.32879638671875e-05, "model_forward_time": 0.024347543716430664, "step": 15262 }, { "epoch": 2.32879638671875e-05, "step": 15262, "training_step_time": 0.19633865356445312 }, { "epoch": 2.328948974609375e-05, "model_forward_time": 0.024576187133789062, "step": 15263 }, { "epoch": 2.328948974609375e-05, "step": 15263, "training_step_time": 0.1814260482788086 }, { "epoch": 2.3291015625e-05, "model_forward_time": 0.02428889274597168, "step": 15264 }, { "epoch": 2.3291015625e-05, "step": 15264, "training_step_time": 0.21654391288757324 }, { "epoch": 2.329254150390625e-05, "model_forward_time": 0.024482011795043945, "step": 15265 }, { "epoch": 2.329254150390625e-05, "step": 15265, "training_step_time": 0.11835718154907227 }, { "epoch": 2.32940673828125e-05, "model_forward_time": 0.024366378784179688, "step": 15266 }, { "epoch": 2.32940673828125e-05, "step": 15266, "training_step_time": 0.13567018508911133 }, { "epoch": 2.329559326171875e-05, "model_forward_time": 0.0249178409576416, "step": 15267 }, { "epoch": 2.329559326171875e-05, "step": 15267, "training_step_time": 0.16544771194458008 }, { "epoch": 2.3297119140625e-05, "model_forward_time": 0.02442169189453125, "step": 15268 }, { "epoch": 2.3297119140625e-05, "step": 15268, "training_step_time": 0.22284746170043945 }, { "epoch": 2.329864501953125e-05, "model_forward_time": 0.024158954620361328, "step": 15269 }, { "epoch": 2.329864501953125e-05, "step": 15269, "training_step_time": 0.11124777793884277 }, { "epoch": 2.33001708984375e-05, "grad_norm": 0.2568480670452118, "learning_rate": 5.264431747654284e-05, "loss": 0.0116, "step": 15270 }, { "epoch": 2.33001708984375e-05, "model_forward_time": 0.023680686950683594, "step": 15270 }, { "epoch": 2.33001708984375e-05, "step": 15270, "training_step_time": 0.10302042961120605 }, { "epoch": 2.330169677734375e-05, "model_forward_time": 0.027590513229370117, "step": 15271 }, { "epoch": 2.330169677734375e-05, "step": 15271, "training_step_time": 0.1077120304107666 }, { "epoch": 2.330322265625e-05, "model_forward_time": 0.02540135383605957, "step": 15272 }, { "epoch": 2.330322265625e-05, "step": 15272, "training_step_time": 0.10612607002258301 }, { "epoch": 2.330474853515625e-05, "model_forward_time": 0.02534174919128418, "step": 15273 }, { "epoch": 2.330474853515625e-05, "step": 15273, "training_step_time": 0.10607242584228516 }, { "epoch": 2.33062744140625e-05, "model_forward_time": 0.02555370330810547, "step": 15274 }, { "epoch": 2.33062744140625e-05, "step": 15274, "training_step_time": 0.10603666305541992 }, { "epoch": 2.330780029296875e-05, "model_forward_time": 0.025315046310424805, "step": 15275 }, { "epoch": 2.330780029296875e-05, "step": 15275, "training_step_time": 0.10519814491271973 }, { "epoch": 2.3309326171875e-05, "model_forward_time": 0.025450468063354492, "step": 15276 }, { "epoch": 2.3309326171875e-05, "step": 15276, "training_step_time": 0.10500073432922363 }, { "epoch": 2.331085205078125e-05, "model_forward_time": 0.025153398513793945, "step": 15277 }, { "epoch": 2.331085205078125e-05, "step": 15277, "training_step_time": 0.10687518119812012 }, { "epoch": 2.33123779296875e-05, "model_forward_time": 0.02491164207458496, "step": 15278 }, { "epoch": 2.33123779296875e-05, "step": 15278, "training_step_time": 0.10454821586608887 }, { "epoch": 2.331390380859375e-05, "model_forward_time": 0.02551555633544922, "step": 15279 }, { "epoch": 2.331390380859375e-05, "step": 15279, "training_step_time": 0.10577702522277832 }, { "epoch": 2.33154296875e-05, "grad_norm": 0.2536419630050659, "learning_rate": 5.258927735283748e-05, "loss": 0.0116, "step": 15280 }, { "epoch": 2.33154296875e-05, "model_forward_time": 0.025087833404541016, "step": 15280 }, { "epoch": 2.33154296875e-05, "step": 15280, "training_step_time": 0.1040959358215332 }, { "epoch": 2.331695556640625e-05, "model_forward_time": 0.024971961975097656, "step": 15281 }, { "epoch": 2.331695556640625e-05, "step": 15281, "training_step_time": 0.10511565208435059 }, { "epoch": 2.33184814453125e-05, "model_forward_time": 0.023833274841308594, "step": 15282 }, { "epoch": 2.33184814453125e-05, "step": 15282, "training_step_time": 0.10863137245178223 }, { "epoch": 2.332000732421875e-05, "model_forward_time": 0.02470111846923828, "step": 15283 }, { "epoch": 2.332000732421875e-05, "step": 15283, "training_step_time": 0.10718393325805664 }, { "epoch": 2.3321533203125e-05, "model_forward_time": 0.025243282318115234, "step": 15284 }, { "epoch": 2.3321533203125e-05, "step": 15284, "training_step_time": 0.1086874008178711 }, { "epoch": 2.332305908203125e-05, "model_forward_time": 0.025231361389160156, "step": 15285 }, { "epoch": 2.332305908203125e-05, "step": 15285, "training_step_time": 0.10646581649780273 }, { "epoch": 2.33245849609375e-05, "model_forward_time": 0.025090694427490234, "step": 15286 }, { "epoch": 2.33245849609375e-05, "step": 15286, "training_step_time": 0.10531091690063477 }, { "epoch": 2.332611083984375e-05, "model_forward_time": 0.025076866149902344, "step": 15287 }, { "epoch": 2.332611083984375e-05, "step": 15287, "training_step_time": 0.12819266319274902 }, { "epoch": 2.332763671875e-05, "model_forward_time": 0.026549577713012695, "step": 15288 }, { "epoch": 2.332763671875e-05, "step": 15288, "training_step_time": 0.1395878791809082 }, { "epoch": 2.332916259765625e-05, "model_forward_time": 0.02529454231262207, "step": 15289 }, { "epoch": 2.332916259765625e-05, "step": 15289, "training_step_time": 0.10957598686218262 }, { "epoch": 2.33306884765625e-05, "grad_norm": 0.20586350560188293, "learning_rate": 5.2534234082915976e-05, "loss": 0.0218, "step": 15290 }, { "epoch": 2.33306884765625e-05, "model_forward_time": 0.024724721908569336, "step": 15290 }, { "epoch": 2.33306884765625e-05, "step": 15290, "training_step_time": 0.1301717758178711 }, { "epoch": 2.333221435546875e-05, "model_forward_time": 0.02480340003967285, "step": 15291 }, { "epoch": 2.333221435546875e-05, "step": 15291, "training_step_time": 0.19103646278381348 }, { "epoch": 2.3333740234375e-05, "model_forward_time": 0.024306297302246094, "step": 15292 }, { "epoch": 2.3333740234375e-05, "step": 15292, "training_step_time": 0.18758130073547363 }, { "epoch": 2.333526611328125e-05, "model_forward_time": 0.023836135864257812, "step": 15293 }, { "epoch": 2.333526611328125e-05, "step": 15293, "training_step_time": 0.1793956756591797 }, { "epoch": 2.33367919921875e-05, "model_forward_time": 0.024411916732788086, "step": 15294 }, { "epoch": 2.33367919921875e-05, "step": 15294, "training_step_time": 0.16347241401672363 }, { "epoch": 2.333831787109375e-05, "model_forward_time": 0.02408623695373535, "step": 15295 }, { "epoch": 2.333831787109375e-05, "step": 15295, "training_step_time": 0.14354419708251953 }, { "epoch": 2.333984375e-05, "model_forward_time": 0.02458810806274414, "step": 15296 }, { "epoch": 2.333984375e-05, "step": 15296, "training_step_time": 0.13205933570861816 }, { "epoch": 2.334136962890625e-05, "model_forward_time": 0.024129629135131836, "step": 15297 }, { "epoch": 2.334136962890625e-05, "step": 15297, "training_step_time": 0.1281569004058838 }, { "epoch": 2.33428955078125e-05, "model_forward_time": 0.02498650550842285, "step": 15298 }, { "epoch": 2.33428955078125e-05, "step": 15298, "training_step_time": 0.12688708305358887 }, { "epoch": 2.334442138671875e-05, "model_forward_time": 0.024173736572265625, "step": 15299 }, { "epoch": 2.334442138671875e-05, "step": 15299, "training_step_time": 0.12340426445007324 }, { "epoch": 2.3345947265625e-05, "grad_norm": 0.42707836627960205, "learning_rate": 5.247918773366112e-05, "loss": 0.0148, "step": 15300 }, { "epoch": 2.3345947265625e-05, "model_forward_time": 0.024618864059448242, "step": 15300 }, { "epoch": 2.3345947265625e-05, "step": 15300, "training_step_time": 0.11664533615112305 }, { "epoch": 2.334747314453125e-05, "model_forward_time": 0.025237083435058594, "step": 15301 }, { "epoch": 2.334747314453125e-05, "step": 15301, "training_step_time": 0.2027144432067871 }, { "epoch": 2.33489990234375e-05, "model_forward_time": 0.024098634719848633, "step": 15302 }, { "epoch": 2.33489990234375e-05, "step": 15302, "training_step_time": 0.10950112342834473 }, { "epoch": 2.335052490234375e-05, "model_forward_time": 0.024495363235473633, "step": 15303 }, { "epoch": 2.335052490234375e-05, "step": 15303, "training_step_time": 0.1127464771270752 }, { "epoch": 2.335205078125e-05, "model_forward_time": 0.025002002716064453, "step": 15304 }, { "epoch": 2.335205078125e-05, "step": 15304, "training_step_time": 0.10552859306335449 }, { "epoch": 2.335357666015625e-05, "model_forward_time": 0.02428436279296875, "step": 15305 }, { "epoch": 2.335357666015625e-05, "step": 15305, "training_step_time": 0.20306682586669922 }, { "epoch": 2.33551025390625e-05, "model_forward_time": 0.024345874786376953, "step": 15306 }, { "epoch": 2.33551025390625e-05, "step": 15306, "training_step_time": 0.13819527626037598 }, { "epoch": 2.335662841796875e-05, "model_forward_time": 0.024289846420288086, "step": 15307 }, { "epoch": 2.335662841796875e-05, "step": 15307, "training_step_time": 0.22170662879943848 }, { "epoch": 2.3358154296875e-05, "model_forward_time": 0.02424025535583496, "step": 15308 }, { "epoch": 2.3358154296875e-05, "step": 15308, "training_step_time": 0.12711763381958008 }, { "epoch": 2.335968017578125e-05, "model_forward_time": 0.02436089515686035, "step": 15309 }, { "epoch": 2.335968017578125e-05, "step": 15309, "training_step_time": 0.12703967094421387 }, { "epoch": 2.33612060546875e-05, "grad_norm": 0.1862405240535736, "learning_rate": 5.242413837195938e-05, "loss": 0.0224, "step": 15310 }, { "epoch": 2.33612060546875e-05, "model_forward_time": 0.025122642517089844, "step": 15310 }, { "epoch": 2.33612060546875e-05, "step": 15310, "training_step_time": 0.1587541103363037 }, { "epoch": 2.336273193359375e-05, "model_forward_time": 0.024602651596069336, "step": 15311 }, { "epoch": 2.336273193359375e-05, "step": 15311, "training_step_time": 0.10514330863952637 }, { "epoch": 2.33642578125e-05, "model_forward_time": 0.025187015533447266, "step": 15312 }, { "epoch": 2.33642578125e-05, "step": 15312, "training_step_time": 0.11492061614990234 }, { "epoch": 2.336578369140625e-05, "model_forward_time": 0.024924278259277344, "step": 15313 }, { "epoch": 2.336578369140625e-05, "step": 15313, "training_step_time": 0.12243032455444336 }, { "epoch": 2.33673095703125e-05, "model_forward_time": 0.024816513061523438, "step": 15314 }, { "epoch": 2.33673095703125e-05, "step": 15314, "training_step_time": 0.1029207706451416 }, { "epoch": 2.336883544921875e-05, "model_forward_time": 0.02472090721130371, "step": 15315 }, { "epoch": 2.336883544921875e-05, "step": 15315, "training_step_time": 0.10407686233520508 }, { "epoch": 2.3370361328125e-05, "model_forward_time": 0.025075674057006836, "step": 15316 }, { "epoch": 2.3370361328125e-05, "step": 15316, "training_step_time": 0.10959339141845703 }, { "epoch": 2.337188720703125e-05, "model_forward_time": 0.025055408477783203, "step": 15317 }, { "epoch": 2.337188720703125e-05, "step": 15317, "training_step_time": 0.10413765907287598 }, { "epoch": 2.33734130859375e-05, "model_forward_time": 0.024980545043945312, "step": 15318 }, { "epoch": 2.33734130859375e-05, "step": 15318, "training_step_time": 0.10468339920043945 }, { "epoch": 2.337493896484375e-05, "model_forward_time": 0.025492191314697266, "step": 15319 }, { "epoch": 2.337493896484375e-05, "step": 15319, "training_step_time": 0.1098322868347168 }, { "epoch": 2.337646484375e-05, "grad_norm": 0.18742430210113525, "learning_rate": 5.2369086064700945e-05, "loss": 0.0098, "step": 15320 }, { "epoch": 2.337646484375e-05, "model_forward_time": 0.025102853775024414, "step": 15320 }, { "epoch": 2.337646484375e-05, "step": 15320, "training_step_time": 0.10607647895812988 }, { "epoch": 2.337799072265625e-05, "model_forward_time": 0.025090932846069336, "step": 15321 }, { "epoch": 2.337799072265625e-05, "step": 15321, "training_step_time": 0.10474205017089844 }, { "epoch": 2.33795166015625e-05, "model_forward_time": 0.025668859481811523, "step": 15322 }, { "epoch": 2.33795166015625e-05, "step": 15322, "training_step_time": 0.10908031463623047 }, { "epoch": 2.338104248046875e-05, "model_forward_time": 0.025102853775024414, "step": 15323 }, { "epoch": 2.338104248046875e-05, "step": 15323, "training_step_time": 0.1050269603729248 }, { "epoch": 2.3382568359375e-05, "model_forward_time": 0.025225162506103516, "step": 15324 }, { "epoch": 2.3382568359375e-05, "step": 15324, "training_step_time": 0.10441994667053223 }, { "epoch": 2.338409423828125e-05, "model_forward_time": 0.025038480758666992, "step": 15325 }, { "epoch": 2.338409423828125e-05, "step": 15325, "training_step_time": 0.11023640632629395 }, { "epoch": 2.33856201171875e-05, "model_forward_time": 0.024993896484375, "step": 15326 }, { "epoch": 2.33856201171875e-05, "step": 15326, "training_step_time": 0.13028454780578613 }, { "epoch": 2.338714599609375e-05, "model_forward_time": 0.024782896041870117, "step": 15327 }, { "epoch": 2.338714599609375e-05, "step": 15327, "training_step_time": 0.13089704513549805 }, { "epoch": 2.3388671875e-05, "model_forward_time": 0.02513885498046875, "step": 15328 }, { "epoch": 2.3388671875e-05, "step": 15328, "training_step_time": 0.1268014907836914 }, { "epoch": 2.339019775390625e-05, "model_forward_time": 0.02509903907775879, "step": 15329 }, { "epoch": 2.339019775390625e-05, "step": 15329, "training_step_time": 0.11759662628173828 }, { "epoch": 2.33917236328125e-05, "grad_norm": 0.3768186867237091, "learning_rate": 5.231403087877955e-05, "loss": 0.0103, "step": 15330 }, { "epoch": 2.33917236328125e-05, "model_forward_time": 0.025177955627441406, "step": 15330 }, { "epoch": 2.33917236328125e-05, "step": 15330, "training_step_time": 0.16248250007629395 }, { "epoch": 2.339324951171875e-05, "model_forward_time": 0.024389982223510742, "step": 15331 }, { "epoch": 2.339324951171875e-05, "step": 15331, "training_step_time": 0.13031840324401855 }, { "epoch": 2.3394775390625e-05, "model_forward_time": 0.02442455291748047, "step": 15332 }, { "epoch": 2.3394775390625e-05, "step": 15332, "training_step_time": 0.11825394630432129 }, { "epoch": 2.339630126953125e-05, "model_forward_time": 0.024667739868164062, "step": 15333 }, { "epoch": 2.339630126953125e-05, "step": 15333, "training_step_time": 0.11692261695861816 }, { "epoch": 2.33978271484375e-05, "model_forward_time": 0.024834871292114258, "step": 15334 }, { "epoch": 2.33978271484375e-05, "step": 15334, "training_step_time": 0.16986608505249023 }, { "epoch": 2.339935302734375e-05, "model_forward_time": 0.02391648292541504, "step": 15335 }, { "epoch": 2.339935302734375e-05, "step": 15335, "training_step_time": 0.13137102127075195 }, { "epoch": 2.340087890625e-05, "model_forward_time": 0.024741411209106445, "step": 15336 }, { "epoch": 2.340087890625e-05, "step": 15336, "training_step_time": 0.10733795166015625 }, { "epoch": 2.340240478515625e-05, "model_forward_time": 0.025008678436279297, "step": 15337 }, { "epoch": 2.340240478515625e-05, "step": 15337, "training_step_time": 0.10665702819824219 }, { "epoch": 2.34039306640625e-05, "model_forward_time": 0.02646803855895996, "step": 15338 }, { "epoch": 2.34039306640625e-05, "step": 15338, "training_step_time": 0.1087789535522461 }, { "epoch": 2.340545654296875e-05, "model_forward_time": 0.025359630584716797, "step": 15339 }, { "epoch": 2.340545654296875e-05, "step": 15339, "training_step_time": 0.10595822334289551 }, { "epoch": 2.3406982421875e-05, "grad_norm": 0.3194389045238495, "learning_rate": 5.225897288109245e-05, "loss": 0.0134, "step": 15340 }, { "epoch": 2.3406982421875e-05, "model_forward_time": 0.025155067443847656, "step": 15340 }, { "epoch": 2.3406982421875e-05, "step": 15340, "training_step_time": 0.10541176795959473 }, { "epoch": 2.340850830078125e-05, "model_forward_time": 0.024828433990478516, "step": 15341 }, { "epoch": 2.340850830078125e-05, "step": 15341, "training_step_time": 0.10384440422058105 }, { "epoch": 2.34100341796875e-05, "model_forward_time": 0.025161266326904297, "step": 15342 }, { "epoch": 2.34100341796875e-05, "step": 15342, "training_step_time": 0.10907888412475586 }, { "epoch": 2.341156005859375e-05, "model_forward_time": 0.024771690368652344, "step": 15343 }, { "epoch": 2.341156005859375e-05, "step": 15343, "training_step_time": 0.10642671585083008 }, { "epoch": 2.34130859375e-05, "model_forward_time": 0.025197505950927734, "step": 15344 }, { "epoch": 2.34130859375e-05, "step": 15344, "training_step_time": 0.1052391529083252 }, { "epoch": 2.341461181640625e-05, "model_forward_time": 0.025090932846069336, "step": 15345 }, { "epoch": 2.341461181640625e-05, "step": 15345, "training_step_time": 0.10569882392883301 }, { "epoch": 2.34161376953125e-05, "model_forward_time": 0.025136709213256836, "step": 15346 }, { "epoch": 2.34161376953125e-05, "step": 15346, "training_step_time": 0.10583758354187012 }, { "epoch": 2.341766357421875e-05, "model_forward_time": 0.025170087814331055, "step": 15347 }, { "epoch": 2.341766357421875e-05, "step": 15347, "training_step_time": 0.10538554191589355 }, { "epoch": 2.3419189453125e-05, "model_forward_time": 0.025414705276489258, "step": 15348 }, { "epoch": 2.3419189453125e-05, "step": 15348, "training_step_time": 0.11744141578674316 }, { "epoch": 2.342071533203125e-05, "model_forward_time": 0.025246858596801758, "step": 15349 }, { "epoch": 2.342071533203125e-05, "step": 15349, "training_step_time": 0.1069784164428711 }, { "epoch": 2.34222412109375e-05, "grad_norm": 0.26905763149261475, "learning_rate": 5.220391213854028e-05, "loss": 0.0184, "step": 15350 }, { "epoch": 2.34222412109375e-05, "model_forward_time": 0.025348424911499023, "step": 15350 }, { "epoch": 2.34222412109375e-05, "step": 15350, "training_step_time": 0.10698223114013672 }, { "epoch": 2.342376708984375e-05, "model_forward_time": 0.02601456642150879, "step": 15351 }, { "epoch": 2.342376708984375e-05, "step": 15351, "training_step_time": 0.1059103012084961 }, { "epoch": 2.342529296875e-05, "model_forward_time": 0.02536630630493164, "step": 15352 }, { "epoch": 2.342529296875e-05, "step": 15352, "training_step_time": 0.1172034740447998 }, { "epoch": 2.342681884765625e-05, "model_forward_time": 0.025052785873413086, "step": 15353 }, { "epoch": 2.342681884765625e-05, "step": 15353, "training_step_time": 0.16534662246704102 }, { "epoch": 2.34283447265625e-05, "model_forward_time": 0.024302244186401367, "step": 15354 }, { "epoch": 2.34283447265625e-05, "step": 15354, "training_step_time": 0.16155099868774414 }, { "epoch": 2.342987060546875e-05, "model_forward_time": 0.023742198944091797, "step": 15355 }, { "epoch": 2.342987060546875e-05, "step": 15355, "training_step_time": 0.1527423858642578 }, { "epoch": 2.3431396484375e-05, "model_forward_time": 0.02551412582397461, "step": 15356 }, { "epoch": 2.3431396484375e-05, "step": 15356, "training_step_time": 0.12360501289367676 }, { "epoch": 2.343292236328125e-05, "model_forward_time": 0.024333477020263672, "step": 15357 }, { "epoch": 2.343292236328125e-05, "step": 15357, "training_step_time": 0.1407303810119629 }, { "epoch": 2.34344482421875e-05, "model_forward_time": 0.02489304542541504, "step": 15358 }, { "epoch": 2.34344482421875e-05, "step": 15358, "training_step_time": 0.15712380409240723 }, { "epoch": 2.343597412109375e-05, "model_forward_time": 0.024536848068237305, "step": 15359 }, { "epoch": 2.343597412109375e-05, "step": 15359, "training_step_time": 0.10138440132141113 }, { "epoch": 2.34375e-05, "grad_norm": 0.23495899140834808, "learning_rate": 5.214884871802703e-05, "loss": 0.011, "step": 15360 }, { "epoch": 2.34375e-05, "model_forward_time": 0.02471184730529785, "step": 15360 }, { "epoch": 2.34375e-05, "step": 15360, "training_step_time": 0.11548018455505371 }, { "epoch": 2.343902587890625e-05, "model_forward_time": 0.02496027946472168, "step": 15361 }, { "epoch": 2.343902587890625e-05, "step": 15361, "training_step_time": 0.12210655212402344 }, { "epoch": 2.34405517578125e-05, "model_forward_time": 0.025074481964111328, "step": 15362 }, { "epoch": 2.34405517578125e-05, "step": 15362, "training_step_time": 0.11411142349243164 }, { "epoch": 2.344207763671875e-05, "model_forward_time": 0.024842023849487305, "step": 15363 }, { "epoch": 2.344207763671875e-05, "step": 15363, "training_step_time": 0.11220860481262207 }, { "epoch": 2.3443603515625e-05, "model_forward_time": 0.02507781982421875, "step": 15364 }, { "epoch": 2.3443603515625e-05, "step": 15364, "training_step_time": 0.10889554023742676 }, { "epoch": 2.344512939453125e-05, "model_forward_time": 0.025470972061157227, "step": 15365 }, { "epoch": 2.344512939453125e-05, "step": 15365, "training_step_time": 0.10918879508972168 }, { "epoch": 2.34466552734375e-05, "model_forward_time": 0.02514028549194336, "step": 15366 }, { "epoch": 2.34466552734375e-05, "step": 15366, "training_step_time": 0.11275744438171387 }, { "epoch": 2.344818115234375e-05, "model_forward_time": 0.025383472442626953, "step": 15367 }, { "epoch": 2.344818115234375e-05, "step": 15367, "training_step_time": 0.11038017272949219 }, { "epoch": 2.344970703125e-05, "model_forward_time": 0.025271177291870117, "step": 15368 }, { "epoch": 2.344970703125e-05, "step": 15368, "training_step_time": 0.11092162132263184 }, { "epoch": 2.345123291015625e-05, "model_forward_time": 0.02517414093017578, "step": 15369 }, { "epoch": 2.345123291015625e-05, "step": 15369, "training_step_time": 0.11031627655029297 }, { "epoch": 2.34527587890625e-05, "grad_norm": 0.16505533456802368, "learning_rate": 5.209378268645998e-05, "loss": 0.0132, "step": 15370 }, { "epoch": 2.34527587890625e-05, "model_forward_time": 0.025294065475463867, "step": 15370 }, { "epoch": 2.34527587890625e-05, "step": 15370, "training_step_time": 0.10870862007141113 }, { "epoch": 2.345428466796875e-05, "model_forward_time": 0.024875640869140625, "step": 15371 }, { "epoch": 2.345428466796875e-05, "step": 15371, "training_step_time": 0.10671424865722656 }, { "epoch": 2.3455810546875e-05, "model_forward_time": 0.025078773498535156, "step": 15372 }, { "epoch": 2.3455810546875e-05, "step": 15372, "training_step_time": 0.1061561107635498 }, { "epoch": 2.345733642578125e-05, "model_forward_time": 0.025143861770629883, "step": 15373 }, { "epoch": 2.345733642578125e-05, "step": 15373, "training_step_time": 0.10863304138183594 }, { "epoch": 2.34588623046875e-05, "model_forward_time": 0.024934053421020508, "step": 15374 }, { "epoch": 2.34588623046875e-05, "step": 15374, "training_step_time": 0.10528159141540527 }, { "epoch": 2.346038818359375e-05, "model_forward_time": 0.02546405792236328, "step": 15375 }, { "epoch": 2.346038818359375e-05, "step": 15375, "training_step_time": 0.10586833953857422 }, { "epoch": 2.34619140625e-05, "model_forward_time": 0.025496244430541992, "step": 15376 }, { "epoch": 2.34619140625e-05, "step": 15376, "training_step_time": 0.10705113410949707 }, { "epoch": 2.346343994140625e-05, "model_forward_time": 0.02477574348449707, "step": 15377 }, { "epoch": 2.346343994140625e-05, "step": 15377, "training_step_time": 0.10455822944641113 }, { "epoch": 2.34649658203125e-05, "model_forward_time": 0.024817943572998047, "step": 15378 }, { "epoch": 2.34649658203125e-05, "step": 15378, "training_step_time": 0.11150622367858887 }, { "epoch": 2.346649169921875e-05, "model_forward_time": 0.024812698364257812, "step": 15379 }, { "epoch": 2.346649169921875e-05, "step": 15379, "training_step_time": 0.15322542190551758 }, { "epoch": 2.3468017578125e-05, "grad_norm": 0.2848520576953888, "learning_rate": 5.203871411074954e-05, "loss": 0.0138, "step": 15380 }, { "epoch": 2.3468017578125e-05, "model_forward_time": 0.024263381958007812, "step": 15380 }, { "epoch": 2.3468017578125e-05, "step": 15380, "training_step_time": 0.10468792915344238 }, { "epoch": 2.346954345703125e-05, "model_forward_time": 0.02417922019958496, "step": 15381 }, { "epoch": 2.346954345703125e-05, "step": 15381, "training_step_time": 0.11133933067321777 }, { "epoch": 2.34710693359375e-05, "model_forward_time": 0.025397539138793945, "step": 15382 }, { "epoch": 2.34710693359375e-05, "step": 15382, "training_step_time": 0.10789775848388672 }, { "epoch": 2.347259521484375e-05, "model_forward_time": 0.024869680404663086, "step": 15383 }, { "epoch": 2.347259521484375e-05, "step": 15383, "training_step_time": 0.10707378387451172 }, { "epoch": 2.347412109375e-05, "model_forward_time": 0.025076627731323242, "step": 15384 }, { "epoch": 2.347412109375e-05, "step": 15384, "training_step_time": 0.19950199127197266 }, { "epoch": 2.347564697265625e-05, "model_forward_time": 0.024183273315429688, "step": 15385 }, { "epoch": 2.347564697265625e-05, "step": 15385, "training_step_time": 0.10201859474182129 }, { "epoch": 2.34771728515625e-05, "model_forward_time": 0.024294137954711914, "step": 15386 }, { "epoch": 2.34771728515625e-05, "step": 15386, "training_step_time": 0.1039879322052002 }, { "epoch": 2.347869873046875e-05, "model_forward_time": 0.02482318878173828, "step": 15387 }, { "epoch": 2.347869873046875e-05, "step": 15387, "training_step_time": 0.10565757751464844 }, { "epoch": 2.3480224609375e-05, "model_forward_time": 0.025242090225219727, "step": 15388 }, { "epoch": 2.3480224609375e-05, "step": 15388, "training_step_time": 0.10662460327148438 }, { "epoch": 2.348175048828125e-05, "model_forward_time": 0.02550506591796875, "step": 15389 }, { "epoch": 2.348175048828125e-05, "step": 15389, "training_step_time": 0.1093130111694336 }, { "epoch": 2.34832763671875e-05, "grad_norm": 0.298772394657135, "learning_rate": 5.198364305780922e-05, "loss": 0.0117, "step": 15390 }, { "epoch": 2.34832763671875e-05, "model_forward_time": 0.025131940841674805, "step": 15390 }, { "epoch": 2.34832763671875e-05, "step": 15390, "training_step_time": 0.10651803016662598 }, { "epoch": 2.348480224609375e-05, "model_forward_time": 0.025292634963989258, "step": 15391 }, { "epoch": 2.348480224609375e-05, "step": 15391, "training_step_time": 0.10826468467712402 }, { "epoch": 2.3486328125e-05, "model_forward_time": 0.02551889419555664, "step": 15392 }, { "epoch": 2.3486328125e-05, "step": 15392, "training_step_time": 0.10725760459899902 }, { "epoch": 2.348785400390625e-05, "model_forward_time": 0.02572035789489746, "step": 15393 }, { "epoch": 2.348785400390625e-05, "step": 15393, "training_step_time": 0.11358070373535156 }, { "epoch": 2.34893798828125e-05, "model_forward_time": 0.025273561477661133, "step": 15394 }, { "epoch": 2.34893798828125e-05, "step": 15394, "training_step_time": 0.1069033145904541 }, { "epoch": 2.349090576171875e-05, "model_forward_time": 0.025171518325805664, "step": 15395 }, { "epoch": 2.349090576171875e-05, "step": 15395, "training_step_time": 0.10815834999084473 }, { "epoch": 2.3492431640625e-05, "model_forward_time": 0.025238752365112305, "step": 15396 }, { "epoch": 2.3492431640625e-05, "step": 15396, "training_step_time": 0.16638398170471191 }, { "epoch": 2.349395751953125e-05, "model_forward_time": 0.025639057159423828, "step": 15397 }, { "epoch": 2.349395751953125e-05, "step": 15397, "training_step_time": 0.10660839080810547 }, { "epoch": 2.34954833984375e-05, "model_forward_time": 0.024918794631958008, "step": 15398 }, { "epoch": 2.34954833984375e-05, "step": 15398, "training_step_time": 0.14927196502685547 }, { "epoch": 2.349700927734375e-05, "model_forward_time": 0.024569272994995117, "step": 15399 }, { "epoch": 2.349700927734375e-05, "step": 15399, "training_step_time": 0.12423467636108398 }, { "epoch": 2.349853515625e-05, "grad_norm": 0.3625675141811371, "learning_rate": 5.1928569594555524e-05, "loss": 0.0119, "step": 15400 }, { "epoch": 2.349853515625e-05, "model_forward_time": 0.02469778060913086, "step": 15400 }, { "epoch": 2.349853515625e-05, "step": 15400, "training_step_time": 0.1961045265197754 }, { "epoch": 2.350006103515625e-05, "model_forward_time": 0.024578332901000977, "step": 15401 }, { "epoch": 2.350006103515625e-05, "step": 15401, "training_step_time": 0.14148569107055664 }, { "epoch": 2.35015869140625e-05, "model_forward_time": 0.024995803833007812, "step": 15402 }, { "epoch": 2.35015869140625e-05, "step": 15402, "training_step_time": 0.10483098030090332 }, { "epoch": 2.350311279296875e-05, "model_forward_time": 0.025381088256835938, "step": 15403 }, { "epoch": 2.350311279296875e-05, "step": 15403, "training_step_time": 0.21323180198669434 }, { "epoch": 2.3504638671875e-05, "model_forward_time": 0.02424168586730957, "step": 15404 }, { "epoch": 2.3504638671875e-05, "step": 15404, "training_step_time": 0.11415362358093262 }, { "epoch": 2.350616455078125e-05, "model_forward_time": 0.024085283279418945, "step": 15405 }, { "epoch": 2.350616455078125e-05, "step": 15405, "training_step_time": 0.13407683372497559 }, { "epoch": 2.35076904296875e-05, "model_forward_time": 0.025089502334594727, "step": 15406 }, { "epoch": 2.35076904296875e-05, "step": 15406, "training_step_time": 0.13432741165161133 }, { "epoch": 2.350921630859375e-05, "model_forward_time": 0.02424168586730957, "step": 15407 }, { "epoch": 2.350921630859375e-05, "step": 15407, "training_step_time": 0.11794853210449219 }, { "epoch": 2.35107421875e-05, "model_forward_time": 0.024641036987304688, "step": 15408 }, { "epoch": 2.35107421875e-05, "step": 15408, "training_step_time": 0.12234663963317871 }, { "epoch": 2.351226806640625e-05, "model_forward_time": 0.025035619735717773, "step": 15409 }, { "epoch": 2.351226806640625e-05, "step": 15409, "training_step_time": 0.10802412033081055 }, { "epoch": 2.35137939453125e-05, "grad_norm": 0.5025345683097839, "learning_rate": 5.187349378790793e-05, "loss": 0.0096, "step": 15410 }, { "epoch": 2.35137939453125e-05, "model_forward_time": 0.02489638328552246, "step": 15410 }, { "epoch": 2.35137939453125e-05, "step": 15410, "training_step_time": 0.10715770721435547 }, { "epoch": 2.351531982421875e-05, "model_forward_time": 0.025928258895874023, "step": 15411 }, { "epoch": 2.351531982421875e-05, "step": 15411, "training_step_time": 0.11005735397338867 }, { "epoch": 2.3516845703125e-05, "model_forward_time": 0.024924516677856445, "step": 15412 }, { "epoch": 2.3516845703125e-05, "step": 15412, "training_step_time": 0.1216881275177002 }, { "epoch": 2.351837158203125e-05, "model_forward_time": 0.025143146514892578, "step": 15413 }, { "epoch": 2.351837158203125e-05, "step": 15413, "training_step_time": 0.12044548988342285 }, { "epoch": 2.35198974609375e-05, "model_forward_time": 0.024954795837402344, "step": 15414 }, { "epoch": 2.35198974609375e-05, "step": 15414, "training_step_time": 0.11371207237243652 }, { "epoch": 2.352142333984375e-05, "model_forward_time": 0.024860143661499023, "step": 15415 }, { "epoch": 2.352142333984375e-05, "step": 15415, "training_step_time": 0.12073802947998047 }, { "epoch": 2.352294921875e-05, "model_forward_time": 0.02498793601989746, "step": 15416 }, { "epoch": 2.352294921875e-05, "step": 15416, "training_step_time": 0.1166830062866211 }, { "epoch": 2.352447509765625e-05, "model_forward_time": 0.02482151985168457, "step": 15417 }, { "epoch": 2.352447509765625e-05, "step": 15417, "training_step_time": 0.1142582893371582 }, { "epoch": 2.35260009765625e-05, "model_forward_time": 0.024872303009033203, "step": 15418 }, { "epoch": 2.35260009765625e-05, "step": 15418, "training_step_time": 0.11376190185546875 }, { "epoch": 2.352752685546875e-05, "model_forward_time": 0.025324106216430664, "step": 15419 }, { "epoch": 2.352752685546875e-05, "step": 15419, "training_step_time": 0.11222314834594727 }, { "epoch": 2.3529052734375e-05, "grad_norm": 0.18463116884231567, "learning_rate": 5.1818415704788725e-05, "loss": 0.0134, "step": 15420 }, { "epoch": 2.3529052734375e-05, "model_forward_time": 0.024056196212768555, "step": 15420 }, { "epoch": 2.3529052734375e-05, "step": 15420, "training_step_time": 0.1104884147644043 }, { "epoch": 2.353057861328125e-05, "model_forward_time": 0.025332927703857422, "step": 15421 }, { "epoch": 2.353057861328125e-05, "step": 15421, "training_step_time": 0.1106576919555664 }, { "epoch": 2.35321044921875e-05, "model_forward_time": 0.02504897117614746, "step": 15422 }, { "epoch": 2.35321044921875e-05, "step": 15422, "training_step_time": 0.10846090316772461 }, { "epoch": 2.353363037109375e-05, "model_forward_time": 0.024890899658203125, "step": 15423 }, { "epoch": 2.353363037109375e-05, "step": 15423, "training_step_time": 0.10877013206481934 }, { "epoch": 2.353515625e-05, "model_forward_time": 0.024968862533569336, "step": 15424 }, { "epoch": 2.353515625e-05, "step": 15424, "training_step_time": 0.13439416885375977 }, { "epoch": 2.353668212890625e-05, "model_forward_time": 0.025145292282104492, "step": 15425 }, { "epoch": 2.353668212890625e-05, "step": 15425, "training_step_time": 0.12939810752868652 }, { "epoch": 2.35382080078125e-05, "model_forward_time": 0.02478194236755371, "step": 15426 }, { "epoch": 2.35382080078125e-05, "step": 15426, "training_step_time": 0.11025047302246094 }, { "epoch": 2.353973388671875e-05, "model_forward_time": 0.02718830108642578, "step": 15427 }, { "epoch": 2.353973388671875e-05, "step": 15427, "training_step_time": 0.10894656181335449 }, { "epoch": 2.3541259765625e-05, "model_forward_time": 0.025003433227539062, "step": 15428 }, { "epoch": 2.3541259765625e-05, "step": 15428, "training_step_time": 0.1192939281463623 }, { "epoch": 2.354278564453125e-05, "model_forward_time": 0.024866342544555664, "step": 15429 }, { "epoch": 2.354278564453125e-05, "step": 15429, "training_step_time": 0.11080098152160645 }, { "epoch": 2.35443115234375e-05, "grad_norm": 0.2784884572029114, "learning_rate": 5.176333541212296e-05, "loss": 0.0306, "step": 15430 }, { "epoch": 2.35443115234375e-05, "model_forward_time": 0.02485799789428711, "step": 15430 }, { "epoch": 2.35443115234375e-05, "step": 15430, "training_step_time": 0.18803691864013672 }, { "epoch": 2.354583740234375e-05, "model_forward_time": 0.024337291717529297, "step": 15431 }, { "epoch": 2.354583740234375e-05, "step": 15431, "training_step_time": 0.1038210391998291 }, { "epoch": 2.354736328125e-05, "model_forward_time": 0.024437904357910156, "step": 15432 }, { "epoch": 2.354736328125e-05, "step": 15432, "training_step_time": 0.10624289512634277 }, { "epoch": 2.354888916015625e-05, "model_forward_time": 0.02502727508544922, "step": 15433 }, { "epoch": 2.354888916015625e-05, "step": 15433, "training_step_time": 0.10673403739929199 }, { "epoch": 2.35504150390625e-05, "model_forward_time": 0.02527904510498047, "step": 15434 }, { "epoch": 2.35504150390625e-05, "step": 15434, "training_step_time": 0.10990214347839355 }, { "epoch": 2.355194091796875e-05, "model_forward_time": 0.025089025497436523, "step": 15435 }, { "epoch": 2.355194091796875e-05, "step": 15435, "training_step_time": 0.10812997817993164 }, { "epoch": 2.3553466796875e-05, "model_forward_time": 0.024912357330322266, "step": 15436 }, { "epoch": 2.3553466796875e-05, "step": 15436, "training_step_time": 0.11075687408447266 }, { "epoch": 2.355499267578125e-05, "model_forward_time": 0.024885177612304688, "step": 15437 }, { "epoch": 2.355499267578125e-05, "step": 15437, "training_step_time": 0.11005043983459473 }, { "epoch": 2.35565185546875e-05, "model_forward_time": 0.02490067481994629, "step": 15438 }, { "epoch": 2.35565185546875e-05, "step": 15438, "training_step_time": 0.10965943336486816 }, { "epoch": 2.355804443359375e-05, "model_forward_time": 0.023903369903564453, "step": 15439 }, { "epoch": 2.355804443359375e-05, "step": 15439, "training_step_time": 0.10749006271362305 }, { "epoch": 2.35595703125e-05, "grad_norm": 0.2178107351064682, "learning_rate": 5.170825297683841e-05, "loss": 0.0108, "step": 15440 }, { "epoch": 2.35595703125e-05, "model_forward_time": 0.024161577224731445, "step": 15440 }, { "epoch": 2.35595703125e-05, "step": 15440, "training_step_time": 0.1110990047454834 }, { "epoch": 2.356109619140625e-05, "model_forward_time": 0.024830102920532227, "step": 15441 }, { "epoch": 2.356109619140625e-05, "step": 15441, "training_step_time": 0.1069638729095459 }, { "epoch": 2.35626220703125e-05, "model_forward_time": 0.024884700775146484, "step": 15442 }, { "epoch": 2.35626220703125e-05, "step": 15442, "training_step_time": 0.18027305603027344 }, { "epoch": 2.356414794921875e-05, "model_forward_time": 0.02498769760131836, "step": 15443 }, { "epoch": 2.356414794921875e-05, "step": 15443, "training_step_time": 0.10463571548461914 }, { "epoch": 2.3565673828125e-05, "model_forward_time": 0.024384260177612305, "step": 15444 }, { "epoch": 2.3565673828125e-05, "step": 15444, "training_step_time": 0.19267678260803223 }, { "epoch": 2.356719970703125e-05, "model_forward_time": 0.0242311954498291, "step": 15445 }, { "epoch": 2.356719970703125e-05, "step": 15445, "training_step_time": 0.13937997817993164 }, { "epoch": 2.35687255859375e-05, "model_forward_time": 0.024295806884765625, "step": 15446 }, { "epoch": 2.35687255859375e-05, "step": 15446, "training_step_time": 0.11492729187011719 }, { "epoch": 2.357025146484375e-05, "model_forward_time": 0.025031089782714844, "step": 15447 }, { "epoch": 2.357025146484375e-05, "step": 15447, "training_step_time": 0.1697549819946289 }, { "epoch": 2.357177734375e-05, "model_forward_time": 0.024139881134033203, "step": 15448 }, { "epoch": 2.357177734375e-05, "step": 15448, "training_step_time": 0.1727585792541504 }, { "epoch": 2.357330322265625e-05, "model_forward_time": 0.024634838104248047, "step": 15449 }, { "epoch": 2.357330322265625e-05, "step": 15449, "training_step_time": 0.14006805419921875 }, { "epoch": 2.35748291015625e-05, "grad_norm": 0.3177327811717987, "learning_rate": 5.165316846586541e-05, "loss": 0.0209, "step": 15450 }, { "epoch": 2.35748291015625e-05, "model_forward_time": 0.024422168731689453, "step": 15450 }, { "epoch": 2.35748291015625e-05, "step": 15450, "training_step_time": 0.10347270965576172 }, { "epoch": 2.357635498046875e-05, "model_forward_time": 0.02494335174560547, "step": 15451 }, { "epoch": 2.357635498046875e-05, "step": 15451, "training_step_time": 0.11974620819091797 }, { "epoch": 2.3577880859375e-05, "model_forward_time": 0.024838924407958984, "step": 15452 }, { "epoch": 2.3577880859375e-05, "step": 15452, "training_step_time": 0.10344910621643066 }, { "epoch": 2.357940673828125e-05, "model_forward_time": 0.025091886520385742, "step": 15453 }, { "epoch": 2.357940673828125e-05, "step": 15453, "training_step_time": 0.1819746494293213 }, { "epoch": 2.35809326171875e-05, "model_forward_time": 0.02439284324645996, "step": 15454 }, { "epoch": 2.35809326171875e-05, "step": 15454, "training_step_time": 0.1127784252166748 }, { "epoch": 2.358245849609375e-05, "model_forward_time": 0.024183273315429688, "step": 15455 }, { "epoch": 2.358245849609375e-05, "step": 15455, "training_step_time": 0.1116793155670166 }, { "epoch": 2.3583984375e-05, "model_forward_time": 0.02536940574645996, "step": 15456 }, { "epoch": 2.3583984375e-05, "step": 15456, "training_step_time": 0.10589981079101562 }, { "epoch": 2.358551025390625e-05, "model_forward_time": 0.02509307861328125, "step": 15457 }, { "epoch": 2.358551025390625e-05, "step": 15457, "training_step_time": 0.1077585220336914 }, { "epoch": 2.35870361328125e-05, "model_forward_time": 0.02546548843383789, "step": 15458 }, { "epoch": 2.35870361328125e-05, "step": 15458, "training_step_time": 0.10547542572021484 }, { "epoch": 2.358856201171875e-05, "model_forward_time": 0.025015830993652344, "step": 15459 }, { "epoch": 2.358856201171875e-05, "step": 15459, "training_step_time": 0.10639286041259766 }, { "epoch": 2.3590087890625e-05, "grad_norm": 0.3070053458213806, "learning_rate": 5.1598081946136843e-05, "loss": 0.0119, "step": 15460 }, { "epoch": 2.3590087890625e-05, "model_forward_time": 0.025297880172729492, "step": 15460 }, { "epoch": 2.3590087890625e-05, "step": 15460, "training_step_time": 0.1054983139038086 }, { "epoch": 2.359161376953125e-05, "model_forward_time": 0.02512669563293457, "step": 15461 }, { "epoch": 2.359161376953125e-05, "step": 15461, "training_step_time": 0.10627937316894531 }, { "epoch": 2.35931396484375e-05, "model_forward_time": 0.025159358978271484, "step": 15462 }, { "epoch": 2.35931396484375e-05, "step": 15462, "training_step_time": 0.10619521141052246 }, { "epoch": 2.359466552734375e-05, "model_forward_time": 0.024899721145629883, "step": 15463 }, { "epoch": 2.359466552734375e-05, "step": 15463, "training_step_time": 0.10640764236450195 }, { "epoch": 2.359619140625e-05, "model_forward_time": 0.024675607681274414, "step": 15464 }, { "epoch": 2.359619140625e-05, "step": 15464, "training_step_time": 0.1460740566253662 }, { "epoch": 2.359771728515625e-05, "model_forward_time": 0.02475285530090332, "step": 15465 }, { "epoch": 2.359771728515625e-05, "step": 15465, "training_step_time": 0.1727917194366455 }, { "epoch": 2.35992431640625e-05, "model_forward_time": 0.0242769718170166, "step": 15466 }, { "epoch": 2.35992431640625e-05, "step": 15466, "training_step_time": 0.16758012771606445 }, { "epoch": 2.360076904296875e-05, "model_forward_time": 0.023667335510253906, "step": 15467 }, { "epoch": 2.360076904296875e-05, "step": 15467, "training_step_time": 0.15587496757507324 }, { "epoch": 2.3602294921875e-05, "model_forward_time": 0.023774385452270508, "step": 15468 }, { "epoch": 2.3602294921875e-05, "step": 15468, "training_step_time": 0.14188408851623535 }, { "epoch": 2.360382080078125e-05, "model_forward_time": 0.0244598388671875, "step": 15469 }, { "epoch": 2.360382080078125e-05, "step": 15469, "training_step_time": 0.22607922554016113 }, { "epoch": 2.36053466796875e-05, "grad_norm": 0.5303618907928467, "learning_rate": 5.154299348458803e-05, "loss": 0.0125, "step": 15470 }, { "epoch": 2.36053466796875e-05, "model_forward_time": 0.024691343307495117, "step": 15470 }, { "epoch": 2.36053466796875e-05, "step": 15470, "training_step_time": 0.12068867683410645 }, { "epoch": 2.360687255859375e-05, "model_forward_time": 0.02361154556274414, "step": 15471 }, { "epoch": 2.360687255859375e-05, "step": 15471, "training_step_time": 0.1164555549621582 }, { "epoch": 2.36083984375e-05, "model_forward_time": 0.025020599365234375, "step": 15472 }, { "epoch": 2.36083984375e-05, "step": 15472, "training_step_time": 0.12052369117736816 }, { "epoch": 2.360992431640625e-05, "model_forward_time": 0.025152921676635742, "step": 15473 }, { "epoch": 2.360992431640625e-05, "step": 15473, "training_step_time": 0.1830449104309082 }, { "epoch": 2.36114501953125e-05, "model_forward_time": 0.02413034439086914, "step": 15474 }, { "epoch": 2.36114501953125e-05, "step": 15474, "training_step_time": 0.11108589172363281 }, { "epoch": 2.361297607421875e-05, "model_forward_time": 0.0240480899810791, "step": 15475 }, { "epoch": 2.361297607421875e-05, "step": 15475, "training_step_time": 0.10810565948486328 }, { "epoch": 2.3614501953125e-05, "model_forward_time": 0.024749040603637695, "step": 15476 }, { "epoch": 2.3614501953125e-05, "step": 15476, "training_step_time": 0.10934638977050781 }, { "epoch": 2.361602783203125e-05, "model_forward_time": 0.025372743606567383, "step": 15477 }, { "epoch": 2.361602783203125e-05, "step": 15477, "training_step_time": 0.10945510864257812 }, { "epoch": 2.36175537109375e-05, "model_forward_time": 0.024855375289916992, "step": 15478 }, { "epoch": 2.36175537109375e-05, "step": 15478, "training_step_time": 0.10562300682067871 }, { "epoch": 2.361907958984375e-05, "model_forward_time": 0.025145769119262695, "step": 15479 }, { "epoch": 2.361907958984375e-05, "step": 15479, "training_step_time": 0.10640716552734375 }, { "epoch": 2.362060546875e-05, "grad_norm": 0.2705172896385193, "learning_rate": 5.148790314815663e-05, "loss": 0.0162, "step": 15480 }, { "epoch": 2.362060546875e-05, "model_forward_time": 0.02538013458251953, "step": 15480 }, { "epoch": 2.362060546875e-05, "step": 15480, "training_step_time": 0.10561037063598633 }, { "epoch": 2.362213134765625e-05, "model_forward_time": 0.02546858787536621, "step": 15481 }, { "epoch": 2.362213134765625e-05, "step": 15481, "training_step_time": 0.10766482353210449 }, { "epoch": 2.36236572265625e-05, "model_forward_time": 0.025583744049072266, "step": 15482 }, { "epoch": 2.36236572265625e-05, "step": 15482, "training_step_time": 0.10739254951477051 }, { "epoch": 2.362518310546875e-05, "model_forward_time": 0.025444507598876953, "step": 15483 }, { "epoch": 2.362518310546875e-05, "step": 15483, "training_step_time": 0.10892939567565918 }, { "epoch": 2.3626708984375e-05, "model_forward_time": 0.02527332305908203, "step": 15484 }, { "epoch": 2.3626708984375e-05, "step": 15484, "training_step_time": 0.10937261581420898 }, { "epoch": 2.362823486328125e-05, "model_forward_time": 0.02500295639038086, "step": 15485 }, { "epoch": 2.362823486328125e-05, "step": 15485, "training_step_time": 0.10414910316467285 }, { "epoch": 2.36297607421875e-05, "model_forward_time": 0.025390148162841797, "step": 15486 }, { "epoch": 2.36297607421875e-05, "step": 15486, "training_step_time": 0.21254229545593262 }, { "epoch": 2.363128662109375e-05, "model_forward_time": 0.025146961212158203, "step": 15487 }, { "epoch": 2.363128662109375e-05, "step": 15487, "training_step_time": 0.10316276550292969 }, { "epoch": 2.36328125e-05, "model_forward_time": 0.024444580078125, "step": 15488 }, { "epoch": 2.36328125e-05, "step": 15488, "training_step_time": 0.21838974952697754 }, { "epoch": 2.363433837890625e-05, "model_forward_time": 0.024268388748168945, "step": 15489 }, { "epoch": 2.363433837890625e-05, "step": 15489, "training_step_time": 0.12634849548339844 }, { "epoch": 2.36358642578125e-05, "grad_norm": 0.456437885761261, "learning_rate": 5.143281100378261e-05, "loss": 0.0164, "step": 15490 }, { "epoch": 2.36358642578125e-05, "model_forward_time": 0.02475738525390625, "step": 15490 }, { "epoch": 2.36358642578125e-05, "step": 15490, "training_step_time": 0.1128537654876709 }, { "epoch": 2.363739013671875e-05, "model_forward_time": 0.025415897369384766, "step": 15491 }, { "epoch": 2.363739013671875e-05, "step": 15491, "training_step_time": 0.12503910064697266 }, { "epoch": 2.3638916015625e-05, "model_forward_time": 0.025249719619750977, "step": 15492 }, { "epoch": 2.3638916015625e-05, "step": 15492, "training_step_time": 0.11187577247619629 }, { "epoch": 2.364044189453125e-05, "model_forward_time": 0.02574634552001953, "step": 15493 }, { "epoch": 2.364044189453125e-05, "step": 15493, "training_step_time": 0.1807117462158203 }, { "epoch": 2.36419677734375e-05, "model_forward_time": 0.024758577346801758, "step": 15494 }, { "epoch": 2.36419677734375e-05, "step": 15494, "training_step_time": 0.10839438438415527 }, { "epoch": 2.364349365234375e-05, "model_forward_time": 0.024535417556762695, "step": 15495 }, { "epoch": 2.364349365234375e-05, "step": 15495, "training_step_time": 0.11020088195800781 }, { "epoch": 2.364501953125e-05, "model_forward_time": 0.024944305419921875, "step": 15496 }, { "epoch": 2.364501953125e-05, "step": 15496, "training_step_time": 0.12137317657470703 }, { "epoch": 2.364654541015625e-05, "model_forward_time": 0.025732994079589844, "step": 15497 }, { "epoch": 2.364654541015625e-05, "step": 15497, "training_step_time": 0.11068940162658691 }, { "epoch": 2.36480712890625e-05, "model_forward_time": 0.025120973587036133, "step": 15498 }, { "epoch": 2.36480712890625e-05, "step": 15498, "training_step_time": 0.11432290077209473 }, { "epoch": 2.364959716796875e-05, "model_forward_time": 0.024804115295410156, "step": 15499 }, { "epoch": 2.364959716796875e-05, "step": 15499, "training_step_time": 0.16653013229370117 }, { "epoch": 2.3651123046875e-05, "grad_norm": 0.25273188948631287, "learning_rate": 5.1377717118408105e-05, "loss": 0.0126, "step": 15500 }, { "epoch": 2.3651123046875e-05, "model_forward_time": 0.02519965171813965, "step": 15500 }, { "epoch": 2.3651123046875e-05, "step": 15500, "training_step_time": 0.10695862770080566 }, { "epoch": 2.365264892578125e-05, "model_forward_time": 0.024699926376342773, "step": 15501 }, { "epoch": 2.365264892578125e-05, "step": 15501, "training_step_time": 0.10287117958068848 }, { "epoch": 2.36541748046875e-05, "model_forward_time": 0.025287389755249023, "step": 15502 }, { "epoch": 2.36541748046875e-05, "step": 15502, "training_step_time": 0.10683393478393555 }, { "epoch": 2.365570068359375e-05, "model_forward_time": 0.025455474853515625, "step": 15503 }, { "epoch": 2.365570068359375e-05, "step": 15503, "training_step_time": 0.10776019096374512 }, { "epoch": 2.36572265625e-05, "model_forward_time": 0.02508997917175293, "step": 15504 }, { "epoch": 2.36572265625e-05, "step": 15504, "training_step_time": 0.10859990119934082 }, { "epoch": 2.365875244140625e-05, "model_forward_time": 0.02528238296508789, "step": 15505 }, { "epoch": 2.365875244140625e-05, "step": 15505, "training_step_time": 0.1126713752746582 }, { "epoch": 2.36602783203125e-05, "model_forward_time": 0.02522754669189453, "step": 15506 }, { "epoch": 2.36602783203125e-05, "step": 15506, "training_step_time": 0.10576677322387695 }, { "epoch": 2.366180419921875e-05, "model_forward_time": 0.02542281150817871, "step": 15507 }, { "epoch": 2.366180419921875e-05, "step": 15507, "training_step_time": 0.1065669059753418 }, { "epoch": 2.3663330078125e-05, "model_forward_time": 0.02556920051574707, "step": 15508 }, { "epoch": 2.3663330078125e-05, "step": 15508, "training_step_time": 0.10675168037414551 }, { "epoch": 2.366485595703125e-05, "model_forward_time": 0.025197267532348633, "step": 15509 }, { "epoch": 2.366485595703125e-05, "step": 15509, "training_step_time": 0.10640859603881836 }, { "epoch": 2.36663818359375e-05, "grad_norm": 0.23745200037956238, "learning_rate": 5.132262155897739e-05, "loss": 0.0165, "step": 15510 }, { "epoch": 2.36663818359375e-05, "model_forward_time": 0.025296449661254883, "step": 15510 }, { "epoch": 2.36663818359375e-05, "step": 15510, "training_step_time": 0.10574531555175781 }, { "epoch": 2.366790771484375e-05, "model_forward_time": 0.025619983673095703, "step": 15511 }, { "epoch": 2.366790771484375e-05, "step": 15511, "training_step_time": 0.10712242126464844 }, { "epoch": 2.366943359375e-05, "model_forward_time": 0.024913549423217773, "step": 15512 }, { "epoch": 2.366943359375e-05, "step": 15512, "training_step_time": 0.1060788631439209 }, { "epoch": 2.367095947265625e-05, "model_forward_time": 0.025278568267822266, "step": 15513 }, { "epoch": 2.367095947265625e-05, "step": 15513, "training_step_time": 0.10665512084960938 }, { "epoch": 2.36724853515625e-05, "model_forward_time": 0.025328874588012695, "step": 15514 }, { "epoch": 2.36724853515625e-05, "step": 15514, "training_step_time": 0.10864830017089844 }, { "epoch": 2.367401123046875e-05, "model_forward_time": 0.02547430992126465, "step": 15515 }, { "epoch": 2.367401123046875e-05, "step": 15515, "training_step_time": 0.13143396377563477 }, { "epoch": 2.3675537109375e-05, "model_forward_time": 0.026354074478149414, "step": 15516 }, { "epoch": 2.3675537109375e-05, "step": 15516, "training_step_time": 0.1341395378112793 }, { "epoch": 2.367706298828125e-05, "model_forward_time": 0.024558067321777344, "step": 15517 }, { "epoch": 2.367706298828125e-05, "step": 15517, "training_step_time": 0.11649513244628906 }, { "epoch": 2.36785888671875e-05, "model_forward_time": 0.026198863983154297, "step": 15518 }, { "epoch": 2.36785888671875e-05, "step": 15518, "training_step_time": 0.11956572532653809 }, { "epoch": 2.368011474609375e-05, "model_forward_time": 0.02507328987121582, "step": 15519 }, { "epoch": 2.368011474609375e-05, "step": 15519, "training_step_time": 0.1077735424041748 }, { "epoch": 2.3681640625e-05, "grad_norm": 0.2525326609611511, "learning_rate": 5.1267524392436784e-05, "loss": 0.0076, "step": 15520 }, { "epoch": 2.3681640625e-05, "model_forward_time": 0.025011301040649414, "step": 15520 }, { "epoch": 2.3681640625e-05, "step": 15520, "training_step_time": 0.15593647956848145 }, { "epoch": 2.368316650390625e-05, "model_forward_time": 0.024903059005737305, "step": 15521 }, { "epoch": 2.368316650390625e-05, "step": 15521, "training_step_time": 0.14361357688903809 }, { "epoch": 2.36846923828125e-05, "model_forward_time": 0.02489614486694336, "step": 15522 }, { "epoch": 2.36846923828125e-05, "step": 15522, "training_step_time": 0.10280609130859375 }, { "epoch": 2.368621826171875e-05, "model_forward_time": 0.025716304779052734, "step": 15523 }, { "epoch": 2.368621826171875e-05, "step": 15523, "training_step_time": 0.10572409629821777 }, { "epoch": 2.3687744140625e-05, "model_forward_time": 0.025220394134521484, "step": 15524 }, { "epoch": 2.3687744140625e-05, "step": 15524, "training_step_time": 0.10412740707397461 }, { "epoch": 2.368927001953125e-05, "model_forward_time": 0.024684906005859375, "step": 15525 }, { "epoch": 2.368927001953125e-05, "step": 15525, "training_step_time": 0.10775518417358398 }, { "epoch": 2.36907958984375e-05, "model_forward_time": 0.025216102600097656, "step": 15526 }, { "epoch": 2.36907958984375e-05, "step": 15526, "training_step_time": 0.10562849044799805 }, { "epoch": 2.369232177734375e-05, "model_forward_time": 0.02550339698791504, "step": 15527 }, { "epoch": 2.369232177734375e-05, "step": 15527, "training_step_time": 0.10631871223449707 }, { "epoch": 2.369384765625e-05, "model_forward_time": 0.02595376968383789, "step": 15528 }, { "epoch": 2.369384765625e-05, "step": 15528, "training_step_time": 0.10671257972717285 }, { "epoch": 2.369537353515625e-05, "model_forward_time": 0.02534341812133789, "step": 15529 }, { "epoch": 2.369537353515625e-05, "step": 15529, "training_step_time": 0.1061403751373291 }, { "epoch": 2.36968994140625e-05, "grad_norm": 0.19660969078540802, "learning_rate": 5.12124256857345e-05, "loss": 0.0084, "step": 15530 }, { "epoch": 2.36968994140625e-05, "model_forward_time": 0.025191307067871094, "step": 15530 }, { "epoch": 2.36968994140625e-05, "step": 15530, "training_step_time": 0.1143960952758789 }, { "epoch": 2.369842529296875e-05, "model_forward_time": 0.025026321411132812, "step": 15531 }, { "epoch": 2.369842529296875e-05, "step": 15531, "training_step_time": 0.1423177719116211 }, { "epoch": 2.3699951171875e-05, "model_forward_time": 0.02529621124267578, "step": 15532 }, { "epoch": 2.3699951171875e-05, "step": 15532, "training_step_time": 0.1184380054473877 }, { "epoch": 2.370147705078125e-05, "model_forward_time": 0.025246381759643555, "step": 15533 }, { "epoch": 2.370147705078125e-05, "step": 15533, "training_step_time": 0.11681675910949707 }, { "epoch": 2.37030029296875e-05, "model_forward_time": 0.025043487548828125, "step": 15534 }, { "epoch": 2.37030029296875e-05, "step": 15534, "training_step_time": 0.12097930908203125 }, { "epoch": 2.370452880859375e-05, "model_forward_time": 0.025690555572509766, "step": 15535 }, { "epoch": 2.370452880859375e-05, "step": 15535, "training_step_time": 0.18161416053771973 }, { "epoch": 2.37060546875e-05, "model_forward_time": 0.024939537048339844, "step": 15536 }, { "epoch": 2.37060546875e-05, "step": 15536, "training_step_time": 0.15705394744873047 }, { "epoch": 2.370758056640625e-05, "model_forward_time": 0.024589061737060547, "step": 15537 }, { "epoch": 2.370758056640625e-05, "step": 15537, "training_step_time": 0.14492130279541016 }, { "epoch": 2.37091064453125e-05, "model_forward_time": 0.024574995040893555, "step": 15538 }, { "epoch": 2.37091064453125e-05, "step": 15538, "training_step_time": 0.19661617279052734 }, { "epoch": 2.371063232421875e-05, "model_forward_time": 0.024836063385009766, "step": 15539 }, { "epoch": 2.371063232421875e-05, "step": 15539, "training_step_time": 0.11748814582824707 }, { "epoch": 2.3712158203125e-05, "grad_norm": 0.22174404561519623, "learning_rate": 5.1157325505820694e-05, "loss": 0.0107, "step": 15540 }, { "epoch": 2.3712158203125e-05, "model_forward_time": 0.02497577667236328, "step": 15540 }, { "epoch": 2.3712158203125e-05, "step": 15540, "training_step_time": 0.17559480667114258 }, { "epoch": 2.371368408203125e-05, "model_forward_time": 0.027292490005493164, "step": 15541 }, { "epoch": 2.371368408203125e-05, "step": 15541, "training_step_time": 0.12818336486816406 }, { "epoch": 2.37152099609375e-05, "model_forward_time": 0.024483442306518555, "step": 15542 }, { "epoch": 2.37152099609375e-05, "step": 15542, "training_step_time": 0.13470458984375 }, { "epoch": 2.371673583984375e-05, "model_forward_time": 0.0247952938079834, "step": 15543 }, { "epoch": 2.371673583984375e-05, "step": 15543, "training_step_time": 0.10858464241027832 }, { "epoch": 2.371826171875e-05, "model_forward_time": 0.025104284286499023, "step": 15544 }, { "epoch": 2.371826171875e-05, "step": 15544, "training_step_time": 0.1763756275177002 }, { "epoch": 2.371978759765625e-05, "model_forward_time": 0.02468395233154297, "step": 15545 }, { "epoch": 2.371978759765625e-05, "step": 15545, "training_step_time": 0.13909649848937988 }, { "epoch": 2.37213134765625e-05, "model_forward_time": 0.024155616760253906, "step": 15546 }, { "epoch": 2.37213134765625e-05, "step": 15546, "training_step_time": 0.11394286155700684 }, { "epoch": 2.372283935546875e-05, "model_forward_time": 0.024597644805908203, "step": 15547 }, { "epoch": 2.372283935546875e-05, "step": 15547, "training_step_time": 0.10602164268493652 }, { "epoch": 2.3724365234375e-05, "model_forward_time": 0.02496170997619629, "step": 15548 }, { "epoch": 2.3724365234375e-05, "step": 15548, "training_step_time": 0.104339599609375 }, { "epoch": 2.372589111328125e-05, "model_forward_time": 0.024941444396972656, "step": 15549 }, { "epoch": 2.372589111328125e-05, "step": 15549, "training_step_time": 0.10719060897827148 }, { "epoch": 2.37274169921875e-05, "grad_norm": 0.3350104093551636, "learning_rate": 5.110222391964728e-05, "loss": 0.0111, "step": 15550 }, { "epoch": 2.37274169921875e-05, "model_forward_time": 0.02505970001220703, "step": 15550 }, { "epoch": 2.37274169921875e-05, "step": 15550, "training_step_time": 0.1048574447631836 }, { "epoch": 2.372894287109375e-05, "model_forward_time": 0.025135040283203125, "step": 15551 }, { "epoch": 2.372894287109375e-05, "step": 15551, "training_step_time": 0.10508251190185547 }, { "epoch": 2.373046875e-05, "model_forward_time": 0.0250399112701416, "step": 15552 }, { "epoch": 2.373046875e-05, "step": 15552, "training_step_time": 0.10562539100646973 }, { "epoch": 2.373199462890625e-05, "model_forward_time": 0.02492499351501465, "step": 15553 }, { "epoch": 2.373199462890625e-05, "step": 15553, "training_step_time": 0.10793876647949219 }, { "epoch": 2.37335205078125e-05, "model_forward_time": 0.025185585021972656, "step": 15554 }, { "epoch": 2.37335205078125e-05, "step": 15554, "training_step_time": 0.10675525665283203 }, { "epoch": 2.373504638671875e-05, "model_forward_time": 0.024840116500854492, "step": 15555 }, { "epoch": 2.373504638671875e-05, "step": 15555, "training_step_time": 0.10751819610595703 }, { "epoch": 2.3736572265625e-05, "model_forward_time": 0.024757862091064453, "step": 15556 }, { "epoch": 2.3736572265625e-05, "step": 15556, "training_step_time": 0.10419392585754395 }, { "epoch": 2.373809814453125e-05, "model_forward_time": 0.02526688575744629, "step": 15557 }, { "epoch": 2.373809814453125e-05, "step": 15557, "training_step_time": 0.1046595573425293 }, { "epoch": 2.37396240234375e-05, "model_forward_time": 0.025385618209838867, "step": 15558 }, { "epoch": 2.37396240234375e-05, "step": 15558, "training_step_time": 0.10466217994689941 }, { "epoch": 2.374114990234375e-05, "model_forward_time": 0.0250852108001709, "step": 15559 }, { "epoch": 2.374114990234375e-05, "step": 15559, "training_step_time": 0.10519838333129883 }, { "epoch": 2.374267578125e-05, "grad_norm": 0.3381047546863556, "learning_rate": 5.104712099416785e-05, "loss": 0.0144, "step": 15560 }, { "epoch": 2.374267578125e-05, "model_forward_time": 0.025135040283203125, "step": 15560 }, { "epoch": 2.374267578125e-05, "step": 15560, "training_step_time": 0.10407280921936035 }, { "epoch": 2.374420166015625e-05, "model_forward_time": 0.025406837463378906, "step": 15561 }, { "epoch": 2.374420166015625e-05, "step": 15561, "training_step_time": 0.18298101425170898 }, { "epoch": 2.37457275390625e-05, "model_forward_time": 0.024218082427978516, "step": 15562 }, { "epoch": 2.37457275390625e-05, "step": 15562, "training_step_time": 0.14161419868469238 }, { "epoch": 2.374725341796875e-05, "model_forward_time": 0.024161577224731445, "step": 15563 }, { "epoch": 2.374725341796875e-05, "step": 15563, "training_step_time": 0.1076512336730957 }, { "epoch": 2.3748779296875e-05, "model_forward_time": 0.025122880935668945, "step": 15564 }, { "epoch": 2.3748779296875e-05, "step": 15564, "training_step_time": 0.10458135604858398 }, { "epoch": 2.375030517578125e-05, "model_forward_time": 0.02505016326904297, "step": 15565 }, { "epoch": 2.375030517578125e-05, "step": 15565, "training_step_time": 0.11544489860534668 }, { "epoch": 2.37518310546875e-05, "model_forward_time": 0.025093793869018555, "step": 15566 }, { "epoch": 2.37518310546875e-05, "step": 15566, "training_step_time": 0.10781002044677734 }, { "epoch": 2.375335693359375e-05, "model_forward_time": 0.02479076385498047, "step": 15567 }, { "epoch": 2.375335693359375e-05, "step": 15567, "training_step_time": 0.19495177268981934 }, { "epoch": 2.37548828125e-05, "model_forward_time": 0.024281978607177734, "step": 15568 }, { "epoch": 2.37548828125e-05, "step": 15568, "training_step_time": 0.10193657875061035 }, { "epoch": 2.375640869140625e-05, "model_forward_time": 0.024460792541503906, "step": 15569 }, { "epoch": 2.375640869140625e-05, "step": 15569, "training_step_time": 0.10332179069519043 }, { "epoch": 2.37579345703125e-05, "grad_norm": 0.4016200304031372, "learning_rate": 5.0992016796337686e-05, "loss": 0.0166, "step": 15570 }, { "epoch": 2.37579345703125e-05, "model_forward_time": 0.02515554428100586, "step": 15570 }, { "epoch": 2.37579345703125e-05, "step": 15570, "training_step_time": 0.10767722129821777 }, { "epoch": 2.375946044921875e-05, "model_forward_time": 0.02510213851928711, "step": 15571 }, { "epoch": 2.375946044921875e-05, "step": 15571, "training_step_time": 0.10695695877075195 }, { "epoch": 2.3760986328125e-05, "model_forward_time": 0.02525019645690918, "step": 15572 }, { "epoch": 2.3760986328125e-05, "step": 15572, "training_step_time": 0.17679834365844727 }, { "epoch": 2.376251220703125e-05, "model_forward_time": 0.02431774139404297, "step": 15573 }, { "epoch": 2.376251220703125e-05, "step": 15573, "training_step_time": 0.18374896049499512 }, { "epoch": 2.37640380859375e-05, "model_forward_time": 0.02478766441345215, "step": 15574 }, { "epoch": 2.37640380859375e-05, "step": 15574, "training_step_time": 0.17661786079406738 }, { "epoch": 2.376556396484375e-05, "model_forward_time": 0.023332834243774414, "step": 15575 }, { "epoch": 2.376556396484375e-05, "step": 15575, "training_step_time": 0.15988945960998535 }, { "epoch": 2.376708984375e-05, "model_forward_time": 0.023907899856567383, "step": 15576 }, { "epoch": 2.376708984375e-05, "step": 15576, "training_step_time": 0.15201449394226074 }, { "epoch": 2.376861572265625e-05, "model_forward_time": 0.024202585220336914, "step": 15577 }, { "epoch": 2.376861572265625e-05, "step": 15577, "training_step_time": 0.1348402500152588 }, { "epoch": 2.37701416015625e-05, "model_forward_time": 0.02507925033569336, "step": 15578 }, { "epoch": 2.37701416015625e-05, "step": 15578, "training_step_time": 0.12445616722106934 }, { "epoch": 2.377166748046875e-05, "model_forward_time": 0.024689912796020508, "step": 15579 }, { "epoch": 2.377166748046875e-05, "step": 15579, "training_step_time": 0.17964577674865723 }, { "epoch": 2.3773193359375e-05, "grad_norm": 0.35026347637176514, "learning_rate": 5.093691139311356e-05, "loss": 0.0204, "step": 15580 }, { "epoch": 2.3773193359375e-05, "model_forward_time": 0.025986433029174805, "step": 15580 }, { "epoch": 2.3773193359375e-05, "step": 15580, "training_step_time": 0.14970684051513672 }, { "epoch": 2.377471923828125e-05, "model_forward_time": 0.024339914321899414, "step": 15581 }, { "epoch": 2.377471923828125e-05, "step": 15581, "training_step_time": 0.12225461006164551 }, { "epoch": 2.37762451171875e-05, "model_forward_time": 0.024596691131591797, "step": 15582 }, { "epoch": 2.37762451171875e-05, "step": 15582, "training_step_time": 0.13561606407165527 }, { "epoch": 2.377777099609375e-05, "model_forward_time": 0.024923086166381836, "step": 15583 }, { "epoch": 2.377777099609375e-05, "step": 15583, "training_step_time": 0.17176580429077148 }, { "epoch": 2.3779296875e-05, "model_forward_time": 0.024042367935180664, "step": 15584 }, { "epoch": 2.3779296875e-05, "step": 15584, "training_step_time": 0.2165834903717041 }, { "epoch": 2.378082275390625e-05, "model_forward_time": 0.024832963943481445, "step": 15585 }, { "epoch": 2.378082275390625e-05, "step": 15585, "training_step_time": 0.11080408096313477 }, { "epoch": 2.37823486328125e-05, "model_forward_time": 0.02445220947265625, "step": 15586 }, { "epoch": 2.37823486328125e-05, "step": 15586, "training_step_time": 0.14115309715270996 }, { "epoch": 2.378387451171875e-05, "model_forward_time": 0.025157928466796875, "step": 15587 }, { "epoch": 2.378387451171875e-05, "step": 15587, "training_step_time": 0.16283750534057617 }, { "epoch": 2.3785400390625e-05, "model_forward_time": 0.025167226791381836, "step": 15588 }, { "epoch": 2.3785400390625e-05, "step": 15588, "training_step_time": 0.2176222801208496 }, { "epoch": 2.378692626953125e-05, "model_forward_time": 0.024124622344970703, "step": 15589 }, { "epoch": 2.378692626953125e-05, "step": 15589, "training_step_time": 0.10517382621765137 }, { "epoch": 2.37884521484375e-05, "grad_norm": 0.2896489202976227, "learning_rate": 5.088180485145378e-05, "loss": 0.0114, "step": 15590 }, { "epoch": 2.37884521484375e-05, "model_forward_time": 0.024439334869384766, "step": 15590 }, { "epoch": 2.37884521484375e-05, "step": 15590, "training_step_time": 0.10289835929870605 }, { "epoch": 2.378997802734375e-05, "model_forward_time": 0.025264739990234375, "step": 15591 }, { "epoch": 2.378997802734375e-05, "step": 15591, "training_step_time": 0.1031949520111084 }, { "epoch": 2.379150390625e-05, "model_forward_time": 0.025368213653564453, "step": 15592 }, { "epoch": 2.379150390625e-05, "step": 15592, "training_step_time": 0.10387086868286133 }, { "epoch": 2.379302978515625e-05, "model_forward_time": 0.02516484260559082, "step": 15593 }, { "epoch": 2.379302978515625e-05, "step": 15593, "training_step_time": 0.10605406761169434 }, { "epoch": 2.37945556640625e-05, "model_forward_time": 0.025130033493041992, "step": 15594 }, { "epoch": 2.37945556640625e-05, "step": 15594, "training_step_time": 0.10422801971435547 }, { "epoch": 2.379608154296875e-05, "model_forward_time": 0.02509903907775879, "step": 15595 }, { "epoch": 2.379608154296875e-05, "step": 15595, "training_step_time": 0.11664748191833496 }, { "epoch": 2.3797607421875e-05, "model_forward_time": 0.025197744369506836, "step": 15596 }, { "epoch": 2.3797607421875e-05, "step": 15596, "training_step_time": 0.13244318962097168 }, { "epoch": 2.379913330078125e-05, "model_forward_time": 0.02502298355102539, "step": 15597 }, { "epoch": 2.379913330078125e-05, "step": 15597, "training_step_time": 0.1247413158416748 }, { "epoch": 2.38006591796875e-05, "model_forward_time": 0.024704456329345703, "step": 15598 }, { "epoch": 2.38006591796875e-05, "step": 15598, "training_step_time": 0.1258080005645752 }, { "epoch": 2.380218505859375e-05, "model_forward_time": 0.02482008934020996, "step": 15599 }, { "epoch": 2.380218505859375e-05, "step": 15599, "training_step_time": 0.12758708000183105 }, { "epoch": 2.38037109375e-05, "grad_norm": 0.16501736640930176, "learning_rate": 5.0826697238317935e-05, "loss": 0.0101, "step": 15600 }, { "epoch": 2.38037109375e-05, "model_forward_time": 0.024660587310791016, "step": 15600 }, { "epoch": 2.38037109375e-05, "step": 15600, "training_step_time": 0.1298818588256836 }, { "epoch": 2.380523681640625e-05, "model_forward_time": 0.02463245391845703, "step": 15601 }, { "epoch": 2.380523681640625e-05, "step": 15601, "training_step_time": 0.1278674602508545 }, { "epoch": 2.38067626953125e-05, "model_forward_time": 0.025204896926879883, "step": 15602 }, { "epoch": 2.38067626953125e-05, "step": 15602, "training_step_time": 0.12260246276855469 }, { "epoch": 2.380828857421875e-05, "model_forward_time": 0.02535843849182129, "step": 15603 }, { "epoch": 2.380828857421875e-05, "step": 15603, "training_step_time": 0.1605980396270752 }, { "epoch": 2.3809814453125e-05, "model_forward_time": 0.024189233779907227, "step": 15604 }, { "epoch": 2.3809814453125e-05, "step": 15604, "training_step_time": 0.13411641120910645 }, { "epoch": 2.381134033203125e-05, "model_forward_time": 0.024723529815673828, "step": 15605 }, { "epoch": 2.381134033203125e-05, "step": 15605, "training_step_time": 0.11251378059387207 }, { "epoch": 2.38128662109375e-05, "model_forward_time": 0.025576353073120117, "step": 15606 }, { "epoch": 2.38128662109375e-05, "step": 15606, "training_step_time": 0.11512970924377441 }, { "epoch": 2.381439208984375e-05, "model_forward_time": 0.025397539138793945, "step": 15607 }, { "epoch": 2.381439208984375e-05, "step": 15607, "training_step_time": 0.11396622657775879 }, { "epoch": 2.381591796875e-05, "model_forward_time": 0.02524399757385254, "step": 15608 }, { "epoch": 2.381591796875e-05, "step": 15608, "training_step_time": 0.1091468334197998 }, { "epoch": 2.381744384765625e-05, "model_forward_time": 0.02525162696838379, "step": 15609 }, { "epoch": 2.381744384765625e-05, "step": 15609, "training_step_time": 0.18956470489501953 }, { "epoch": 2.38189697265625e-05, "grad_norm": 0.2760356068611145, "learning_rate": 5.077158862066699e-05, "loss": 0.0159, "step": 15610 }, { "epoch": 2.38189697265625e-05, "model_forward_time": 0.024942636489868164, "step": 15610 }, { "epoch": 2.38189697265625e-05, "step": 15610, "training_step_time": 0.10869026184082031 }, { "epoch": 2.382049560546875e-05, "model_forward_time": 0.024528980255126953, "step": 15611 }, { "epoch": 2.382049560546875e-05, "step": 15611, "training_step_time": 0.10511445999145508 }, { "epoch": 2.3822021484375e-05, "model_forward_time": 0.025424957275390625, "step": 15612 }, { "epoch": 2.3822021484375e-05, "step": 15612, "training_step_time": 0.10639476776123047 }, { "epoch": 2.382354736328125e-05, "model_forward_time": 0.02530527114868164, "step": 15613 }, { "epoch": 2.382354736328125e-05, "step": 15613, "training_step_time": 0.1054840087890625 }, { "epoch": 2.38250732421875e-05, "model_forward_time": 0.025310754776000977, "step": 15614 }, { "epoch": 2.38250732421875e-05, "step": 15614, "training_step_time": 0.1055150032043457 }, { "epoch": 2.382659912109375e-05, "model_forward_time": 0.025423049926757812, "step": 15615 }, { "epoch": 2.382659912109375e-05, "step": 15615, "training_step_time": 0.10662078857421875 }, { "epoch": 2.3828125e-05, "model_forward_time": 0.025767087936401367, "step": 15616 }, { "epoch": 2.3828125e-05, "step": 15616, "training_step_time": 0.1063082218170166 }, { "epoch": 2.382965087890625e-05, "model_forward_time": 0.025623798370361328, "step": 15617 }, { "epoch": 2.382965087890625e-05, "step": 15617, "training_step_time": 0.10524296760559082 }, { "epoch": 2.38311767578125e-05, "model_forward_time": 0.025317907333374023, "step": 15618 }, { "epoch": 2.38311767578125e-05, "step": 15618, "training_step_time": 0.10434246063232422 }, { "epoch": 2.383270263671875e-05, "model_forward_time": 0.025449752807617188, "step": 15619 }, { "epoch": 2.383270263671875e-05, "step": 15619, "training_step_time": 0.10587954521179199 }, { "epoch": 2.3834228515625e-05, "grad_norm": 0.2215505689382553, "learning_rate": 5.071647906546312e-05, "loss": 0.0085, "step": 15620 }, { "epoch": 2.3834228515625e-05, "model_forward_time": 0.025365352630615234, "step": 15620 }, { "epoch": 2.3834228515625e-05, "step": 15620, "training_step_time": 0.10589313507080078 }, { "epoch": 2.383575439453125e-05, "model_forward_time": 0.025188922882080078, "step": 15621 }, { "epoch": 2.383575439453125e-05, "step": 15621, "training_step_time": 0.10543441772460938 }, { "epoch": 2.38372802734375e-05, "model_forward_time": 0.025354385375976562, "step": 15622 }, { "epoch": 2.38372802734375e-05, "step": 15622, "training_step_time": 0.2088792324066162 }, { "epoch": 2.383880615234375e-05, "model_forward_time": 0.024627685546875, "step": 15623 }, { "epoch": 2.383880615234375e-05, "step": 15623, "training_step_time": 0.20875072479248047 }, { "epoch": 2.384033203125e-05, "model_forward_time": 0.024672508239746094, "step": 15624 }, { "epoch": 2.384033203125e-05, "step": 15624, "training_step_time": 0.14546966552734375 }, { "epoch": 2.384185791015625e-05, "model_forward_time": 0.02501964569091797, "step": 15625 }, { "epoch": 2.384185791015625e-05, "step": 15625, "training_step_time": 0.12274909019470215 }, { "epoch": 2.38433837890625e-05, "model_forward_time": 0.02538156509399414, "step": 15626 }, { "epoch": 2.38433837890625e-05, "step": 15626, "training_step_time": 0.11711645126342773 }, { "epoch": 2.384490966796875e-05, "model_forward_time": 0.025151491165161133, "step": 15627 }, { "epoch": 2.384490966796875e-05, "step": 15627, "training_step_time": 0.19344210624694824 }, { "epoch": 2.3846435546875e-05, "model_forward_time": 0.02436089515686035, "step": 15628 }, { "epoch": 2.3846435546875e-05, "step": 15628, "training_step_time": 0.22031569480895996 }, { "epoch": 2.384796142578125e-05, "model_forward_time": 0.0250244140625, "step": 15629 }, { "epoch": 2.384796142578125e-05, "step": 15629, "training_step_time": 0.1169736385345459 }, { "epoch": 2.38494873046875e-05, "grad_norm": 0.26977258920669556, "learning_rate": 5.066136863966963e-05, "loss": 0.013, "step": 15630 }, { "epoch": 2.38494873046875e-05, "model_forward_time": 0.024294614791870117, "step": 15630 }, { "epoch": 2.38494873046875e-05, "step": 15630, "training_step_time": 0.1336665153503418 }, { "epoch": 2.385101318359375e-05, "model_forward_time": 0.025176525115966797, "step": 15631 }, { "epoch": 2.385101318359375e-05, "step": 15631, "training_step_time": 0.15837621688842773 }, { "epoch": 2.38525390625e-05, "model_forward_time": 0.024575471878051758, "step": 15632 }, { "epoch": 2.38525390625e-05, "step": 15632, "training_step_time": 0.22232365608215332 }, { "epoch": 2.385406494140625e-05, "model_forward_time": 0.02417302131652832, "step": 15633 }, { "epoch": 2.385406494140625e-05, "step": 15633, "training_step_time": 0.11557555198669434 }, { "epoch": 2.38555908203125e-05, "model_forward_time": 0.024214982986450195, "step": 15634 }, { "epoch": 2.38555908203125e-05, "step": 15634, "training_step_time": 0.10381317138671875 }, { "epoch": 2.385711669921875e-05, "model_forward_time": 0.02511119842529297, "step": 15635 }, { "epoch": 2.385711669921875e-05, "step": 15635, "training_step_time": 0.10708165168762207 }, { "epoch": 2.3858642578125e-05, "model_forward_time": 0.025550365447998047, "step": 15636 }, { "epoch": 2.3858642578125e-05, "step": 15636, "training_step_time": 0.1111898422241211 }, { "epoch": 2.386016845703125e-05, "model_forward_time": 0.025110483169555664, "step": 15637 }, { "epoch": 2.386016845703125e-05, "step": 15637, "training_step_time": 0.1081082820892334 }, { "epoch": 2.38616943359375e-05, "model_forward_time": 0.025223255157470703, "step": 15638 }, { "epoch": 2.38616943359375e-05, "step": 15638, "training_step_time": 0.1057896614074707 }, { "epoch": 2.386322021484375e-05, "model_forward_time": 0.0266420841217041, "step": 15639 }, { "epoch": 2.386322021484375e-05, "step": 15639, "training_step_time": 0.10665774345397949 }, { "epoch": 2.386474609375e-05, "grad_norm": 0.23721297085285187, "learning_rate": 5.0606257410250866e-05, "loss": 0.0118, "step": 15640 }, { "epoch": 2.386474609375e-05, "model_forward_time": 0.0257110595703125, "step": 15640 }, { "epoch": 2.386474609375e-05, "step": 15640, "training_step_time": 0.10663628578186035 }, { "epoch": 2.386627197265625e-05, "model_forward_time": 0.025228500366210938, "step": 15641 }, { "epoch": 2.386627197265625e-05, "step": 15641, "training_step_time": 0.10524153709411621 }, { "epoch": 2.38677978515625e-05, "model_forward_time": 0.025109052658081055, "step": 15642 }, { "epoch": 2.38677978515625e-05, "step": 15642, "training_step_time": 0.10617804527282715 }, { "epoch": 2.386932373046875e-05, "model_forward_time": 0.025444984436035156, "step": 15643 }, { "epoch": 2.386932373046875e-05, "step": 15643, "training_step_time": 0.10571408271789551 }, { "epoch": 2.3870849609375e-05, "model_forward_time": 0.024799823760986328, "step": 15644 }, { "epoch": 2.3870849609375e-05, "step": 15644, "training_step_time": 0.10444450378417969 }, { "epoch": 2.387237548828125e-05, "model_forward_time": 0.025690317153930664, "step": 15645 }, { "epoch": 2.387237548828125e-05, "step": 15645, "training_step_time": 0.10672354698181152 }, { "epoch": 2.38739013671875e-05, "model_forward_time": 0.0254669189453125, "step": 15646 }, { "epoch": 2.38739013671875e-05, "step": 15646, "training_step_time": 0.10465312004089355 }, { "epoch": 2.387542724609375e-05, "model_forward_time": 0.02547621726989746, "step": 15647 }, { "epoch": 2.387542724609375e-05, "step": 15647, "training_step_time": 0.10809636116027832 }, { "epoch": 2.3876953125e-05, "model_forward_time": 0.025116920471191406, "step": 15648 }, { "epoch": 2.3876953125e-05, "step": 15648, "training_step_time": 0.10679912567138672 }, { "epoch": 2.387847900390625e-05, "model_forward_time": 0.0255887508392334, "step": 15649 }, { "epoch": 2.387847900390625e-05, "step": 15649, "training_step_time": 0.10684967041015625 }, { "epoch": 2.38800048828125e-05, "grad_norm": 0.12469741702079773, "learning_rate": 5.0551145444172186e-05, "loss": 0.0124, "step": 15650 }, { "epoch": 2.38800048828125e-05, "model_forward_time": 0.025740861892700195, "step": 15650 }, { "epoch": 2.38800048828125e-05, "step": 15650, "training_step_time": 0.12513995170593262 }, { "epoch": 2.388153076171875e-05, "model_forward_time": 0.025266408920288086, "step": 15651 }, { "epoch": 2.388153076171875e-05, "step": 15651, "training_step_time": 0.10985207557678223 }, { "epoch": 2.3883056640625e-05, "model_forward_time": 0.02570033073425293, "step": 15652 }, { "epoch": 2.3883056640625e-05, "step": 15652, "training_step_time": 0.11091756820678711 }, { "epoch": 2.388458251953125e-05, "model_forward_time": 0.025640487670898438, "step": 15653 }, { "epoch": 2.388458251953125e-05, "step": 15653, "training_step_time": 0.1194925308227539 }, { "epoch": 2.38861083984375e-05, "model_forward_time": 0.02581954002380371, "step": 15654 }, { "epoch": 2.38861083984375e-05, "step": 15654, "training_step_time": 0.10608649253845215 }, { "epoch": 2.388763427734375e-05, "model_forward_time": 0.025385141372680664, "step": 15655 }, { "epoch": 2.388763427734375e-05, "step": 15655, "training_step_time": 0.18947172164916992 }, { "epoch": 2.388916015625e-05, "model_forward_time": 0.025116682052612305, "step": 15656 }, { "epoch": 2.388916015625e-05, "step": 15656, "training_step_time": 0.10415387153625488 }, { "epoch": 2.389068603515625e-05, "model_forward_time": 0.02492046356201172, "step": 15657 }, { "epoch": 2.389068603515625e-05, "step": 15657, "training_step_time": 0.1020052433013916 }, { "epoch": 2.38922119140625e-05, "model_forward_time": 0.02547311782836914, "step": 15658 }, { "epoch": 2.38922119140625e-05, "step": 15658, "training_step_time": 0.10596990585327148 }, { "epoch": 2.389373779296875e-05, "model_forward_time": 0.025180578231811523, "step": 15659 }, { "epoch": 2.389373779296875e-05, "step": 15659, "training_step_time": 0.10626721382141113 }, { "epoch": 2.3895263671875e-05, "grad_norm": 0.3579079210758209, "learning_rate": 5.0496032808399815e-05, "loss": 0.0146, "step": 15660 }, { "epoch": 2.3895263671875e-05, "model_forward_time": 0.025380373001098633, "step": 15660 }, { "epoch": 2.3895263671875e-05, "step": 15660, "training_step_time": 0.10590934753417969 }, { "epoch": 2.389678955078125e-05, "model_forward_time": 0.02711939811706543, "step": 15661 }, { "epoch": 2.389678955078125e-05, "step": 15661, "training_step_time": 0.10676169395446777 }, { "epoch": 2.38983154296875e-05, "model_forward_time": 0.025482892990112305, "step": 15662 }, { "epoch": 2.38983154296875e-05, "step": 15662, "training_step_time": 0.10722780227661133 }, { "epoch": 2.389984130859375e-05, "model_forward_time": 0.025774717330932617, "step": 15663 }, { "epoch": 2.389984130859375e-05, "step": 15663, "training_step_time": 0.10567951202392578 }, { "epoch": 2.39013671875e-05, "model_forward_time": 0.025280237197875977, "step": 15664 }, { "epoch": 2.39013671875e-05, "step": 15664, "training_step_time": 0.10564327239990234 }, { "epoch": 2.390289306640625e-05, "model_forward_time": 0.025325298309326172, "step": 15665 }, { "epoch": 2.390289306640625e-05, "step": 15665, "training_step_time": 0.10602641105651855 }, { "epoch": 2.39044189453125e-05, "model_forward_time": 0.02639174461364746, "step": 15666 }, { "epoch": 2.39044189453125e-05, "step": 15666, "training_step_time": 0.10949540138244629 }, { "epoch": 2.390594482421875e-05, "model_forward_time": 0.02569580078125, "step": 15667 }, { "epoch": 2.390594482421875e-05, "step": 15667, "training_step_time": 0.10619878768920898 }, { "epoch": 2.3907470703125e-05, "model_forward_time": 0.025929927825927734, "step": 15668 }, { "epoch": 2.3907470703125e-05, "step": 15668, "training_step_time": 0.1085813045501709 }, { "epoch": 2.390899658203125e-05, "model_forward_time": 0.02612757682800293, "step": 15669 }, { "epoch": 2.390899658203125e-05, "step": 15669, "training_step_time": 0.1049036979675293 }, { "epoch": 2.39105224609375e-05, "grad_norm": 0.3659554421901703, "learning_rate": 5.0440919569900835e-05, "loss": 0.0121, "step": 15670 }, { "epoch": 2.39105224609375e-05, "model_forward_time": 0.024453163146972656, "step": 15670 }, { "epoch": 2.39105224609375e-05, "step": 15670, "training_step_time": 0.14203572273254395 }, { "epoch": 2.391204833984375e-05, "model_forward_time": 0.025274276733398438, "step": 15671 }, { "epoch": 2.391204833984375e-05, "step": 15671, "training_step_time": 0.11493611335754395 }, { "epoch": 2.391357421875e-05, "model_forward_time": 0.025693893432617188, "step": 15672 }, { "epoch": 2.391357421875e-05, "step": 15672, "training_step_time": 0.19053339958190918 }, { "epoch": 2.391510009765625e-05, "model_forward_time": 0.024596691131591797, "step": 15673 }, { "epoch": 2.391510009765625e-05, "step": 15673, "training_step_time": 0.17298626899719238 }, { "epoch": 2.39166259765625e-05, "model_forward_time": 0.02466559410095215, "step": 15674 }, { "epoch": 2.39166259765625e-05, "step": 15674, "training_step_time": 0.16990995407104492 }, { "epoch": 2.391815185546875e-05, "model_forward_time": 0.024994611740112305, "step": 15675 }, { "epoch": 2.391815185546875e-05, "step": 15675, "training_step_time": 0.1241142749786377 }, { "epoch": 2.3919677734375e-05, "model_forward_time": 0.024948596954345703, "step": 15676 }, { "epoch": 2.3919677734375e-05, "step": 15676, "training_step_time": 0.1110687255859375 }, { "epoch": 2.392120361328125e-05, "model_forward_time": 0.02531599998474121, "step": 15677 }, { "epoch": 2.392120361328125e-05, "step": 15677, "training_step_time": 0.13848352432250977 }, { "epoch": 2.39227294921875e-05, "model_forward_time": 0.026212453842163086, "step": 15678 }, { "epoch": 2.39227294921875e-05, "step": 15678, "training_step_time": 0.1585986614227295 }, { "epoch": 2.392425537109375e-05, "model_forward_time": 0.024724245071411133, "step": 15679 }, { "epoch": 2.392425537109375e-05, "step": 15679, "training_step_time": 0.17809128761291504 }, { "epoch": 2.392578125e-05, "grad_norm": 0.20290325582027435, "learning_rate": 5.038580579564298e-05, "loss": 0.0081, "step": 15680 }, { "epoch": 2.392578125e-05, "model_forward_time": 0.024472951889038086, "step": 15680 }, { "epoch": 2.392578125e-05, "step": 15680, "training_step_time": 0.16106295585632324 }, { "epoch": 2.392730712890625e-05, "model_forward_time": 0.024515867233276367, "step": 15681 }, { "epoch": 2.392730712890625e-05, "step": 15681, "training_step_time": 0.10512518882751465 }, { "epoch": 2.39288330078125e-05, "model_forward_time": 0.025154829025268555, "step": 15682 }, { "epoch": 2.39288330078125e-05, "step": 15682, "training_step_time": 0.10563302040100098 }, { "epoch": 2.393035888671875e-05, "model_forward_time": 0.025299787521362305, "step": 15683 }, { "epoch": 2.393035888671875e-05, "step": 15683, "training_step_time": 0.1065983772277832 }, { "epoch": 2.3931884765625e-05, "model_forward_time": 0.02611517906188965, "step": 15684 }, { "epoch": 2.3931884765625e-05, "step": 15684, "training_step_time": 0.11059141159057617 }, { "epoch": 2.393341064453125e-05, "model_forward_time": 0.02445840835571289, "step": 15685 }, { "epoch": 2.393341064453125e-05, "step": 15685, "training_step_time": 0.11269688606262207 }, { "epoch": 2.39349365234375e-05, "model_forward_time": 0.0266571044921875, "step": 15686 }, { "epoch": 2.39349365234375e-05, "step": 15686, "training_step_time": 0.1074678897857666 }, { "epoch": 2.393646240234375e-05, "model_forward_time": 0.02553415298461914, "step": 15687 }, { "epoch": 2.393646240234375e-05, "step": 15687, "training_step_time": 0.10420751571655273 }, { "epoch": 2.393798828125e-05, "model_forward_time": 0.025606870651245117, "step": 15688 }, { "epoch": 2.393798828125e-05, "step": 15688, "training_step_time": 0.10428428649902344 }, { "epoch": 2.393951416015625e-05, "model_forward_time": 0.025835752487182617, "step": 15689 }, { "epoch": 2.393951416015625e-05, "step": 15689, "training_step_time": 0.10545182228088379 }, { "epoch": 2.39410400390625e-05, "grad_norm": 0.1512109339237213, "learning_rate": 5.033069155259471e-05, "loss": 0.0118, "step": 15690 }, { "epoch": 2.39410400390625e-05, "model_forward_time": 0.02567434310913086, "step": 15690 }, { "epoch": 2.39410400390625e-05, "step": 15690, "training_step_time": 0.10708451271057129 }, { "epoch": 2.394256591796875e-05, "model_forward_time": 0.025406837463378906, "step": 15691 }, { "epoch": 2.394256591796875e-05, "step": 15691, "training_step_time": 0.10428786277770996 }, { "epoch": 2.3944091796875e-05, "model_forward_time": 0.0257265567779541, "step": 15692 }, { "epoch": 2.3944091796875e-05, "step": 15692, "training_step_time": 0.10521364212036133 }, { "epoch": 2.394561767578125e-05, "model_forward_time": 0.025312423706054688, "step": 15693 }, { "epoch": 2.394561767578125e-05, "step": 15693, "training_step_time": 0.1078634262084961 }, { "epoch": 2.39471435546875e-05, "model_forward_time": 0.026050567626953125, "step": 15694 }, { "epoch": 2.39471435546875e-05, "step": 15694, "training_step_time": 0.1060945987701416 }, { "epoch": 2.394866943359375e-05, "model_forward_time": 0.02522110939025879, "step": 15695 }, { "epoch": 2.394866943359375e-05, "step": 15695, "training_step_time": 0.1257786750793457 }, { "epoch": 2.39501953125e-05, "model_forward_time": 0.025350093841552734, "step": 15696 }, { "epoch": 2.39501953125e-05, "step": 15696, "training_step_time": 0.1403203010559082 }, { "epoch": 2.395172119140625e-05, "model_forward_time": 0.025290489196777344, "step": 15697 }, { "epoch": 2.395172119140625e-05, "step": 15697, "training_step_time": 0.10766243934631348 }, { "epoch": 2.39532470703125e-05, "model_forward_time": 0.02566242218017578, "step": 15698 }, { "epoch": 2.39532470703125e-05, "step": 15698, "training_step_time": 0.1178436279296875 }, { "epoch": 2.395477294921875e-05, "model_forward_time": 0.02545452117919922, "step": 15699 }, { "epoch": 2.395477294921875e-05, "step": 15699, "training_step_time": 0.11051583290100098 }, { "epoch": 2.3956298828125e-05, "grad_norm": 0.37655261158943176, "learning_rate": 5.027557690772503e-05, "loss": 0.0153, "step": 15700 }, { "epoch": 2.3956298828125e-05, "model_forward_time": 0.02591085433959961, "step": 15700 }, { "epoch": 2.3956298828125e-05, "step": 15700, "training_step_time": 0.10465764999389648 }, { "epoch": 2.395782470703125e-05, "model_forward_time": 0.026204347610473633, "step": 15701 }, { "epoch": 2.395782470703125e-05, "step": 15701, "training_step_time": 0.19267773628234863 }, { "epoch": 2.39593505859375e-05, "model_forward_time": 0.025120973587036133, "step": 15702 }, { "epoch": 2.39593505859375e-05, "step": 15702, "training_step_time": 0.10290408134460449 }, { "epoch": 2.396087646484375e-05, "model_forward_time": 0.025056123733520508, "step": 15703 }, { "epoch": 2.396087646484375e-05, "step": 15703, "training_step_time": 0.10310602188110352 }, { "epoch": 2.396240234375e-05, "model_forward_time": 0.02574896812438965, "step": 15704 }, { "epoch": 2.396240234375e-05, "step": 15704, "training_step_time": 0.10790634155273438 }, { "epoch": 2.396392822265625e-05, "model_forward_time": 0.025341510772705078, "step": 15705 }, { "epoch": 2.396392822265625e-05, "step": 15705, "training_step_time": 0.17306828498840332 }, { "epoch": 2.39654541015625e-05, "model_forward_time": 0.02524876594543457, "step": 15706 }, { "epoch": 2.39654541015625e-05, "step": 15706, "training_step_time": 0.1821305751800537 }, { "epoch": 2.396697998046875e-05, "model_forward_time": 0.024592876434326172, "step": 15707 }, { "epoch": 2.396697998046875e-05, "step": 15707, "training_step_time": 0.1700141429901123 }, { "epoch": 2.3968505859375e-05, "model_forward_time": 0.02700018882751465, "step": 15708 }, { "epoch": 2.3968505859375e-05, "step": 15708, "training_step_time": 0.16891098022460938 }, { "epoch": 2.397003173828125e-05, "model_forward_time": 0.024342060089111328, "step": 15709 }, { "epoch": 2.397003173828125e-05, "step": 15709, "training_step_time": 0.15590286254882812 }, { "epoch": 2.39715576171875e-05, "grad_norm": 0.19053003191947937, "learning_rate": 5.0220461928003406e-05, "loss": 0.0157, "step": 15710 }, { "epoch": 2.39715576171875e-05, "model_forward_time": 0.024369001388549805, "step": 15710 }, { "epoch": 2.39715576171875e-05, "step": 15710, "training_step_time": 0.14293909072875977 }, { "epoch": 2.397308349609375e-05, "model_forward_time": 0.024281024932861328, "step": 15711 }, { "epoch": 2.397308349609375e-05, "step": 15711, "training_step_time": 0.13981842994689941 }, { "epoch": 2.3974609375e-05, "model_forward_time": 0.02434992790222168, "step": 15712 }, { "epoch": 2.3974609375e-05, "step": 15712, "training_step_time": 0.1496868133544922 }, { "epoch": 2.397613525390625e-05, "model_forward_time": 0.024439096450805664, "step": 15713 }, { "epoch": 2.397613525390625e-05, "step": 15713, "training_step_time": 0.10545682907104492 }, { "epoch": 2.39776611328125e-05, "model_forward_time": 0.025188922882080078, "step": 15714 }, { "epoch": 2.39776611328125e-05, "step": 15714, "training_step_time": 0.1512157917022705 }, { "epoch": 2.397918701171875e-05, "model_forward_time": 0.025780916213989258, "step": 15715 }, { "epoch": 2.397918701171875e-05, "step": 15715, "training_step_time": 0.19835686683654785 }, { "epoch": 2.3980712890625e-05, "model_forward_time": 0.024177074432373047, "step": 15716 }, { "epoch": 2.3980712890625e-05, "step": 15716, "training_step_time": 0.12496805191040039 }, { "epoch": 2.398223876953125e-05, "model_forward_time": 0.025367259979248047, "step": 15717 }, { "epoch": 2.398223876953125e-05, "step": 15717, "training_step_time": 0.10844254493713379 }, { "epoch": 2.39837646484375e-05, "model_forward_time": 0.02589869499206543, "step": 15718 }, { "epoch": 2.39837646484375e-05, "step": 15718, "training_step_time": 0.12061381340026855 }, { "epoch": 2.398529052734375e-05, "model_forward_time": 0.02615499496459961, "step": 15719 }, { "epoch": 2.398529052734375e-05, "step": 15719, "training_step_time": 0.18742942810058594 }, { "epoch": 2.398681640625e-05, "grad_norm": 0.20142294466495514, "learning_rate": 5.016534668039976e-05, "loss": 0.0081, "step": 15720 }, { "epoch": 2.398681640625e-05, "model_forward_time": 0.024407148361206055, "step": 15720 }, { "epoch": 2.398681640625e-05, "step": 15720, "training_step_time": 0.11683297157287598 }, { "epoch": 2.398834228515625e-05, "model_forward_time": 0.024501562118530273, "step": 15721 }, { "epoch": 2.398834228515625e-05, "step": 15721, "training_step_time": 0.130723237991333 }, { "epoch": 2.39898681640625e-05, "model_forward_time": 0.0258944034576416, "step": 15722 }, { "epoch": 2.39898681640625e-05, "step": 15722, "training_step_time": 0.1405935287475586 }, { "epoch": 2.399139404296875e-05, "model_forward_time": 0.025165319442749023, "step": 15723 }, { "epoch": 2.399139404296875e-05, "step": 15723, "training_step_time": 0.12010598182678223 }, { "epoch": 2.3992919921875e-05, "model_forward_time": 0.025543212890625, "step": 15724 }, { "epoch": 2.3992919921875e-05, "step": 15724, "training_step_time": 0.12578797340393066 }, { "epoch": 2.399444580078125e-05, "model_forward_time": 0.025387287139892578, "step": 15725 }, { "epoch": 2.399444580078125e-05, "step": 15725, "training_step_time": 0.1106715202331543 }, { "epoch": 2.39959716796875e-05, "model_forward_time": 0.02564239501953125, "step": 15726 }, { "epoch": 2.39959716796875e-05, "step": 15726, "training_step_time": 0.10779571533203125 }, { "epoch": 2.399749755859375e-05, "model_forward_time": 0.02602839469909668, "step": 15727 }, { "epoch": 2.399749755859375e-05, "step": 15727, "training_step_time": 0.10991978645324707 }, { "epoch": 2.39990234375e-05, "model_forward_time": 0.025864362716674805, "step": 15728 }, { "epoch": 2.39990234375e-05, "step": 15728, "training_step_time": 0.10994291305541992 }, { "epoch": 2.400054931640625e-05, "model_forward_time": 0.02585434913635254, "step": 15729 }, { "epoch": 2.400054931640625e-05, "step": 15729, "training_step_time": 0.1133272647857666 }, { "epoch": 2.40020751953125e-05, "grad_norm": 0.406608521938324, "learning_rate": 5.011023123188431e-05, "loss": 0.0189, "step": 15730 }, { "epoch": 2.40020751953125e-05, "model_forward_time": 0.02548384666442871, "step": 15730 }, { "epoch": 2.40020751953125e-05, "step": 15730, "training_step_time": 0.1119084358215332 }, { "epoch": 2.400360107421875e-05, "model_forward_time": 0.02577519416809082, "step": 15731 }, { "epoch": 2.400360107421875e-05, "step": 15731, "training_step_time": 0.10664033889770508 }, { "epoch": 2.4005126953125e-05, "model_forward_time": 0.025902509689331055, "step": 15732 }, { "epoch": 2.4005126953125e-05, "step": 15732, "training_step_time": 0.11297035217285156 }, { "epoch": 2.400665283203125e-05, "model_forward_time": 0.026175737380981445, "step": 15733 }, { "epoch": 2.400665283203125e-05, "step": 15733, "training_step_time": 0.10593867301940918 }, { "epoch": 2.40081787109375e-05, "model_forward_time": 0.025608539581298828, "step": 15734 }, { "epoch": 2.40081787109375e-05, "step": 15734, "training_step_time": 0.10799932479858398 }, { "epoch": 2.400970458984375e-05, "model_forward_time": 0.02581620216369629, "step": 15735 }, { "epoch": 2.400970458984375e-05, "step": 15735, "training_step_time": 0.10684680938720703 }, { "epoch": 2.401123046875e-05, "model_forward_time": 0.025358915328979492, "step": 15736 }, { "epoch": 2.401123046875e-05, "step": 15736, "training_step_time": 0.10815548896789551 }, { "epoch": 2.401275634765625e-05, "model_forward_time": 0.02623271942138672, "step": 15737 }, { "epoch": 2.401275634765625e-05, "step": 15737, "training_step_time": 0.10978078842163086 }, { "epoch": 2.40142822265625e-05, "model_forward_time": 0.02579784393310547, "step": 15738 }, { "epoch": 2.40142822265625e-05, "step": 15738, "training_step_time": 0.1045680046081543 }, { "epoch": 2.401580810546875e-05, "model_forward_time": 0.02588510513305664, "step": 15739 }, { "epoch": 2.401580810546875e-05, "step": 15739, "training_step_time": 0.1911606788635254 }, { "epoch": 2.4017333984375e-05, "grad_norm": 0.2088557779788971, "learning_rate": 5.005511564942751e-05, "loss": 0.0093, "step": 15740 }, { "epoch": 2.4017333984375e-05, "model_forward_time": 0.02411818504333496, "step": 15740 }, { "epoch": 2.4017333984375e-05, "step": 15740, "training_step_time": 0.13492608070373535 }, { "epoch": 2.401885986328125e-05, "model_forward_time": 0.02290797233581543, "step": 15741 }, { "epoch": 2.401885986328125e-05, "step": 15741, "training_step_time": 0.10648155212402344 }, { "epoch": 2.40203857421875e-05, "model_forward_time": 0.025433063507080078, "step": 15742 }, { "epoch": 2.40203857421875e-05, "step": 15742, "training_step_time": 0.12204146385192871 }, { "epoch": 2.402191162109375e-05, "model_forward_time": 0.025731563568115234, "step": 15743 }, { "epoch": 2.402191162109375e-05, "step": 15743, "training_step_time": 0.10908007621765137 }, { "epoch": 2.40234375e-05, "model_forward_time": 0.029944896697998047, "step": 15744 }, { "epoch": 2.40234375e-05, "step": 15744, "training_step_time": 0.10880923271179199 }, { "epoch": 2.402496337890625e-05, "model_forward_time": 0.025807619094848633, "step": 15745 }, { "epoch": 2.402496337890625e-05, "step": 15745, "training_step_time": 0.19451689720153809 }, { "epoch": 2.40264892578125e-05, "model_forward_time": 0.02525162696838379, "step": 15746 }, { "epoch": 2.40264892578125e-05, "step": 15746, "training_step_time": 0.10396194458007812 }, { "epoch": 2.402801513671875e-05, "model_forward_time": 0.025376319885253906, "step": 15747 }, { "epoch": 2.402801513671875e-05, "step": 15747, "training_step_time": 0.10477805137634277 }, { "epoch": 2.4029541015625e-05, "model_forward_time": 0.025945663452148438, "step": 15748 }, { "epoch": 2.4029541015625e-05, "step": 15748, "training_step_time": 0.10656166076660156 }, { "epoch": 2.403106689453125e-05, "model_forward_time": 0.025737285614013672, "step": 15749 }, { "epoch": 2.403106689453125e-05, "step": 15749, "training_step_time": 0.10791826248168945 }, { "epoch": 2.40325927734375e-05, "grad_norm": 0.17635443806648254, "learning_rate": 5e-05, "loss": 0.014, "step": 15750 }, { "epoch": 2.40325927734375e-05, "model_forward_time": 0.02617049217224121, "step": 15750 }, { "epoch": 2.40325927734375e-05, "step": 15750, "training_step_time": 0.10543346405029297 }, { "epoch": 2.403411865234375e-05, "model_forward_time": 0.02562546730041504, "step": 15751 }, { "epoch": 2.403411865234375e-05, "step": 15751, "training_step_time": 0.10540437698364258 }, { "epoch": 2.403564453125e-05, "model_forward_time": 0.028939247131347656, "step": 15752 }, { "epoch": 2.403564453125e-05, "step": 15752, "training_step_time": 0.11156201362609863 }, { "epoch": 2.403717041015625e-05, "model_forward_time": 0.025876283645629883, "step": 15753 }, { "epoch": 2.403717041015625e-05, "step": 15753, "training_step_time": 0.1595165729522705 }, { "epoch": 2.40386962890625e-05, "model_forward_time": 0.025485992431640625, "step": 15754 }, { "epoch": 2.40386962890625e-05, "step": 15754, "training_step_time": 0.12966632843017578 }, { "epoch": 2.404022216796875e-05, "model_forward_time": 0.0247652530670166, "step": 15755 }, { "epoch": 2.404022216796875e-05, "step": 15755, "training_step_time": 0.10863614082336426 }, { "epoch": 2.4041748046875e-05, "model_forward_time": 0.02574443817138672, "step": 15756 }, { "epoch": 2.4041748046875e-05, "step": 15756, "training_step_time": 0.10725879669189453 }, { "epoch": 2.404327392578125e-05, "model_forward_time": 0.025397300720214844, "step": 15757 }, { "epoch": 2.404327392578125e-05, "step": 15757, "training_step_time": 0.10614991188049316 }, { "epoch": 2.40447998046875e-05, "model_forward_time": 0.025533437728881836, "step": 15758 }, { "epoch": 2.40447998046875e-05, "step": 15758, "training_step_time": 0.10601568222045898 }, { "epoch": 2.404632568359375e-05, "model_forward_time": 0.02973031997680664, "step": 15759 }, { "epoch": 2.404632568359375e-05, "step": 15759, "training_step_time": 0.1150655746459961 }, { "epoch": 2.40478515625e-05, "grad_norm": 0.3995972275733948, "learning_rate": 4.994488435057251e-05, "loss": 0.0122, "step": 15760 }, { "epoch": 2.40478515625e-05, "model_forward_time": 0.025236129760742188, "step": 15760 }, { "epoch": 2.40478515625e-05, "step": 15760, "training_step_time": 0.20906376838684082 }, { "epoch": 2.404937744140625e-05, "model_forward_time": 0.0255582332611084, "step": 15761 }, { "epoch": 2.404937744140625e-05, "step": 15761, "training_step_time": 0.17047595977783203 }, { "epoch": 2.40509033203125e-05, "model_forward_time": 0.0250546932220459, "step": 15762 }, { "epoch": 2.40509033203125e-05, "step": 15762, "training_step_time": 0.15946364402770996 }, { "epoch": 2.405242919921875e-05, "model_forward_time": 0.024927854537963867, "step": 15763 }, { "epoch": 2.405242919921875e-05, "step": 15763, "training_step_time": 0.16434407234191895 }, { "epoch": 2.4053955078125e-05, "model_forward_time": 0.024791955947875977, "step": 15764 }, { "epoch": 2.4053955078125e-05, "step": 15764, "training_step_time": 0.16209888458251953 }, { "epoch": 2.405548095703125e-05, "model_forward_time": 0.025361061096191406, "step": 15765 }, { "epoch": 2.405548095703125e-05, "step": 15765, "training_step_time": 0.11711454391479492 }, { "epoch": 2.40570068359375e-05, "model_forward_time": 0.025193452835083008, "step": 15766 }, { "epoch": 2.40570068359375e-05, "step": 15766, "training_step_time": 0.13407039642333984 }, { "epoch": 2.405853271484375e-05, "model_forward_time": 0.025889158248901367, "step": 15767 }, { "epoch": 2.405853271484375e-05, "step": 15767, "training_step_time": 0.15701603889465332 }, { "epoch": 2.406005859375e-05, "model_forward_time": 0.025141239166259766, "step": 15768 }, { "epoch": 2.406005859375e-05, "step": 15768, "training_step_time": 0.10872602462768555 }, { "epoch": 2.406158447265625e-05, "model_forward_time": 0.02553534507751465, "step": 15769 }, { "epoch": 2.406158447265625e-05, "step": 15769, "training_step_time": 0.12163162231445312 }, { "epoch": 2.40631103515625e-05, "grad_norm": 0.25937697291374207, "learning_rate": 4.988976876811571e-05, "loss": 0.0142, "step": 15770 }, { "epoch": 2.40631103515625e-05, "model_forward_time": 0.025218486785888672, "step": 15770 }, { "epoch": 2.40631103515625e-05, "step": 15770, "training_step_time": 0.10687446594238281 }, { "epoch": 2.406463623046875e-05, "model_forward_time": 0.026124000549316406, "step": 15771 }, { "epoch": 2.406463623046875e-05, "step": 15771, "training_step_time": 0.10693860054016113 }, { "epoch": 2.4066162109375e-05, "model_forward_time": 0.025829315185546875, "step": 15772 }, { "epoch": 2.4066162109375e-05, "step": 15772, "training_step_time": 0.10928893089294434 }, { "epoch": 2.406768798828125e-05, "model_forward_time": 0.026218175888061523, "step": 15773 }, { "epoch": 2.406768798828125e-05, "step": 15773, "training_step_time": 0.10986065864562988 }, { "epoch": 2.40692138671875e-05, "model_forward_time": 0.026322364807128906, "step": 15774 }, { "epoch": 2.40692138671875e-05, "step": 15774, "training_step_time": 0.10667562484741211 }, { "epoch": 2.407073974609375e-05, "model_forward_time": 0.02584385871887207, "step": 15775 }, { "epoch": 2.407073974609375e-05, "step": 15775, "training_step_time": 0.10597348213195801 }, { "epoch": 2.4072265625e-05, "model_forward_time": 0.02616286277770996, "step": 15776 }, { "epoch": 2.4072265625e-05, "step": 15776, "training_step_time": 0.10861873626708984 }, { "epoch": 2.407379150390625e-05, "model_forward_time": 0.025615215301513672, "step": 15777 }, { "epoch": 2.407379150390625e-05, "step": 15777, "training_step_time": 0.10670042037963867 }, { "epoch": 2.40753173828125e-05, "model_forward_time": 0.025829315185546875, "step": 15778 }, { "epoch": 2.40753173828125e-05, "step": 15778, "training_step_time": 0.11101603507995605 }, { "epoch": 2.407684326171875e-05, "model_forward_time": 0.02596449851989746, "step": 15779 }, { "epoch": 2.407684326171875e-05, "step": 15779, "training_step_time": 0.1047966480255127 }, { "epoch": 2.4078369140625e-05, "grad_norm": 0.25760772824287415, "learning_rate": 4.9834653319600246e-05, "loss": 0.0136, "step": 15780 }, { "epoch": 2.4078369140625e-05, "model_forward_time": 0.025949954986572266, "step": 15780 }, { "epoch": 2.4078369140625e-05, "step": 15780, "training_step_time": 0.1060335636138916 }, { "epoch": 2.407989501953125e-05, "model_forward_time": 0.02608633041381836, "step": 15781 }, { "epoch": 2.407989501953125e-05, "step": 15781, "training_step_time": 0.10626053810119629 }, { "epoch": 2.40814208984375e-05, "model_forward_time": 0.025837182998657227, "step": 15782 }, { "epoch": 2.40814208984375e-05, "step": 15782, "training_step_time": 0.10515642166137695 }, { "epoch": 2.408294677734375e-05, "model_forward_time": 0.027230024337768555, "step": 15783 }, { "epoch": 2.408294677734375e-05, "step": 15783, "training_step_time": 0.10765790939331055 }, { "epoch": 2.408447265625e-05, "model_forward_time": 0.026217937469482422, "step": 15784 }, { "epoch": 2.408447265625e-05, "step": 15784, "training_step_time": 0.10561633110046387 }, { "epoch": 2.408599853515625e-05, "model_forward_time": 0.026778697967529297, "step": 15785 }, { "epoch": 2.408599853515625e-05, "step": 15785, "training_step_time": 0.11178970336914062 }, { "epoch": 2.40875244140625e-05, "model_forward_time": 0.025687694549560547, "step": 15786 }, { "epoch": 2.40875244140625e-05, "step": 15786, "training_step_time": 0.14815092086791992 }, { "epoch": 2.408905029296875e-05, "model_forward_time": 0.026078224182128906, "step": 15787 }, { "epoch": 2.408905029296875e-05, "step": 15787, "training_step_time": 0.10693025588989258 }, { "epoch": 2.4090576171875e-05, "model_forward_time": 0.025758981704711914, "step": 15788 }, { "epoch": 2.4090576171875e-05, "step": 15788, "training_step_time": 0.11509013175964355 }, { "epoch": 2.409210205078125e-05, "model_forward_time": 0.029214859008789062, "step": 15789 }, { "epoch": 2.409210205078125e-05, "step": 15789, "training_step_time": 0.112762451171875 }, { "epoch": 2.40936279296875e-05, "grad_norm": 0.3325439691543579, "learning_rate": 4.97795380719966e-05, "loss": 0.021, "step": 15790 }, { "epoch": 2.40936279296875e-05, "model_forward_time": 0.02478194236755371, "step": 15790 }, { "epoch": 2.40936279296875e-05, "step": 15790, "training_step_time": 0.11779904365539551 }, { "epoch": 2.409515380859375e-05, "model_forward_time": 0.02554774284362793, "step": 15791 }, { "epoch": 2.409515380859375e-05, "step": 15791, "training_step_time": 0.17894721031188965 }, { "epoch": 2.40966796875e-05, "model_forward_time": 0.024927377700805664, "step": 15792 }, { "epoch": 2.40966796875e-05, "step": 15792, "training_step_time": 0.11855912208557129 }, { "epoch": 2.409820556640625e-05, "model_forward_time": 0.025980234146118164, "step": 15793 }, { "epoch": 2.409820556640625e-05, "step": 15793, "training_step_time": 0.12667489051818848 }, { "epoch": 2.40997314453125e-05, "model_forward_time": 0.025071382522583008, "step": 15794 }, { "epoch": 2.40997314453125e-05, "step": 15794, "training_step_time": 0.11824154853820801 }, { "epoch": 2.410125732421875e-05, "model_forward_time": 0.025911331176757812, "step": 15795 }, { "epoch": 2.410125732421875e-05, "step": 15795, "training_step_time": 0.11380553245544434 }, { "epoch": 2.4102783203125e-05, "model_forward_time": 0.025363683700561523, "step": 15796 }, { "epoch": 2.4102783203125e-05, "step": 15796, "training_step_time": 0.11666440963745117 }, { "epoch": 2.410430908203125e-05, "model_forward_time": 0.025876760482788086, "step": 15797 }, { "epoch": 2.410430908203125e-05, "step": 15797, "training_step_time": 0.10966110229492188 }, { "epoch": 2.41058349609375e-05, "model_forward_time": 0.026041269302368164, "step": 15798 }, { "epoch": 2.41058349609375e-05, "step": 15798, "training_step_time": 0.1086728572845459 }, { "epoch": 2.410736083984375e-05, "model_forward_time": 0.025957822799682617, "step": 15799 }, { "epoch": 2.410736083984375e-05, "step": 15799, "training_step_time": 0.10919523239135742 }, { "epoch": 2.410888671875e-05, "grad_norm": 0.2951965034008026, "learning_rate": 4.972442309227498e-05, "loss": 0.0148, "step": 15800 }, { "epoch": 2.410888671875e-05, "model_forward_time": 0.025502681732177734, "step": 15800 }, { "epoch": 2.410888671875e-05, "step": 15800, "training_step_time": 0.10789346694946289 }, { "epoch": 2.411041259765625e-05, "model_forward_time": 0.02546858787536621, "step": 15801 }, { "epoch": 2.411041259765625e-05, "step": 15801, "training_step_time": 0.10580611228942871 }, { "epoch": 2.41119384765625e-05, "model_forward_time": 0.025721311569213867, "step": 15802 }, { "epoch": 2.41119384765625e-05, "step": 15802, "training_step_time": 0.10697054862976074 }, { "epoch": 2.411346435546875e-05, "model_forward_time": 0.025745153427124023, "step": 15803 }, { "epoch": 2.411346435546875e-05, "step": 15803, "training_step_time": 0.10657548904418945 }, { "epoch": 2.4114990234375e-05, "model_forward_time": 0.025782346725463867, "step": 15804 }, { "epoch": 2.4114990234375e-05, "step": 15804, "training_step_time": 0.10963892936706543 }, { "epoch": 2.411651611328125e-05, "model_forward_time": 0.025864601135253906, "step": 15805 }, { "epoch": 2.411651611328125e-05, "step": 15805, "training_step_time": 0.11355066299438477 }, { "epoch": 2.41180419921875e-05, "model_forward_time": 0.02550530433654785, "step": 15806 }, { "epoch": 2.41180419921875e-05, "step": 15806, "training_step_time": 0.19260001182556152 }, { "epoch": 2.411956787109375e-05, "model_forward_time": 0.025295495986938477, "step": 15807 }, { "epoch": 2.411956787109375e-05, "step": 15807, "training_step_time": 0.20040321350097656 }, { "epoch": 2.412109375e-05, "model_forward_time": 0.025110244750976562, "step": 15808 }, { "epoch": 2.412109375e-05, "step": 15808, "training_step_time": 0.19210124015808105 }, { "epoch": 2.412261962890625e-05, "model_forward_time": 0.024487972259521484, "step": 15809 }, { "epoch": 2.412261962890625e-05, "step": 15809, "training_step_time": 0.16778779029846191 }, { "epoch": 2.41241455078125e-05, "grad_norm": 0.2948176860809326, "learning_rate": 4.96693084474053e-05, "loss": 0.0137, "step": 15810 }, { "epoch": 2.41241455078125e-05, "model_forward_time": 0.027551889419555664, "step": 15810 }, { "epoch": 2.41241455078125e-05, "step": 15810, "training_step_time": 0.12750530242919922 }, { "epoch": 2.412567138671875e-05, "model_forward_time": 0.025095224380493164, "step": 15811 }, { "epoch": 2.412567138671875e-05, "step": 15811, "training_step_time": 0.11366772651672363 }, { "epoch": 2.4127197265625e-05, "model_forward_time": 0.025704622268676758, "step": 15812 }, { "epoch": 2.4127197265625e-05, "step": 15812, "training_step_time": 0.12529611587524414 }, { "epoch": 2.412872314453125e-05, "model_forward_time": 0.025648117065429688, "step": 15813 }, { "epoch": 2.412872314453125e-05, "step": 15813, "training_step_time": 0.1047525405883789 }, { "epoch": 2.41302490234375e-05, "model_forward_time": 0.025472402572631836, "step": 15814 }, { "epoch": 2.41302490234375e-05, "step": 15814, "training_step_time": 0.1850287914276123 }, { "epoch": 2.413177490234375e-05, "model_forward_time": 0.02468585968017578, "step": 15815 }, { "epoch": 2.413177490234375e-05, "step": 15815, "training_step_time": 0.16116023063659668 }, { "epoch": 2.413330078125e-05, "model_forward_time": 0.0242311954498291, "step": 15816 }, { "epoch": 2.413330078125e-05, "step": 15816, "training_step_time": 0.188004732131958 }, { "epoch": 2.413482666015625e-05, "model_forward_time": 0.024767637252807617, "step": 15817 }, { "epoch": 2.413482666015625e-05, "step": 15817, "training_step_time": 0.17100310325622559 }, { "epoch": 2.41363525390625e-05, "model_forward_time": 0.02544546127319336, "step": 15818 }, { "epoch": 2.41363525390625e-05, "step": 15818, "training_step_time": 0.1612837314605713 }, { "epoch": 2.413787841796875e-05, "model_forward_time": 0.024760723114013672, "step": 15819 }, { "epoch": 2.413787841796875e-05, "step": 15819, "training_step_time": 0.13944172859191895 }, { "epoch": 2.4139404296875e-05, "grad_norm": 0.21831856667995453, "learning_rate": 4.961419420435703e-05, "loss": 0.0125, "step": 15820 }, { "epoch": 2.4139404296875e-05, "model_forward_time": 0.024768829345703125, "step": 15820 }, { "epoch": 2.4139404296875e-05, "step": 15820, "training_step_time": 0.13277482986450195 }, { "epoch": 2.414093017578125e-05, "model_forward_time": 0.02490711212158203, "step": 15821 }, { "epoch": 2.414093017578125e-05, "step": 15821, "training_step_time": 0.12941956520080566 }, { "epoch": 2.41424560546875e-05, "model_forward_time": 0.02538466453552246, "step": 15822 }, { "epoch": 2.41424560546875e-05, "step": 15822, "training_step_time": 0.12295937538146973 }, { "epoch": 2.414398193359375e-05, "model_forward_time": 0.027056455612182617, "step": 15823 }, { "epoch": 2.414398193359375e-05, "step": 15823, "training_step_time": 0.12282776832580566 }, { "epoch": 2.41455078125e-05, "model_forward_time": 0.025541067123413086, "step": 15824 }, { "epoch": 2.41455078125e-05, "step": 15824, "training_step_time": 0.11903858184814453 }, { "epoch": 2.414703369140625e-05, "model_forward_time": 0.02582693099975586, "step": 15825 }, { "epoch": 2.414703369140625e-05, "step": 15825, "training_step_time": 0.11393070220947266 }, { "epoch": 2.41485595703125e-05, "model_forward_time": 0.025495290756225586, "step": 15826 }, { "epoch": 2.41485595703125e-05, "step": 15826, "training_step_time": 0.1136636734008789 }, { "epoch": 2.415008544921875e-05, "model_forward_time": 0.025578022003173828, "step": 15827 }, { "epoch": 2.415008544921875e-05, "step": 15827, "training_step_time": 0.15861845016479492 }, { "epoch": 2.4151611328125e-05, "model_forward_time": 0.024882078170776367, "step": 15828 }, { "epoch": 2.4151611328125e-05, "step": 15828, "training_step_time": 0.17461276054382324 }, { "epoch": 2.415313720703125e-05, "model_forward_time": 0.02426290512084961, "step": 15829 }, { "epoch": 2.415313720703125e-05, "step": 15829, "training_step_time": 0.10796284675598145 }, { "epoch": 2.41546630859375e-05, "grad_norm": 0.41024482250213623, "learning_rate": 4.955908043009917e-05, "loss": 0.0123, "step": 15830 }, { "epoch": 2.41546630859375e-05, "model_forward_time": 0.025498390197753906, "step": 15830 }, { "epoch": 2.41546630859375e-05, "step": 15830, "training_step_time": 0.1199045181274414 }, { "epoch": 2.415618896484375e-05, "model_forward_time": 0.025540590286254883, "step": 15831 }, { "epoch": 2.415618896484375e-05, "step": 15831, "training_step_time": 0.11087441444396973 }, { "epoch": 2.415771484375e-05, "model_forward_time": 0.025449037551879883, "step": 15832 }, { "epoch": 2.415771484375e-05, "step": 15832, "training_step_time": 0.10480070114135742 }, { "epoch": 2.415924072265625e-05, "model_forward_time": 0.025753498077392578, "step": 15833 }, { "epoch": 2.415924072265625e-05, "step": 15833, "training_step_time": 0.19542288780212402 }, { "epoch": 2.41607666015625e-05, "model_forward_time": 0.02477288246154785, "step": 15834 }, { "epoch": 2.41607666015625e-05, "step": 15834, "training_step_time": 0.10563373565673828 }, { "epoch": 2.416229248046875e-05, "model_forward_time": 0.02510356903076172, "step": 15835 }, { "epoch": 2.416229248046875e-05, "step": 15835, "training_step_time": 0.10892748832702637 }, { "epoch": 2.4163818359375e-05, "model_forward_time": 0.025130033493041992, "step": 15836 }, { "epoch": 2.4163818359375e-05, "step": 15836, "training_step_time": 0.10665225982666016 }, { "epoch": 2.416534423828125e-05, "model_forward_time": 0.025502443313598633, "step": 15837 }, { "epoch": 2.416534423828125e-05, "step": 15837, "training_step_time": 0.10480070114135742 }, { "epoch": 2.41668701171875e-05, "model_forward_time": 0.028617143630981445, "step": 15838 }, { "epoch": 2.41668701171875e-05, "step": 15838, "training_step_time": 0.10938143730163574 }, { "epoch": 2.416839599609375e-05, "model_forward_time": 0.025852441787719727, "step": 15839 }, { "epoch": 2.416839599609375e-05, "step": 15839, "training_step_time": 0.10679483413696289 }, { "epoch": 2.4169921875e-05, "grad_norm": 0.17590944468975067, "learning_rate": 4.950396719160018e-05, "loss": 0.013, "step": 15840 }, { "epoch": 2.4169921875e-05, "model_forward_time": 0.025693178176879883, "step": 15840 }, { "epoch": 2.4169921875e-05, "step": 15840, "training_step_time": 0.10664200782775879 }, { "epoch": 2.417144775390625e-05, "model_forward_time": 0.025706768035888672, "step": 15841 }, { "epoch": 2.417144775390625e-05, "step": 15841, "training_step_time": 0.10791993141174316 }, { "epoch": 2.41729736328125e-05, "model_forward_time": 0.02624201774597168, "step": 15842 }, { "epoch": 2.41729736328125e-05, "step": 15842, "training_step_time": 0.10647845268249512 }, { "epoch": 2.417449951171875e-05, "model_forward_time": 0.02576589584350586, "step": 15843 }, { "epoch": 2.417449951171875e-05, "step": 15843, "training_step_time": 0.1066436767578125 }, { "epoch": 2.4176025390625e-05, "model_forward_time": 0.025748729705810547, "step": 15844 }, { "epoch": 2.4176025390625e-05, "step": 15844, "training_step_time": 0.10545134544372559 }, { "epoch": 2.417755126953125e-05, "model_forward_time": 0.025636672973632812, "step": 15845 }, { "epoch": 2.417755126953125e-05, "step": 15845, "training_step_time": 0.10648918151855469 }, { "epoch": 2.41790771484375e-05, "model_forward_time": 0.026278972625732422, "step": 15846 }, { "epoch": 2.41790771484375e-05, "step": 15846, "training_step_time": 0.10998773574829102 }, { "epoch": 2.418060302734375e-05, "model_forward_time": 0.026279687881469727, "step": 15847 }, { "epoch": 2.418060302734375e-05, "step": 15847, "training_step_time": 0.10999917984008789 }, { "epoch": 2.418212890625e-05, "model_forward_time": 0.02591681480407715, "step": 15848 }, { "epoch": 2.418212890625e-05, "step": 15848, "training_step_time": 0.15461254119873047 }, { "epoch": 2.418365478515625e-05, "model_forward_time": 0.02440500259399414, "step": 15849 }, { "epoch": 2.418365478515625e-05, "step": 15849, "training_step_time": 0.10561490058898926 }, { "epoch": 2.41851806640625e-05, "grad_norm": 0.26678934693336487, "learning_rate": 4.9448854555827825e-05, "loss": 0.0117, "step": 15850 }, { "epoch": 2.41851806640625e-05, "model_forward_time": 0.024710416793823242, "step": 15850 }, { "epoch": 2.41851806640625e-05, "step": 15850, "training_step_time": 0.16521763801574707 }, { "epoch": 2.418670654296875e-05, "model_forward_time": 0.024506330490112305, "step": 15851 }, { "epoch": 2.418670654296875e-05, "step": 15851, "training_step_time": 0.19717860221862793 }, { "epoch": 2.4188232421875e-05, "model_forward_time": 0.024341821670532227, "step": 15852 }, { "epoch": 2.4188232421875e-05, "step": 15852, "training_step_time": 0.14354300498962402 }, { "epoch": 2.418975830078125e-05, "model_forward_time": 0.02642035484313965, "step": 15853 }, { "epoch": 2.418975830078125e-05, "step": 15853, "training_step_time": 0.1900327205657959 }, { "epoch": 2.41912841796875e-05, "model_forward_time": 0.02536320686340332, "step": 15854 }, { "epoch": 2.41912841796875e-05, "step": 15854, "training_step_time": 0.17298412322998047 }, { "epoch": 2.419281005859375e-05, "model_forward_time": 0.025203704833984375, "step": 15855 }, { "epoch": 2.419281005859375e-05, "step": 15855, "training_step_time": 0.10773253440856934 }, { "epoch": 2.41943359375e-05, "model_forward_time": 0.02529430389404297, "step": 15856 }, { "epoch": 2.41943359375e-05, "step": 15856, "training_step_time": 0.12476611137390137 }, { "epoch": 2.419586181640625e-05, "model_forward_time": 0.02646183967590332, "step": 15857 }, { "epoch": 2.419586181640625e-05, "step": 15857, "training_step_time": 0.1109309196472168 }, { "epoch": 2.41973876953125e-05, "model_forward_time": 0.026561975479125977, "step": 15858 }, { "epoch": 2.41973876953125e-05, "step": 15858, "training_step_time": 0.11260294914245605 }, { "epoch": 2.419891357421875e-05, "model_forward_time": 0.025049686431884766, "step": 15859 }, { "epoch": 2.419891357421875e-05, "step": 15859, "training_step_time": 0.19774889945983887 }, { "epoch": 2.4200439453125e-05, "grad_norm": 0.35972481966018677, "learning_rate": 4.9393742589749145e-05, "loss": 0.0126, "step": 15860 }, { "epoch": 2.4200439453125e-05, "model_forward_time": 0.02472972869873047, "step": 15860 }, { "epoch": 2.4200439453125e-05, "step": 15860, "training_step_time": 0.10699343681335449 }, { "epoch": 2.420196533203125e-05, "model_forward_time": 0.02517223358154297, "step": 15861 }, { "epoch": 2.420196533203125e-05, "step": 15861, "training_step_time": 0.10393333435058594 }, { "epoch": 2.42034912109375e-05, "model_forward_time": 0.025421619415283203, "step": 15862 }, { "epoch": 2.42034912109375e-05, "step": 15862, "training_step_time": 0.10455727577209473 }, { "epoch": 2.420501708984375e-05, "model_forward_time": 0.025794506072998047, "step": 15863 }, { "epoch": 2.420501708984375e-05, "step": 15863, "training_step_time": 0.10572934150695801 }, { "epoch": 2.420654296875e-05, "model_forward_time": 0.024958372116088867, "step": 15864 }, { "epoch": 2.420654296875e-05, "step": 15864, "training_step_time": 0.10541963577270508 }, { "epoch": 2.420806884765625e-05, "model_forward_time": 0.025580644607543945, "step": 15865 }, { "epoch": 2.420806884765625e-05, "step": 15865, "training_step_time": 0.10817766189575195 }, { "epoch": 2.42095947265625e-05, "model_forward_time": 0.025849342346191406, "step": 15866 }, { "epoch": 2.42095947265625e-05, "step": 15866, "training_step_time": 0.10609817504882812 }, { "epoch": 2.421112060546875e-05, "model_forward_time": 0.025813817977905273, "step": 15867 }, { "epoch": 2.421112060546875e-05, "step": 15867, "training_step_time": 0.10567402839660645 }, { "epoch": 2.4212646484375e-05, "model_forward_time": 0.02597522735595703, "step": 15868 }, { "epoch": 2.4212646484375e-05, "step": 15868, "training_step_time": 0.10711789131164551 }, { "epoch": 2.421417236328125e-05, "model_forward_time": 0.025913715362548828, "step": 15869 }, { "epoch": 2.421417236328125e-05, "step": 15869, "training_step_time": 0.10759878158569336 }, { "epoch": 2.42156982421875e-05, "grad_norm": 0.32081305980682373, "learning_rate": 4.93386313603304e-05, "loss": 0.0175, "step": 15870 }, { "epoch": 2.42156982421875e-05, "model_forward_time": 0.025621652603149414, "step": 15870 }, { "epoch": 2.42156982421875e-05, "step": 15870, "training_step_time": 0.10730576515197754 }, { "epoch": 2.421722412109375e-05, "model_forward_time": 0.025763988494873047, "step": 15871 }, { "epoch": 2.421722412109375e-05, "step": 15871, "training_step_time": 0.1067664623260498 }, { "epoch": 2.421875e-05, "model_forward_time": 0.025990962982177734, "step": 15872 }, { "epoch": 2.421875e-05, "step": 15872, "training_step_time": 0.1060187816619873 }, { "epoch": 2.422027587890625e-05, "model_forward_time": 0.026315689086914062, "step": 15873 }, { "epoch": 2.422027587890625e-05, "step": 15873, "training_step_time": 0.1778872013092041 }, { "epoch": 2.42218017578125e-05, "model_forward_time": 0.025370121002197266, "step": 15874 }, { "epoch": 2.42218017578125e-05, "step": 15874, "training_step_time": 0.13401436805725098 }, { "epoch": 2.422332763671875e-05, "model_forward_time": 0.025191783905029297, "step": 15875 }, { "epoch": 2.422332763671875e-05, "step": 15875, "training_step_time": 0.1104578971862793 }, { "epoch": 2.4224853515625e-05, "model_forward_time": 0.026271581649780273, "step": 15876 }, { "epoch": 2.4224853515625e-05, "step": 15876, "training_step_time": 0.11933588981628418 }, { "epoch": 2.422637939453125e-05, "model_forward_time": 0.025708675384521484, "step": 15877 }, { "epoch": 2.422637939453125e-05, "step": 15877, "training_step_time": 0.10750508308410645 }, { "epoch": 2.42279052734375e-05, "model_forward_time": 0.02553844451904297, "step": 15878 }, { "epoch": 2.42279052734375e-05, "step": 15878, "training_step_time": 0.1074683666229248 }, { "epoch": 2.422943115234375e-05, "model_forward_time": 0.025675058364868164, "step": 15879 }, { "epoch": 2.422943115234375e-05, "step": 15879, "training_step_time": 0.19632577896118164 }, { "epoch": 2.423095703125e-05, "grad_norm": 0.384095162153244, "learning_rate": 4.9283520934536904e-05, "loss": 0.0164, "step": 15880 }, { "epoch": 2.423095703125e-05, "model_forward_time": 0.024239778518676758, "step": 15880 }, { "epoch": 2.423095703125e-05, "step": 15880, "training_step_time": 0.10271501541137695 }, { "epoch": 2.423248291015625e-05, "model_forward_time": 0.02489304542541504, "step": 15881 }, { "epoch": 2.423248291015625e-05, "step": 15881, "training_step_time": 0.10219669342041016 }, { "epoch": 2.42340087890625e-05, "model_forward_time": 0.025671005249023438, "step": 15882 }, { "epoch": 2.42340087890625e-05, "step": 15882, "training_step_time": 0.10906648635864258 }, { "epoch": 2.423553466796875e-05, "model_forward_time": 0.025266170501708984, "step": 15883 }, { "epoch": 2.423553466796875e-05, "step": 15883, "training_step_time": 0.10835576057434082 }, { "epoch": 2.4237060546875e-05, "model_forward_time": 0.025645971298217773, "step": 15884 }, { "epoch": 2.4237060546875e-05, "step": 15884, "training_step_time": 0.10694122314453125 }, { "epoch": 2.423858642578125e-05, "model_forward_time": 0.026115894317626953, "step": 15885 }, { "epoch": 2.423858642578125e-05, "step": 15885, "training_step_time": 0.10697221755981445 }, { "epoch": 2.42401123046875e-05, "model_forward_time": 0.025562286376953125, "step": 15886 }, { "epoch": 2.42401123046875e-05, "step": 15886, "training_step_time": 0.10576319694519043 }, { "epoch": 2.424163818359375e-05, "model_forward_time": 0.025818586349487305, "step": 15887 }, { "epoch": 2.424163818359375e-05, "step": 15887, "training_step_time": 0.10538721084594727 }, { "epoch": 2.42431640625e-05, "model_forward_time": 0.025905847549438477, "step": 15888 }, { "epoch": 2.42431640625e-05, "step": 15888, "training_step_time": 0.10671162605285645 }, { "epoch": 2.424468994140625e-05, "model_forward_time": 0.025387287139892578, "step": 15889 }, { "epoch": 2.424468994140625e-05, "step": 15889, "training_step_time": 0.10831189155578613 }, { "epoch": 2.42462158203125e-05, "grad_norm": 0.20762024819850922, "learning_rate": 4.9228411379333014e-05, "loss": 0.0127, "step": 15890 }, { "epoch": 2.42462158203125e-05, "model_forward_time": 0.026102066040039062, "step": 15890 }, { "epoch": 2.42462158203125e-05, "step": 15890, "training_step_time": 0.10633206367492676 }, { "epoch": 2.424774169921875e-05, "model_forward_time": 0.027546167373657227, "step": 15891 }, { "epoch": 2.424774169921875e-05, "step": 15891, "training_step_time": 0.10753345489501953 }, { "epoch": 2.4249267578125e-05, "model_forward_time": 0.026748180389404297, "step": 15892 }, { "epoch": 2.4249267578125e-05, "step": 15892, "training_step_time": 0.11024665832519531 }, { "epoch": 2.425079345703125e-05, "model_forward_time": 0.025822877883911133, "step": 15893 }, { "epoch": 2.425079345703125e-05, "step": 15893, "training_step_time": 0.10959315299987793 }, { "epoch": 2.42523193359375e-05, "model_forward_time": 0.025696992874145508, "step": 15894 }, { "epoch": 2.42523193359375e-05, "step": 15894, "training_step_time": 0.10599088668823242 }, { "epoch": 2.425384521484375e-05, "model_forward_time": 0.02540755271911621, "step": 15895 }, { "epoch": 2.425384521484375e-05, "step": 15895, "training_step_time": 0.18365001678466797 }, { "epoch": 2.425537109375e-05, "model_forward_time": 0.025188684463500977, "step": 15896 }, { "epoch": 2.425537109375e-05, "step": 15896, "training_step_time": 0.135390043258667 }, { "epoch": 2.425689697265625e-05, "model_forward_time": 0.025185585021972656, "step": 15897 }, { "epoch": 2.425689697265625e-05, "step": 15897, "training_step_time": 0.10508251190185547 }, { "epoch": 2.42584228515625e-05, "model_forward_time": 0.025966644287109375, "step": 15898 }, { "epoch": 2.42584228515625e-05, "step": 15898, "training_step_time": 0.13190364837646484 }, { "epoch": 2.425994873046875e-05, "model_forward_time": 0.029868364334106445, "step": 15899 }, { "epoch": 2.425994873046875e-05, "step": 15899, "training_step_time": 0.21393394470214844 }, { "epoch": 2.4261474609375e-05, "grad_norm": 0.14643548429012299, "learning_rate": 4.917330276168208e-05, "loss": 0.007, "step": 15900 }, { "epoch": 2.4261474609375e-05, "model_forward_time": 0.025090932846069336, "step": 15900 }, { "epoch": 2.4261474609375e-05, "step": 15900, "training_step_time": 0.10868263244628906 }, { "epoch": 2.426300048828125e-05, "model_forward_time": 0.02563619613647461, "step": 15901 }, { "epoch": 2.426300048828125e-05, "step": 15901, "training_step_time": 0.11607813835144043 }, { "epoch": 2.42645263671875e-05, "model_forward_time": 0.02555394172668457, "step": 15902 }, { "epoch": 2.42645263671875e-05, "step": 15902, "training_step_time": 0.17038440704345703 }, { "epoch": 2.426605224609375e-05, "model_forward_time": 0.025087594985961914, "step": 15903 }, { "epoch": 2.426605224609375e-05, "step": 15903, "training_step_time": 0.1440563201904297 }, { "epoch": 2.4267578125e-05, "model_forward_time": 0.02509760856628418, "step": 15904 }, { "epoch": 2.4267578125e-05, "step": 15904, "training_step_time": 0.12248110771179199 }, { "epoch": 2.426910400390625e-05, "model_forward_time": 0.02478957176208496, "step": 15905 }, { "epoch": 2.426910400390625e-05, "step": 15905, "training_step_time": 0.10920500755310059 }, { "epoch": 2.42706298828125e-05, "model_forward_time": 0.02589106559753418, "step": 15906 }, { "epoch": 2.42706298828125e-05, "step": 15906, "training_step_time": 0.10769796371459961 }, { "epoch": 2.427215576171875e-05, "model_forward_time": 0.025074005126953125, "step": 15907 }, { "epoch": 2.427215576171875e-05, "step": 15907, "training_step_time": 0.10381889343261719 }, { "epoch": 2.4273681640625e-05, "model_forward_time": 0.025251150131225586, "step": 15908 }, { "epoch": 2.4273681640625e-05, "step": 15908, "training_step_time": 0.10627913475036621 }, { "epoch": 2.427520751953125e-05, "model_forward_time": 0.02614879608154297, "step": 15909 }, { "epoch": 2.427520751953125e-05, "step": 15909, "training_step_time": 0.11140656471252441 }, { "epoch": 2.42767333984375e-05, "grad_norm": 0.30145934224128723, "learning_rate": 4.911819514854624e-05, "loss": 0.0098, "step": 15910 }, { "epoch": 2.42767333984375e-05, "model_forward_time": 0.02497696876525879, "step": 15910 }, { "epoch": 2.42767333984375e-05, "step": 15910, "training_step_time": 0.10891985893249512 }, { "epoch": 2.427825927734375e-05, "model_forward_time": 0.025133132934570312, "step": 15911 }, { "epoch": 2.427825927734375e-05, "step": 15911, "training_step_time": 0.10997700691223145 }, { "epoch": 2.427978515625e-05, "model_forward_time": 0.025237321853637695, "step": 15912 }, { "epoch": 2.427978515625e-05, "step": 15912, "training_step_time": 0.10519576072692871 }, { "epoch": 2.428131103515625e-05, "model_forward_time": 0.025793075561523438, "step": 15913 }, { "epoch": 2.428131103515625e-05, "step": 15913, "training_step_time": 0.10698103904724121 }, { "epoch": 2.42828369140625e-05, "model_forward_time": 0.02942371368408203, "step": 15914 }, { "epoch": 2.42828369140625e-05, "step": 15914, "training_step_time": 0.11082577705383301 }, { "epoch": 2.428436279296875e-05, "model_forward_time": 0.0254364013671875, "step": 15915 }, { "epoch": 2.428436279296875e-05, "step": 15915, "training_step_time": 0.1064605712890625 }, { "epoch": 2.4285888671875e-05, "model_forward_time": 0.025509119033813477, "step": 15916 }, { "epoch": 2.4285888671875e-05, "step": 15916, "training_step_time": 0.1069495677947998 }, { "epoch": 2.428741455078125e-05, "model_forward_time": 0.025258779525756836, "step": 15917 }, { "epoch": 2.428741455078125e-05, "step": 15917, "training_step_time": 0.10472822189331055 }, { "epoch": 2.42889404296875e-05, "model_forward_time": 0.025116443634033203, "step": 15918 }, { "epoch": 2.42889404296875e-05, "step": 15918, "training_step_time": 0.10599899291992188 }, { "epoch": 2.429046630859375e-05, "model_forward_time": 0.025040388107299805, "step": 15919 }, { "epoch": 2.429046630859375e-05, "step": 15919, "training_step_time": 0.10509538650512695 }, { "epoch": 2.42919921875e-05, "grad_norm": 0.1788942962884903, "learning_rate": 4.906308860688645e-05, "loss": 0.0122, "step": 15920 }, { "epoch": 2.42919921875e-05, "model_forward_time": 0.024704694747924805, "step": 15920 }, { "epoch": 2.42919921875e-05, "step": 15920, "training_step_time": 0.10908269882202148 }, { "epoch": 2.429351806640625e-05, "model_forward_time": 0.024132966995239258, "step": 15921 }, { "epoch": 2.429351806640625e-05, "step": 15921, "training_step_time": 0.10468530654907227 }, { "epoch": 2.42950439453125e-05, "model_forward_time": 0.025097370147705078, "step": 15922 }, { "epoch": 2.42950439453125e-05, "step": 15922, "training_step_time": 0.13061738014221191 }, { "epoch": 2.429656982421875e-05, "model_forward_time": 0.024858713150024414, "step": 15923 }, { "epoch": 2.429656982421875e-05, "step": 15923, "training_step_time": 0.11765551567077637 }, { "epoch": 2.4298095703125e-05, "model_forward_time": 0.025319576263427734, "step": 15924 }, { "epoch": 2.4298095703125e-05, "step": 15924, "training_step_time": 0.10913586616516113 }, { "epoch": 2.429962158203125e-05, "model_forward_time": 0.025531530380249023, "step": 15925 }, { "epoch": 2.429962158203125e-05, "step": 15925, "training_step_time": 0.11077141761779785 }, { "epoch": 2.43011474609375e-05, "model_forward_time": 0.02533721923828125, "step": 15926 }, { "epoch": 2.43011474609375e-05, "step": 15926, "training_step_time": 0.10660672187805176 }, { "epoch": 2.430267333984375e-05, "model_forward_time": 0.02531909942626953, "step": 15927 }, { "epoch": 2.430267333984375e-05, "step": 15927, "training_step_time": 0.19421911239624023 }, { "epoch": 2.430419921875e-05, "model_forward_time": 0.025098562240600586, "step": 15928 }, { "epoch": 2.430419921875e-05, "step": 15928, "training_step_time": 0.10227751731872559 }, { "epoch": 2.430572509765625e-05, "model_forward_time": 0.02476191520690918, "step": 15929 }, { "epoch": 2.430572509765625e-05, "step": 15929, "training_step_time": 0.10433697700500488 }, { "epoch": 2.43072509765625e-05, "grad_norm": 0.32863739132881165, "learning_rate": 4.9007983203662326e-05, "loss": 0.0148, "step": 15930 }, { "epoch": 2.43072509765625e-05, "model_forward_time": 0.026124238967895508, "step": 15930 }, { "epoch": 2.43072509765625e-05, "step": 15930, "training_step_time": 0.10723996162414551 }, { "epoch": 2.430877685546875e-05, "model_forward_time": 0.02552962303161621, "step": 15931 }, { "epoch": 2.430877685546875e-05, "step": 15931, "training_step_time": 0.10885930061340332 }, { "epoch": 2.4310302734375e-05, "model_forward_time": 0.026043415069580078, "step": 15932 }, { "epoch": 2.4310302734375e-05, "step": 15932, "training_step_time": 0.10613417625427246 }, { "epoch": 2.431182861328125e-05, "model_forward_time": 0.02562427520751953, "step": 15933 }, { "epoch": 2.431182861328125e-05, "step": 15933, "training_step_time": 0.10567808151245117 }, { "epoch": 2.43133544921875e-05, "model_forward_time": 0.02583456039428711, "step": 15934 }, { "epoch": 2.43133544921875e-05, "step": 15934, "training_step_time": 0.10648989677429199 }, { "epoch": 2.431488037109375e-05, "model_forward_time": 0.025661706924438477, "step": 15935 }, { "epoch": 2.431488037109375e-05, "step": 15935, "training_step_time": 0.10765194892883301 }, { "epoch": 2.431640625e-05, "model_forward_time": 0.024990081787109375, "step": 15936 }, { "epoch": 2.431640625e-05, "step": 15936, "training_step_time": 0.10599589347839355 }, { "epoch": 2.431793212890625e-05, "model_forward_time": 0.025968313217163086, "step": 15937 }, { "epoch": 2.431793212890625e-05, "step": 15937, "training_step_time": 0.10855555534362793 }, { "epoch": 2.43194580078125e-05, "model_forward_time": 0.02735447883605957, "step": 15938 }, { "epoch": 2.43194580078125e-05, "step": 15938, "training_step_time": 0.10811400413513184 }, { "epoch": 2.432098388671875e-05, "model_forward_time": 0.026804208755493164, "step": 15939 }, { "epoch": 2.432098388671875e-05, "step": 15939, "training_step_time": 0.10663986206054688 }, { "epoch": 2.4322509765625e-05, "grad_norm": 0.2623797655105591, "learning_rate": 4.895287900583216e-05, "loss": 0.0122, "step": 15940 }, { "epoch": 2.4322509765625e-05, "model_forward_time": 0.02431941032409668, "step": 15940 }, { "epoch": 2.4322509765625e-05, "step": 15940, "training_step_time": 0.10401558876037598 }, { "epoch": 2.432403564453125e-05, "model_forward_time": 0.024582862854003906, "step": 15941 }, { "epoch": 2.432403564453125e-05, "step": 15941, "training_step_time": 0.10406661033630371 }, { "epoch": 2.43255615234375e-05, "model_forward_time": 0.026092052459716797, "step": 15942 }, { "epoch": 2.43255615234375e-05, "step": 15942, "training_step_time": 0.10527443885803223 }, { "epoch": 2.432708740234375e-05, "model_forward_time": 0.02536296844482422, "step": 15943 }, { "epoch": 2.432708740234375e-05, "step": 15943, "training_step_time": 0.1746978759765625 }, { "epoch": 2.432861328125e-05, "model_forward_time": 0.02556014060974121, "step": 15944 }, { "epoch": 2.432861328125e-05, "step": 15944, "training_step_time": 0.17778635025024414 }, { "epoch": 2.433013916015625e-05, "model_forward_time": 0.02498459815979004, "step": 15945 }, { "epoch": 2.433013916015625e-05, "step": 15945, "training_step_time": 0.19803738594055176 }, { "epoch": 2.43316650390625e-05, "model_forward_time": 0.025725603103637695, "step": 15946 }, { "epoch": 2.43316650390625e-05, "step": 15946, "training_step_time": 0.16976666450500488 }, { "epoch": 2.433319091796875e-05, "model_forward_time": 0.024913787841796875, "step": 15947 }, { "epoch": 2.433319091796875e-05, "step": 15947, "training_step_time": 0.1595759391784668 }, { "epoch": 2.4334716796875e-05, "model_forward_time": 0.024934768676757812, "step": 15948 }, { "epoch": 2.4334716796875e-05, "step": 15948, "training_step_time": 0.11550354957580566 }, { "epoch": 2.433624267578125e-05, "model_forward_time": 0.025606870651245117, "step": 15949 }, { "epoch": 2.433624267578125e-05, "step": 15949, "training_step_time": 0.13918447494506836 }, { "epoch": 2.43377685546875e-05, "grad_norm": 0.15143872797489166, "learning_rate": 4.889777608035273e-05, "loss": 0.0077, "step": 15950 }, { "epoch": 2.43377685546875e-05, "model_forward_time": 0.025627613067626953, "step": 15950 }, { "epoch": 2.43377685546875e-05, "step": 15950, "training_step_time": 0.11227560043334961 }, { "epoch": 2.433929443359375e-05, "model_forward_time": 0.025737524032592773, "step": 15951 }, { "epoch": 2.433929443359375e-05, "step": 15951, "training_step_time": 0.17915630340576172 }, { "epoch": 2.43408203125e-05, "model_forward_time": 0.025168418884277344, "step": 15952 }, { "epoch": 2.43408203125e-05, "step": 15952, "training_step_time": 0.1336383819580078 }, { "epoch": 2.434234619140625e-05, "model_forward_time": 0.02490830421447754, "step": 15953 }, { "epoch": 2.434234619140625e-05, "step": 15953, "training_step_time": 0.11654448509216309 }, { "epoch": 2.43438720703125e-05, "model_forward_time": 0.02537083625793457, "step": 15954 }, { "epoch": 2.43438720703125e-05, "step": 15954, "training_step_time": 0.10313296318054199 }, { "epoch": 2.434539794921875e-05, "model_forward_time": 0.025387048721313477, "step": 15955 }, { "epoch": 2.434539794921875e-05, "step": 15955, "training_step_time": 0.10349178314208984 }, { "epoch": 2.4346923828125e-05, "model_forward_time": 0.02561783790588379, "step": 15956 }, { "epoch": 2.4346923828125e-05, "step": 15956, "training_step_time": 0.10423946380615234 }, { "epoch": 2.434844970703125e-05, "model_forward_time": 0.025623798370361328, "step": 15957 }, { "epoch": 2.434844970703125e-05, "step": 15957, "training_step_time": 0.10730814933776855 }, { "epoch": 2.43499755859375e-05, "model_forward_time": 0.02522563934326172, "step": 15958 }, { "epoch": 2.43499755859375e-05, "step": 15958, "training_step_time": 0.11627459526062012 }, { "epoch": 2.435150146484375e-05, "model_forward_time": 0.02457427978515625, "step": 15959 }, { "epoch": 2.435150146484375e-05, "step": 15959, "training_step_time": 0.1127774715423584 }, { "epoch": 2.435302734375e-05, "grad_norm": 0.13048915565013885, "learning_rate": 4.884267449417931e-05, "loss": 0.0109, "step": 15960 }, { "epoch": 2.435302734375e-05, "model_forward_time": 0.026351451873779297, "step": 15960 }, { "epoch": 2.435302734375e-05, "step": 15960, "training_step_time": 0.11735105514526367 }, { "epoch": 2.435455322265625e-05, "model_forward_time": 0.025661468505859375, "step": 15961 }, { "epoch": 2.435455322265625e-05, "step": 15961, "training_step_time": 0.1125333309173584 }, { "epoch": 2.43560791015625e-05, "model_forward_time": 0.025895357131958008, "step": 15962 }, { "epoch": 2.43560791015625e-05, "step": 15962, "training_step_time": 0.1161501407623291 }, { "epoch": 2.435760498046875e-05, "model_forward_time": 0.025594472885131836, "step": 15963 }, { "epoch": 2.435760498046875e-05, "step": 15963, "training_step_time": 0.11189889907836914 }, { "epoch": 2.4359130859375e-05, "model_forward_time": 0.02572154998779297, "step": 15964 }, { "epoch": 2.4359130859375e-05, "step": 15964, "training_step_time": 0.11070418357849121 }, { "epoch": 2.436065673828125e-05, "model_forward_time": 0.025930404663085938, "step": 15965 }, { "epoch": 2.436065673828125e-05, "step": 15965, "training_step_time": 0.11066246032714844 }, { "epoch": 2.43621826171875e-05, "model_forward_time": 0.025638580322265625, "step": 15966 }, { "epoch": 2.43621826171875e-05, "step": 15966, "training_step_time": 0.1066124439239502 }, { "epoch": 2.436370849609375e-05, "model_forward_time": 0.02590799331665039, "step": 15967 }, { "epoch": 2.436370849609375e-05, "step": 15967, "training_step_time": 0.14440560340881348 }, { "epoch": 2.4365234375e-05, "model_forward_time": 0.026114702224731445, "step": 15968 }, { "epoch": 2.4365234375e-05, "step": 15968, "training_step_time": 0.14677739143371582 }, { "epoch": 2.436676025390625e-05, "model_forward_time": 0.025386333465576172, "step": 15969 }, { "epoch": 2.436676025390625e-05, "step": 15969, "training_step_time": 0.10649895668029785 }, { "epoch": 2.43682861328125e-05, "grad_norm": 0.4667963683605194, "learning_rate": 4.878757431426551e-05, "loss": 0.0173, "step": 15970 }, { "epoch": 2.43682861328125e-05, "model_forward_time": 0.026465177536010742, "step": 15970 }, { "epoch": 2.43682861328125e-05, "step": 15970, "training_step_time": 0.10843157768249512 }, { "epoch": 2.436981201171875e-05, "model_forward_time": 0.025693416595458984, "step": 15971 }, { "epoch": 2.436981201171875e-05, "step": 15971, "training_step_time": 0.10870504379272461 }, { "epoch": 2.4371337890625e-05, "model_forward_time": 0.025734424591064453, "step": 15972 }, { "epoch": 2.4371337890625e-05, "step": 15972, "training_step_time": 0.10793256759643555 }, { "epoch": 2.437286376953125e-05, "model_forward_time": 0.025585412979125977, "step": 15973 }, { "epoch": 2.437286376953125e-05, "step": 15973, "training_step_time": 0.19218230247497559 }, { "epoch": 2.43743896484375e-05, "model_forward_time": 0.02491140365600586, "step": 15974 }, { "epoch": 2.43743896484375e-05, "step": 15974, "training_step_time": 0.10451340675354004 }, { "epoch": 2.437591552734375e-05, "model_forward_time": 0.027354001998901367, "step": 15975 }, { "epoch": 2.437591552734375e-05, "step": 15975, "training_step_time": 0.10729265213012695 }, { "epoch": 2.437744140625e-05, "model_forward_time": 0.025938987731933594, "step": 15976 }, { "epoch": 2.437744140625e-05, "step": 15976, "training_step_time": 0.10894656181335449 }, { "epoch": 2.437896728515625e-05, "model_forward_time": 0.02557539939880371, "step": 15977 }, { "epoch": 2.437896728515625e-05, "step": 15977, "training_step_time": 0.10466217994689941 }, { "epoch": 2.43804931640625e-05, "model_forward_time": 0.02507948875427246, "step": 15978 }, { "epoch": 2.43804931640625e-05, "step": 15978, "training_step_time": 0.10676407814025879 }, { "epoch": 2.438201904296875e-05, "model_forward_time": 0.025479555130004883, "step": 15979 }, { "epoch": 2.438201904296875e-05, "step": 15979, "training_step_time": 0.10506010055541992 }, { "epoch": 2.4383544921875e-05, "grad_norm": 0.29334574937820435, "learning_rate": 4.873247560756324e-05, "loss": 0.0098, "step": 15980 }, { "epoch": 2.4383544921875e-05, "model_forward_time": 0.025799274444580078, "step": 15980 }, { "epoch": 2.4383544921875e-05, "step": 15980, "training_step_time": 0.10599017143249512 }, { "epoch": 2.438507080078125e-05, "model_forward_time": 0.026062488555908203, "step": 15981 }, { "epoch": 2.438507080078125e-05, "step": 15981, "training_step_time": 0.1067500114440918 }, { "epoch": 2.43865966796875e-05, "model_forward_time": 0.025292396545410156, "step": 15982 }, { "epoch": 2.43865966796875e-05, "step": 15982, "training_step_time": 0.1045982837677002 }, { "epoch": 2.438812255859375e-05, "model_forward_time": 0.02561354637145996, "step": 15983 }, { "epoch": 2.438812255859375e-05, "step": 15983, "training_step_time": 0.10487008094787598 }, { "epoch": 2.43896484375e-05, "model_forward_time": 0.026061296463012695, "step": 15984 }, { "epoch": 2.43896484375e-05, "step": 15984, "training_step_time": 0.10548281669616699 }, { "epoch": 2.439117431640625e-05, "model_forward_time": 0.025626659393310547, "step": 15985 }, { "epoch": 2.439117431640625e-05, "step": 15985, "training_step_time": 0.10392022132873535 }, { "epoch": 2.43927001953125e-05, "model_forward_time": 0.02581048011779785, "step": 15986 }, { "epoch": 2.43927001953125e-05, "step": 15986, "training_step_time": 0.10550999641418457 }, { "epoch": 2.439422607421875e-05, "model_forward_time": 0.025572538375854492, "step": 15987 }, { "epoch": 2.439422607421875e-05, "step": 15987, "training_step_time": 0.10541439056396484 }, { "epoch": 2.4395751953125e-05, "model_forward_time": 0.025936603546142578, "step": 15988 }, { "epoch": 2.4395751953125e-05, "step": 15988, "training_step_time": 0.10556578636169434 }, { "epoch": 2.439727783203125e-05, "model_forward_time": 0.02664804458618164, "step": 15989 }, { "epoch": 2.439727783203125e-05, "step": 15989, "training_step_time": 0.10661935806274414 }, { "epoch": 2.43988037109375e-05, "grad_norm": 0.17938953638076782, "learning_rate": 4.867737844102261e-05, "loss": 0.0184, "step": 15990 }, { "epoch": 2.43988037109375e-05, "model_forward_time": 0.02633047103881836, "step": 15990 }, { "epoch": 2.43988037109375e-05, "step": 15990, "training_step_time": 0.18438220024108887 }, { "epoch": 2.440032958984375e-05, "model_forward_time": 0.0251009464263916, "step": 15991 }, { "epoch": 2.440032958984375e-05, "step": 15991, "training_step_time": 0.12546944618225098 }, { "epoch": 2.440185546875e-05, "model_forward_time": 0.02520918846130371, "step": 15992 }, { "epoch": 2.440185546875e-05, "step": 15992, "training_step_time": 0.11509394645690918 }, { "epoch": 2.440338134765625e-05, "model_forward_time": 0.026113033294677734, "step": 15993 }, { "epoch": 2.440338134765625e-05, "step": 15993, "training_step_time": 0.1378002166748047 }, { "epoch": 2.44049072265625e-05, "model_forward_time": 0.025772571563720703, "step": 15994 }, { "epoch": 2.44049072265625e-05, "step": 15994, "training_step_time": 0.10792136192321777 }, { "epoch": 2.440643310546875e-05, "model_forward_time": 0.02566218376159668, "step": 15995 }, { "epoch": 2.440643310546875e-05, "step": 15995, "training_step_time": 0.20623564720153809 }, { "epoch": 2.4407958984375e-05, "model_forward_time": 0.025394916534423828, "step": 15996 }, { "epoch": 2.4407958984375e-05, "step": 15996, "training_step_time": 0.11642718315124512 }, { "epoch": 2.440948486328125e-05, "model_forward_time": 0.025074005126953125, "step": 15997 }, { "epoch": 2.440948486328125e-05, "step": 15997, "training_step_time": 0.1303861141204834 }, { "epoch": 2.44110107421875e-05, "model_forward_time": 0.025536537170410156, "step": 15998 }, { "epoch": 2.44110107421875e-05, "step": 15998, "training_step_time": 0.1416034698486328 }, { "epoch": 2.441253662109375e-05, "model_forward_time": 0.02530837059020996, "step": 15999 }, { "epoch": 2.441253662109375e-05, "step": 15999, "training_step_time": 0.11546540260314941 }, { "epoch": 2.44140625e-05, "grad_norm": 0.31458476185798645, "learning_rate": 4.8622282881591906e-05, "loss": 0.0117, "step": 16000 }, { "epoch": 2.44140625e-05, "model_forward_time": 0.026625871658325195, "step": 16000 }, { "epoch": 2.44140625e-05, "step": 16000, "training_step_time": 0.11011123657226562 }, { "epoch": 2.441558837890625e-05, "model_forward_time": 0.023554325103759766, "step": 16001 }, { "epoch": 2.441558837890625e-05, "step": 16001, "training_step_time": 0.15520930290222168 }, { "epoch": 2.44171142578125e-05, "model_forward_time": 0.024297237396240234, "step": 16002 }, { "epoch": 2.44171142578125e-05, "step": 16002, "training_step_time": 0.13415908813476562 }, { "epoch": 2.441864013671875e-05, "model_forward_time": 0.02445244789123535, "step": 16003 }, { "epoch": 2.441864013671875e-05, "step": 16003, "training_step_time": 0.16687917709350586 }, { "epoch": 2.4420166015625e-05, "model_forward_time": 0.024739980697631836, "step": 16004 }, { "epoch": 2.4420166015625e-05, "step": 16004, "training_step_time": 0.13107967376708984 }, { "epoch": 2.442169189453125e-05, "model_forward_time": 0.024922609329223633, "step": 16005 }, { "epoch": 2.442169189453125e-05, "step": 16005, "training_step_time": 0.11837315559387207 }, { "epoch": 2.44232177734375e-05, "model_forward_time": 0.025043964385986328, "step": 16006 }, { "epoch": 2.44232177734375e-05, "step": 16006, "training_step_time": 0.14241456985473633 }, { "epoch": 2.442474365234375e-05, "model_forward_time": 0.02635049819946289, "step": 16007 }, { "epoch": 2.442474365234375e-05, "step": 16007, "training_step_time": 0.1542069911956787 }, { "epoch": 2.442626953125e-05, "model_forward_time": 0.024143218994140625, "step": 16008 }, { "epoch": 2.442626953125e-05, "step": 16008, "training_step_time": 0.22746992111206055 }, { "epoch": 2.442779541015625e-05, "model_forward_time": 0.02458500862121582, "step": 16009 }, { "epoch": 2.442779541015625e-05, "step": 16009, "training_step_time": 0.1101388931274414 }, { "epoch": 2.44293212890625e-05, "grad_norm": 0.28905314207077026, "learning_rate": 4.85671889962174e-05, "loss": 0.0126, "step": 16010 }, { "epoch": 2.44293212890625e-05, "model_forward_time": 0.024885177612304688, "step": 16010 }, { "epoch": 2.44293212890625e-05, "step": 16010, "training_step_time": 0.1119379997253418 }, { "epoch": 2.443084716796875e-05, "model_forward_time": 0.025030851364135742, "step": 16011 }, { "epoch": 2.443084716796875e-05, "step": 16011, "training_step_time": 0.10663294792175293 }, { "epoch": 2.4432373046875e-05, "model_forward_time": 0.02551102638244629, "step": 16012 }, { "epoch": 2.4432373046875e-05, "step": 16012, "training_step_time": 0.10792016983032227 }, { "epoch": 2.443389892578125e-05, "model_forward_time": 0.02524113655090332, "step": 16013 }, { "epoch": 2.443389892578125e-05, "step": 16013, "training_step_time": 0.11145853996276855 }, { "epoch": 2.44354248046875e-05, "model_forward_time": 0.02544546127319336, "step": 16014 }, { "epoch": 2.44354248046875e-05, "step": 16014, "training_step_time": 0.10948348045349121 }, { "epoch": 2.443695068359375e-05, "model_forward_time": 0.025015830993652344, "step": 16015 }, { "epoch": 2.443695068359375e-05, "step": 16015, "training_step_time": 0.10814213752746582 }, { "epoch": 2.44384765625e-05, "model_forward_time": 0.02673625946044922, "step": 16016 }, { "epoch": 2.44384765625e-05, "step": 16016, "training_step_time": 0.11029171943664551 }, { "epoch": 2.444000244140625e-05, "model_forward_time": 0.02514338493347168, "step": 16017 }, { "epoch": 2.444000244140625e-05, "step": 16017, "training_step_time": 0.10781049728393555 }, { "epoch": 2.44415283203125e-05, "model_forward_time": 0.02511143684387207, "step": 16018 }, { "epoch": 2.44415283203125e-05, "step": 16018, "training_step_time": 0.10732817649841309 }, { "epoch": 2.444305419921875e-05, "model_forward_time": 0.02526378631591797, "step": 16019 }, { "epoch": 2.444305419921875e-05, "step": 16019, "training_step_time": 0.1095438003540039 }, { "epoch": 2.4444580078125e-05, "grad_norm": 0.22905737161636353, "learning_rate": 4.851209685184338e-05, "loss": 0.0104, "step": 16020 }, { "epoch": 2.4444580078125e-05, "model_forward_time": 0.024835824966430664, "step": 16020 }, { "epoch": 2.4444580078125e-05, "step": 16020, "training_step_time": 0.10628747940063477 }, { "epoch": 2.444610595703125e-05, "model_forward_time": 0.024904966354370117, "step": 16021 }, { "epoch": 2.444610595703125e-05, "step": 16021, "training_step_time": 0.10562610626220703 }, { "epoch": 2.44476318359375e-05, "model_forward_time": 0.02544426918029785, "step": 16022 }, { "epoch": 2.44476318359375e-05, "step": 16022, "training_step_time": 0.10908746719360352 }, { "epoch": 2.444915771484375e-05, "model_forward_time": 0.025531768798828125, "step": 16023 }, { "epoch": 2.444915771484375e-05, "step": 16023, "training_step_time": 0.18296432495117188 }, { "epoch": 2.445068359375e-05, "model_forward_time": 0.02452850341796875, "step": 16024 }, { "epoch": 2.445068359375e-05, "step": 16024, "training_step_time": 0.20955276489257812 }, { "epoch": 2.445220947265625e-05, "model_forward_time": 0.02400803565979004, "step": 16025 }, { "epoch": 2.445220947265625e-05, "step": 16025, "training_step_time": 0.2079789638519287 }, { "epoch": 2.44537353515625e-05, "model_forward_time": 0.023763179779052734, "step": 16026 }, { "epoch": 2.44537353515625e-05, "step": 16026, "training_step_time": 0.20606422424316406 }, { "epoch": 2.445526123046875e-05, "model_forward_time": 0.024510860443115234, "step": 16027 }, { "epoch": 2.445526123046875e-05, "step": 16027, "training_step_time": 0.1596519947052002 }, { "epoch": 2.4456787109375e-05, "model_forward_time": 0.02387404441833496, "step": 16028 }, { "epoch": 2.4456787109375e-05, "step": 16028, "training_step_time": 0.1474475860595703 }, { "epoch": 2.445831298828125e-05, "model_forward_time": 0.0247194766998291, "step": 16029 }, { "epoch": 2.445831298828125e-05, "step": 16029, "training_step_time": 0.1114950180053711 }, { "epoch": 2.44598388671875e-05, "grad_norm": 0.12559278309345245, "learning_rate": 4.845700651541199e-05, "loss": 0.0129, "step": 16030 }, { "epoch": 2.44598388671875e-05, "model_forward_time": 0.024735689163208008, "step": 16030 }, { "epoch": 2.44598388671875e-05, "step": 16030, "training_step_time": 0.10233592987060547 }, { "epoch": 2.446136474609375e-05, "model_forward_time": 0.024773120880126953, "step": 16031 }, { "epoch": 2.446136474609375e-05, "step": 16031, "training_step_time": 0.10128474235534668 }, { "epoch": 2.4462890625e-05, "model_forward_time": 0.024929523468017578, "step": 16032 }, { "epoch": 2.4462890625e-05, "step": 16032, "training_step_time": 0.10394048690795898 }, { "epoch": 2.446441650390625e-05, "model_forward_time": 0.02707695960998535, "step": 16033 }, { "epoch": 2.446441650390625e-05, "step": 16033, "training_step_time": 0.1057279109954834 }, { "epoch": 2.44659423828125e-05, "model_forward_time": 0.025636911392211914, "step": 16034 }, { "epoch": 2.44659423828125e-05, "step": 16034, "training_step_time": 0.10531949996948242 }, { "epoch": 2.446746826171875e-05, "model_forward_time": 0.025171518325805664, "step": 16035 }, { "epoch": 2.446746826171875e-05, "step": 16035, "training_step_time": 0.1037743091583252 }, { "epoch": 2.4468994140625e-05, "model_forward_time": 0.02550506591796875, "step": 16036 }, { "epoch": 2.4468994140625e-05, "step": 16036, "training_step_time": 0.10723590850830078 }, { "epoch": 2.447052001953125e-05, "model_forward_time": 0.025185585021972656, "step": 16037 }, { "epoch": 2.447052001953125e-05, "step": 16037, "training_step_time": 0.1066126823425293 }, { "epoch": 2.44720458984375e-05, "model_forward_time": 0.02517557144165039, "step": 16038 }, { "epoch": 2.44720458984375e-05, "step": 16038, "training_step_time": 0.10784101486206055 }, { "epoch": 2.447357177734375e-05, "model_forward_time": 0.02515125274658203, "step": 16039 }, { "epoch": 2.447357177734375e-05, "step": 16039, "training_step_time": 0.10886144638061523 }, { "epoch": 2.447509765625e-05, "grad_norm": 0.17869317531585693, "learning_rate": 4.840191805386315e-05, "loss": 0.0105, "step": 16040 }, { "epoch": 2.447509765625e-05, "model_forward_time": 0.024691104888916016, "step": 16040 }, { "epoch": 2.447509765625e-05, "step": 16040, "training_step_time": 0.10958528518676758 }, { "epoch": 2.447662353515625e-05, "model_forward_time": 0.025226354598999023, "step": 16041 }, { "epoch": 2.447662353515625e-05, "step": 16041, "training_step_time": 0.10772013664245605 }, { "epoch": 2.44781494140625e-05, "model_forward_time": 0.025742292404174805, "step": 16042 }, { "epoch": 2.44781494140625e-05, "step": 16042, "training_step_time": 0.10521101951599121 }, { "epoch": 2.447967529296875e-05, "model_forward_time": 0.024721622467041016, "step": 16043 }, { "epoch": 2.447967529296875e-05, "step": 16043, "training_step_time": 0.11118674278259277 }, { "epoch": 2.4481201171875e-05, "model_forward_time": 0.024915456771850586, "step": 16044 }, { "epoch": 2.4481201171875e-05, "step": 16044, "training_step_time": 0.14704394340515137 }, { "epoch": 2.448272705078125e-05, "model_forward_time": 0.024907827377319336, "step": 16045 }, { "epoch": 2.448272705078125e-05, "step": 16045, "training_step_time": 0.142988920211792 }, { "epoch": 2.44842529296875e-05, "model_forward_time": 0.02510213851928711, "step": 16046 }, { "epoch": 2.44842529296875e-05, "step": 16046, "training_step_time": 0.19709253311157227 }, { "epoch": 2.448577880859375e-05, "model_forward_time": 0.0263364315032959, "step": 16047 }, { "epoch": 2.448577880859375e-05, "step": 16047, "training_step_time": 0.14141154289245605 }, { "epoch": 2.44873046875e-05, "model_forward_time": 0.024976491928100586, "step": 16048 }, { "epoch": 2.44873046875e-05, "step": 16048, "training_step_time": 0.13085460662841797 }, { "epoch": 2.448883056640625e-05, "model_forward_time": 0.02457737922668457, "step": 16049 }, { "epoch": 2.448883056640625e-05, "step": 16049, "training_step_time": 0.11881160736083984 }, { "epoch": 2.44903564453125e-05, "grad_norm": 0.19629864394664764, "learning_rate": 4.834683153413459e-05, "loss": 0.0104, "step": 16050 }, { "epoch": 2.44903564453125e-05, "model_forward_time": 0.025118589401245117, "step": 16050 }, { "epoch": 2.44903564453125e-05, "step": 16050, "training_step_time": 0.13252973556518555 }, { "epoch": 2.449188232421875e-05, "model_forward_time": 0.024988412857055664, "step": 16051 }, { "epoch": 2.449188232421875e-05, "step": 16051, "training_step_time": 0.1635758876800537 }, { "epoch": 2.4493408203125e-05, "model_forward_time": 0.024254560470581055, "step": 16052 }, { "epoch": 2.4493408203125e-05, "step": 16052, "training_step_time": 0.22408628463745117 }, { "epoch": 2.449493408203125e-05, "model_forward_time": 0.02434825897216797, "step": 16053 }, { "epoch": 2.449493408203125e-05, "step": 16053, "training_step_time": 0.10737228393554688 }, { "epoch": 2.44964599609375e-05, "model_forward_time": 0.024283409118652344, "step": 16054 }, { "epoch": 2.44964599609375e-05, "step": 16054, "training_step_time": 0.1102597713470459 }, { "epoch": 2.449798583984375e-05, "model_forward_time": 0.0248873233795166, "step": 16055 }, { "epoch": 2.449798583984375e-05, "step": 16055, "training_step_time": 0.10745763778686523 }, { "epoch": 2.449951171875e-05, "model_forward_time": 0.025278568267822266, "step": 16056 }, { "epoch": 2.449951171875e-05, "step": 16056, "training_step_time": 0.10583353042602539 }, { "epoch": 2.450103759765625e-05, "model_forward_time": 0.025668859481811523, "step": 16057 }, { "epoch": 2.450103759765625e-05, "step": 16057, "training_step_time": 0.1065981388092041 }, { "epoch": 2.45025634765625e-05, "model_forward_time": 0.025279998779296875, "step": 16058 }, { "epoch": 2.45025634765625e-05, "step": 16058, "training_step_time": 0.1078941822052002 }, { "epoch": 2.450408935546875e-05, "model_forward_time": 0.02539801597595215, "step": 16059 }, { "epoch": 2.450408935546875e-05, "step": 16059, "training_step_time": 0.10501527786254883 }, { "epoch": 2.4505615234375e-05, "grad_norm": 0.2447129338979721, "learning_rate": 4.82917470231616e-05, "loss": 0.0103, "step": 16060 }, { "epoch": 2.4505615234375e-05, "model_forward_time": 0.02509760856628418, "step": 16060 }, { "epoch": 2.4505615234375e-05, "step": 16060, "training_step_time": 0.10478544235229492 }, { "epoch": 2.450714111328125e-05, "model_forward_time": 0.024875402450561523, "step": 16061 }, { "epoch": 2.450714111328125e-05, "step": 16061, "training_step_time": 0.10678672790527344 }, { "epoch": 2.45086669921875e-05, "model_forward_time": 0.024808168411254883, "step": 16062 }, { "epoch": 2.45086669921875e-05, "step": 16062, "training_step_time": 0.10939908027648926 }, { "epoch": 2.451019287109375e-05, "model_forward_time": 0.02469491958618164, "step": 16063 }, { "epoch": 2.451019287109375e-05, "step": 16063, "training_step_time": 0.1053767204284668 }, { "epoch": 2.451171875e-05, "model_forward_time": 0.025281429290771484, "step": 16064 }, { "epoch": 2.451171875e-05, "step": 16064, "training_step_time": 0.10822534561157227 }, { "epoch": 2.451324462890625e-05, "model_forward_time": 0.024911165237426758, "step": 16065 }, { "epoch": 2.451324462890625e-05, "step": 16065, "training_step_time": 0.10770344734191895 }, { "epoch": 2.45147705078125e-05, "model_forward_time": 0.025319576263427734, "step": 16066 }, { "epoch": 2.45147705078125e-05, "step": 16066, "training_step_time": 0.1389482021331787 }, { "epoch": 2.451629638671875e-05, "model_forward_time": 0.025473594665527344, "step": 16067 }, { "epoch": 2.451629638671875e-05, "step": 16067, "training_step_time": 0.1396324634552002 }, { "epoch": 2.4517822265625e-05, "model_forward_time": 0.02412867546081543, "step": 16068 }, { "epoch": 2.4517822265625e-05, "step": 16068, "training_step_time": 0.10547018051147461 }, { "epoch": 2.451934814453125e-05, "model_forward_time": 0.024769306182861328, "step": 16069 }, { "epoch": 2.451934814453125e-05, "step": 16069, "training_step_time": 0.1202085018157959 }, { "epoch": 2.45208740234375e-05, "grad_norm": 0.34994441270828247, "learning_rate": 4.823666458787705e-05, "loss": 0.0098, "step": 16070 }, { "epoch": 2.45208740234375e-05, "model_forward_time": 0.025077104568481445, "step": 16070 }, { "epoch": 2.45208740234375e-05, "step": 16070, "training_step_time": 0.11086845397949219 }, { "epoch": 2.452239990234375e-05, "model_forward_time": 0.025316476821899414, "step": 16071 }, { "epoch": 2.452239990234375e-05, "step": 16071, "training_step_time": 0.10955166816711426 }, { "epoch": 2.452392578125e-05, "model_forward_time": 0.025339365005493164, "step": 16072 }, { "epoch": 2.452392578125e-05, "step": 16072, "training_step_time": 0.20117926597595215 }, { "epoch": 2.452545166015625e-05, "model_forward_time": 0.02426004409790039, "step": 16073 }, { "epoch": 2.452545166015625e-05, "step": 16073, "training_step_time": 0.1071014404296875 }, { "epoch": 2.45269775390625e-05, "model_forward_time": 0.024445772171020508, "step": 16074 }, { "epoch": 2.45269775390625e-05, "step": 16074, "training_step_time": 0.10209822654724121 }, { "epoch": 2.452850341796875e-05, "model_forward_time": 0.024713754653930664, "step": 16075 }, { "epoch": 2.452850341796875e-05, "step": 16075, "training_step_time": 0.1036672592163086 }, { "epoch": 2.4530029296875e-05, "model_forward_time": 0.02493453025817871, "step": 16076 }, { "epoch": 2.4530029296875e-05, "step": 16076, "training_step_time": 0.10753417015075684 }, { "epoch": 2.453155517578125e-05, "model_forward_time": 0.025128841400146484, "step": 16077 }, { "epoch": 2.453155517578125e-05, "step": 16077, "training_step_time": 0.1046915054321289 }, { "epoch": 2.45330810546875e-05, "model_forward_time": 0.025221586227416992, "step": 16078 }, { "epoch": 2.45330810546875e-05, "step": 16078, "training_step_time": 0.10518550872802734 }, { "epoch": 2.453460693359375e-05, "model_forward_time": 0.025501728057861328, "step": 16079 }, { "epoch": 2.453460693359375e-05, "step": 16079, "training_step_time": 0.10677289962768555 }, { "epoch": 2.45361328125e-05, "grad_norm": 0.3603598177433014, "learning_rate": 4.818158429521129e-05, "loss": 0.0118, "step": 16080 }, { "epoch": 2.45361328125e-05, "model_forward_time": 0.0250699520111084, "step": 16080 }, { "epoch": 2.45361328125e-05, "step": 16080, "training_step_time": 0.10611152648925781 }, { "epoch": 2.453765869140625e-05, "model_forward_time": 0.0255281925201416, "step": 16081 }, { "epoch": 2.453765869140625e-05, "step": 16081, "training_step_time": 0.1064307689666748 }, { "epoch": 2.45391845703125e-05, "model_forward_time": 0.024738073348999023, "step": 16082 }, { "epoch": 2.45391845703125e-05, "step": 16082, "training_step_time": 0.10457611083984375 }, { "epoch": 2.454071044921875e-05, "model_forward_time": 0.025248050689697266, "step": 16083 }, { "epoch": 2.454071044921875e-05, "step": 16083, "training_step_time": 0.10298562049865723 }, { "epoch": 2.4542236328125e-05, "model_forward_time": 0.025495290756225586, "step": 16084 }, { "epoch": 2.4542236328125e-05, "step": 16084, "training_step_time": 0.10384058952331543 }, { "epoch": 2.454376220703125e-05, "model_forward_time": 0.025461196899414062, "step": 16085 }, { "epoch": 2.454376220703125e-05, "step": 16085, "training_step_time": 0.10553216934204102 }, { "epoch": 2.45452880859375e-05, "model_forward_time": 0.025313377380371094, "step": 16086 }, { "epoch": 2.45452880859375e-05, "step": 16086, "training_step_time": 0.10474443435668945 }, { "epoch": 2.454681396484375e-05, "model_forward_time": 0.025262117385864258, "step": 16087 }, { "epoch": 2.454681396484375e-05, "step": 16087, "training_step_time": 0.11038088798522949 }, { "epoch": 2.454833984375e-05, "model_forward_time": 0.025281906127929688, "step": 16088 }, { "epoch": 2.454833984375e-05, "step": 16088, "training_step_time": 0.12487149238586426 }, { "epoch": 2.454986572265625e-05, "model_forward_time": 0.025722265243530273, "step": 16089 }, { "epoch": 2.454986572265625e-05, "step": 16089, "training_step_time": 0.10833883285522461 }, { "epoch": 2.45513916015625e-05, "grad_norm": 0.3049020767211914, "learning_rate": 4.812650621209209e-05, "loss": 0.0128, "step": 16090 }, { "epoch": 2.45513916015625e-05, "model_forward_time": 0.028198957443237305, "step": 16090 }, { "epoch": 2.45513916015625e-05, "step": 16090, "training_step_time": 0.14754581451416016 }, { "epoch": 2.455291748046875e-05, "model_forward_time": 0.024680614471435547, "step": 16091 }, { "epoch": 2.455291748046875e-05, "step": 16091, "training_step_time": 0.18529558181762695 }, { "epoch": 2.4554443359375e-05, "model_forward_time": 0.02428436279296875, "step": 16092 }, { "epoch": 2.4554443359375e-05, "step": 16092, "training_step_time": 0.224656343460083 }, { "epoch": 2.455596923828125e-05, "model_forward_time": 0.02434372901916504, "step": 16093 }, { "epoch": 2.455596923828125e-05, "step": 16093, "training_step_time": 0.12918615341186523 }, { "epoch": 2.45574951171875e-05, "model_forward_time": 0.024224281311035156, "step": 16094 }, { "epoch": 2.45574951171875e-05, "step": 16094, "training_step_time": 0.21254897117614746 }, { "epoch": 2.455902099609375e-05, "model_forward_time": 0.02459120750427246, "step": 16095 }, { "epoch": 2.455902099609375e-05, "step": 16095, "training_step_time": 0.1494593620300293 }, { "epoch": 2.4560546875e-05, "model_forward_time": 0.0243682861328125, "step": 16096 }, { "epoch": 2.4560546875e-05, "step": 16096, "training_step_time": 0.1251683235168457 }, { "epoch": 2.456207275390625e-05, "model_forward_time": 0.02452540397644043, "step": 16097 }, { "epoch": 2.456207275390625e-05, "step": 16097, "training_step_time": 0.14014649391174316 }, { "epoch": 2.45635986328125e-05, "model_forward_time": 0.024792909622192383, "step": 16098 }, { "epoch": 2.45635986328125e-05, "step": 16098, "training_step_time": 0.2160184383392334 }, { "epoch": 2.456512451171875e-05, "model_forward_time": 0.024190187454223633, "step": 16099 }, { "epoch": 2.456512451171875e-05, "step": 16099, "training_step_time": 0.10810136795043945 }, { "epoch": 2.4566650390625e-05, "grad_norm": 0.2999719977378845, "learning_rate": 4.8071430405444474e-05, "loss": 0.0142, "step": 16100 }, { "epoch": 2.4566650390625e-05, "model_forward_time": 0.024098873138427734, "step": 16100 }, { "epoch": 2.4566650390625e-05, "step": 16100, "training_step_time": 0.10593509674072266 }, { "epoch": 2.456817626953125e-05, "model_forward_time": 0.025705575942993164, "step": 16101 }, { "epoch": 2.456817626953125e-05, "step": 16101, "training_step_time": 0.11299729347229004 }, { "epoch": 2.45697021484375e-05, "model_forward_time": 0.02547454833984375, "step": 16102 }, { "epoch": 2.45697021484375e-05, "step": 16102, "training_step_time": 0.1086578369140625 }, { "epoch": 2.457122802734375e-05, "model_forward_time": 0.024875402450561523, "step": 16103 }, { "epoch": 2.457122802734375e-05, "step": 16103, "training_step_time": 0.10395574569702148 }, { "epoch": 2.457275390625e-05, "model_forward_time": 0.025222063064575195, "step": 16104 }, { "epoch": 2.457275390625e-05, "step": 16104, "training_step_time": 0.1070709228515625 }, { "epoch": 2.457427978515625e-05, "model_forward_time": 0.025050640106201172, "step": 16105 }, { "epoch": 2.457427978515625e-05, "step": 16105, "training_step_time": 0.10502123832702637 }, { "epoch": 2.45758056640625e-05, "model_forward_time": 0.02514481544494629, "step": 16106 }, { "epoch": 2.45758056640625e-05, "step": 16106, "training_step_time": 0.10582542419433594 }, { "epoch": 2.457733154296875e-05, "model_forward_time": 0.024735689163208008, "step": 16107 }, { "epoch": 2.457733154296875e-05, "step": 16107, "training_step_time": 0.10391736030578613 }, { "epoch": 2.4578857421875e-05, "model_forward_time": 0.027152538299560547, "step": 16108 }, { "epoch": 2.4578857421875e-05, "step": 16108, "training_step_time": 0.11144232749938965 }, { "epoch": 2.458038330078125e-05, "model_forward_time": 0.0252993106842041, "step": 16109 }, { "epoch": 2.458038330078125e-05, "step": 16109, "training_step_time": 0.10557746887207031 }, { "epoch": 2.45819091796875e-05, "grad_norm": 0.2928496301174164, "learning_rate": 4.801635694219079e-05, "loss": 0.0138, "step": 16110 }, { "epoch": 2.45819091796875e-05, "model_forward_time": 0.028730392456054688, "step": 16110 }, { "epoch": 2.45819091796875e-05, "step": 16110, "training_step_time": 0.10891079902648926 }, { "epoch": 2.458343505859375e-05, "model_forward_time": 0.0252683162689209, "step": 16111 }, { "epoch": 2.458343505859375e-05, "step": 16111, "training_step_time": 0.1058034896850586 }, { "epoch": 2.45849609375e-05, "model_forward_time": 0.025069713592529297, "step": 16112 }, { "epoch": 2.45849609375e-05, "step": 16112, "training_step_time": 0.14451146125793457 }, { "epoch": 2.458648681640625e-05, "model_forward_time": 0.02535080909729004, "step": 16113 }, { "epoch": 2.458648681640625e-05, "step": 16113, "training_step_time": 0.13773822784423828 }, { "epoch": 2.45880126953125e-05, "model_forward_time": 0.025204181671142578, "step": 16114 }, { "epoch": 2.45880126953125e-05, "step": 16114, "training_step_time": 0.1137995719909668 }, { "epoch": 2.458953857421875e-05, "model_forward_time": 0.02487349510192871, "step": 16115 }, { "epoch": 2.458953857421875e-05, "step": 16115, "training_step_time": 0.1198887825012207 }, { "epoch": 2.4591064453125e-05, "model_forward_time": 0.024190902709960938, "step": 16116 }, { "epoch": 2.4591064453125e-05, "step": 16116, "training_step_time": 0.12017822265625 }, { "epoch": 2.459259033203125e-05, "model_forward_time": 0.024064302444458008, "step": 16117 }, { "epoch": 2.459259033203125e-05, "step": 16117, "training_step_time": 0.18487882614135742 }, { "epoch": 2.45941162109375e-05, "model_forward_time": 0.02485513687133789, "step": 16118 }, { "epoch": 2.45941162109375e-05, "step": 16118, "training_step_time": 0.12007832527160645 }, { "epoch": 2.459564208984375e-05, "model_forward_time": 0.024615764617919922, "step": 16119 }, { "epoch": 2.459564208984375e-05, "step": 16119, "training_step_time": 0.10958337783813477 }, { "epoch": 2.459716796875e-05, "grad_norm": 0.33059120178222656, "learning_rate": 4.7961285889250475e-05, "loss": 0.0176, "step": 16120 }, { "epoch": 2.459716796875e-05, "model_forward_time": 0.025120973587036133, "step": 16120 }, { "epoch": 2.459716796875e-05, "step": 16120, "training_step_time": 0.10580110549926758 }, { "epoch": 2.459869384765625e-05, "model_forward_time": 0.02521371841430664, "step": 16121 }, { "epoch": 2.459869384765625e-05, "step": 16121, "training_step_time": 0.10853123664855957 }, { "epoch": 2.46002197265625e-05, "model_forward_time": 0.02423262596130371, "step": 16122 }, { "epoch": 2.46002197265625e-05, "step": 16122, "training_step_time": 0.10614967346191406 }, { "epoch": 2.460174560546875e-05, "model_forward_time": 0.025251150131225586, "step": 16123 }, { "epoch": 2.460174560546875e-05, "step": 16123, "training_step_time": 0.11067581176757812 }, { "epoch": 2.4603271484375e-05, "model_forward_time": 0.024424314498901367, "step": 16124 }, { "epoch": 2.4603271484375e-05, "step": 16124, "training_step_time": 0.10618782043457031 }, { "epoch": 2.460479736328125e-05, "model_forward_time": 0.023900747299194336, "step": 16125 }, { "epoch": 2.460479736328125e-05, "step": 16125, "training_step_time": 0.10705137252807617 }, { "epoch": 2.46063232421875e-05, "model_forward_time": 0.024680137634277344, "step": 16126 }, { "epoch": 2.46063232421875e-05, "step": 16126, "training_step_time": 0.10939192771911621 }, { "epoch": 2.460784912109375e-05, "model_forward_time": 0.02499699592590332, "step": 16127 }, { "epoch": 2.460784912109375e-05, "step": 16127, "training_step_time": 0.1062169075012207 }, { "epoch": 2.4609375e-05, "model_forward_time": 0.024322509765625, "step": 16128 }, { "epoch": 2.4609375e-05, "step": 16128, "training_step_time": 0.10481142997741699 }, { "epoch": 2.461090087890625e-05, "model_forward_time": 0.02375054359436035, "step": 16129 }, { "epoch": 2.461090087890625e-05, "step": 16129, "training_step_time": 0.10536932945251465 }, { "epoch": 2.46124267578125e-05, "grad_norm": 0.544135570526123, "learning_rate": 4.790621731354003e-05, "loss": 0.0164, "step": 16130 }, { "epoch": 2.46124267578125e-05, "model_forward_time": 0.024738073348999023, "step": 16130 }, { "epoch": 2.46124267578125e-05, "step": 16130, "training_step_time": 0.10423541069030762 }, { "epoch": 2.461395263671875e-05, "model_forward_time": 0.024841785430908203, "step": 16131 }, { "epoch": 2.461395263671875e-05, "step": 16131, "training_step_time": 0.10787677764892578 }, { "epoch": 2.4615478515625e-05, "model_forward_time": 0.02568793296813965, "step": 16132 }, { "epoch": 2.4615478515625e-05, "step": 16132, "training_step_time": 0.10695147514343262 }, { "epoch": 2.461700439453125e-05, "model_forward_time": 0.025285005569458008, "step": 16133 }, { "epoch": 2.461700439453125e-05, "step": 16133, "training_step_time": 0.10512733459472656 }, { "epoch": 2.46185302734375e-05, "model_forward_time": 0.025441884994506836, "step": 16134 }, { "epoch": 2.46185302734375e-05, "step": 16134, "training_step_time": 0.10851693153381348 }, { "epoch": 2.462005615234375e-05, "model_forward_time": 0.026524782180786133, "step": 16135 }, { "epoch": 2.462005615234375e-05, "step": 16135, "training_step_time": 0.11173510551452637 }, { "epoch": 2.462158203125e-05, "model_forward_time": 0.02531719207763672, "step": 16136 }, { "epoch": 2.462158203125e-05, "step": 16136, "training_step_time": 0.14018917083740234 }, { "epoch": 2.462310791015625e-05, "model_forward_time": 0.026691675186157227, "step": 16137 }, { "epoch": 2.462310791015625e-05, "step": 16137, "training_step_time": 0.18733811378479004 }, { "epoch": 2.46246337890625e-05, "model_forward_time": 0.02558588981628418, "step": 16138 }, { "epoch": 2.46246337890625e-05, "step": 16138, "training_step_time": 0.11570215225219727 }, { "epoch": 2.462615966796875e-05, "model_forward_time": 0.025201082229614258, "step": 16139 }, { "epoch": 2.462615966796875e-05, "step": 16139, "training_step_time": 0.12868285179138184 }, { "epoch": 2.4627685546875e-05, "grad_norm": 0.3250410854816437, "learning_rate": 4.785115128197298e-05, "loss": 0.0098, "step": 16140 }, { "epoch": 2.4627685546875e-05, "model_forward_time": 0.025620698928833008, "step": 16140 }, { "epoch": 2.4627685546875e-05, "step": 16140, "training_step_time": 0.11954641342163086 }, { "epoch": 2.462921142578125e-05, "model_forward_time": 0.026640892028808594, "step": 16141 }, { "epoch": 2.462921142578125e-05, "step": 16141, "training_step_time": 0.14858341217041016 }, { "epoch": 2.46307373046875e-05, "model_forward_time": 0.02458810806274414, "step": 16142 }, { "epoch": 2.46307373046875e-05, "step": 16142, "training_step_time": 0.1560044288635254 }, { "epoch": 2.463226318359375e-05, "model_forward_time": 0.023381948471069336, "step": 16143 }, { "epoch": 2.463226318359375e-05, "step": 16143, "training_step_time": 0.15671443939208984 }, { "epoch": 2.46337890625e-05, "model_forward_time": 0.025445938110351562, "step": 16144 }, { "epoch": 2.46337890625e-05, "step": 16144, "training_step_time": 0.1066136360168457 }, { "epoch": 2.463531494140625e-05, "model_forward_time": 0.025538206100463867, "step": 16145 }, { "epoch": 2.463531494140625e-05, "step": 16145, "training_step_time": 0.21464848518371582 }, { "epoch": 2.46368408203125e-05, "model_forward_time": 0.025081157684326172, "step": 16146 }, { "epoch": 2.46368408203125e-05, "step": 16146, "training_step_time": 0.11975431442260742 }, { "epoch": 2.463836669921875e-05, "model_forward_time": 0.026146650314331055, "step": 16147 }, { "epoch": 2.463836669921875e-05, "step": 16147, "training_step_time": 0.12375187873840332 }, { "epoch": 2.4639892578125e-05, "model_forward_time": 0.025850772857666016, "step": 16148 }, { "epoch": 2.4639892578125e-05, "step": 16148, "training_step_time": 0.11661672592163086 }, { "epoch": 2.464141845703125e-05, "model_forward_time": 0.02549600601196289, "step": 16149 }, { "epoch": 2.464141845703125e-05, "step": 16149, "training_step_time": 0.11336874961853027 }, { "epoch": 2.46429443359375e-05, "grad_norm": 0.22780925035476685, "learning_rate": 4.779608786145974e-05, "loss": 0.0165, "step": 16150 }, { "epoch": 2.46429443359375e-05, "model_forward_time": 0.026188135147094727, "step": 16150 }, { "epoch": 2.46429443359375e-05, "step": 16150, "training_step_time": 0.11464548110961914 }, { "epoch": 2.464447021484375e-05, "model_forward_time": 0.02583622932434082, "step": 16151 }, { "epoch": 2.464447021484375e-05, "step": 16151, "training_step_time": 0.10999727249145508 }, { "epoch": 2.464599609375e-05, "model_forward_time": 0.025386810302734375, "step": 16152 }, { "epoch": 2.464599609375e-05, "step": 16152, "training_step_time": 0.10936284065246582 }, { "epoch": 2.464752197265625e-05, "model_forward_time": 0.026138782501220703, "step": 16153 }, { "epoch": 2.464752197265625e-05, "step": 16153, "training_step_time": 0.11400222778320312 }, { "epoch": 2.46490478515625e-05, "model_forward_time": 0.02529168128967285, "step": 16154 }, { "epoch": 2.46490478515625e-05, "step": 16154, "training_step_time": 0.11008834838867188 }, { "epoch": 2.465057373046875e-05, "model_forward_time": 0.02493572235107422, "step": 16155 }, { "epoch": 2.465057373046875e-05, "step": 16155, "training_step_time": 0.11313605308532715 }, { "epoch": 2.4652099609375e-05, "model_forward_time": 0.025528430938720703, "step": 16156 }, { "epoch": 2.4652099609375e-05, "step": 16156, "training_step_time": 0.11084222793579102 }, { "epoch": 2.465362548828125e-05, "model_forward_time": 0.025157928466796875, "step": 16157 }, { "epoch": 2.465362548828125e-05, "step": 16157, "training_step_time": 0.10454368591308594 }, { "epoch": 2.46551513671875e-05, "model_forward_time": 0.025124788284301758, "step": 16158 }, { "epoch": 2.46551513671875e-05, "step": 16158, "training_step_time": 0.15764188766479492 }, { "epoch": 2.465667724609375e-05, "model_forward_time": 0.024669885635375977, "step": 16159 }, { "epoch": 2.465667724609375e-05, "step": 16159, "training_step_time": 0.13961529731750488 }, { "epoch": 2.4658203125e-05, "grad_norm": 0.24136672914028168, "learning_rate": 4.774102711890756e-05, "loss": 0.0139, "step": 16160 }, { "epoch": 2.4658203125e-05, "model_forward_time": 0.024538278579711914, "step": 16160 }, { "epoch": 2.4658203125e-05, "step": 16160, "training_step_time": 0.10979533195495605 }, { "epoch": 2.465972900390625e-05, "model_forward_time": 0.024740219116210938, "step": 16161 }, { "epoch": 2.465972900390625e-05, "step": 16161, "training_step_time": 0.11514472961425781 }, { "epoch": 2.46612548828125e-05, "model_forward_time": 0.0251772403717041, "step": 16162 }, { "epoch": 2.46612548828125e-05, "step": 16162, "training_step_time": 0.1228785514831543 }, { "epoch": 2.466278076171875e-05, "model_forward_time": 0.026154518127441406, "step": 16163 }, { "epoch": 2.466278076171875e-05, "step": 16163, "training_step_time": 0.10581207275390625 }, { "epoch": 2.4664306640625e-05, "model_forward_time": 0.025814056396484375, "step": 16164 }, { "epoch": 2.4664306640625e-05, "step": 16164, "training_step_time": 0.1941380500793457 }, { "epoch": 2.466583251953125e-05, "model_forward_time": 0.025127887725830078, "step": 16165 }, { "epoch": 2.466583251953125e-05, "step": 16165, "training_step_time": 0.10600805282592773 }, { "epoch": 2.46673583984375e-05, "model_forward_time": 0.025033235549926758, "step": 16166 }, { "epoch": 2.46673583984375e-05, "step": 16166, "training_step_time": 0.10438394546508789 }, { "epoch": 2.466888427734375e-05, "model_forward_time": 0.02610039710998535, "step": 16167 }, { "epoch": 2.466888427734375e-05, "step": 16167, "training_step_time": 0.10724472999572754 }, { "epoch": 2.467041015625e-05, "model_forward_time": 0.02712869644165039, "step": 16168 }, { "epoch": 2.467041015625e-05, "step": 16168, "training_step_time": 0.10924911499023438 }, { "epoch": 2.467193603515625e-05, "model_forward_time": 0.026007652282714844, "step": 16169 }, { "epoch": 2.467193603515625e-05, "step": 16169, "training_step_time": 0.10770177841186523 }, { "epoch": 2.46734619140625e-05, "grad_norm": 0.4083085060119629, "learning_rate": 4.7685969121220456e-05, "loss": 0.0118, "step": 16170 }, { "epoch": 2.46734619140625e-05, "model_forward_time": 0.025290966033935547, "step": 16170 }, { "epoch": 2.46734619140625e-05, "step": 16170, "training_step_time": 0.10418009757995605 }, { "epoch": 2.467498779296875e-05, "model_forward_time": 0.02587413787841797, "step": 16171 }, { "epoch": 2.467498779296875e-05, "step": 16171, "training_step_time": 0.10625100135803223 }, { "epoch": 2.4676513671875e-05, "model_forward_time": 0.025673627853393555, "step": 16172 }, { "epoch": 2.4676513671875e-05, "step": 16172, "training_step_time": 0.1051933765411377 }, { "epoch": 2.467803955078125e-05, "model_forward_time": 0.025649070739746094, "step": 16173 }, { "epoch": 2.467803955078125e-05, "step": 16173, "training_step_time": 0.10828709602355957 }, { "epoch": 2.46795654296875e-05, "model_forward_time": 0.025463104248046875, "step": 16174 }, { "epoch": 2.46795654296875e-05, "step": 16174, "training_step_time": 0.1094357967376709 }, { "epoch": 2.468109130859375e-05, "model_forward_time": 0.025511741638183594, "step": 16175 }, { "epoch": 2.468109130859375e-05, "step": 16175, "training_step_time": 0.11036467552185059 }, { "epoch": 2.46826171875e-05, "model_forward_time": 0.025599002838134766, "step": 16176 }, { "epoch": 2.46826171875e-05, "step": 16176, "training_step_time": 0.10901355743408203 }, { "epoch": 2.468414306640625e-05, "model_forward_time": 0.025568485260009766, "step": 16177 }, { "epoch": 2.468414306640625e-05, "step": 16177, "training_step_time": 0.10729050636291504 }, { "epoch": 2.46856689453125e-05, "model_forward_time": 0.02558445930480957, "step": 16178 }, { "epoch": 2.46856689453125e-05, "step": 16178, "training_step_time": 0.10770964622497559 }, { "epoch": 2.468719482421875e-05, "model_forward_time": 0.02515268325805664, "step": 16179 }, { "epoch": 2.468719482421875e-05, "step": 16179, "training_step_time": 0.11266899108886719 }, { "epoch": 2.4688720703125e-05, "grad_norm": 0.2024080604314804, "learning_rate": 4.7630913935299066e-05, "loss": 0.0076, "step": 16180 }, { "epoch": 2.4688720703125e-05, "model_forward_time": 0.02666330337524414, "step": 16180 }, { "epoch": 2.4688720703125e-05, "step": 16180, "training_step_time": 0.10637784004211426 }, { "epoch": 2.469024658203125e-05, "model_forward_time": 0.025818824768066406, "step": 16181 }, { "epoch": 2.469024658203125e-05, "step": 16181, "training_step_time": 0.10465383529663086 }, { "epoch": 2.46917724609375e-05, "model_forward_time": 0.025391340255737305, "step": 16182 }, { "epoch": 2.46917724609375e-05, "step": 16182, "training_step_time": 0.1235666275024414 }, { "epoch": 2.469329833984375e-05, "model_forward_time": 0.025562524795532227, "step": 16183 }, { "epoch": 2.469329833984375e-05, "step": 16183, "training_step_time": 0.12334942817687988 }, { "epoch": 2.469482421875e-05, "model_forward_time": 0.02539825439453125, "step": 16184 }, { "epoch": 2.469482421875e-05, "step": 16184, "training_step_time": 0.11265730857849121 }, { "epoch": 2.469635009765625e-05, "model_forward_time": 0.025794029235839844, "step": 16185 }, { "epoch": 2.469635009765625e-05, "step": 16185, "training_step_time": 0.12052798271179199 }, { "epoch": 2.46978759765625e-05, "model_forward_time": 0.029140949249267578, "step": 16186 }, { "epoch": 2.46978759765625e-05, "step": 16186, "training_step_time": 0.11626005172729492 }, { "epoch": 2.469940185546875e-05, "model_forward_time": 0.027049779891967773, "step": 16187 }, { "epoch": 2.469940185546875e-05, "step": 16187, "training_step_time": 0.19759511947631836 }, { "epoch": 2.4700927734375e-05, "model_forward_time": 0.02585911750793457, "step": 16188 }, { "epoch": 2.4700927734375e-05, "step": 16188, "training_step_time": 0.1720418930053711 }, { "epoch": 2.470245361328125e-05, "model_forward_time": 0.025360822677612305, "step": 16189 }, { "epoch": 2.470245361328125e-05, "step": 16189, "training_step_time": 0.1560804843902588 }, { "epoch": 2.47039794921875e-05, "grad_norm": 0.1812436431646347, "learning_rate": 4.7575861628040635e-05, "loss": 0.0112, "step": 16190 }, { "epoch": 2.47039794921875e-05, "model_forward_time": 0.024546384811401367, "step": 16190 }, { "epoch": 2.47039794921875e-05, "step": 16190, "training_step_time": 0.1588447093963623 }, { "epoch": 2.470550537109375e-05, "model_forward_time": 0.02393651008605957, "step": 16191 }, { "epoch": 2.470550537109375e-05, "step": 16191, "training_step_time": 0.13080954551696777 }, { "epoch": 2.470703125e-05, "model_forward_time": 0.024852275848388672, "step": 16192 }, { "epoch": 2.470703125e-05, "step": 16192, "training_step_time": 0.11185646057128906 }, { "epoch": 2.470855712890625e-05, "model_forward_time": 0.026625633239746094, "step": 16193 }, { "epoch": 2.470855712890625e-05, "step": 16193, "training_step_time": 0.10564064979553223 }, { "epoch": 2.47100830078125e-05, "model_forward_time": 0.02526068687438965, "step": 16194 }, { "epoch": 2.47100830078125e-05, "step": 16194, "training_step_time": 0.10600495338439941 }, { "epoch": 2.471160888671875e-05, "model_forward_time": 0.02494978904724121, "step": 16195 }, { "epoch": 2.471160888671875e-05, "step": 16195, "training_step_time": 0.10504436492919922 }, { "epoch": 2.4713134765625e-05, "model_forward_time": 0.024843931198120117, "step": 16196 }, { "epoch": 2.4713134765625e-05, "step": 16196, "training_step_time": 0.10647225379943848 }, { "epoch": 2.471466064453125e-05, "model_forward_time": 0.02707505226135254, "step": 16197 }, { "epoch": 2.471466064453125e-05, "step": 16197, "training_step_time": 0.1067650318145752 }, { "epoch": 2.47161865234375e-05, "model_forward_time": 0.024207592010498047, "step": 16198 }, { "epoch": 2.47161865234375e-05, "step": 16198, "training_step_time": 0.10439109802246094 }, { "epoch": 2.471771240234375e-05, "model_forward_time": 0.025090456008911133, "step": 16199 }, { "epoch": 2.471771240234375e-05, "step": 16199, "training_step_time": 0.1062307357788086 }, { "epoch": 2.471923828125e-05, "grad_norm": 0.25928398966789246, "learning_rate": 4.7520812266338885e-05, "loss": 0.0125, "step": 16200 }, { "epoch": 2.471923828125e-05, "model_forward_time": 0.02549576759338379, "step": 16200 }, { "epoch": 2.471923828125e-05, "step": 16200, "training_step_time": 0.10840487480163574 }, { "epoch": 2.472076416015625e-05, "model_forward_time": 0.025158166885375977, "step": 16201 }, { "epoch": 2.472076416015625e-05, "step": 16201, "training_step_time": 0.11226058006286621 }, { "epoch": 2.47222900390625e-05, "model_forward_time": 0.02422642707824707, "step": 16202 }, { "epoch": 2.47222900390625e-05, "step": 16202, "training_step_time": 0.11089587211608887 }, { "epoch": 2.472381591796875e-05, "model_forward_time": 0.025419235229492188, "step": 16203 }, { "epoch": 2.472381591796875e-05, "step": 16203, "training_step_time": 0.10629701614379883 }, { "epoch": 2.4725341796875e-05, "model_forward_time": 0.025270700454711914, "step": 16204 }, { "epoch": 2.4725341796875e-05, "step": 16204, "training_step_time": 0.10700774192810059 }, { "epoch": 2.472686767578125e-05, "model_forward_time": 0.025156021118164062, "step": 16205 }, { "epoch": 2.472686767578125e-05, "step": 16205, "training_step_time": 0.15929818153381348 }, { "epoch": 2.47283935546875e-05, "model_forward_time": 0.024748563766479492, "step": 16206 }, { "epoch": 2.47283935546875e-05, "step": 16206, "training_step_time": 0.13684296607971191 }, { "epoch": 2.472991943359375e-05, "model_forward_time": 0.024466753005981445, "step": 16207 }, { "epoch": 2.472991943359375e-05, "step": 16207, "training_step_time": 0.10899686813354492 }, { "epoch": 2.47314453125e-05, "model_forward_time": 0.025307655334472656, "step": 16208 }, { "epoch": 2.47314453125e-05, "step": 16208, "training_step_time": 0.12186980247497559 }, { "epoch": 2.473297119140625e-05, "model_forward_time": 0.02525639533996582, "step": 16209 }, { "epoch": 2.473297119140625e-05, "step": 16209, "training_step_time": 0.1183323860168457 }, { "epoch": 2.47344970703125e-05, "grad_norm": 0.20085115730762482, "learning_rate": 4.746576591708403e-05, "loss": 0.0173, "step": 16210 }, { "epoch": 2.47344970703125e-05, "model_forward_time": 0.024528026580810547, "step": 16210 }, { "epoch": 2.47344970703125e-05, "step": 16210, "training_step_time": 0.1824934482574463 }, { "epoch": 2.473602294921875e-05, "model_forward_time": 0.024796724319458008, "step": 16211 }, { "epoch": 2.473602294921875e-05, "step": 16211, "training_step_time": 0.12742853164672852 }, { "epoch": 2.4737548828125e-05, "model_forward_time": 0.023262739181518555, "step": 16212 }, { "epoch": 2.4737548828125e-05, "step": 16212, "training_step_time": 0.1163945198059082 }, { "epoch": 2.473907470703125e-05, "model_forward_time": 0.02364373207092285, "step": 16213 }, { "epoch": 2.473907470703125e-05, "step": 16213, "training_step_time": 0.11248302459716797 }, { "epoch": 2.47406005859375e-05, "model_forward_time": 0.02513265609741211, "step": 16214 }, { "epoch": 2.47406005859375e-05, "step": 16214, "training_step_time": 0.1156315803527832 }, { "epoch": 2.474212646484375e-05, "model_forward_time": 0.025038957595825195, "step": 16215 }, { "epoch": 2.474212646484375e-05, "step": 16215, "training_step_time": 0.11132192611694336 }, { "epoch": 2.474365234375e-05, "model_forward_time": 0.024923086166381836, "step": 16216 }, { "epoch": 2.474365234375e-05, "step": 16216, "training_step_time": 0.11100625991821289 }, { "epoch": 2.474517822265625e-05, "model_forward_time": 0.025243043899536133, "step": 16217 }, { "epoch": 2.474517822265625e-05, "step": 16217, "training_step_time": 0.10742473602294922 }, { "epoch": 2.47467041015625e-05, "model_forward_time": 0.02485966682434082, "step": 16218 }, { "epoch": 2.47467041015625e-05, "step": 16218, "training_step_time": 0.10725021362304688 }, { "epoch": 2.474822998046875e-05, "model_forward_time": 0.025301694869995117, "step": 16219 }, { "epoch": 2.474822998046875e-05, "step": 16219, "training_step_time": 0.10614800453186035 }, { "epoch": 2.4749755859375e-05, "grad_norm": 0.19542065262794495, "learning_rate": 4.741072264716252e-05, "loss": 0.0091, "step": 16220 }, { "epoch": 2.4749755859375e-05, "model_forward_time": 0.024325132369995117, "step": 16220 }, { "epoch": 2.4749755859375e-05, "step": 16220, "training_step_time": 0.10867118835449219 }, { "epoch": 2.475128173828125e-05, "model_forward_time": 0.024011611938476562, "step": 16221 }, { "epoch": 2.475128173828125e-05, "step": 16221, "training_step_time": 0.10500812530517578 }, { "epoch": 2.47528076171875e-05, "model_forward_time": 0.02426433563232422, "step": 16222 }, { "epoch": 2.47528076171875e-05, "step": 16222, "training_step_time": 0.10507440567016602 }, { "epoch": 2.475433349609375e-05, "model_forward_time": 0.02481698989868164, "step": 16223 }, { "epoch": 2.475433349609375e-05, "step": 16223, "training_step_time": 0.10480046272277832 }, { "epoch": 2.4755859375e-05, "model_forward_time": 0.024898290634155273, "step": 16224 }, { "epoch": 2.4755859375e-05, "step": 16224, "training_step_time": 0.10490274429321289 }, { "epoch": 2.475738525390625e-05, "model_forward_time": 0.02657914161682129, "step": 16225 }, { "epoch": 2.475738525390625e-05, "step": 16225, "training_step_time": 0.11086845397949219 }, { "epoch": 2.47589111328125e-05, "model_forward_time": 0.024889707565307617, "step": 16226 }, { "epoch": 2.47589111328125e-05, "step": 16226, "training_step_time": 0.11124300956726074 }, { "epoch": 2.476043701171875e-05, "model_forward_time": 0.02503037452697754, "step": 16227 }, { "epoch": 2.476043701171875e-05, "step": 16227, "training_step_time": 0.10475730895996094 }, { "epoch": 2.4761962890625e-05, "model_forward_time": 0.026055574417114258, "step": 16228 }, { "epoch": 2.4761962890625e-05, "step": 16228, "training_step_time": 0.10592436790466309 }, { "epoch": 2.476348876953125e-05, "model_forward_time": 0.024195194244384766, "step": 16229 }, { "epoch": 2.476348876953125e-05, "step": 16229, "training_step_time": 0.19240927696228027 }, { "epoch": 2.47650146484375e-05, "grad_norm": 0.18954356014728546, "learning_rate": 4.735568252345718e-05, "loss": 0.0205, "step": 16230 }, { "epoch": 2.47650146484375e-05, "model_forward_time": 0.024260282516479492, "step": 16230 }, { "epoch": 2.47650146484375e-05, "step": 16230, "training_step_time": 0.1103520393371582 }, { "epoch": 2.476654052734375e-05, "model_forward_time": 0.024547815322875977, "step": 16231 }, { "epoch": 2.476654052734375e-05, "step": 16231, "training_step_time": 0.20546698570251465 }, { "epoch": 2.476806640625e-05, "model_forward_time": 0.024348974227905273, "step": 16232 }, { "epoch": 2.476806640625e-05, "step": 16232, "training_step_time": 0.1531684398651123 }, { "epoch": 2.476959228515625e-05, "model_forward_time": 0.02470707893371582, "step": 16233 }, { "epoch": 2.476959228515625e-05, "step": 16233, "training_step_time": 0.1925342082977295 }, { "epoch": 2.47711181640625e-05, "model_forward_time": 0.02418684959411621, "step": 16234 }, { "epoch": 2.47711181640625e-05, "step": 16234, "training_step_time": 0.1765756607055664 }, { "epoch": 2.477264404296875e-05, "model_forward_time": 0.024838924407958984, "step": 16235 }, { "epoch": 2.477264404296875e-05, "step": 16235, "training_step_time": 0.14877724647521973 }, { "epoch": 2.4774169921875e-05, "model_forward_time": 0.02419567108154297, "step": 16236 }, { "epoch": 2.4774169921875e-05, "step": 16236, "training_step_time": 0.2134108543395996 }, { "epoch": 2.477569580078125e-05, "model_forward_time": 0.025240182876586914, "step": 16237 }, { "epoch": 2.477569580078125e-05, "step": 16237, "training_step_time": 0.1162571907043457 }, { "epoch": 2.47772216796875e-05, "model_forward_time": 0.02402663230895996, "step": 16238 }, { "epoch": 2.47772216796875e-05, "step": 16238, "training_step_time": 0.10306692123413086 }, { "epoch": 2.477874755859375e-05, "model_forward_time": 0.025136470794677734, "step": 16239 }, { "epoch": 2.477874755859375e-05, "step": 16239, "training_step_time": 0.1045379638671875 }, { "epoch": 2.47802734375e-05, "grad_norm": 0.46666330099105835, "learning_rate": 4.7300645612846907e-05, "loss": 0.0177, "step": 16240 }, { "epoch": 2.47802734375e-05, "model_forward_time": 0.02404022216796875, "step": 16240 }, { "epoch": 2.47802734375e-05, "step": 16240, "training_step_time": 0.11048674583435059 }, { "epoch": 2.478179931640625e-05, "model_forward_time": 0.025072336196899414, "step": 16241 }, { "epoch": 2.478179931640625e-05, "step": 16241, "training_step_time": 0.10549473762512207 }, { "epoch": 2.47833251953125e-05, "model_forward_time": 0.024855375289916992, "step": 16242 }, { "epoch": 2.47833251953125e-05, "step": 16242, "training_step_time": 0.10568571090698242 }, { "epoch": 2.478485107421875e-05, "model_forward_time": 0.02508711814880371, "step": 16243 }, { "epoch": 2.478485107421875e-05, "step": 16243, "training_step_time": 0.10364198684692383 }, { "epoch": 2.4786376953125e-05, "model_forward_time": 0.024747371673583984, "step": 16244 }, { "epoch": 2.4786376953125e-05, "step": 16244, "training_step_time": 0.10334634780883789 }, { "epoch": 2.478790283203125e-05, "model_forward_time": 0.025084972381591797, "step": 16245 }, { "epoch": 2.478790283203125e-05, "step": 16245, "training_step_time": 0.10556483268737793 }, { "epoch": 2.47894287109375e-05, "model_forward_time": 0.025446653366088867, "step": 16246 }, { "epoch": 2.47894287109375e-05, "step": 16246, "training_step_time": 0.10393023490905762 }, { "epoch": 2.479095458984375e-05, "model_forward_time": 0.025079011917114258, "step": 16247 }, { "epoch": 2.479095458984375e-05, "step": 16247, "training_step_time": 0.10346174240112305 }, { "epoch": 2.479248046875e-05, "model_forward_time": 0.025050640106201172, "step": 16248 }, { "epoch": 2.479248046875e-05, "step": 16248, "training_step_time": 0.10455822944641113 }, { "epoch": 2.479400634765625e-05, "model_forward_time": 0.02494955062866211, "step": 16249 }, { "epoch": 2.479400634765625e-05, "step": 16249, "training_step_time": 0.10714197158813477 }, { "epoch": 2.47955322265625e-05, "grad_norm": 0.2493179738521576, "learning_rate": 4.7245611982206724e-05, "loss": 0.0144, "step": 16250 }, { "epoch": 2.47955322265625e-05, "model_forward_time": 0.025229454040527344, "step": 16250 }, { "epoch": 2.47955322265625e-05, "step": 16250, "training_step_time": 0.18499398231506348 }, { "epoch": 2.479705810546875e-05, "model_forward_time": 0.02404928207397461, "step": 16251 }, { "epoch": 2.479705810546875e-05, "step": 16251, "training_step_time": 0.14815235137939453 }, { "epoch": 2.4798583984375e-05, "model_forward_time": 0.02343606948852539, "step": 16252 }, { "epoch": 2.4798583984375e-05, "step": 16252, "training_step_time": 0.10651707649230957 }, { "epoch": 2.480010986328125e-05, "model_forward_time": 0.02485060691833496, "step": 16253 }, { "epoch": 2.480010986328125e-05, "step": 16253, "training_step_time": 0.13423585891723633 }, { "epoch": 2.48016357421875e-05, "model_forward_time": 0.024404525756835938, "step": 16254 }, { "epoch": 2.48016357421875e-05, "step": 16254, "training_step_time": 0.16561031341552734 }, { "epoch": 2.480316162109375e-05, "model_forward_time": 0.024136066436767578, "step": 16255 }, { "epoch": 2.480316162109375e-05, "step": 16255, "training_step_time": 0.14639639854431152 }, { "epoch": 2.48046875e-05, "model_forward_time": 0.024193525314331055, "step": 16256 }, { "epoch": 2.48046875e-05, "step": 16256, "training_step_time": 0.12460637092590332 }, { "epoch": 2.480621337890625e-05, "model_forward_time": 0.024394512176513672, "step": 16257 }, { "epoch": 2.480621337890625e-05, "step": 16257, "training_step_time": 0.12795114517211914 }, { "epoch": 2.48077392578125e-05, "model_forward_time": 0.025166034698486328, "step": 16258 }, { "epoch": 2.48077392578125e-05, "step": 16258, "training_step_time": 0.11902475357055664 }, { "epoch": 2.480926513671875e-05, "model_forward_time": 0.025125980377197266, "step": 16259 }, { "epoch": 2.480926513671875e-05, "step": 16259, "training_step_time": 0.1196904182434082 }, { "epoch": 2.4810791015625e-05, "grad_norm": 0.41187161207199097, "learning_rate": 4.7190581698407725e-05, "loss": 0.0178, "step": 16260 }, { "epoch": 2.4810791015625e-05, "model_forward_time": 0.024883031845092773, "step": 16260 }, { "epoch": 2.4810791015625e-05, "step": 16260, "training_step_time": 0.11699986457824707 }, { "epoch": 2.481231689453125e-05, "model_forward_time": 0.025297880172729492, "step": 16261 }, { "epoch": 2.481231689453125e-05, "step": 16261, "training_step_time": 0.11632728576660156 }, { "epoch": 2.48138427734375e-05, "model_forward_time": 0.025328397750854492, "step": 16262 }, { "epoch": 2.48138427734375e-05, "step": 16262, "training_step_time": 0.1151590347290039 }, { "epoch": 2.481536865234375e-05, "model_forward_time": 0.025298357009887695, "step": 16263 }, { "epoch": 2.481536865234375e-05, "step": 16263, "training_step_time": 0.11055874824523926 }, { "epoch": 2.481689453125e-05, "model_forward_time": 0.023949623107910156, "step": 16264 }, { "epoch": 2.481689453125e-05, "step": 16264, "training_step_time": 0.10680747032165527 }, { "epoch": 2.481842041015625e-05, "model_forward_time": 0.024205446243286133, "step": 16265 }, { "epoch": 2.481842041015625e-05, "step": 16265, "training_step_time": 0.10588288307189941 }, { "epoch": 2.48199462890625e-05, "model_forward_time": 0.025264978408813477, "step": 16266 }, { "epoch": 2.48199462890625e-05, "step": 16266, "training_step_time": 0.1061089038848877 }, { "epoch": 2.482147216796875e-05, "model_forward_time": 0.025129079818725586, "step": 16267 }, { "epoch": 2.482147216796875e-05, "step": 16267, "training_step_time": 0.10616922378540039 }, { "epoch": 2.4822998046875e-05, "model_forward_time": 0.025879859924316406, "step": 16268 }, { "epoch": 2.4822998046875e-05, "step": 16268, "training_step_time": 0.10933637619018555 }, { "epoch": 2.482452392578125e-05, "model_forward_time": 0.02556324005126953, "step": 16269 }, { "epoch": 2.482452392578125e-05, "step": 16269, "training_step_time": 0.10754871368408203 }, { "epoch": 2.48260498046875e-05, "grad_norm": 0.29778537154197693, "learning_rate": 4.713555482831688e-05, "loss": 0.0116, "step": 16270 }, { "epoch": 2.48260498046875e-05, "model_forward_time": 0.02476024627685547, "step": 16270 }, { "epoch": 2.48260498046875e-05, "step": 16270, "training_step_time": 0.10511088371276855 }, { "epoch": 2.482757568359375e-05, "model_forward_time": 0.024984359741210938, "step": 16271 }, { "epoch": 2.482757568359375e-05, "step": 16271, "training_step_time": 0.10584664344787598 }, { "epoch": 2.48291015625e-05, "model_forward_time": 0.024882793426513672, "step": 16272 }, { "epoch": 2.48291015625e-05, "step": 16272, "training_step_time": 0.1046748161315918 }, { "epoch": 2.483062744140625e-05, "model_forward_time": 0.025183677673339844, "step": 16273 }, { "epoch": 2.483062744140625e-05, "step": 16273, "training_step_time": 0.10336112976074219 }, { "epoch": 2.48321533203125e-05, "model_forward_time": 0.024118423461914062, "step": 16274 }, { "epoch": 2.48321533203125e-05, "step": 16274, "training_step_time": 0.13229107856750488 }, { "epoch": 2.483367919921875e-05, "model_forward_time": 0.025916576385498047, "step": 16275 }, { "epoch": 2.483367919921875e-05, "step": 16275, "training_step_time": 0.12342047691345215 }, { "epoch": 2.4835205078125e-05, "model_forward_time": 0.02852344512939453, "step": 16276 }, { "epoch": 2.4835205078125e-05, "step": 16276, "training_step_time": 0.1071012020111084 }, { "epoch": 2.483673095703125e-05, "model_forward_time": 0.026875019073486328, "step": 16277 }, { "epoch": 2.483673095703125e-05, "step": 16277, "training_step_time": 0.13825321197509766 }, { "epoch": 2.48382568359375e-05, "model_forward_time": 0.026206493377685547, "step": 16278 }, { "epoch": 2.48382568359375e-05, "step": 16278, "training_step_time": 0.19133424758911133 }, { "epoch": 2.483978271484375e-05, "model_forward_time": 0.02421116828918457, "step": 16279 }, { "epoch": 2.483978271484375e-05, "step": 16279, "training_step_time": 0.15685462951660156 }, { "epoch": 2.484130859375e-05, "grad_norm": 0.24624891579151154, "learning_rate": 4.708053143879701e-05, "loss": 0.016, "step": 16280 }, { "epoch": 2.484130859375e-05, "model_forward_time": 0.023868560791015625, "step": 16280 }, { "epoch": 2.484130859375e-05, "step": 16280, "training_step_time": 0.13067984580993652 }, { "epoch": 2.484283447265625e-05, "model_forward_time": 0.0245361328125, "step": 16281 }, { "epoch": 2.484283447265625e-05, "step": 16281, "training_step_time": 0.12362837791442871 }, { "epoch": 2.48443603515625e-05, "model_forward_time": 0.02461719512939453, "step": 16282 }, { "epoch": 2.48443603515625e-05, "step": 16282, "training_step_time": 0.20903635025024414 }, { "epoch": 2.484588623046875e-05, "model_forward_time": 0.02420830726623535, "step": 16283 }, { "epoch": 2.484588623046875e-05, "step": 16283, "training_step_time": 0.11212754249572754 }, { "epoch": 2.4847412109375e-05, "model_forward_time": 0.02399158477783203, "step": 16284 }, { "epoch": 2.4847412109375e-05, "step": 16284, "training_step_time": 0.10303592681884766 }, { "epoch": 2.484893798828125e-05, "model_forward_time": 0.024989604949951172, "step": 16285 }, { "epoch": 2.484893798828125e-05, "step": 16285, "training_step_time": 0.10585665702819824 }, { "epoch": 2.48504638671875e-05, "model_forward_time": 0.025114774703979492, "step": 16286 }, { "epoch": 2.48504638671875e-05, "step": 16286, "training_step_time": 0.10547614097595215 }, { "epoch": 2.485198974609375e-05, "model_forward_time": 0.024865150451660156, "step": 16287 }, { "epoch": 2.485198974609375e-05, "step": 16287, "training_step_time": 0.10534405708312988 }, { "epoch": 2.4853515625e-05, "model_forward_time": 0.024904966354370117, "step": 16288 }, { "epoch": 2.4853515625e-05, "step": 16288, "training_step_time": 0.10594511032104492 }, { "epoch": 2.485504150390625e-05, "model_forward_time": 0.025199174880981445, "step": 16289 }, { "epoch": 2.485504150390625e-05, "step": 16289, "training_step_time": 0.10417437553405762 }, { "epoch": 2.48565673828125e-05, "grad_norm": 0.1607915312051773, "learning_rate": 4.702551159670672e-05, "loss": 0.0117, "step": 16290 }, { "epoch": 2.48565673828125e-05, "model_forward_time": 0.024855852127075195, "step": 16290 }, { "epoch": 2.48565673828125e-05, "step": 16290, "training_step_time": 0.10786008834838867 }, { "epoch": 2.485809326171875e-05, "model_forward_time": 0.024591684341430664, "step": 16291 }, { "epoch": 2.485809326171875e-05, "step": 16291, "training_step_time": 0.10679364204406738 }, { "epoch": 2.4859619140625e-05, "model_forward_time": 0.023889541625976562, "step": 16292 }, { "epoch": 2.4859619140625e-05, "step": 16292, "training_step_time": 0.10312747955322266 }, { "epoch": 2.486114501953125e-05, "model_forward_time": 0.024994373321533203, "step": 16293 }, { "epoch": 2.486114501953125e-05, "step": 16293, "training_step_time": 0.10481023788452148 }, { "epoch": 2.48626708984375e-05, "model_forward_time": 0.025351524353027344, "step": 16294 }, { "epoch": 2.48626708984375e-05, "step": 16294, "training_step_time": 0.10539054870605469 }, { "epoch": 2.486419677734375e-05, "model_forward_time": 0.025487899780273438, "step": 16295 }, { "epoch": 2.486419677734375e-05, "step": 16295, "training_step_time": 0.10578775405883789 }, { "epoch": 2.486572265625e-05, "model_forward_time": 0.024945497512817383, "step": 16296 }, { "epoch": 2.486572265625e-05, "step": 16296, "training_step_time": 0.19185590744018555 }, { "epoch": 2.486724853515625e-05, "model_forward_time": 0.024527788162231445, "step": 16297 }, { "epoch": 2.486724853515625e-05, "step": 16297, "training_step_time": 0.13971376419067383 }, { "epoch": 2.48687744140625e-05, "model_forward_time": 0.02409672737121582, "step": 16298 }, { "epoch": 2.48687744140625e-05, "step": 16298, "training_step_time": 0.11168146133422852 }, { "epoch": 2.487030029296875e-05, "model_forward_time": 0.02462482452392578, "step": 16299 }, { "epoch": 2.487030029296875e-05, "step": 16299, "training_step_time": 0.11505866050720215 }, { "epoch": 2.4871826171875e-05, "grad_norm": 0.15231294929981232, "learning_rate": 4.697049536890033e-05, "loss": 0.0067, "step": 16300 }, { "epoch": 2.4871826171875e-05, "model_forward_time": 0.025057315826416016, "step": 16300 }, { "epoch": 2.4871826171875e-05, "step": 16300, "training_step_time": 0.11022377014160156 }, { "epoch": 2.487335205078125e-05, "model_forward_time": 0.02479267120361328, "step": 16301 }, { "epoch": 2.487335205078125e-05, "step": 16301, "training_step_time": 0.10480833053588867 }, { "epoch": 2.48748779296875e-05, "model_forward_time": 0.024980783462524414, "step": 16302 }, { "epoch": 2.48748779296875e-05, "step": 16302, "training_step_time": 0.1955420970916748 }, { "epoch": 2.487640380859375e-05, "model_forward_time": 0.02412867546081543, "step": 16303 }, { "epoch": 2.487640380859375e-05, "step": 16303, "training_step_time": 0.10394859313964844 }, { "epoch": 2.48779296875e-05, "model_forward_time": 0.024408340454101562, "step": 16304 }, { "epoch": 2.48779296875e-05, "step": 16304, "training_step_time": 0.10170817375183105 }, { "epoch": 2.487945556640625e-05, "model_forward_time": 0.024803876876831055, "step": 16305 }, { "epoch": 2.487945556640625e-05, "step": 16305, "training_step_time": 0.1048121452331543 }, { "epoch": 2.48809814453125e-05, "model_forward_time": 0.02779698371887207, "step": 16306 }, { "epoch": 2.48809814453125e-05, "step": 16306, "training_step_time": 0.10891389846801758 }, { "epoch": 2.488250732421875e-05, "model_forward_time": 0.02533578872680664, "step": 16307 }, { "epoch": 2.488250732421875e-05, "step": 16307, "training_step_time": 0.10491514205932617 }, { "epoch": 2.4884033203125e-05, "model_forward_time": 0.025112628936767578, "step": 16308 }, { "epoch": 2.4884033203125e-05, "step": 16308, "training_step_time": 0.10504293441772461 }, { "epoch": 2.488555908203125e-05, "model_forward_time": 0.025204896926879883, "step": 16309 }, { "epoch": 2.488555908203125e-05, "step": 16309, "training_step_time": 0.10461282730102539 }, { "epoch": 2.48870849609375e-05, "grad_norm": 0.3484882712364197, "learning_rate": 4.691548282222771e-05, "loss": 0.0161, "step": 16310 }, { "epoch": 2.48870849609375e-05, "model_forward_time": 0.024941682815551758, "step": 16310 }, { "epoch": 2.48870849609375e-05, "step": 16310, "training_step_time": 0.10410261154174805 }, { "epoch": 2.488861083984375e-05, "model_forward_time": 0.025418996810913086, "step": 16311 }, { "epoch": 2.488861083984375e-05, "step": 16311, "training_step_time": 0.10486364364624023 }, { "epoch": 2.489013671875e-05, "model_forward_time": 0.02506732940673828, "step": 16312 }, { "epoch": 2.489013671875e-05, "step": 16312, "training_step_time": 0.10433101654052734 }, { "epoch": 2.489166259765625e-05, "model_forward_time": 0.025041580200195312, "step": 16313 }, { "epoch": 2.489166259765625e-05, "step": 16313, "training_step_time": 0.105072021484375 }, { "epoch": 2.48931884765625e-05, "model_forward_time": 0.02478504180908203, "step": 16314 }, { "epoch": 2.48931884765625e-05, "step": 16314, "training_step_time": 0.10431265830993652 }, { "epoch": 2.489471435546875e-05, "model_forward_time": 0.025422096252441406, "step": 16315 }, { "epoch": 2.489471435546875e-05, "step": 16315, "training_step_time": 0.10451745986938477 }, { "epoch": 2.4896240234375e-05, "model_forward_time": 0.024912118911743164, "step": 16316 }, { "epoch": 2.4896240234375e-05, "step": 16316, "training_step_time": 0.11055994033813477 }, { "epoch": 2.489776611328125e-05, "model_forward_time": 0.02508997917175293, "step": 16317 }, { "epoch": 2.489776611328125e-05, "step": 16317, "training_step_time": 0.1047368049621582 }, { "epoch": 2.48992919921875e-05, "model_forward_time": 0.02500462532043457, "step": 16318 }, { "epoch": 2.48992919921875e-05, "step": 16318, "training_step_time": 0.1147313117980957 }, { "epoch": 2.490081787109375e-05, "model_forward_time": 0.025377273559570312, "step": 16319 }, { "epoch": 2.490081787109375e-05, "step": 16319, "training_step_time": 0.1119074821472168 }, { "epoch": 2.490234375e-05, "grad_norm": 0.3668254017829895, "learning_rate": 4.6860474023534335e-05, "loss": 0.0101, "step": 16320 }, { "epoch": 2.490234375e-05, "model_forward_time": 0.024182796478271484, "step": 16320 }, { "epoch": 2.490234375e-05, "step": 16320, "training_step_time": 0.10673284530639648 }, { "epoch": 2.490386962890625e-05, "model_forward_time": 0.025019407272338867, "step": 16321 }, { "epoch": 2.490386962890625e-05, "step": 16321, "training_step_time": 0.1049048900604248 }, { "epoch": 2.49053955078125e-05, "model_forward_time": 0.024164438247680664, "step": 16322 }, { "epoch": 2.49053955078125e-05, "step": 16322, "training_step_time": 0.15128326416015625 }, { "epoch": 2.490692138671875e-05, "model_forward_time": 0.02541065216064453, "step": 16323 }, { "epoch": 2.490692138671875e-05, "step": 16323, "training_step_time": 0.10972213745117188 }, { "epoch": 2.4908447265625e-05, "model_forward_time": 0.02537703514099121, "step": 16324 }, { "epoch": 2.4908447265625e-05, "step": 16324, "training_step_time": 0.16443753242492676 }, { "epoch": 2.490997314453125e-05, "model_forward_time": 0.02503347396850586, "step": 16325 }, { "epoch": 2.490997314453125e-05, "step": 16325, "training_step_time": 0.17136788368225098 }, { "epoch": 2.49114990234375e-05, "model_forward_time": 0.024497270584106445, "step": 16326 }, { "epoch": 2.49114990234375e-05, "step": 16326, "training_step_time": 0.17516231536865234 }, { "epoch": 2.491302490234375e-05, "model_forward_time": 0.024019956588745117, "step": 16327 }, { "epoch": 2.491302490234375e-05, "step": 16327, "training_step_time": 0.1611323356628418 }, { "epoch": 2.491455078125e-05, "model_forward_time": 0.0241086483001709, "step": 16328 }, { "epoch": 2.491455078125e-05, "step": 16328, "training_step_time": 0.19383645057678223 }, { "epoch": 2.491607666015625e-05, "model_forward_time": 0.024566650390625, "step": 16329 }, { "epoch": 2.491607666015625e-05, "step": 16329, "training_step_time": 0.10863876342773438 }, { "epoch": 2.49176025390625e-05, "grad_norm": 0.2264672964811325, "learning_rate": 4.680546903966106e-05, "loss": 0.0184, "step": 16330 }, { "epoch": 2.49176025390625e-05, "model_forward_time": 0.024892091751098633, "step": 16330 }, { "epoch": 2.49176025390625e-05, "step": 16330, "training_step_time": 0.10461735725402832 }, { "epoch": 2.491912841796875e-05, "model_forward_time": 0.025025129318237305, "step": 16331 }, { "epoch": 2.491912841796875e-05, "step": 16331, "training_step_time": 0.10393857955932617 }, { "epoch": 2.4920654296875e-05, "model_forward_time": 0.024837017059326172, "step": 16332 }, { "epoch": 2.4920654296875e-05, "step": 16332, "training_step_time": 0.10575008392333984 }, { "epoch": 2.492218017578125e-05, "model_forward_time": 0.026929140090942383, "step": 16333 }, { "epoch": 2.492218017578125e-05, "step": 16333, "training_step_time": 0.10875463485717773 }, { "epoch": 2.49237060546875e-05, "model_forward_time": 0.025187969207763672, "step": 16334 }, { "epoch": 2.49237060546875e-05, "step": 16334, "training_step_time": 0.1756439208984375 }, { "epoch": 2.492523193359375e-05, "model_forward_time": 0.02461981773376465, "step": 16335 }, { "epoch": 2.492523193359375e-05, "step": 16335, "training_step_time": 0.1862964630126953 }, { "epoch": 2.49267578125e-05, "model_forward_time": 0.024547576904296875, "step": 16336 }, { "epoch": 2.49267578125e-05, "step": 16336, "training_step_time": 0.18993282318115234 }, { "epoch": 2.492828369140625e-05, "model_forward_time": 0.024155855178833008, "step": 16337 }, { "epoch": 2.492828369140625e-05, "step": 16337, "training_step_time": 0.17677855491638184 }, { "epoch": 2.49298095703125e-05, "model_forward_time": 0.024112701416015625, "step": 16338 }, { "epoch": 2.49298095703125e-05, "step": 16338, "training_step_time": 0.17261219024658203 }, { "epoch": 2.493133544921875e-05, "model_forward_time": 0.024508953094482422, "step": 16339 }, { "epoch": 2.493133544921875e-05, "step": 16339, "training_step_time": 0.15276527404785156 }, { "epoch": 2.4932861328125e-05, "grad_norm": 0.2060825675725937, "learning_rate": 4.6750467937444115e-05, "loss": 0.0092, "step": 16340 }, { "epoch": 2.4932861328125e-05, "model_forward_time": 0.023807287216186523, "step": 16340 }, { "epoch": 2.4932861328125e-05, "step": 16340, "training_step_time": 0.13687825202941895 }, { "epoch": 2.493438720703125e-05, "model_forward_time": 0.024261951446533203, "step": 16341 }, { "epoch": 2.493438720703125e-05, "step": 16341, "training_step_time": 0.14150047302246094 }, { "epoch": 2.49359130859375e-05, "model_forward_time": 0.02357625961303711, "step": 16342 }, { "epoch": 2.49359130859375e-05, "step": 16342, "training_step_time": 0.1299598217010498 }, { "epoch": 2.493743896484375e-05, "model_forward_time": 0.023953676223754883, "step": 16343 }, { "epoch": 2.493743896484375e-05, "step": 16343, "training_step_time": 0.12199187278747559 }, { "epoch": 2.493896484375e-05, "model_forward_time": 0.024519681930541992, "step": 16344 }, { "epoch": 2.493896484375e-05, "step": 16344, "training_step_time": 0.15520262718200684 }, { "epoch": 2.494049072265625e-05, "model_forward_time": 0.024527549743652344, "step": 16345 }, { "epoch": 2.494049072265625e-05, "step": 16345, "training_step_time": 0.14768505096435547 }, { "epoch": 2.49420166015625e-05, "model_forward_time": 0.02425408363342285, "step": 16346 }, { "epoch": 2.49420166015625e-05, "step": 16346, "training_step_time": 0.112396240234375 }, { "epoch": 2.494354248046875e-05, "model_forward_time": 0.023823261260986328, "step": 16347 }, { "epoch": 2.494354248046875e-05, "step": 16347, "training_step_time": 0.11086869239807129 }, { "epoch": 2.4945068359375e-05, "model_forward_time": 0.025020122528076172, "step": 16348 }, { "epoch": 2.4945068359375e-05, "step": 16348, "training_step_time": 0.10997748374938965 }, { "epoch": 2.494659423828125e-05, "model_forward_time": 0.025551795959472656, "step": 16349 }, { "epoch": 2.494659423828125e-05, "step": 16349, "training_step_time": 0.10901117324829102 }, { "epoch": 2.49481201171875e-05, "grad_norm": 0.11935984343290329, "learning_rate": 4.669547078371504e-05, "loss": 0.0065, "step": 16350 }, { "epoch": 2.49481201171875e-05, "model_forward_time": 0.025210142135620117, "step": 16350 }, { "epoch": 2.49481201171875e-05, "step": 16350, "training_step_time": 0.10570192337036133 }, { "epoch": 2.494964599609375e-05, "model_forward_time": 0.02564096450805664, "step": 16351 }, { "epoch": 2.494964599609375e-05, "step": 16351, "training_step_time": 0.10630607604980469 }, { "epoch": 2.4951171875e-05, "model_forward_time": 0.02502918243408203, "step": 16352 }, { "epoch": 2.4951171875e-05, "step": 16352, "training_step_time": 0.10651874542236328 }, { "epoch": 2.495269775390625e-05, "model_forward_time": 0.02509450912475586, "step": 16353 }, { "epoch": 2.495269775390625e-05, "step": 16353, "training_step_time": 0.10657787322998047 }, { "epoch": 2.49542236328125e-05, "model_forward_time": 0.025289058685302734, "step": 16354 }, { "epoch": 2.49542236328125e-05, "step": 16354, "training_step_time": 0.10852885246276855 }, { "epoch": 2.495574951171875e-05, "model_forward_time": 0.026223182678222656, "step": 16355 }, { "epoch": 2.495574951171875e-05, "step": 16355, "training_step_time": 0.10669922828674316 }, { "epoch": 2.4957275390625e-05, "model_forward_time": 0.025287628173828125, "step": 16356 }, { "epoch": 2.4957275390625e-05, "step": 16356, "training_step_time": 0.1076345443725586 }, { "epoch": 2.495880126953125e-05, "model_forward_time": 0.025488853454589844, "step": 16357 }, { "epoch": 2.495880126953125e-05, "step": 16357, "training_step_time": 0.10650825500488281 }, { "epoch": 2.49603271484375e-05, "model_forward_time": 0.023928403854370117, "step": 16358 }, { "epoch": 2.49603271484375e-05, "step": 16358, "training_step_time": 0.10686659812927246 }, { "epoch": 2.496185302734375e-05, "model_forward_time": 0.02517867088317871, "step": 16359 }, { "epoch": 2.496185302734375e-05, "step": 16359, "training_step_time": 0.10415863990783691 }, { "epoch": 2.496337890625e-05, "grad_norm": 0.2253582924604416, "learning_rate": 4.664047764530055e-05, "loss": 0.0135, "step": 16360 }, { "epoch": 2.496337890625e-05, "model_forward_time": 0.025079727172851562, "step": 16360 }, { "epoch": 2.496337890625e-05, "step": 16360, "training_step_time": 0.10331606864929199 }, { "epoch": 2.496490478515625e-05, "model_forward_time": 0.025379419326782227, "step": 16361 }, { "epoch": 2.496490478515625e-05, "step": 16361, "training_step_time": 0.10523080825805664 }, { "epoch": 2.49664306640625e-05, "model_forward_time": 0.02500295639038086, "step": 16362 }, { "epoch": 2.49664306640625e-05, "step": 16362, "training_step_time": 0.8561389446258545 }, { "epoch": 2.496795654296875e-05, "model_forward_time": 0.022899866104125977, "step": 16363 }, { "epoch": 2.496795654296875e-05, "step": 16363, "training_step_time": 0.1220083236694336 }, { "epoch": 2.4969482421875e-05, "model_forward_time": 0.02422332763671875, "step": 16364 }, { "epoch": 2.4969482421875e-05, "step": 16364, "training_step_time": 0.18590235710144043 }, { "epoch": 2.497100830078125e-05, "model_forward_time": 0.02471017837524414, "step": 16365 }, { "epoch": 2.497100830078125e-05, "step": 16365, "training_step_time": 0.1924910545349121 }, { "epoch": 2.49725341796875e-05, "model_forward_time": 0.024799108505249023, "step": 16366 }, { "epoch": 2.49725341796875e-05, "step": 16366, "training_step_time": 0.17697477340698242 }, { "epoch": 2.497406005859375e-05, "model_forward_time": 0.024113178253173828, "step": 16367 }, { "epoch": 2.497406005859375e-05, "step": 16367, "training_step_time": 0.20777678489685059 }, { "epoch": 2.49755859375e-05, "model_forward_time": 0.024667739868164062, "step": 16368 }, { "epoch": 2.49755859375e-05, "step": 16368, "training_step_time": 0.10934662818908691 }, { "epoch": 2.497711181640625e-05, "model_forward_time": 0.024131059646606445, "step": 16369 }, { "epoch": 2.497711181640625e-05, "step": 16369, "training_step_time": 0.1007537841796875 }, { "epoch": 2.49786376953125e-05, "grad_norm": 0.1973024159669876, "learning_rate": 4.65854885890225e-05, "loss": 0.0171, "step": 16370 }, { "epoch": 2.49786376953125e-05, "model_forward_time": 0.025710582733154297, "step": 16370 }, { "epoch": 2.49786376953125e-05, "step": 16370, "training_step_time": 0.10565590858459473 }, { "epoch": 2.498016357421875e-05, "model_forward_time": 0.025020360946655273, "step": 16371 }, { "epoch": 2.498016357421875e-05, "step": 16371, "training_step_time": 0.10427546501159668 }, { "epoch": 2.4981689453125e-05, "model_forward_time": 0.025077104568481445, "step": 16372 }, { "epoch": 2.4981689453125e-05, "step": 16372, "training_step_time": 0.10362625122070312 }, { "epoch": 2.498321533203125e-05, "model_forward_time": 0.02566361427307129, "step": 16373 }, { "epoch": 2.498321533203125e-05, "step": 16373, "training_step_time": 0.11781764030456543 }, { "epoch": 2.49847412109375e-05, "model_forward_time": 0.024481534957885742, "step": 16374 }, { "epoch": 2.49847412109375e-05, "step": 16374, "training_step_time": 0.1341838836669922 }, { "epoch": 2.498626708984375e-05, "model_forward_time": 0.02390313148498535, "step": 16375 }, { "epoch": 2.498626708984375e-05, "step": 16375, "training_step_time": 0.12933087348937988 }, { "epoch": 2.498779296875e-05, "model_forward_time": 0.023952007293701172, "step": 16376 }, { "epoch": 2.498779296875e-05, "step": 16376, "training_step_time": 0.11882829666137695 }, { "epoch": 2.498931884765625e-05, "model_forward_time": 0.024495363235473633, "step": 16377 }, { "epoch": 2.498931884765625e-05, "step": 16377, "training_step_time": 0.11801695823669434 }, { "epoch": 2.49908447265625e-05, "model_forward_time": 0.024218082427978516, "step": 16378 }, { "epoch": 2.49908447265625e-05, "step": 16378, "training_step_time": 0.11306476593017578 }, { "epoch": 2.499237060546875e-05, "model_forward_time": 0.024240493774414062, "step": 16379 }, { "epoch": 2.499237060546875e-05, "step": 16379, "training_step_time": 0.11334037780761719 }, { "epoch": 2.4993896484375e-05, "grad_norm": 0.18750359117984772, "learning_rate": 4.65305036816978e-05, "loss": 0.0112, "step": 16380 }, { "epoch": 2.4993896484375e-05, "model_forward_time": 0.024095535278320312, "step": 16380 }, { "epoch": 2.4993896484375e-05, "step": 16380, "training_step_time": 0.1091609001159668 }, { "epoch": 2.499542236328125e-05, "model_forward_time": 0.024363994598388672, "step": 16381 }, { "epoch": 2.499542236328125e-05, "step": 16381, "training_step_time": 0.11114931106567383 }, { "epoch": 2.49969482421875e-05, "model_forward_time": 0.02513265609741211, "step": 16382 }, { "epoch": 2.49969482421875e-05, "step": 16382, "training_step_time": 0.14131546020507812 }, { "epoch": 2.499847412109375e-05, "model_forward_time": 0.025200605392456055, "step": 16383 }, { "epoch": 2.499847412109375e-05, "step": 16383, "training_step_time": 0.13706445693969727 }, { "epoch": 2.5e-05, "model_forward_time": 0.0248410701751709, "step": 16384 }, { "epoch": 2.5e-05, "step": 16384, "training_step_time": 0.10546612739562988 }, { "epoch": 2.500152587890625e-05, "model_forward_time": 0.02525472640991211, "step": 16385 }, { "epoch": 2.500152587890625e-05, "step": 16385, "training_step_time": 0.11875677108764648 }, { "epoch": 2.50030517578125e-05, "model_forward_time": 0.025292396545410156, "step": 16386 }, { "epoch": 2.50030517578125e-05, "step": 16386, "training_step_time": 0.11366009712219238 }, { "epoch": 2.500457763671875e-05, "model_forward_time": 0.025298357009887695, "step": 16387 }, { "epoch": 2.500457763671875e-05, "step": 16387, "training_step_time": 0.10927557945251465 }, { "epoch": 2.5006103515625e-05, "model_forward_time": 0.024949312210083008, "step": 16388 }, { "epoch": 2.5006103515625e-05, "step": 16388, "training_step_time": 0.19842839241027832 }, { "epoch": 2.500762939453125e-05, "model_forward_time": 0.02506852149963379, "step": 16389 }, { "epoch": 2.500762939453125e-05, "step": 16389, "training_step_time": 0.1086266040802002 }, { "epoch": 2.50091552734375e-05, "grad_norm": 0.21019835770130157, "learning_rate": 4.647552299013828e-05, "loss": 0.0173, "step": 16390 }, { "epoch": 2.50091552734375e-05, "model_forward_time": 0.023973464965820312, "step": 16390 }, { "epoch": 2.50091552734375e-05, "step": 16390, "training_step_time": 0.10193824768066406 }, { "epoch": 2.501068115234375e-05, "model_forward_time": 0.025185108184814453, "step": 16391 }, { "epoch": 2.501068115234375e-05, "step": 16391, "training_step_time": 0.10439562797546387 }, { "epoch": 2.501220703125e-05, "model_forward_time": 0.025111675262451172, "step": 16392 }, { "epoch": 2.501220703125e-05, "step": 16392, "training_step_time": 0.10789251327514648 }, { "epoch": 2.501373291015625e-05, "model_forward_time": 0.02472543716430664, "step": 16393 }, { "epoch": 2.501373291015625e-05, "step": 16393, "training_step_time": 0.10443997383117676 }, { "epoch": 2.50152587890625e-05, "model_forward_time": 0.02505183219909668, "step": 16394 }, { "epoch": 2.50152587890625e-05, "step": 16394, "training_step_time": 0.10407376289367676 }, { "epoch": 2.501678466796875e-05, "model_forward_time": 0.025483369827270508, "step": 16395 }, { "epoch": 2.501678466796875e-05, "step": 16395, "training_step_time": 0.10547423362731934 }, { "epoch": 2.5018310546875e-05, "model_forward_time": 0.0256044864654541, "step": 16396 }, { "epoch": 2.5018310546875e-05, "step": 16396, "training_step_time": 0.1062936782836914 }, { "epoch": 2.501983642578125e-05, "model_forward_time": 0.025485992431640625, "step": 16397 }, { "epoch": 2.501983642578125e-05, "step": 16397, "training_step_time": 0.10991168022155762 }, { "epoch": 2.50213623046875e-05, "model_forward_time": 0.025321245193481445, "step": 16398 }, { "epoch": 2.50213623046875e-05, "step": 16398, "training_step_time": 0.10649967193603516 }, { "epoch": 2.502288818359375e-05, "model_forward_time": 0.025323152542114258, "step": 16399 }, { "epoch": 2.502288818359375e-05, "step": 16399, "training_step_time": 0.10945630073547363 }, { "epoch": 2.50244140625e-05, "grad_norm": 0.35515621304512024, "learning_rate": 4.642054658115067e-05, "loss": 0.0111, "step": 16400 }, { "epoch": 2.50244140625e-05, "model_forward_time": 0.02395486831665039, "step": 16400 }, { "epoch": 2.50244140625e-05, "step": 16400, "training_step_time": 0.10367679595947266 }, { "epoch": 2.502593994140625e-05, "model_forward_time": 0.024101734161376953, "step": 16401 }, { "epoch": 2.502593994140625e-05, "step": 16401, "training_step_time": 0.10614180564880371 }, { "epoch": 2.50274658203125e-05, "model_forward_time": 0.02769947052001953, "step": 16402 }, { "epoch": 2.50274658203125e-05, "step": 16402, "training_step_time": 0.10841131210327148 }, { "epoch": 2.502899169921875e-05, "model_forward_time": 0.025760412216186523, "step": 16403 }, { "epoch": 2.502899169921875e-05, "step": 16403, "training_step_time": 0.10672712326049805 }, { "epoch": 2.5030517578125e-05, "model_forward_time": 0.025777101516723633, "step": 16404 }, { "epoch": 2.5030517578125e-05, "step": 16404, "training_step_time": 0.10654926300048828 }, { "epoch": 2.503204345703125e-05, "model_forward_time": 0.025480985641479492, "step": 16405 }, { "epoch": 2.503204345703125e-05, "step": 16405, "training_step_time": 0.10590529441833496 }, { "epoch": 2.50335693359375e-05, "model_forward_time": 0.025279521942138672, "step": 16406 }, { "epoch": 2.50335693359375e-05, "step": 16406, "training_step_time": 0.10498547554016113 }, { "epoch": 2.503509521484375e-05, "model_forward_time": 0.026096105575561523, "step": 16407 }, { "epoch": 2.503509521484375e-05, "step": 16407, "training_step_time": 0.1059730052947998 }, { "epoch": 2.503662109375e-05, "model_forward_time": 0.026453018188476562, "step": 16408 }, { "epoch": 2.503662109375e-05, "step": 16408, "training_step_time": 0.14661693572998047 }, { "epoch": 2.503814697265625e-05, "model_forward_time": 0.024920940399169922, "step": 16409 }, { "epoch": 2.503814697265625e-05, "step": 16409, "training_step_time": 0.20054221153259277 }, { "epoch": 2.50396728515625e-05, "grad_norm": 0.29080188274383545, "learning_rate": 4.6365574521536445e-05, "loss": 0.013, "step": 16410 }, { "epoch": 2.50396728515625e-05, "model_forward_time": 0.024312734603881836, "step": 16410 }, { "epoch": 2.50396728515625e-05, "step": 16410, "training_step_time": 0.21537327766418457 }, { "epoch": 2.504119873046875e-05, "model_forward_time": 0.023350000381469727, "step": 16411 }, { "epoch": 2.504119873046875e-05, "step": 16411, "training_step_time": 0.1923351287841797 }, { "epoch": 2.5042724609375e-05, "model_forward_time": 0.02477264404296875, "step": 16412 }, { "epoch": 2.5042724609375e-05, "step": 16412, "training_step_time": 0.17514443397521973 }, { "epoch": 2.504425048828125e-05, "model_forward_time": 0.024207592010498047, "step": 16413 }, { "epoch": 2.504425048828125e-05, "step": 16413, "training_step_time": 0.19513440132141113 }, { "epoch": 2.50457763671875e-05, "model_forward_time": 0.024522066116333008, "step": 16414 }, { "epoch": 2.50457763671875e-05, "step": 16414, "training_step_time": 0.11477231979370117 }, { "epoch": 2.504730224609375e-05, "model_forward_time": 0.024348735809326172, "step": 16415 }, { "epoch": 2.504730224609375e-05, "step": 16415, "training_step_time": 0.10291814804077148 }, { "epoch": 2.5048828125e-05, "model_forward_time": 0.025489091873168945, "step": 16416 }, { "epoch": 2.5048828125e-05, "step": 16416, "training_step_time": 0.11387300491333008 }, { "epoch": 2.505035400390625e-05, "model_forward_time": 0.025764942169189453, "step": 16417 }, { "epoch": 2.505035400390625e-05, "step": 16417, "training_step_time": 0.11059021949768066 }, { "epoch": 2.50518798828125e-05, "model_forward_time": 0.025543212890625, "step": 16418 }, { "epoch": 2.50518798828125e-05, "step": 16418, "training_step_time": 0.11300849914550781 }, { "epoch": 2.505340576171875e-05, "model_forward_time": 0.02530503273010254, "step": 16419 }, { "epoch": 2.505340576171875e-05, "step": 16419, "training_step_time": 0.10860490798950195 }, { "epoch": 2.5054931640625e-05, "grad_norm": 0.4580700397491455, "learning_rate": 4.631060687809191e-05, "loss": 0.0124, "step": 16420 }, { "epoch": 2.5054931640625e-05, "model_forward_time": 0.02600264549255371, "step": 16420 }, { "epoch": 2.5054931640625e-05, "step": 16420, "training_step_time": 0.10634422302246094 }, { "epoch": 2.505645751953125e-05, "model_forward_time": 0.025363683700561523, "step": 16421 }, { "epoch": 2.505645751953125e-05, "step": 16421, "training_step_time": 0.10703110694885254 }, { "epoch": 2.50579833984375e-05, "model_forward_time": 0.025494098663330078, "step": 16422 }, { "epoch": 2.50579833984375e-05, "step": 16422, "training_step_time": 0.9178316593170166 }, { "epoch": 2.505950927734375e-05, "model_forward_time": 0.02284836769104004, "step": 16423 }, { "epoch": 2.505950927734375e-05, "step": 16423, "training_step_time": 0.16357779502868652 }, { "epoch": 2.506103515625e-05, "model_forward_time": 0.02417278289794922, "step": 16424 }, { "epoch": 2.506103515625e-05, "step": 16424, "training_step_time": 0.15090036392211914 }, { "epoch": 2.506256103515625e-05, "model_forward_time": 0.023791790008544922, "step": 16425 }, { "epoch": 2.506256103515625e-05, "step": 16425, "training_step_time": 0.11537337303161621 }, { "epoch": 2.50640869140625e-05, "model_forward_time": 0.02497553825378418, "step": 16426 }, { "epoch": 2.50640869140625e-05, "step": 16426, "training_step_time": 0.10822105407714844 }, { "epoch": 2.506561279296875e-05, "model_forward_time": 0.02534937858581543, "step": 16427 }, { "epoch": 2.506561279296875e-05, "step": 16427, "training_step_time": 0.19776582717895508 }, { "epoch": 2.5067138671875e-05, "model_forward_time": 0.02429938316345215, "step": 16428 }, { "epoch": 2.5067138671875e-05, "step": 16428, "training_step_time": 0.10302066802978516 }, { "epoch": 2.506866455078125e-05, "model_forward_time": 0.024543046951293945, "step": 16429 }, { "epoch": 2.506866455078125e-05, "step": 16429, "training_step_time": 0.1020200252532959 }, { "epoch": 2.50701904296875e-05, "grad_norm": 0.3575209081172943, "learning_rate": 4.625564371760791e-05, "loss": 0.0162, "step": 16430 }, { "epoch": 2.50701904296875e-05, "model_forward_time": 0.025224685668945312, "step": 16430 }, { "epoch": 2.50701904296875e-05, "step": 16430, "training_step_time": 0.1049802303314209 }, { "epoch": 2.507171630859375e-05, "model_forward_time": 0.025543689727783203, "step": 16431 }, { "epoch": 2.507171630859375e-05, "step": 16431, "training_step_time": 0.10553622245788574 }, { "epoch": 2.50732421875e-05, "model_forward_time": 0.025386333465576172, "step": 16432 }, { "epoch": 2.50732421875e-05, "step": 16432, "training_step_time": 0.10555195808410645 }, { "epoch": 2.507476806640625e-05, "model_forward_time": 0.025124549865722656, "step": 16433 }, { "epoch": 2.507476806640625e-05, "step": 16433, "training_step_time": 0.10695457458496094 }, { "epoch": 2.50762939453125e-05, "model_forward_time": 0.025166749954223633, "step": 16434 }, { "epoch": 2.50762939453125e-05, "step": 16434, "training_step_time": 0.10552239418029785 }, { "epoch": 2.507781982421875e-05, "model_forward_time": 0.025157451629638672, "step": 16435 }, { "epoch": 2.507781982421875e-05, "step": 16435, "training_step_time": 0.10619926452636719 }, { "epoch": 2.5079345703125e-05, "model_forward_time": 0.025560617446899414, "step": 16436 }, { "epoch": 2.5079345703125e-05, "step": 16436, "training_step_time": 0.10657930374145508 }, { "epoch": 2.508087158203125e-05, "model_forward_time": 0.025399208068847656, "step": 16437 }, { "epoch": 2.508087158203125e-05, "step": 16437, "training_step_time": 0.10880327224731445 }, { "epoch": 2.50823974609375e-05, "model_forward_time": 0.025549888610839844, "step": 16438 }, { "epoch": 2.50823974609375e-05, "step": 16438, "training_step_time": 0.10569071769714355 }, { "epoch": 2.508392333984375e-05, "model_forward_time": 0.025377750396728516, "step": 16439 }, { "epoch": 2.508392333984375e-05, "step": 16439, "training_step_time": 0.10698080062866211 }, { "epoch": 2.508544921875e-05, "grad_norm": 0.2672197222709656, "learning_rate": 4.620068510686985e-05, "loss": 0.0136, "step": 16440 }, { "epoch": 2.508544921875e-05, "model_forward_time": 0.02522110939025879, "step": 16440 }, { "epoch": 2.508544921875e-05, "step": 16440, "training_step_time": 0.1342785358428955 }, { "epoch": 2.508697509765625e-05, "model_forward_time": 0.025295495986938477, "step": 16441 }, { "epoch": 2.508697509765625e-05, "step": 16441, "training_step_time": 0.1684868335723877 }, { "epoch": 2.50885009765625e-05, "model_forward_time": 0.024318218231201172, "step": 16442 }, { "epoch": 2.50885009765625e-05, "step": 16442, "training_step_time": 0.15961003303527832 }, { "epoch": 2.509002685546875e-05, "model_forward_time": 0.023906230926513672, "step": 16443 }, { "epoch": 2.509002685546875e-05, "step": 16443, "training_step_time": 0.140031099319458 }, { "epoch": 2.5091552734375e-05, "model_forward_time": 0.024425506591796875, "step": 16444 }, { "epoch": 2.5091552734375e-05, "step": 16444, "training_step_time": 0.13741731643676758 }, { "epoch": 2.509307861328125e-05, "model_forward_time": 0.0252687931060791, "step": 16445 }, { "epoch": 2.509307861328125e-05, "step": 16445, "training_step_time": 0.12011456489562988 }, { "epoch": 2.50946044921875e-05, "model_forward_time": 0.024673938751220703, "step": 16446 }, { "epoch": 2.50946044921875e-05, "step": 16446, "training_step_time": 0.1358191967010498 }, { "epoch": 2.509613037109375e-05, "model_forward_time": 0.024561643600463867, "step": 16447 }, { "epoch": 2.509613037109375e-05, "step": 16447, "training_step_time": 0.15794754028320312 }, { "epoch": 2.509765625e-05, "model_forward_time": 0.024929285049438477, "step": 16448 }, { "epoch": 2.509765625e-05, "step": 16448, "training_step_time": 0.19464588165283203 }, { "epoch": 2.509918212890625e-05, "model_forward_time": 0.024489641189575195, "step": 16449 }, { "epoch": 2.509918212890625e-05, "step": 16449, "training_step_time": 0.1565990447998047 }, { "epoch": 2.51007080078125e-05, "grad_norm": 0.32794129848480225, "learning_rate": 4.6145731112657644e-05, "loss": 0.0124, "step": 16450 }, { "epoch": 2.51007080078125e-05, "model_forward_time": 0.024987220764160156, "step": 16450 }, { "epoch": 2.51007080078125e-05, "step": 16450, "training_step_time": 0.19224977493286133 }, { "epoch": 2.510223388671875e-05, "model_forward_time": 0.025913715362548828, "step": 16451 }, { "epoch": 2.510223388671875e-05, "step": 16451, "training_step_time": 0.1502819061279297 }, { "epoch": 2.5103759765625e-05, "model_forward_time": 0.02418661117553711, "step": 16452 }, { "epoch": 2.5103759765625e-05, "step": 16452, "training_step_time": 0.1773838996887207 }, { "epoch": 2.510528564453125e-05, "model_forward_time": 0.024291038513183594, "step": 16453 }, { "epoch": 2.510528564453125e-05, "step": 16453, "training_step_time": 0.10554051399230957 }, { "epoch": 2.51068115234375e-05, "model_forward_time": 0.024457693099975586, "step": 16454 }, { "epoch": 2.51068115234375e-05, "step": 16454, "training_step_time": 0.10473227500915527 }, { "epoch": 2.510833740234375e-05, "model_forward_time": 0.025315046310424805, "step": 16455 }, { "epoch": 2.510833740234375e-05, "step": 16455, "training_step_time": 0.10741019248962402 }, { "epoch": 2.510986328125e-05, "model_forward_time": 0.025682926177978516, "step": 16456 }, { "epoch": 2.510986328125e-05, "step": 16456, "training_step_time": 0.11321353912353516 }, { "epoch": 2.511138916015625e-05, "model_forward_time": 0.025025129318237305, "step": 16457 }, { "epoch": 2.511138916015625e-05, "step": 16457, "training_step_time": 0.10503840446472168 }, { "epoch": 2.51129150390625e-05, "model_forward_time": 0.025183439254760742, "step": 16458 }, { "epoch": 2.51129150390625e-05, "step": 16458, "training_step_time": 0.10402584075927734 }, { "epoch": 2.511444091796875e-05, "model_forward_time": 0.025130271911621094, "step": 16459 }, { "epoch": 2.511444091796875e-05, "step": 16459, "training_step_time": 0.10849952697753906 }, { "epoch": 2.5115966796875e-05, "grad_norm": 0.31576505303382874, "learning_rate": 4.609078180174555e-05, "loss": 0.0181, "step": 16460 }, { "epoch": 2.5115966796875e-05, "model_forward_time": 0.025750398635864258, "step": 16460 }, { "epoch": 2.5115966796875e-05, "step": 16460, "training_step_time": 0.10608148574829102 }, { "epoch": 2.511749267578125e-05, "model_forward_time": 0.025476694107055664, "step": 16461 }, { "epoch": 2.511749267578125e-05, "step": 16461, "training_step_time": 0.10701918601989746 }, { "epoch": 2.51190185546875e-05, "model_forward_time": 0.026217937469482422, "step": 16462 }, { "epoch": 2.51190185546875e-05, "step": 16462, "training_step_time": 0.10647058486938477 }, { "epoch": 2.512054443359375e-05, "model_forward_time": 0.02526712417602539, "step": 16463 }, { "epoch": 2.512054443359375e-05, "step": 16463, "training_step_time": 0.10477113723754883 }, { "epoch": 2.51220703125e-05, "model_forward_time": 0.02513861656188965, "step": 16464 }, { "epoch": 2.51220703125e-05, "step": 16464, "training_step_time": 0.10711383819580078 }, { "epoch": 2.512359619140625e-05, "model_forward_time": 0.025604724884033203, "step": 16465 }, { "epoch": 2.512359619140625e-05, "step": 16465, "training_step_time": 0.11089730262756348 }, { "epoch": 2.51251220703125e-05, "model_forward_time": 0.02483367919921875, "step": 16466 }, { "epoch": 2.51251220703125e-05, "step": 16466, "training_step_time": 0.10695266723632812 }, { "epoch": 2.512664794921875e-05, "model_forward_time": 0.02513289451599121, "step": 16467 }, { "epoch": 2.512664794921875e-05, "step": 16467, "training_step_time": 0.1735219955444336 }, { "epoch": 2.5128173828125e-05, "model_forward_time": 0.025539636611938477, "step": 16468 }, { "epoch": 2.5128173828125e-05, "step": 16468, "training_step_time": 0.1385173797607422 }, { "epoch": 2.512969970703125e-05, "model_forward_time": 0.02483367919921875, "step": 16469 }, { "epoch": 2.512969970703125e-05, "step": 16469, "training_step_time": 0.10190558433532715 }, { "epoch": 2.51312255859375e-05, "grad_norm": 0.27324163913726807, "learning_rate": 4.60358372409022e-05, "loss": 0.011, "step": 16470 }, { "epoch": 2.51312255859375e-05, "model_forward_time": 0.024628877639770508, "step": 16470 }, { "epoch": 2.51312255859375e-05, "step": 16470, "training_step_time": 0.11878824234008789 }, { "epoch": 2.513275146484375e-05, "model_forward_time": 0.025277376174926758, "step": 16471 }, { "epoch": 2.513275146484375e-05, "step": 16471, "training_step_time": 0.11488461494445801 }, { "epoch": 2.513427734375e-05, "model_forward_time": 0.02507805824279785, "step": 16472 }, { "epoch": 2.513427734375e-05, "step": 16472, "training_step_time": 0.10796904563903809 }, { "epoch": 2.513580322265625e-05, "model_forward_time": 0.02526092529296875, "step": 16473 }, { "epoch": 2.513580322265625e-05, "step": 16473, "training_step_time": 0.18858838081359863 }, { "epoch": 2.51373291015625e-05, "model_forward_time": 0.02453160285949707, "step": 16474 }, { "epoch": 2.51373291015625e-05, "step": 16474, "training_step_time": 0.1021580696105957 }, { "epoch": 2.513885498046875e-05, "model_forward_time": 0.024401187896728516, "step": 16475 }, { "epoch": 2.513885498046875e-05, "step": 16475, "training_step_time": 0.10171961784362793 }, { "epoch": 2.5140380859375e-05, "model_forward_time": 0.0251007080078125, "step": 16476 }, { "epoch": 2.5140380859375e-05, "step": 16476, "training_step_time": 0.10509967803955078 }, { "epoch": 2.514190673828125e-05, "model_forward_time": 0.024897336959838867, "step": 16477 }, { "epoch": 2.514190673828125e-05, "step": 16477, "training_step_time": 0.10453486442565918 }, { "epoch": 2.51434326171875e-05, "model_forward_time": 0.02522420883178711, "step": 16478 }, { "epoch": 2.51434326171875e-05, "step": 16478, "training_step_time": 0.10608386993408203 }, { "epoch": 2.514495849609375e-05, "model_forward_time": 0.025086402893066406, "step": 16479 }, { "epoch": 2.514495849609375e-05, "step": 16479, "training_step_time": 0.10670804977416992 }, { "epoch": 2.5146484375e-05, "grad_norm": 0.36495304107666016, "learning_rate": 4.598089749689041e-05, "loss": 0.0188, "step": 16480 }, { "epoch": 2.5146484375e-05, "model_forward_time": 0.025043725967407227, "step": 16480 }, { "epoch": 2.5146484375e-05, "step": 16480, "training_step_time": 0.11182355880737305 }, { "epoch": 2.514801025390625e-05, "model_forward_time": 0.025373220443725586, "step": 16481 }, { "epoch": 2.514801025390625e-05, "step": 16481, "training_step_time": 0.11329817771911621 }, { "epoch": 2.51495361328125e-05, "model_forward_time": 0.025304079055786133, "step": 16482 }, { "epoch": 2.51495361328125e-05, "step": 16482, "training_step_time": 0.11450672149658203 }, { "epoch": 2.515106201171875e-05, "model_forward_time": 0.025184154510498047, "step": 16483 }, { "epoch": 2.515106201171875e-05, "step": 16483, "training_step_time": 0.1051628589630127 }, { "epoch": 2.5152587890625e-05, "model_forward_time": 0.02497553825378418, "step": 16484 }, { "epoch": 2.5152587890625e-05, "step": 16484, "training_step_time": 0.10339093208312988 }, { "epoch": 2.515411376953125e-05, "model_forward_time": 0.024957895278930664, "step": 16485 }, { "epoch": 2.515411376953125e-05, "step": 16485, "training_step_time": 0.10459566116333008 }, { "epoch": 2.51556396484375e-05, "model_forward_time": 0.025227785110473633, "step": 16486 }, { "epoch": 2.51556396484375e-05, "step": 16486, "training_step_time": 0.10567498207092285 }, { "epoch": 2.515716552734375e-05, "model_forward_time": 0.025365591049194336, "step": 16487 }, { "epoch": 2.515716552734375e-05, "step": 16487, "training_step_time": 0.10596203804016113 }, { "epoch": 2.515869140625e-05, "model_forward_time": 0.025220394134521484, "step": 16488 }, { "epoch": 2.515869140625e-05, "step": 16488, "training_step_time": 0.1044306755065918 }, { "epoch": 2.516021728515625e-05, "model_forward_time": 0.0256500244140625, "step": 16489 }, { "epoch": 2.516021728515625e-05, "step": 16489, "training_step_time": 0.10474944114685059 }, { "epoch": 2.51617431640625e-05, "grad_norm": 0.27902287244796753, "learning_rate": 4.5925962636467126e-05, "loss": 0.0157, "step": 16490 }, { "epoch": 2.51617431640625e-05, "model_forward_time": 0.025027036666870117, "step": 16490 }, { "epoch": 2.51617431640625e-05, "step": 16490, "training_step_time": 0.10479950904846191 }, { "epoch": 2.516326904296875e-05, "model_forward_time": 0.025645971298217773, "step": 16491 }, { "epoch": 2.516326904296875e-05, "step": 16491, "training_step_time": 0.10541296005249023 }, { "epoch": 2.5164794921875e-05, "model_forward_time": 0.02505207061767578, "step": 16492 }, { "epoch": 2.5164794921875e-05, "step": 16492, "training_step_time": 0.10613512992858887 }, { "epoch": 2.516632080078125e-05, "model_forward_time": 0.02463698387145996, "step": 16493 }, { "epoch": 2.516632080078125e-05, "step": 16493, "training_step_time": 0.1469099521636963 }, { "epoch": 2.51678466796875e-05, "model_forward_time": 0.024808168411254883, "step": 16494 }, { "epoch": 2.51678466796875e-05, "step": 16494, "training_step_time": 0.16462421417236328 }, { "epoch": 2.516937255859375e-05, "model_forward_time": 0.02490520477294922, "step": 16495 }, { "epoch": 2.516937255859375e-05, "step": 16495, "training_step_time": 0.12042093276977539 }, { "epoch": 2.51708984375e-05, "model_forward_time": 0.024792194366455078, "step": 16496 }, { "epoch": 2.51708984375e-05, "step": 16496, "training_step_time": 0.1595468521118164 }, { "epoch": 2.517242431640625e-05, "model_forward_time": 0.024524688720703125, "step": 16497 }, { "epoch": 2.517242431640625e-05, "step": 16497, "training_step_time": 0.16651201248168945 }, { "epoch": 2.51739501953125e-05, "model_forward_time": 0.024406909942626953, "step": 16498 }, { "epoch": 2.51739501953125e-05, "step": 16498, "training_step_time": 0.1763901710510254 }, { "epoch": 2.517547607421875e-05, "model_forward_time": 0.02459883689880371, "step": 16499 }, { "epoch": 2.517547607421875e-05, "step": 16499, "training_step_time": 0.1804189682006836 }, { "epoch": 2.5177001953125e-05, "grad_norm": 0.2936546802520752, "learning_rate": 4.5871032726383386e-05, "loss": 0.0093, "step": 16500 }, { "epoch": 2.5177001953125e-05, "model_forward_time": 0.024168729782104492, "step": 16500 }, { "epoch": 2.5177001953125e-05, "step": 16500, "training_step_time": 0.10477113723754883 }, { "epoch": 2.517852783203125e-05, "model_forward_time": 0.02460312843322754, "step": 16501 }, { "epoch": 2.517852783203125e-05, "step": 16501, "training_step_time": 0.10196876525878906 }, { "epoch": 2.51800537109375e-05, "model_forward_time": 0.025241613388061523, "step": 16502 }, { "epoch": 2.51800537109375e-05, "step": 16502, "training_step_time": 0.10885930061340332 }, { "epoch": 2.518157958984375e-05, "model_forward_time": 0.025164365768432617, "step": 16503 }, { "epoch": 2.518157958984375e-05, "step": 16503, "training_step_time": 0.10882854461669922 }, { "epoch": 2.518310546875e-05, "model_forward_time": 0.025022506713867188, "step": 16504 }, { "epoch": 2.518310546875e-05, "step": 16504, "training_step_time": 0.1059410572052002 }, { "epoch": 2.518463134765625e-05, "model_forward_time": 0.02507948875427246, "step": 16505 }, { "epoch": 2.518463134765625e-05, "step": 16505, "training_step_time": 0.11070466041564941 }, { "epoch": 2.51861572265625e-05, "model_forward_time": 0.025187969207763672, "step": 16506 }, { "epoch": 2.51861572265625e-05, "step": 16506, "training_step_time": 0.12397480010986328 }, { "epoch": 2.518768310546875e-05, "model_forward_time": 0.024918556213378906, "step": 16507 }, { "epoch": 2.518768310546875e-05, "step": 16507, "training_step_time": 0.11527156829833984 }, { "epoch": 2.5189208984375e-05, "model_forward_time": 0.024707794189453125, "step": 16508 }, { "epoch": 2.5189208984375e-05, "step": 16508, "training_step_time": 0.11465907096862793 }, { "epoch": 2.519073486328125e-05, "model_forward_time": 0.02501821517944336, "step": 16509 }, { "epoch": 2.519073486328125e-05, "step": 16509, "training_step_time": 0.11523771286010742 }, { "epoch": 2.51922607421875e-05, "grad_norm": 0.30701643228530884, "learning_rate": 4.5816107833384234e-05, "loss": 0.0117, "step": 16510 }, { "epoch": 2.51922607421875e-05, "model_forward_time": 0.02506399154663086, "step": 16510 }, { "epoch": 2.51922607421875e-05, "step": 16510, "training_step_time": 0.11409473419189453 }, { "epoch": 2.519378662109375e-05, "model_forward_time": 0.0250399112701416, "step": 16511 }, { "epoch": 2.519378662109375e-05, "step": 16511, "training_step_time": 0.11527800559997559 }, { "epoch": 2.51953125e-05, "model_forward_time": 0.024981260299682617, "step": 16512 }, { "epoch": 2.51953125e-05, "step": 16512, "training_step_time": 0.10932755470275879 }, { "epoch": 2.519683837890625e-05, "model_forward_time": 0.025107622146606445, "step": 16513 }, { "epoch": 2.519683837890625e-05, "step": 16513, "training_step_time": 0.10806441307067871 }, { "epoch": 2.51983642578125e-05, "model_forward_time": 0.02554798126220703, "step": 16514 }, { "epoch": 2.51983642578125e-05, "step": 16514, "training_step_time": 0.1866769790649414 }, { "epoch": 2.519989013671875e-05, "model_forward_time": 0.0245513916015625, "step": 16515 }, { "epoch": 2.519989013671875e-05, "step": 16515, "training_step_time": 0.1503283977508545 }, { "epoch": 2.5201416015625e-05, "model_forward_time": 0.026794910430908203, "step": 16516 }, { "epoch": 2.5201416015625e-05, "step": 16516, "training_step_time": 0.10719966888427734 }, { "epoch": 2.520294189453125e-05, "model_forward_time": 0.02485489845275879, "step": 16517 }, { "epoch": 2.520294189453125e-05, "step": 16517, "training_step_time": 0.10767030715942383 }, { "epoch": 2.52044677734375e-05, "model_forward_time": 0.02543783187866211, "step": 16518 }, { "epoch": 2.52044677734375e-05, "step": 16518, "training_step_time": 0.11243295669555664 }, { "epoch": 2.520599365234375e-05, "model_forward_time": 0.025179147720336914, "step": 16519 }, { "epoch": 2.520599365234375e-05, "step": 16519, "training_step_time": 0.10486125946044922 }, { "epoch": 2.520751953125e-05, "grad_norm": 0.2752593457698822, "learning_rate": 4.576118802420856e-05, "loss": 0.0147, "step": 16520 }, { "epoch": 2.520751953125e-05, "model_forward_time": 0.025122404098510742, "step": 16520 }, { "epoch": 2.520751953125e-05, "step": 16520, "training_step_time": 0.19008755683898926 }, { "epoch": 2.520904541015625e-05, "model_forward_time": 0.024200439453125, "step": 16521 }, { "epoch": 2.520904541015625e-05, "step": 16521, "training_step_time": 0.10243892669677734 }, { "epoch": 2.52105712890625e-05, "model_forward_time": 0.024297237396240234, "step": 16522 }, { "epoch": 2.52105712890625e-05, "step": 16522, "training_step_time": 0.10262656211853027 }, { "epoch": 2.521209716796875e-05, "model_forward_time": 0.02601146697998047, "step": 16523 }, { "epoch": 2.521209716796875e-05, "step": 16523, "training_step_time": 0.10913658142089844 }, { "epoch": 2.5213623046875e-05, "model_forward_time": 0.02550530433654785, "step": 16524 }, { "epoch": 2.5213623046875e-05, "step": 16524, "training_step_time": 0.10615849494934082 }, { "epoch": 2.521514892578125e-05, "model_forward_time": 0.0251462459564209, "step": 16525 }, { "epoch": 2.521514892578125e-05, "step": 16525, "training_step_time": 0.1041104793548584 }, { "epoch": 2.52166748046875e-05, "model_forward_time": 0.02687358856201172, "step": 16526 }, { "epoch": 2.52166748046875e-05, "step": 16526, "training_step_time": 0.1058659553527832 }, { "epoch": 2.521820068359375e-05, "model_forward_time": 0.024223804473876953, "step": 16527 }, { "epoch": 2.521820068359375e-05, "step": 16527, "training_step_time": 0.8029687404632568 }, { "epoch": 2.52197265625e-05, "model_forward_time": 0.022718429565429688, "step": 16528 }, { "epoch": 2.52197265625e-05, "step": 16528, "training_step_time": 0.09745955467224121 }, { "epoch": 2.522125244140625e-05, "model_forward_time": 0.02447056770324707, "step": 16529 }, { "epoch": 2.522125244140625e-05, "step": 16529, "training_step_time": 0.1030728816986084 }, { "epoch": 2.52227783203125e-05, "grad_norm": 0.4050087034702301, "learning_rate": 4.570627336558915e-05, "loss": 0.0135, "step": 16530 }, { "epoch": 2.52227783203125e-05, "model_forward_time": 0.025234699249267578, "step": 16530 }, { "epoch": 2.52227783203125e-05, "step": 16530, "training_step_time": 0.10991477966308594 }, { "epoch": 2.522430419921875e-05, "model_forward_time": 0.025871753692626953, "step": 16531 }, { "epoch": 2.522430419921875e-05, "step": 16531, "training_step_time": 0.11064910888671875 }, { "epoch": 2.5225830078125e-05, "model_forward_time": 0.025597572326660156, "step": 16532 }, { "epoch": 2.5225830078125e-05, "step": 16532, "training_step_time": 0.10509490966796875 }, { "epoch": 2.522735595703125e-05, "model_forward_time": 0.025438308715820312, "step": 16533 }, { "epoch": 2.522735595703125e-05, "step": 16533, "training_step_time": 0.10590982437133789 }, { "epoch": 2.52288818359375e-05, "model_forward_time": 0.025182485580444336, "step": 16534 }, { "epoch": 2.52288818359375e-05, "step": 16534, "training_step_time": 0.13657116889953613 }, { "epoch": 2.523040771484375e-05, "model_forward_time": 0.0258944034576416, "step": 16535 }, { "epoch": 2.523040771484375e-05, "step": 16535, "training_step_time": 0.1272275447845459 }, { "epoch": 2.523193359375e-05, "model_forward_time": 0.025060415267944336, "step": 16536 }, { "epoch": 2.523193359375e-05, "step": 16536, "training_step_time": 0.1841285228729248 }, { "epoch": 2.523345947265625e-05, "model_forward_time": 0.02487468719482422, "step": 16537 }, { "epoch": 2.523345947265625e-05, "step": 16537, "training_step_time": 0.18292737007141113 }, { "epoch": 2.52349853515625e-05, "model_forward_time": 0.024478912353515625, "step": 16538 }, { "epoch": 2.52349853515625e-05, "step": 16538, "training_step_time": 0.1664283275604248 }, { "epoch": 2.523651123046875e-05, "model_forward_time": 0.02479100227355957, "step": 16539 }, { "epoch": 2.523651123046875e-05, "step": 16539, "training_step_time": 0.13039326667785645 }, { "epoch": 2.5238037109375e-05, "grad_norm": 0.5051366090774536, "learning_rate": 4.565136392425247e-05, "loss": 0.0114, "step": 16540 }, { "epoch": 2.5238037109375e-05, "model_forward_time": 0.02457594871520996, "step": 16540 }, { "epoch": 2.5238037109375e-05, "step": 16540, "training_step_time": 0.12413620948791504 }, { "epoch": 2.523956298828125e-05, "model_forward_time": 0.02474689483642578, "step": 16541 }, { "epoch": 2.523956298828125e-05, "step": 16541, "training_step_time": 0.1603560447692871 }, { "epoch": 2.52410888671875e-05, "model_forward_time": 0.024825334548950195, "step": 16542 }, { "epoch": 2.52410888671875e-05, "step": 16542, "training_step_time": 0.10730910301208496 }, { "epoch": 2.524261474609375e-05, "model_forward_time": 0.024782419204711914, "step": 16543 }, { "epoch": 2.524261474609375e-05, "step": 16543, "training_step_time": 0.10634422302246094 }, { "epoch": 2.5244140625e-05, "model_forward_time": 0.024883031845092773, "step": 16544 }, { "epoch": 2.5244140625e-05, "step": 16544, "training_step_time": 0.10681390762329102 }, { "epoch": 2.524566650390625e-05, "model_forward_time": 0.026651620864868164, "step": 16545 }, { "epoch": 2.524566650390625e-05, "step": 16545, "training_step_time": 0.14073514938354492 }, { "epoch": 2.52471923828125e-05, "model_forward_time": 0.02414870262145996, "step": 16546 }, { "epoch": 2.52471923828125e-05, "step": 16546, "training_step_time": 0.14812803268432617 }, { "epoch": 2.524871826171875e-05, "model_forward_time": 0.024050235748291016, "step": 16547 }, { "epoch": 2.524871826171875e-05, "step": 16547, "training_step_time": 0.140455961227417 }, { "epoch": 2.5250244140625e-05, "model_forward_time": 0.023543834686279297, "step": 16548 }, { "epoch": 2.5250244140625e-05, "step": 16548, "training_step_time": 0.13102316856384277 }, { "epoch": 2.525177001953125e-05, "model_forward_time": 0.02336430549621582, "step": 16549 }, { "epoch": 2.525177001953125e-05, "step": 16549, "training_step_time": 0.1204683780670166 }, { "epoch": 2.52532958984375e-05, "grad_norm": 0.3015366494655609, "learning_rate": 4.559645976691868e-05, "loss": 0.0118, "step": 16550 }, { "epoch": 2.52532958984375e-05, "model_forward_time": 0.024013757705688477, "step": 16550 }, { "epoch": 2.52532958984375e-05, "step": 16550, "training_step_time": 0.12308359146118164 }, { "epoch": 2.525482177734375e-05, "model_forward_time": 0.02369999885559082, "step": 16551 }, { "epoch": 2.525482177734375e-05, "step": 16551, "training_step_time": 0.12003612518310547 }, { "epoch": 2.525634765625e-05, "model_forward_time": 0.024049043655395508, "step": 16552 }, { "epoch": 2.525634765625e-05, "step": 16552, "training_step_time": 0.1148366928100586 }, { "epoch": 2.525787353515625e-05, "model_forward_time": 0.024448871612548828, "step": 16553 }, { "epoch": 2.525787353515625e-05, "step": 16553, "training_step_time": 0.11223649978637695 }, { "epoch": 2.52593994140625e-05, "model_forward_time": 0.024054765701293945, "step": 16554 }, { "epoch": 2.52593994140625e-05, "step": 16554, "training_step_time": 0.10934281349182129 }, { "epoch": 2.526092529296875e-05, "model_forward_time": 0.0249936580657959, "step": 16555 }, { "epoch": 2.526092529296875e-05, "step": 16555, "training_step_time": 0.11856794357299805 }, { "epoch": 2.5262451171875e-05, "model_forward_time": 0.024978160858154297, "step": 16556 }, { "epoch": 2.5262451171875e-05, "step": 16556, "training_step_time": 0.12755894660949707 }, { "epoch": 2.526397705078125e-05, "model_forward_time": 0.025196552276611328, "step": 16557 }, { "epoch": 2.526397705078125e-05, "step": 16557, "training_step_time": 0.10776114463806152 }, { "epoch": 2.52655029296875e-05, "model_forward_time": 0.02548074722290039, "step": 16558 }, { "epoch": 2.52655029296875e-05, "step": 16558, "training_step_time": 0.11733388900756836 }, { "epoch": 2.526702880859375e-05, "model_forward_time": 0.024979352951049805, "step": 16559 }, { "epoch": 2.526702880859375e-05, "step": 16559, "training_step_time": 0.11196684837341309 }, { "epoch": 2.52685546875e-05, "grad_norm": 0.28567254543304443, "learning_rate": 4.554156096030149e-05, "loss": 0.0161, "step": 16560 }, { "epoch": 2.52685546875e-05, "model_forward_time": 0.025266647338867188, "step": 16560 }, { "epoch": 2.52685546875e-05, "step": 16560, "training_step_time": 0.10566043853759766 }, { "epoch": 2.527008056640625e-05, "model_forward_time": 0.025098323822021484, "step": 16561 }, { "epoch": 2.527008056640625e-05, "step": 16561, "training_step_time": 0.19541358947753906 }, { "epoch": 2.52716064453125e-05, "model_forward_time": 0.024399995803833008, "step": 16562 }, { "epoch": 2.52716064453125e-05, "step": 16562, "training_step_time": 0.10361933708190918 }, { "epoch": 2.527313232421875e-05, "model_forward_time": 0.024165868759155273, "step": 16563 }, { "epoch": 2.527313232421875e-05, "step": 16563, "training_step_time": 0.10366702079772949 }, { "epoch": 2.5274658203125e-05, "model_forward_time": 0.02518010139465332, "step": 16564 }, { "epoch": 2.5274658203125e-05, "step": 16564, "training_step_time": 0.10549187660217285 }, { "epoch": 2.527618408203125e-05, "model_forward_time": 0.026192903518676758, "step": 16565 }, { "epoch": 2.527618408203125e-05, "step": 16565, "training_step_time": 0.10967206954956055 }, { "epoch": 2.52777099609375e-05, "model_forward_time": 0.02512669563293457, "step": 16566 }, { "epoch": 2.52777099609375e-05, "step": 16566, "training_step_time": 0.10818696022033691 }, { "epoch": 2.527923583984375e-05, "model_forward_time": 0.028029680252075195, "step": 16567 }, { "epoch": 2.527923583984375e-05, "step": 16567, "training_step_time": 0.10882425308227539 }, { "epoch": 2.528076171875e-05, "model_forward_time": 0.026189088821411133, "step": 16568 }, { "epoch": 2.528076171875e-05, "step": 16568, "training_step_time": 0.10634517669677734 }, { "epoch": 2.528228759765625e-05, "model_forward_time": 0.025264501571655273, "step": 16569 }, { "epoch": 2.528228759765625e-05, "step": 16569, "training_step_time": 0.10529017448425293 }, { "epoch": 2.52838134765625e-05, "grad_norm": 0.1987486630678177, "learning_rate": 4.548666757110812e-05, "loss": 0.013, "step": 16570 }, { "epoch": 2.52838134765625e-05, "model_forward_time": 0.02504134178161621, "step": 16570 }, { "epoch": 2.52838134765625e-05, "step": 16570, "training_step_time": 0.10690069198608398 }, { "epoch": 2.528533935546875e-05, "model_forward_time": 0.024866104125976562, "step": 16571 }, { "epoch": 2.528533935546875e-05, "step": 16571, "training_step_time": 0.11589813232421875 }, { "epoch": 2.5286865234375e-05, "model_forward_time": 0.024870634078979492, "step": 16572 }, { "epoch": 2.5286865234375e-05, "step": 16572, "training_step_time": 0.1053609848022461 }, { "epoch": 2.528839111328125e-05, "model_forward_time": 0.02532672882080078, "step": 16573 }, { "epoch": 2.528839111328125e-05, "step": 16573, "training_step_time": 0.10715961456298828 }, { "epoch": 2.52899169921875e-05, "model_forward_time": 0.027630090713500977, "step": 16574 }, { "epoch": 2.52899169921875e-05, "step": 16574, "training_step_time": 0.10832548141479492 }, { "epoch": 2.529144287109375e-05, "model_forward_time": 0.024944543838500977, "step": 16575 }, { "epoch": 2.529144287109375e-05, "step": 16575, "training_step_time": 0.10603857040405273 }, { "epoch": 2.529296875e-05, "model_forward_time": 0.025362253189086914, "step": 16576 }, { "epoch": 2.529296875e-05, "step": 16576, "training_step_time": 0.10519742965698242 }, { "epoch": 2.529449462890625e-05, "model_forward_time": 0.024924755096435547, "step": 16577 }, { "epoch": 2.529449462890625e-05, "step": 16577, "training_step_time": 0.10332179069519043 }, { "epoch": 2.52960205078125e-05, "model_forward_time": 0.024797677993774414, "step": 16578 }, { "epoch": 2.52960205078125e-05, "step": 16578, "training_step_time": 0.1048271656036377 }, { "epoch": 2.529754638671875e-05, "model_forward_time": 0.02509617805480957, "step": 16579 }, { "epoch": 2.529754638671875e-05, "step": 16579, "training_step_time": 0.1047818660736084 }, { "epoch": 2.5299072265625e-05, "grad_norm": 0.1858363002538681, "learning_rate": 4.543177966603925e-05, "loss": 0.0188, "step": 16580 }, { "epoch": 2.5299072265625e-05, "model_forward_time": 0.02523326873779297, "step": 16580 }, { "epoch": 2.5299072265625e-05, "step": 16580, "training_step_time": 0.10466265678405762 }, { "epoch": 2.530059814453125e-05, "model_forward_time": 0.02450704574584961, "step": 16581 }, { "epoch": 2.530059814453125e-05, "step": 16581, "training_step_time": 0.11696004867553711 }, { "epoch": 2.53021240234375e-05, "model_forward_time": 0.02724623680114746, "step": 16582 }, { "epoch": 2.53021240234375e-05, "step": 16582, "training_step_time": 0.12034058570861816 }, { "epoch": 2.530364990234375e-05, "model_forward_time": 0.025170087814331055, "step": 16583 }, { "epoch": 2.530364990234375e-05, "step": 16583, "training_step_time": 0.14411425590515137 }, { "epoch": 2.530517578125e-05, "model_forward_time": 0.024528026580810547, "step": 16584 }, { "epoch": 2.530517578125e-05, "step": 16584, "training_step_time": 0.2190229892730713 }, { "epoch": 2.530670166015625e-05, "model_forward_time": 0.02458977699279785, "step": 16585 }, { "epoch": 2.530670166015625e-05, "step": 16585, "training_step_time": 0.13709235191345215 }, { "epoch": 2.53082275390625e-05, "model_forward_time": 0.024309873580932617, "step": 16586 }, { "epoch": 2.53082275390625e-05, "step": 16586, "training_step_time": 0.11889171600341797 }, { "epoch": 2.530975341796875e-05, "model_forward_time": 0.024812698364257812, "step": 16587 }, { "epoch": 2.530975341796875e-05, "step": 16587, "training_step_time": 0.12322020530700684 }, { "epoch": 2.5311279296875e-05, "model_forward_time": 0.02521991729736328, "step": 16588 }, { "epoch": 2.5311279296875e-05, "step": 16588, "training_step_time": 0.11711835861206055 }, { "epoch": 2.531280517578125e-05, "model_forward_time": 0.025865554809570312, "step": 16589 }, { "epoch": 2.531280517578125e-05, "step": 16589, "training_step_time": 0.10886645317077637 }, { "epoch": 2.53143310546875e-05, "grad_norm": 0.2656418979167938, "learning_rate": 4.537689731178883e-05, "loss": 0.0079, "step": 16590 }, { "epoch": 2.53143310546875e-05, "model_forward_time": 0.024789094924926758, "step": 16590 }, { "epoch": 2.53143310546875e-05, "step": 16590, "training_step_time": 0.10387253761291504 }, { "epoch": 2.531585693359375e-05, "model_forward_time": 0.02501368522644043, "step": 16591 }, { "epoch": 2.531585693359375e-05, "step": 16591, "training_step_time": 0.10611248016357422 }, { "epoch": 2.53173828125e-05, "model_forward_time": 0.0245816707611084, "step": 16592 }, { "epoch": 2.53173828125e-05, "step": 16592, "training_step_time": 0.1059575080871582 }, { "epoch": 2.531890869140625e-05, "model_forward_time": 0.02487659454345703, "step": 16593 }, { "epoch": 2.531890869140625e-05, "step": 16593, "training_step_time": 0.10468363761901855 }, { "epoch": 2.53204345703125e-05, "model_forward_time": 0.025085926055908203, "step": 16594 }, { "epoch": 2.53204345703125e-05, "step": 16594, "training_step_time": 0.1050114631652832 }, { "epoch": 2.532196044921875e-05, "model_forward_time": 0.02528071403503418, "step": 16595 }, { "epoch": 2.532196044921875e-05, "step": 16595, "training_step_time": 0.10451173782348633 }, { "epoch": 2.5323486328125e-05, "model_forward_time": 0.025014400482177734, "step": 16596 }, { "epoch": 2.5323486328125e-05, "step": 16596, "training_step_time": 0.10512495040893555 }, { "epoch": 2.532501220703125e-05, "model_forward_time": 0.025255203247070312, "step": 16597 }, { "epoch": 2.532501220703125e-05, "step": 16597, "training_step_time": 0.11398696899414062 }, { "epoch": 2.53265380859375e-05, "model_forward_time": 0.025079965591430664, "step": 16598 }, { "epoch": 2.53265380859375e-05, "step": 16598, "training_step_time": 0.10764050483703613 }, { "epoch": 2.532806396484375e-05, "model_forward_time": 0.025055646896362305, "step": 16599 }, { "epoch": 2.532806396484375e-05, "step": 16599, "training_step_time": 0.10536742210388184 }, { "epoch": 2.532958984375e-05, "grad_norm": 0.1985049694776535, "learning_rate": 4.5322020575044114e-05, "loss": 0.0076, "step": 16600 }, { "epoch": 2.532958984375e-05, "model_forward_time": 0.025246858596801758, "step": 16600 }, { "epoch": 2.532958984375e-05, "step": 16600, "training_step_time": 0.1058351993560791 }, { "epoch": 2.533111572265625e-05, "model_forward_time": 0.02504873275756836, "step": 16601 }, { "epoch": 2.533111572265625e-05, "step": 16601, "training_step_time": 0.10551166534423828 }, { "epoch": 2.53326416015625e-05, "model_forward_time": 0.024591922760009766, "step": 16602 }, { "epoch": 2.53326416015625e-05, "step": 16602, "training_step_time": 0.7987291812896729 }, { "epoch": 2.533416748046875e-05, "model_forward_time": 0.022065401077270508, "step": 16603 }, { "epoch": 2.533416748046875e-05, "step": 16603, "training_step_time": 0.1444566249847412 }, { "epoch": 2.5335693359375e-05, "model_forward_time": 0.02356696128845215, "step": 16604 }, { "epoch": 2.5335693359375e-05, "step": 16604, "training_step_time": 0.13671088218688965 }, { "epoch": 2.533721923828125e-05, "model_forward_time": 0.024448633193969727, "step": 16605 }, { "epoch": 2.533721923828125e-05, "step": 16605, "training_step_time": 0.10616540908813477 }, { "epoch": 2.53387451171875e-05, "model_forward_time": 0.025038719177246094, "step": 16606 }, { "epoch": 2.53387451171875e-05, "step": 16606, "training_step_time": 0.10336637496948242 }, { "epoch": 2.534027099609375e-05, "model_forward_time": 0.02539539337158203, "step": 16607 }, { "epoch": 2.534027099609375e-05, "step": 16607, "training_step_time": 0.1045985221862793 }, { "epoch": 2.5341796875e-05, "model_forward_time": 0.024793386459350586, "step": 16608 }, { "epoch": 2.5341796875e-05, "step": 16608, "training_step_time": 0.1066904067993164 }, { "epoch": 2.534332275390625e-05, "model_forward_time": 0.02488231658935547, "step": 16609 }, { "epoch": 2.534332275390625e-05, "step": 16609, "training_step_time": 0.11014199256896973 }, { "epoch": 2.53448486328125e-05, "grad_norm": 0.1672280728816986, "learning_rate": 4.526714952248551e-05, "loss": 0.007, "step": 16610 }, { "epoch": 2.53448486328125e-05, "model_forward_time": 0.025342941284179688, "step": 16610 }, { "epoch": 2.53448486328125e-05, "step": 16610, "training_step_time": 0.10714030265808105 }, { "epoch": 2.534637451171875e-05, "model_forward_time": 0.02525615692138672, "step": 16611 }, { "epoch": 2.534637451171875e-05, "step": 16611, "training_step_time": 0.10456204414367676 }, { "epoch": 2.5347900390625e-05, "model_forward_time": 0.025119543075561523, "step": 16612 }, { "epoch": 2.5347900390625e-05, "step": 16612, "training_step_time": 0.10472249984741211 }, { "epoch": 2.534942626953125e-05, "model_forward_time": 0.025095701217651367, "step": 16613 }, { "epoch": 2.534942626953125e-05, "step": 16613, "training_step_time": 0.10397100448608398 }, { "epoch": 2.53509521484375e-05, "model_forward_time": 0.025292158126831055, "step": 16614 }, { "epoch": 2.53509521484375e-05, "step": 16614, "training_step_time": 0.1049032211303711 }, { "epoch": 2.535247802734375e-05, "model_forward_time": 0.025177478790283203, "step": 16615 }, { "epoch": 2.535247802734375e-05, "step": 16615, "training_step_time": 0.10416483879089355 }, { "epoch": 2.535400390625e-05, "model_forward_time": 0.02516317367553711, "step": 16616 }, { "epoch": 2.535400390625e-05, "step": 16616, "training_step_time": 0.10529756546020508 }, { "epoch": 2.535552978515625e-05, "model_forward_time": 0.02542901039123535, "step": 16617 }, { "epoch": 2.535552978515625e-05, "step": 16617, "training_step_time": 0.10552763938903809 }, { "epoch": 2.53570556640625e-05, "model_forward_time": 0.025108814239501953, "step": 16618 }, { "epoch": 2.53570556640625e-05, "step": 16618, "training_step_time": 0.1042478084564209 }, { "epoch": 2.535858154296875e-05, "model_forward_time": 0.02516007423400879, "step": 16619 }, { "epoch": 2.535858154296875e-05, "step": 16619, "training_step_time": 0.1058199405670166 }, { "epoch": 2.5360107421875e-05, "grad_norm": 0.18666355311870575, "learning_rate": 4.5212284220786494e-05, "loss": 0.0069, "step": 16620 }, { "epoch": 2.5360107421875e-05, "model_forward_time": 0.025111913681030273, "step": 16620 }, { "epoch": 2.5360107421875e-05, "step": 16620, "training_step_time": 0.10625576972961426 }, { "epoch": 2.536163330078125e-05, "model_forward_time": 0.025203466415405273, "step": 16621 }, { "epoch": 2.536163330078125e-05, "step": 16621, "training_step_time": 0.10848021507263184 }, { "epoch": 2.53631591796875e-05, "model_forward_time": 0.025542020797729492, "step": 16622 }, { "epoch": 2.53631591796875e-05, "step": 16622, "training_step_time": 0.11608672142028809 }, { "epoch": 2.536468505859375e-05, "model_forward_time": 0.025019168853759766, "step": 16623 }, { "epoch": 2.536468505859375e-05, "step": 16623, "training_step_time": 0.10539507865905762 }, { "epoch": 2.53662109375e-05, "model_forward_time": 0.02507162094116211, "step": 16624 }, { "epoch": 2.53662109375e-05, "step": 16624, "training_step_time": 0.10774445533752441 }, { "epoch": 2.536773681640625e-05, "model_forward_time": 0.026443958282470703, "step": 16625 }, { "epoch": 2.536773681640625e-05, "step": 16625, "training_step_time": 0.17640352249145508 }, { "epoch": 2.53692626953125e-05, "model_forward_time": 0.02417755126953125, "step": 16626 }, { "epoch": 2.53692626953125e-05, "step": 16626, "training_step_time": 0.17200732231140137 }, { "epoch": 2.537078857421875e-05, "model_forward_time": 0.024953842163085938, "step": 16627 }, { "epoch": 2.537078857421875e-05, "step": 16627, "training_step_time": 0.20723891258239746 }, { "epoch": 2.5372314453125e-05, "model_forward_time": 0.024498939514160156, "step": 16628 }, { "epoch": 2.5372314453125e-05, "step": 16628, "training_step_time": 0.18797826766967773 }, { "epoch": 2.537384033203125e-05, "model_forward_time": 0.02456212043762207, "step": 16629 }, { "epoch": 2.537384033203125e-05, "step": 16629, "training_step_time": 0.16893768310546875 }, { "epoch": 2.53753662109375e-05, "grad_norm": 0.13135157525539398, "learning_rate": 4.515742473661362e-05, "loss": 0.0068, "step": 16630 }, { "epoch": 2.53753662109375e-05, "model_forward_time": 0.02428412437438965, "step": 16630 }, { "epoch": 2.53753662109375e-05, "step": 16630, "training_step_time": 0.18810272216796875 }, { "epoch": 2.537689208984375e-05, "model_forward_time": 0.024073123931884766, "step": 16631 }, { "epoch": 2.537689208984375e-05, "step": 16631, "training_step_time": 0.11037850379943848 }, { "epoch": 2.537841796875e-05, "model_forward_time": 0.024218320846557617, "step": 16632 }, { "epoch": 2.537841796875e-05, "step": 16632, "training_step_time": 0.10922694206237793 }, { "epoch": 2.537994384765625e-05, "model_forward_time": 0.025294780731201172, "step": 16633 }, { "epoch": 2.537994384765625e-05, "step": 16633, "training_step_time": 0.11012959480285645 }, { "epoch": 2.53814697265625e-05, "model_forward_time": 0.024784088134765625, "step": 16634 }, { "epoch": 2.53814697265625e-05, "step": 16634, "training_step_time": 0.10961174964904785 }, { "epoch": 2.538299560546875e-05, "model_forward_time": 0.02500772476196289, "step": 16635 }, { "epoch": 2.538299560546875e-05, "step": 16635, "training_step_time": 0.1098935604095459 }, { "epoch": 2.5384521484375e-05, "model_forward_time": 0.02524709701538086, "step": 16636 }, { "epoch": 2.5384521484375e-05, "step": 16636, "training_step_time": 0.10840702056884766 }, { "epoch": 2.538604736328125e-05, "model_forward_time": 0.025710582733154297, "step": 16637 }, { "epoch": 2.538604736328125e-05, "step": 16637, "training_step_time": 0.10905075073242188 }, { "epoch": 2.53875732421875e-05, "model_forward_time": 0.027691364288330078, "step": 16638 }, { "epoch": 2.53875732421875e-05, "step": 16638, "training_step_time": 0.11060190200805664 }, { "epoch": 2.538909912109375e-05, "model_forward_time": 0.025199413299560547, "step": 16639 }, { "epoch": 2.538909912109375e-05, "step": 16639, "training_step_time": 0.10738778114318848 }, { "epoch": 2.5390625e-05, "grad_norm": 0.49643006920814514, "learning_rate": 4.510257113662632e-05, "loss": 0.0185, "step": 16640 }, { "epoch": 2.5390625e-05, "model_forward_time": 0.02658867835998535, "step": 16640 }, { "epoch": 2.5390625e-05, "step": 16640, "training_step_time": 0.10846638679504395 }, { "epoch": 2.539215087890625e-05, "model_forward_time": 0.025012969970703125, "step": 16641 }, { "epoch": 2.539215087890625e-05, "step": 16641, "training_step_time": 0.10785222053527832 }, { "epoch": 2.53936767578125e-05, "model_forward_time": 0.02572345733642578, "step": 16642 }, { "epoch": 2.53936767578125e-05, "step": 16642, "training_step_time": 0.1070244312286377 }, { "epoch": 2.539520263671875e-05, "model_forward_time": 0.025281190872192383, "step": 16643 }, { "epoch": 2.539520263671875e-05, "step": 16643, "training_step_time": 0.8571782112121582 }, { "epoch": 2.5396728515625e-05, "model_forward_time": 0.023311138153076172, "step": 16644 }, { "epoch": 2.5396728515625e-05, "step": 16644, "training_step_time": 0.1541895866394043 }, { "epoch": 2.539825439453125e-05, "model_forward_time": 0.024463176727294922, "step": 16645 }, { "epoch": 2.539825439453125e-05, "step": 16645, "training_step_time": 0.10650444030761719 }, { "epoch": 2.53997802734375e-05, "model_forward_time": 0.02795863151550293, "step": 16646 }, { "epoch": 2.53997802734375e-05, "step": 16646, "training_step_time": 0.10680603981018066 }, { "epoch": 2.540130615234375e-05, "model_forward_time": 0.025423765182495117, "step": 16647 }, { "epoch": 2.540130615234375e-05, "step": 16647, "training_step_time": 0.10442256927490234 }, { "epoch": 2.540283203125e-05, "model_forward_time": 0.025372028350830078, "step": 16648 }, { "epoch": 2.540283203125e-05, "step": 16648, "training_step_time": 0.10989189147949219 }, { "epoch": 2.540435791015625e-05, "model_forward_time": 0.025456666946411133, "step": 16649 }, { "epoch": 2.540435791015625e-05, "step": 16649, "training_step_time": 0.10805225372314453 }, { "epoch": 2.54058837890625e-05, "grad_norm": 0.19242271780967712, "learning_rate": 4.504772348747687e-05, "loss": 0.0134, "step": 16650 }, { "epoch": 2.54058837890625e-05, "model_forward_time": 0.025990724563598633, "step": 16650 }, { "epoch": 2.54058837890625e-05, "step": 16650, "training_step_time": 0.10658097267150879 }, { "epoch": 2.540740966796875e-05, "model_forward_time": 0.025502443313598633, "step": 16651 }, { "epoch": 2.540740966796875e-05, "step": 16651, "training_step_time": 0.11491274833679199 }, { "epoch": 2.5408935546875e-05, "model_forward_time": 0.02398061752319336, "step": 16652 }, { "epoch": 2.5408935546875e-05, "step": 16652, "training_step_time": 0.13004589080810547 }, { "epoch": 2.541046142578125e-05, "model_forward_time": 0.0239565372467041, "step": 16653 }, { "epoch": 2.541046142578125e-05, "step": 16653, "training_step_time": 0.1210334300994873 }, { "epoch": 2.54119873046875e-05, "model_forward_time": 0.02406620979309082, "step": 16654 }, { "epoch": 2.54119873046875e-05, "step": 16654, "training_step_time": 0.12010574340820312 }, { "epoch": 2.541351318359375e-05, "model_forward_time": 0.024120330810546875, "step": 16655 }, { "epoch": 2.541351318359375e-05, "step": 16655, "training_step_time": 0.116180419921875 }, { "epoch": 2.54150390625e-05, "model_forward_time": 0.024177074432373047, "step": 16656 }, { "epoch": 2.54150390625e-05, "step": 16656, "training_step_time": 0.11922383308410645 }, { "epoch": 2.541656494140625e-05, "model_forward_time": 0.023957252502441406, "step": 16657 }, { "epoch": 2.541656494140625e-05, "step": 16657, "training_step_time": 0.11320328712463379 }, { "epoch": 2.54180908203125e-05, "model_forward_time": 0.025034427642822266, "step": 16658 }, { "epoch": 2.54180908203125e-05, "step": 16658, "training_step_time": 0.10765457153320312 }, { "epoch": 2.541961669921875e-05, "model_forward_time": 0.0250089168548584, "step": 16659 }, { "epoch": 2.541961669921875e-05, "step": 16659, "training_step_time": 0.10962605476379395 }, { "epoch": 2.5421142578125e-05, "grad_norm": 0.22955352067947388, "learning_rate": 4.4992881855810366e-05, "loss": 0.0098, "step": 16660 }, { "epoch": 2.5421142578125e-05, "model_forward_time": 0.02432107925415039, "step": 16660 }, { "epoch": 2.5421142578125e-05, "step": 16660, "training_step_time": 0.10808014869689941 }, { "epoch": 2.542266845703125e-05, "model_forward_time": 0.025298357009887695, "step": 16661 }, { "epoch": 2.542266845703125e-05, "step": 16661, "training_step_time": 0.10692834854125977 }, { "epoch": 2.54241943359375e-05, "model_forward_time": 0.02498173713684082, "step": 16662 }, { "epoch": 2.54241943359375e-05, "step": 16662, "training_step_time": 0.10931515693664551 }, { "epoch": 2.542572021484375e-05, "model_forward_time": 0.0238497257232666, "step": 16663 }, { "epoch": 2.542572021484375e-05, "step": 16663, "training_step_time": 0.10422039031982422 }, { "epoch": 2.542724609375e-05, "model_forward_time": 0.028241634368896484, "step": 16664 }, { "epoch": 2.542724609375e-05, "step": 16664, "training_step_time": 0.10813021659851074 }, { "epoch": 2.542877197265625e-05, "model_forward_time": 0.02412104606628418, "step": 16665 }, { "epoch": 2.542877197265625e-05, "step": 16665, "training_step_time": 0.1409130096435547 }, { "epoch": 2.54302978515625e-05, "model_forward_time": 0.024274587631225586, "step": 16666 }, { "epoch": 2.54302978515625e-05, "step": 16666, "training_step_time": 0.14400815963745117 }, { "epoch": 2.543182373046875e-05, "model_forward_time": 0.024796247482299805, "step": 16667 }, { "epoch": 2.543182373046875e-05, "step": 16667, "training_step_time": 0.15766596794128418 }, { "epoch": 2.5433349609375e-05, "model_forward_time": 0.024019718170166016, "step": 16668 }, { "epoch": 2.5433349609375e-05, "step": 16668, "training_step_time": 0.16984105110168457 }, { "epoch": 2.543487548828125e-05, "model_forward_time": 0.02421879768371582, "step": 16669 }, { "epoch": 2.543487548828125e-05, "step": 16669, "training_step_time": 0.15294861793518066 }, { "epoch": 2.54364013671875e-05, "grad_norm": 0.20682382583618164, "learning_rate": 4.4938046308264544e-05, "loss": 0.0252, "step": 16670 }, { "epoch": 2.54364013671875e-05, "model_forward_time": 0.024389982223510742, "step": 16670 }, { "epoch": 2.54364013671875e-05, "step": 16670, "training_step_time": 0.1876511573791504 }, { "epoch": 2.543792724609375e-05, "model_forward_time": 0.024072647094726562, "step": 16671 }, { "epoch": 2.543792724609375e-05, "step": 16671, "training_step_time": 0.14053845405578613 }, { "epoch": 2.5439453125e-05, "model_forward_time": 0.024406909942626953, "step": 16672 }, { "epoch": 2.5439453125e-05, "step": 16672, "training_step_time": 0.11157536506652832 }, { "epoch": 2.544097900390625e-05, "model_forward_time": 0.02478647232055664, "step": 16673 }, { "epoch": 2.544097900390625e-05, "step": 16673, "training_step_time": 0.1026923656463623 }, { "epoch": 2.54425048828125e-05, "model_forward_time": 0.025321006774902344, "step": 16674 }, { "epoch": 2.54425048828125e-05, "step": 16674, "training_step_time": 0.10439133644104004 }, { "epoch": 2.544403076171875e-05, "model_forward_time": 0.02544236183166504, "step": 16675 }, { "epoch": 2.544403076171875e-05, "step": 16675, "training_step_time": 0.10453557968139648 }, { "epoch": 2.5445556640625e-05, "model_forward_time": 0.025281667709350586, "step": 16676 }, { "epoch": 2.5445556640625e-05, "step": 16676, "training_step_time": 0.10698318481445312 }, { "epoch": 2.544708251953125e-05, "model_forward_time": 0.025316238403320312, "step": 16677 }, { "epoch": 2.544708251953125e-05, "step": 16677, "training_step_time": 0.10499024391174316 }, { "epoch": 2.54486083984375e-05, "model_forward_time": 0.02532792091369629, "step": 16678 }, { "epoch": 2.54486083984375e-05, "step": 16678, "training_step_time": 0.1079416275024414 }, { "epoch": 2.545013427734375e-05, "model_forward_time": 0.025020360946655273, "step": 16679 }, { "epoch": 2.545013427734375e-05, "step": 16679, "training_step_time": 0.11034703254699707 }, { "epoch": 2.545166015625e-05, "grad_norm": 0.25903502106666565, "learning_rate": 4.488321691146975e-05, "loss": 0.011, "step": 16680 }, { "epoch": 2.545166015625e-05, "model_forward_time": 0.025185585021972656, "step": 16680 }, { "epoch": 2.545166015625e-05, "step": 16680, "training_step_time": 0.10507750511169434 }, { "epoch": 2.545318603515625e-05, "model_forward_time": 0.025228023529052734, "step": 16681 }, { "epoch": 2.545318603515625e-05, "step": 16681, "training_step_time": 0.10497832298278809 }, { "epoch": 2.54547119140625e-05, "model_forward_time": 0.025407075881958008, "step": 16682 }, { "epoch": 2.54547119140625e-05, "step": 16682, "training_step_time": 0.10824084281921387 }, { "epoch": 2.545623779296875e-05, "model_forward_time": 0.025044918060302734, "step": 16683 }, { "epoch": 2.545623779296875e-05, "step": 16683, "training_step_time": 0.10697674751281738 }, { "epoch": 2.5457763671875e-05, "model_forward_time": 0.024970293045043945, "step": 16684 }, { "epoch": 2.5457763671875e-05, "step": 16684, "training_step_time": 0.10669207572937012 }, { "epoch": 2.545928955078125e-05, "model_forward_time": 0.025036096572875977, "step": 16685 }, { "epoch": 2.545928955078125e-05, "step": 16685, "training_step_time": 0.10999512672424316 }, { "epoch": 2.54608154296875e-05, "model_forward_time": 0.02453756332397461, "step": 16686 }, { "epoch": 2.54608154296875e-05, "step": 16686, "training_step_time": 0.13719463348388672 }, { "epoch": 2.546234130859375e-05, "model_forward_time": 0.025812864303588867, "step": 16687 }, { "epoch": 2.546234130859375e-05, "step": 16687, "training_step_time": 0.131516695022583 }, { "epoch": 2.54638671875e-05, "model_forward_time": 0.024624109268188477, "step": 16688 }, { "epoch": 2.54638671875e-05, "step": 16688, "training_step_time": 0.11099481582641602 }, { "epoch": 2.546539306640625e-05, "model_forward_time": 0.024971961975097656, "step": 16689 }, { "epoch": 2.546539306640625e-05, "step": 16689, "training_step_time": 0.1086118221282959 }, { "epoch": 2.54669189453125e-05, "grad_norm": 0.5208256244659424, "learning_rate": 4.482839373204891e-05, "loss": 0.0121, "step": 16690 }, { "epoch": 2.54669189453125e-05, "model_forward_time": 0.02492523193359375, "step": 16690 }, { "epoch": 2.54669189453125e-05, "step": 16690, "training_step_time": 0.11162877082824707 }, { "epoch": 2.546844482421875e-05, "model_forward_time": 0.02521800994873047, "step": 16691 }, { "epoch": 2.546844482421875e-05, "step": 16691, "training_step_time": 0.10720562934875488 }, { "epoch": 2.5469970703125e-05, "model_forward_time": 0.024992704391479492, "step": 16692 }, { "epoch": 2.5469970703125e-05, "step": 16692, "training_step_time": 0.19167208671569824 }, { "epoch": 2.547149658203125e-05, "model_forward_time": 0.024267196655273438, "step": 16693 }, { "epoch": 2.547149658203125e-05, "step": 16693, "training_step_time": 0.10340046882629395 }, { "epoch": 2.54730224609375e-05, "model_forward_time": 0.025059938430786133, "step": 16694 }, { "epoch": 2.54730224609375e-05, "step": 16694, "training_step_time": 0.10753440856933594 }, { "epoch": 2.547454833984375e-05, "model_forward_time": 0.025241851806640625, "step": 16695 }, { "epoch": 2.547454833984375e-05, "step": 16695, "training_step_time": 0.1053619384765625 }, { "epoch": 2.547607421875e-05, "model_forward_time": 0.025176525115966797, "step": 16696 }, { "epoch": 2.547607421875e-05, "step": 16696, "training_step_time": 0.10573458671569824 }, { "epoch": 2.547760009765625e-05, "model_forward_time": 0.025429725646972656, "step": 16697 }, { "epoch": 2.547760009765625e-05, "step": 16697, "training_step_time": 0.10465621948242188 }, { "epoch": 2.54791259765625e-05, "model_forward_time": 0.024989604949951172, "step": 16698 }, { "epoch": 2.54791259765625e-05, "step": 16698, "training_step_time": 0.10613012313842773 }, { "epoch": 2.548065185546875e-05, "model_forward_time": 0.025126934051513672, "step": 16699 }, { "epoch": 2.548065185546875e-05, "step": 16699, "training_step_time": 0.11264276504516602 }, { "epoch": 2.5482177734375e-05, "grad_norm": 0.2976929545402527, "learning_rate": 4.477357683661734e-05, "loss": 0.0096, "step": 16700 }, { "epoch": 2.5482177734375e-05, "model_forward_time": 0.024694204330444336, "step": 16700 }, { "epoch": 2.5482177734375e-05, "step": 16700, "training_step_time": 0.11166071891784668 }, { "epoch": 2.548370361328125e-05, "model_forward_time": 0.024953126907348633, "step": 16701 }, { "epoch": 2.548370361328125e-05, "step": 16701, "training_step_time": 0.11517596244812012 }, { "epoch": 2.54852294921875e-05, "model_forward_time": 0.023789167404174805, "step": 16702 }, { "epoch": 2.54852294921875e-05, "step": 16702, "training_step_time": 0.11387372016906738 }, { "epoch": 2.548675537109375e-05, "model_forward_time": 0.024001121520996094, "step": 16703 }, { "epoch": 2.548675537109375e-05, "step": 16703, "training_step_time": 0.1112363338470459 }, { "epoch": 2.548828125e-05, "model_forward_time": 0.024921417236328125, "step": 16704 }, { "epoch": 2.548828125e-05, "step": 16704, "training_step_time": 0.10770654678344727 }, { "epoch": 2.548980712890625e-05, "model_forward_time": 0.025012493133544922, "step": 16705 }, { "epoch": 2.548980712890625e-05, "step": 16705, "training_step_time": 0.10656070709228516 }, { "epoch": 2.54913330078125e-05, "model_forward_time": 0.024748563766479492, "step": 16706 }, { "epoch": 2.54913330078125e-05, "step": 16706, "training_step_time": 0.10578417778015137 }, { "epoch": 2.549285888671875e-05, "model_forward_time": 0.024987220764160156, "step": 16707 }, { "epoch": 2.549285888671875e-05, "step": 16707, "training_step_time": 0.10602951049804688 }, { "epoch": 2.5494384765625e-05, "model_forward_time": 0.024960041046142578, "step": 16708 }, { "epoch": 2.5494384765625e-05, "step": 16708, "training_step_time": 0.10867547988891602 }, { "epoch": 2.549591064453125e-05, "model_forward_time": 0.02485942840576172, "step": 16709 }, { "epoch": 2.549591064453125e-05, "step": 16709, "training_step_time": 0.10514163970947266 }, { "epoch": 2.54974365234375e-05, "grad_norm": 0.15842688083648682, "learning_rate": 4.471876629178273e-05, "loss": 0.0137, "step": 16710 }, { "epoch": 2.54974365234375e-05, "model_forward_time": 0.02489638328552246, "step": 16710 }, { "epoch": 2.54974365234375e-05, "step": 16710, "training_step_time": 0.10535836219787598 }, { "epoch": 2.549896240234375e-05, "model_forward_time": 0.02777552604675293, "step": 16711 }, { "epoch": 2.549896240234375e-05, "step": 16711, "training_step_time": 0.10854816436767578 }, { "epoch": 2.550048828125e-05, "model_forward_time": 0.0268707275390625, "step": 16712 }, { "epoch": 2.550048828125e-05, "step": 16712, "training_step_time": 0.10781383514404297 }, { "epoch": 2.550201416015625e-05, "model_forward_time": 0.025024890899658203, "step": 16713 }, { "epoch": 2.550201416015625e-05, "step": 16713, "training_step_time": 0.14537405967712402 }, { "epoch": 2.55035400390625e-05, "model_forward_time": 0.0251924991607666, "step": 16714 }, { "epoch": 2.55035400390625e-05, "step": 16714, "training_step_time": 0.10884881019592285 }, { "epoch": 2.550506591796875e-05, "model_forward_time": 0.025264501571655273, "step": 16715 }, { "epoch": 2.550506591796875e-05, "step": 16715, "training_step_time": 0.1516261100769043 }, { "epoch": 2.5506591796875e-05, "model_forward_time": 0.02481532096862793, "step": 16716 }, { "epoch": 2.5506591796875e-05, "step": 16716, "training_step_time": 0.18784236907958984 }, { "epoch": 2.550811767578125e-05, "model_forward_time": 0.02432084083557129, "step": 16717 }, { "epoch": 2.550811767578125e-05, "step": 16717, "training_step_time": 0.17497849464416504 }, { "epoch": 2.55096435546875e-05, "model_forward_time": 0.024656057357788086, "step": 16718 }, { "epoch": 2.55096435546875e-05, "step": 16718, "training_step_time": 0.1910874843597412 }, { "epoch": 2.551116943359375e-05, "model_forward_time": 0.024064302444458008, "step": 16719 }, { "epoch": 2.551116943359375e-05, "step": 16719, "training_step_time": 0.1134650707244873 }, { "epoch": 2.55126953125e-05, "grad_norm": 0.4978160262107849, "learning_rate": 4.4663962164145045e-05, "loss": 0.0222, "step": 16720 }, { "epoch": 2.55126953125e-05, "model_forward_time": 0.02452683448791504, "step": 16720 }, { "epoch": 2.55126953125e-05, "step": 16720, "training_step_time": 0.10638642311096191 }, { "epoch": 2.551422119140625e-05, "model_forward_time": 0.02485203742980957, "step": 16721 }, { "epoch": 2.551422119140625e-05, "step": 16721, "training_step_time": 0.10462522506713867 }, { "epoch": 2.55157470703125e-05, "model_forward_time": 0.025048017501831055, "step": 16722 }, { "epoch": 2.55157470703125e-05, "step": 16722, "training_step_time": 0.10448956489562988 }, { "epoch": 2.551727294921875e-05, "model_forward_time": 0.025154590606689453, "step": 16723 }, { "epoch": 2.551727294921875e-05, "step": 16723, "training_step_time": 0.10414600372314453 }, { "epoch": 2.5518798828125e-05, "model_forward_time": 0.024892330169677734, "step": 16724 }, { "epoch": 2.5518798828125e-05, "step": 16724, "training_step_time": 0.10428643226623535 }, { "epoch": 2.552032470703125e-05, "model_forward_time": 0.025784969329833984, "step": 16725 }, { "epoch": 2.552032470703125e-05, "step": 16725, "training_step_time": 0.11121273040771484 }, { "epoch": 2.55218505859375e-05, "model_forward_time": 0.02483391761779785, "step": 16726 }, { "epoch": 2.55218505859375e-05, "step": 16726, "training_step_time": 0.10900378227233887 }, { "epoch": 2.552337646484375e-05, "model_forward_time": 0.025098562240600586, "step": 16727 }, { "epoch": 2.552337646484375e-05, "step": 16727, "training_step_time": 0.10476136207580566 }, { "epoch": 2.552490234375e-05, "model_forward_time": 0.025157928466796875, "step": 16728 }, { "epoch": 2.552490234375e-05, "step": 16728, "training_step_time": 0.1058509349822998 }, { "epoch": 2.552642822265625e-05, "model_forward_time": 0.024791955947875977, "step": 16729 }, { "epoch": 2.552642822265625e-05, "step": 16729, "training_step_time": 0.10350918769836426 }, { "epoch": 2.55279541015625e-05, "grad_norm": 0.4895699918270111, "learning_rate": 4.46091645202965e-05, "loss": 0.02, "step": 16730 }, { "epoch": 2.55279541015625e-05, "model_forward_time": 0.027889013290405273, "step": 16730 }, { "epoch": 2.55279541015625e-05, "step": 16730, "training_step_time": 0.10820364952087402 }, { "epoch": 2.552947998046875e-05, "model_forward_time": 0.024933338165283203, "step": 16731 }, { "epoch": 2.552947998046875e-05, "step": 16731, "training_step_time": 0.10579538345336914 }, { "epoch": 2.5531005859375e-05, "model_forward_time": 0.024780750274658203, "step": 16732 }, { "epoch": 2.5531005859375e-05, "step": 16732, "training_step_time": 0.10418391227722168 }, { "epoch": 2.553253173828125e-05, "model_forward_time": 0.024895668029785156, "step": 16733 }, { "epoch": 2.553253173828125e-05, "step": 16733, "training_step_time": 0.10424613952636719 }, { "epoch": 2.55340576171875e-05, "model_forward_time": 0.02785658836364746, "step": 16734 }, { "epoch": 2.55340576171875e-05, "step": 16734, "training_step_time": 0.16634130477905273 }, { "epoch": 2.553558349609375e-05, "model_forward_time": 0.024422168731689453, "step": 16735 }, { "epoch": 2.553558349609375e-05, "step": 16735, "training_step_time": 0.13601946830749512 }, { "epoch": 2.5537109375e-05, "model_forward_time": 0.024595975875854492, "step": 16736 }, { "epoch": 2.5537109375e-05, "step": 16736, "training_step_time": 0.10818719863891602 }, { "epoch": 2.553863525390625e-05, "model_forward_time": 0.024896860122680664, "step": 16737 }, { "epoch": 2.553863525390625e-05, "step": 16737, "training_step_time": 0.11409831047058105 }, { "epoch": 2.55401611328125e-05, "model_forward_time": 0.024658679962158203, "step": 16738 }, { "epoch": 2.55401611328125e-05, "step": 16738, "training_step_time": 0.10797262191772461 }, { "epoch": 2.554168701171875e-05, "model_forward_time": 0.025061607360839844, "step": 16739 }, { "epoch": 2.554168701171875e-05, "step": 16739, "training_step_time": 0.11366653442382812 }, { "epoch": 2.5543212890625e-05, "grad_norm": 0.18995165824890137, "learning_rate": 4.4554373426821374e-05, "loss": 0.0112, "step": 16740 }, { "epoch": 2.5543212890625e-05, "model_forward_time": 0.024971961975097656, "step": 16740 }, { "epoch": 2.5543212890625e-05, "step": 16740, "training_step_time": 0.1929788589477539 }, { "epoch": 2.554473876953125e-05, "model_forward_time": 0.02646493911743164, "step": 16741 }, { "epoch": 2.554473876953125e-05, "step": 16741, "training_step_time": 0.10651755332946777 }, { "epoch": 2.55462646484375e-05, "model_forward_time": 0.024684429168701172, "step": 16742 }, { "epoch": 2.55462646484375e-05, "step": 16742, "training_step_time": 0.10277819633483887 }, { "epoch": 2.554779052734375e-05, "model_forward_time": 0.024974346160888672, "step": 16743 }, { "epoch": 2.554779052734375e-05, "step": 16743, "training_step_time": 0.10351777076721191 }, { "epoch": 2.554931640625e-05, "model_forward_time": 0.025186538696289062, "step": 16744 }, { "epoch": 2.554931640625e-05, "step": 16744, "training_step_time": 0.10380268096923828 }, { "epoch": 2.555084228515625e-05, "model_forward_time": 0.025336265563964844, "step": 16745 }, { "epoch": 2.555084228515625e-05, "step": 16745, "training_step_time": 0.10793137550354004 }, { "epoch": 2.55523681640625e-05, "model_forward_time": 0.02574014663696289, "step": 16746 }, { "epoch": 2.55523681640625e-05, "step": 16746, "training_step_time": 0.10466647148132324 }, { "epoch": 2.555389404296875e-05, "model_forward_time": 0.025153636932373047, "step": 16747 }, { "epoch": 2.555389404296875e-05, "step": 16747, "training_step_time": 0.1082465648651123 }, { "epoch": 2.5555419921875e-05, "model_forward_time": 0.0247189998626709, "step": 16748 }, { "epoch": 2.5555419921875e-05, "step": 16748, "training_step_time": 0.10575699806213379 }, { "epoch": 2.555694580078125e-05, "model_forward_time": 0.025058269500732422, "step": 16749 }, { "epoch": 2.555694580078125e-05, "step": 16749, "training_step_time": 0.1042623519897461 }, { "epoch": 2.55584716796875e-05, "grad_norm": 0.31712692975997925, "learning_rate": 4.449958895029604e-05, "loss": 0.0208, "step": 16750 }, { "epoch": 2.55584716796875e-05, "model_forward_time": 0.025702714920043945, "step": 16750 }, { "epoch": 2.55584716796875e-05, "step": 16750, "training_step_time": 0.10800743103027344 }, { "epoch": 2.555999755859375e-05, "model_forward_time": 0.025292396545410156, "step": 16751 }, { "epoch": 2.555999755859375e-05, "step": 16751, "training_step_time": 0.10869860649108887 }, { "epoch": 2.55615234375e-05, "model_forward_time": 0.025087594985961914, "step": 16752 }, { "epoch": 2.55615234375e-05, "step": 16752, "training_step_time": 0.10884904861450195 }, { "epoch": 2.556304931640625e-05, "model_forward_time": 0.025507688522338867, "step": 16753 }, { "epoch": 2.556304931640625e-05, "step": 16753, "training_step_time": 0.10695886611938477 }, { "epoch": 2.55645751953125e-05, "model_forward_time": 0.025018930435180664, "step": 16754 }, { "epoch": 2.55645751953125e-05, "step": 16754, "training_step_time": 0.10542798042297363 }, { "epoch": 2.556610107421875e-05, "model_forward_time": 0.025371313095092773, "step": 16755 }, { "epoch": 2.556610107421875e-05, "step": 16755, "training_step_time": 0.10761356353759766 }, { "epoch": 2.5567626953125e-05, "model_forward_time": 0.025255918502807617, "step": 16756 }, { "epoch": 2.5567626953125e-05, "step": 16756, "training_step_time": 0.10564112663269043 }, { "epoch": 2.556915283203125e-05, "model_forward_time": 0.02525806427001953, "step": 16757 }, { "epoch": 2.556915283203125e-05, "step": 16757, "training_step_time": 0.10609173774719238 }, { "epoch": 2.55706787109375e-05, "model_forward_time": 0.025043249130249023, "step": 16758 }, { "epoch": 2.55706787109375e-05, "step": 16758, "training_step_time": 0.10986900329589844 }, { "epoch": 2.557220458984375e-05, "model_forward_time": 0.025300979614257812, "step": 16759 }, { "epoch": 2.557220458984375e-05, "step": 16759, "training_step_time": 0.10769391059875488 }, { "epoch": 2.557373046875e-05, "grad_norm": 0.3258390724658966, "learning_rate": 4.444481115728878e-05, "loss": 0.0099, "step": 16760 }, { "epoch": 2.557373046875e-05, "model_forward_time": 0.025603771209716797, "step": 16760 }, { "epoch": 2.557373046875e-05, "step": 16760, "training_step_time": 0.10313796997070312 }, { "epoch": 2.557525634765625e-05, "model_forward_time": 0.02408742904663086, "step": 16761 }, { "epoch": 2.557525634765625e-05, "step": 16761, "training_step_time": 0.15250277519226074 }, { "epoch": 2.55767822265625e-05, "model_forward_time": 0.024526119232177734, "step": 16762 }, { "epoch": 2.55767822265625e-05, "step": 16762, "training_step_time": 0.15346312522888184 }, { "epoch": 2.557830810546875e-05, "model_forward_time": 0.024487972259521484, "step": 16763 }, { "epoch": 2.557830810546875e-05, "step": 16763, "training_step_time": 0.20626115798950195 }, { "epoch": 2.5579833984375e-05, "model_forward_time": 0.027460813522338867, "step": 16764 }, { "epoch": 2.5579833984375e-05, "step": 16764, "training_step_time": 0.17421436309814453 }, { "epoch": 2.558135986328125e-05, "model_forward_time": 0.02458643913269043, "step": 16765 }, { "epoch": 2.558135986328125e-05, "step": 16765, "training_step_time": 0.19320344924926758 }, { "epoch": 2.55828857421875e-05, "model_forward_time": 0.02387380599975586, "step": 16766 }, { "epoch": 2.55828857421875e-05, "step": 16766, "training_step_time": 0.17415118217468262 }, { "epoch": 2.558441162109375e-05, "model_forward_time": 0.024008750915527344, "step": 16767 }, { "epoch": 2.558441162109375e-05, "step": 16767, "training_step_time": 0.10105514526367188 }, { "epoch": 2.55859375e-05, "model_forward_time": 0.02463078498840332, "step": 16768 }, { "epoch": 2.55859375e-05, "step": 16768, "training_step_time": 0.10674691200256348 }, { "epoch": 2.558746337890625e-05, "model_forward_time": 0.025688886642456055, "step": 16769 }, { "epoch": 2.558746337890625e-05, "step": 16769, "training_step_time": 0.10628247261047363 }, { "epoch": 2.55889892578125e-05, "grad_norm": 0.17523738741874695, "learning_rate": 4.439004011435979e-05, "loss": 0.0096, "step": 16770 }, { "epoch": 2.55889892578125e-05, "model_forward_time": 0.025551795959472656, "step": 16770 }, { "epoch": 2.55889892578125e-05, "step": 16770, "training_step_time": 0.10815978050231934 }, { "epoch": 2.559051513671875e-05, "model_forward_time": 0.025446414947509766, "step": 16771 }, { "epoch": 2.559051513671875e-05, "step": 16771, "training_step_time": 0.10514068603515625 }, { "epoch": 2.5592041015625e-05, "model_forward_time": 0.025654077529907227, "step": 16772 }, { "epoch": 2.5592041015625e-05, "step": 16772, "training_step_time": 0.10353231430053711 }, { "epoch": 2.559356689453125e-05, "model_forward_time": 0.024960756301879883, "step": 16773 }, { "epoch": 2.559356689453125e-05, "step": 16773, "training_step_time": 0.10429120063781738 }, { "epoch": 2.55950927734375e-05, "model_forward_time": 0.025243759155273438, "step": 16774 }, { "epoch": 2.55950927734375e-05, "step": 16774, "training_step_time": 0.1047215461730957 }, { "epoch": 2.559661865234375e-05, "model_forward_time": 0.025696754455566406, "step": 16775 }, { "epoch": 2.559661865234375e-05, "step": 16775, "training_step_time": 0.10526227951049805 }, { "epoch": 2.559814453125e-05, "model_forward_time": 0.025246143341064453, "step": 16776 }, { "epoch": 2.559814453125e-05, "step": 16776, "training_step_time": 0.10478663444519043 }, { "epoch": 2.559967041015625e-05, "model_forward_time": 0.025341272354125977, "step": 16777 }, { "epoch": 2.559967041015625e-05, "step": 16777, "training_step_time": 0.11000680923461914 }, { "epoch": 2.56011962890625e-05, "model_forward_time": 0.02501654624938965, "step": 16778 }, { "epoch": 2.56011962890625e-05, "step": 16778, "training_step_time": 0.10725045204162598 }, { "epoch": 2.560272216796875e-05, "model_forward_time": 0.024740219116210938, "step": 16779 }, { "epoch": 2.560272216796875e-05, "step": 16779, "training_step_time": 0.11274027824401855 }, { "epoch": 2.5604248046875e-05, "grad_norm": 0.2779456377029419, "learning_rate": 4.433527588806103e-05, "loss": 0.0132, "step": 16780 }, { "epoch": 2.5604248046875e-05, "model_forward_time": 0.025249719619750977, "step": 16780 }, { "epoch": 2.5604248046875e-05, "step": 16780, "training_step_time": 0.10565924644470215 }, { "epoch": 2.560577392578125e-05, "model_forward_time": 0.025516271591186523, "step": 16781 }, { "epoch": 2.560577392578125e-05, "step": 16781, "training_step_time": 0.12335634231567383 }, { "epoch": 2.56072998046875e-05, "model_forward_time": 0.025021791458129883, "step": 16782 }, { "epoch": 2.56072998046875e-05, "step": 16782, "training_step_time": 0.14078235626220703 }, { "epoch": 2.560882568359375e-05, "model_forward_time": 0.025133609771728516, "step": 16783 }, { "epoch": 2.560882568359375e-05, "step": 16783, "training_step_time": 0.10714030265808105 }, { "epoch": 2.56103515625e-05, "model_forward_time": 0.025081872940063477, "step": 16784 }, { "epoch": 2.56103515625e-05, "step": 16784, "training_step_time": 0.1062471866607666 }, { "epoch": 2.561187744140625e-05, "model_forward_time": 0.02557229995727539, "step": 16785 }, { "epoch": 2.561187744140625e-05, "step": 16785, "training_step_time": 0.12314176559448242 }, { "epoch": 2.56134033203125e-05, "model_forward_time": 0.02576589584350586, "step": 16786 }, { "epoch": 2.56134033203125e-05, "step": 16786, "training_step_time": 0.11900877952575684 }, { "epoch": 2.561492919921875e-05, "model_forward_time": 0.02558732032775879, "step": 16787 }, { "epoch": 2.561492919921875e-05, "step": 16787, "training_step_time": 0.18416404724121094 }, { "epoch": 2.5616455078125e-05, "model_forward_time": 0.024999380111694336, "step": 16788 }, { "epoch": 2.5616455078125e-05, "step": 16788, "training_step_time": 0.14604640007019043 }, { "epoch": 2.561798095703125e-05, "model_forward_time": 0.024517536163330078, "step": 16789 }, { "epoch": 2.561798095703125e-05, "step": 16789, "training_step_time": 0.12738370895385742 }, { "epoch": 2.56195068359375e-05, "grad_norm": 0.21013395488262177, "learning_rate": 4.428051854493623e-05, "loss": 0.0092, "step": 16790 }, { "epoch": 2.56195068359375e-05, "model_forward_time": 0.025165796279907227, "step": 16790 }, { "epoch": 2.56195068359375e-05, "step": 16790, "training_step_time": 0.11907148361206055 }, { "epoch": 2.562103271484375e-05, "model_forward_time": 0.025232553482055664, "step": 16791 }, { "epoch": 2.562103271484375e-05, "step": 16791, "training_step_time": 0.11736059188842773 }, { "epoch": 2.562255859375e-05, "model_forward_time": 0.025210857391357422, "step": 16792 }, { "epoch": 2.562255859375e-05, "step": 16792, "training_step_time": 0.1143956184387207 }, { "epoch": 2.562408447265625e-05, "model_forward_time": 0.02561187744140625, "step": 16793 }, { "epoch": 2.562408447265625e-05, "step": 16793, "training_step_time": 0.11107659339904785 }, { "epoch": 2.56256103515625e-05, "model_forward_time": 0.025137901306152344, "step": 16794 }, { "epoch": 2.56256103515625e-05, "step": 16794, "training_step_time": 0.11233687400817871 }, { "epoch": 2.562713623046875e-05, "model_forward_time": 0.024496078491210938, "step": 16795 }, { "epoch": 2.562713623046875e-05, "step": 16795, "training_step_time": 0.11037945747375488 }, { "epoch": 2.5628662109375e-05, "model_forward_time": 0.02522110939025879, "step": 16796 }, { "epoch": 2.5628662109375e-05, "step": 16796, "training_step_time": 0.10970425605773926 }, { "epoch": 2.563018798828125e-05, "model_forward_time": 0.024808168411254883, "step": 16797 }, { "epoch": 2.563018798828125e-05, "step": 16797, "training_step_time": 0.10735297203063965 }, { "epoch": 2.56317138671875e-05, "model_forward_time": 0.025754928588867188, "step": 16798 }, { "epoch": 2.56317138671875e-05, "step": 16798, "training_step_time": 0.10755300521850586 }, { "epoch": 2.563323974609375e-05, "model_forward_time": 0.025974273681640625, "step": 16799 }, { "epoch": 2.563323974609375e-05, "step": 16799, "training_step_time": 0.10742354393005371 }, { "epoch": 2.5634765625e-05, "grad_norm": 0.3088785409927368, "learning_rate": 4.4225768151520694e-05, "loss": 0.0108, "step": 16800 }, { "epoch": 2.5634765625e-05, "model_forward_time": 0.025625944137573242, "step": 16800 }, { "epoch": 2.5634765625e-05, "step": 16800, "training_step_time": 0.10926580429077148 }, { "epoch": 2.563629150390625e-05, "model_forward_time": 0.02532219886779785, "step": 16801 }, { "epoch": 2.563629150390625e-05, "step": 16801, "training_step_time": 0.10619139671325684 }, { "epoch": 2.56378173828125e-05, "model_forward_time": 0.02483844757080078, "step": 16802 }, { "epoch": 2.56378173828125e-05, "step": 16802, "training_step_time": 0.10608315467834473 }, { "epoch": 2.563934326171875e-05, "model_forward_time": 0.026016950607299805, "step": 16803 }, { "epoch": 2.563934326171875e-05, "step": 16803, "training_step_time": 0.11148309707641602 }, { "epoch": 2.5640869140625e-05, "model_forward_time": 0.02501225471496582, "step": 16804 }, { "epoch": 2.5640869140625e-05, "step": 16804, "training_step_time": 0.10559415817260742 }, { "epoch": 2.564239501953125e-05, "model_forward_time": 0.025614261627197266, "step": 16805 }, { "epoch": 2.564239501953125e-05, "step": 16805, "training_step_time": 0.10633063316345215 }, { "epoch": 2.56439208984375e-05, "model_forward_time": 0.02532792091369629, "step": 16806 }, { "epoch": 2.56439208984375e-05, "step": 16806, "training_step_time": 0.10275888442993164 }, { "epoch": 2.564544677734375e-05, "model_forward_time": 0.024401426315307617, "step": 16807 }, { "epoch": 2.564544677734375e-05, "step": 16807, "training_step_time": 0.15028643608093262 }, { "epoch": 2.564697265625e-05, "model_forward_time": 0.024496078491210938, "step": 16808 }, { "epoch": 2.564697265625e-05, "step": 16808, "training_step_time": 0.16051149368286133 }, { "epoch": 2.564849853515625e-05, "model_forward_time": 0.02483391761779785, "step": 16809 }, { "epoch": 2.564849853515625e-05, "step": 16809, "training_step_time": 0.171830415725708 }, { "epoch": 2.56500244140625e-05, "grad_norm": 0.32032281160354614, "learning_rate": 4.4171024774341346e-05, "loss": 0.009, "step": 16810 }, { "epoch": 2.56500244140625e-05, "model_forward_time": 0.024470090866088867, "step": 16810 }, { "epoch": 2.56500244140625e-05, "step": 16810, "training_step_time": 0.20560145378112793 }, { "epoch": 2.565155029296875e-05, "model_forward_time": 0.024690866470336914, "step": 16811 }, { "epoch": 2.565155029296875e-05, "step": 16811, "training_step_time": 0.1791832447052002 }, { "epoch": 2.5653076171875e-05, "model_forward_time": 0.0249636173248291, "step": 16812 }, { "epoch": 2.5653076171875e-05, "step": 16812, "training_step_time": 0.19231772422790527 }, { "epoch": 2.565460205078125e-05, "model_forward_time": 0.025011301040649414, "step": 16813 }, { "epoch": 2.565460205078125e-05, "step": 16813, "training_step_time": 0.10303616523742676 }, { "epoch": 2.56561279296875e-05, "model_forward_time": 0.0244295597076416, "step": 16814 }, { "epoch": 2.56561279296875e-05, "step": 16814, "training_step_time": 0.10301685333251953 }, { "epoch": 2.565765380859375e-05, "model_forward_time": 0.02506875991821289, "step": 16815 }, { "epoch": 2.565765380859375e-05, "step": 16815, "training_step_time": 0.10490775108337402 }, { "epoch": 2.56591796875e-05, "model_forward_time": 0.026404619216918945, "step": 16816 }, { "epoch": 2.56591796875e-05, "step": 16816, "training_step_time": 0.1056966781616211 }, { "epoch": 2.566070556640625e-05, "model_forward_time": 0.02535080909729004, "step": 16817 }, { "epoch": 2.566070556640625e-05, "step": 16817, "training_step_time": 0.1038215160369873 }, { "epoch": 2.56622314453125e-05, "model_forward_time": 0.0249941349029541, "step": 16818 }, { "epoch": 2.56622314453125e-05, "step": 16818, "training_step_time": 0.10544657707214355 }, { "epoch": 2.566375732421875e-05, "model_forward_time": 0.025719165802001953, "step": 16819 }, { "epoch": 2.566375732421875e-05, "step": 16819, "training_step_time": 0.10309267044067383 }, { "epoch": 2.5665283203125e-05, "grad_norm": 0.26390019059181213, "learning_rate": 4.411628847991653e-05, "loss": 0.0087, "step": 16820 }, { "epoch": 2.5665283203125e-05, "model_forward_time": 0.0249483585357666, "step": 16820 }, { "epoch": 2.5665283203125e-05, "step": 16820, "training_step_time": 0.10575437545776367 }, { "epoch": 2.566680908203125e-05, "model_forward_time": 0.025409460067749023, "step": 16821 }, { "epoch": 2.566680908203125e-05, "step": 16821, "training_step_time": 0.10502314567565918 }, { "epoch": 2.56683349609375e-05, "model_forward_time": 0.025376319885253906, "step": 16822 }, { "epoch": 2.56683349609375e-05, "step": 16822, "training_step_time": 0.10579991340637207 }, { "epoch": 2.566986083984375e-05, "model_forward_time": 0.02564239501953125, "step": 16823 }, { "epoch": 2.566986083984375e-05, "step": 16823, "training_step_time": 0.10613274574279785 }, { "epoch": 2.567138671875e-05, "model_forward_time": 0.025617361068725586, "step": 16824 }, { "epoch": 2.567138671875e-05, "step": 16824, "training_step_time": 0.1105353832244873 }, { "epoch": 2.567291259765625e-05, "model_forward_time": 0.02543783187866211, "step": 16825 }, { "epoch": 2.567291259765625e-05, "step": 16825, "training_step_time": 0.11483478546142578 }, { "epoch": 2.56744384765625e-05, "model_forward_time": 0.027030467987060547, "step": 16826 }, { "epoch": 2.56744384765625e-05, "step": 16826, "training_step_time": 0.1994800567626953 }, { "epoch": 2.567596435546875e-05, "model_forward_time": 0.025115966796875, "step": 16827 }, { "epoch": 2.567596435546875e-05, "step": 16827, "training_step_time": 0.1392652988433838 }, { "epoch": 2.5677490234375e-05, "model_forward_time": 0.025485754013061523, "step": 16828 }, { "epoch": 2.5677490234375e-05, "step": 16828, "training_step_time": 0.12481284141540527 }, { "epoch": 2.567901611328125e-05, "model_forward_time": 0.024344682693481445, "step": 16829 }, { "epoch": 2.567901611328125e-05, "step": 16829, "training_step_time": 0.2037820816040039 }, { "epoch": 2.56805419921875e-05, "grad_norm": 0.14889225363731384, "learning_rate": 4.406155933475599e-05, "loss": 0.0111, "step": 16830 }, { "epoch": 2.56805419921875e-05, "model_forward_time": 0.025420427322387695, "step": 16830 }, { "epoch": 2.56805419921875e-05, "step": 16830, "training_step_time": 0.1215512752532959 }, { "epoch": 2.568206787109375e-05, "model_forward_time": 0.026274442672729492, "step": 16831 }, { "epoch": 2.568206787109375e-05, "step": 16831, "training_step_time": 0.18198037147521973 }, { "epoch": 2.568359375e-05, "model_forward_time": 0.024476051330566406, "step": 16832 }, { "epoch": 2.568359375e-05, "step": 16832, "training_step_time": 0.11098074913024902 }, { "epoch": 2.568511962890625e-05, "model_forward_time": 0.0240631103515625, "step": 16833 }, { "epoch": 2.568511962890625e-05, "step": 16833, "training_step_time": 0.1103518009185791 }, { "epoch": 2.56866455078125e-05, "model_forward_time": 0.024391651153564453, "step": 16834 }, { "epoch": 2.56866455078125e-05, "step": 16834, "training_step_time": 0.11121153831481934 }, { "epoch": 2.568817138671875e-05, "model_forward_time": 0.025082826614379883, "step": 16835 }, { "epoch": 2.568817138671875e-05, "step": 16835, "training_step_time": 0.10668015480041504 }, { "epoch": 2.5689697265625e-05, "model_forward_time": 0.026810169219970703, "step": 16836 }, { "epoch": 2.5689697265625e-05, "step": 16836, "training_step_time": 0.10784220695495605 }, { "epoch": 2.569122314453125e-05, "model_forward_time": 0.02538895606994629, "step": 16837 }, { "epoch": 2.569122314453125e-05, "step": 16837, "training_step_time": 0.10799598693847656 }, { "epoch": 2.56927490234375e-05, "model_forward_time": 0.02486896514892578, "step": 16838 }, { "epoch": 2.56927490234375e-05, "step": 16838, "training_step_time": 0.10961604118347168 }, { "epoch": 2.569427490234375e-05, "model_forward_time": 0.02542877197265625, "step": 16839 }, { "epoch": 2.569427490234375e-05, "step": 16839, "training_step_time": 0.10716891288757324 }, { "epoch": 2.569580078125e-05, "grad_norm": 0.17882491648197174, "learning_rate": 4.400683740536083e-05, "loss": 0.0074, "step": 16840 }, { "epoch": 2.569580078125e-05, "model_forward_time": 0.025153636932373047, "step": 16840 }, { "epoch": 2.569580078125e-05, "step": 16840, "training_step_time": 0.10790657997131348 }, { "epoch": 2.569732666015625e-05, "model_forward_time": 0.025177955627441406, "step": 16841 }, { "epoch": 2.569732666015625e-05, "step": 16841, "training_step_time": 0.1056067943572998 }, { "epoch": 2.56988525390625e-05, "model_forward_time": 0.0254209041595459, "step": 16842 }, { "epoch": 2.56988525390625e-05, "step": 16842, "training_step_time": 0.10584473609924316 }, { "epoch": 2.570037841796875e-05, "model_forward_time": 0.0254209041595459, "step": 16843 }, { "epoch": 2.570037841796875e-05, "step": 16843, "training_step_time": 0.10577678680419922 }, { "epoch": 2.5701904296875e-05, "model_forward_time": 0.025554656982421875, "step": 16844 }, { "epoch": 2.5701904296875e-05, "step": 16844, "training_step_time": 0.10765242576599121 }, { "epoch": 2.570343017578125e-05, "model_forward_time": 0.02558302879333496, "step": 16845 }, { "epoch": 2.570343017578125e-05, "step": 16845, "training_step_time": 0.10473942756652832 }, { "epoch": 2.57049560546875e-05, "model_forward_time": 0.025362253189086914, "step": 16846 }, { "epoch": 2.57049560546875e-05, "step": 16846, "training_step_time": 0.10598874092102051 }, { "epoch": 2.570648193359375e-05, "model_forward_time": 0.025374174118041992, "step": 16847 }, { "epoch": 2.570648193359375e-05, "step": 16847, "training_step_time": 0.11011266708374023 }, { "epoch": 2.57080078125e-05, "model_forward_time": 0.025328636169433594, "step": 16848 }, { "epoch": 2.57080078125e-05, "step": 16848, "training_step_time": 0.10754799842834473 }, { "epoch": 2.570953369140625e-05, "model_forward_time": 0.02509307861328125, "step": 16849 }, { "epoch": 2.570953369140625e-05, "step": 16849, "training_step_time": 0.10747814178466797 }, { "epoch": 2.57110595703125e-05, "grad_norm": 0.22404731810092926, "learning_rate": 4.3952122758223354e-05, "loss": 0.0092, "step": 16850 }, { "epoch": 2.57110595703125e-05, "model_forward_time": 0.025411605834960938, "step": 16850 }, { "epoch": 2.57110595703125e-05, "step": 16850, "training_step_time": 0.10464000701904297 }, { "epoch": 2.571258544921875e-05, "model_forward_time": 0.025107145309448242, "step": 16851 }, { "epoch": 2.571258544921875e-05, "step": 16851, "training_step_time": 0.10290241241455078 }, { "epoch": 2.5714111328125e-05, "model_forward_time": 0.024219036102294922, "step": 16852 }, { "epoch": 2.5714111328125e-05, "step": 16852, "training_step_time": 0.19141316413879395 }, { "epoch": 2.571563720703125e-05, "model_forward_time": 0.02443861961364746, "step": 16853 }, { "epoch": 2.571563720703125e-05, "step": 16853, "training_step_time": 0.17130398750305176 }, { "epoch": 2.57171630859375e-05, "model_forward_time": 0.02448868751525879, "step": 16854 }, { "epoch": 2.57171630859375e-05, "step": 16854, "training_step_time": 0.20472288131713867 }, { "epoch": 2.571868896484375e-05, "model_forward_time": 0.02407526969909668, "step": 16855 }, { "epoch": 2.571868896484375e-05, "step": 16855, "training_step_time": 0.2225816249847412 }, { "epoch": 2.572021484375e-05, "model_forward_time": 0.024912357330322266, "step": 16856 }, { "epoch": 2.572021484375e-05, "step": 16856, "training_step_time": 0.1585521697998047 }, { "epoch": 2.572174072265625e-05, "model_forward_time": 0.024732351303100586, "step": 16857 }, { "epoch": 2.572174072265625e-05, "step": 16857, "training_step_time": 0.1115865707397461 }, { "epoch": 2.57232666015625e-05, "model_forward_time": 0.024787425994873047, "step": 16858 }, { "epoch": 2.57232666015625e-05, "step": 16858, "training_step_time": 0.10498762130737305 }, { "epoch": 2.572479248046875e-05, "model_forward_time": 0.025476932525634766, "step": 16859 }, { "epoch": 2.572479248046875e-05, "step": 16859, "training_step_time": 0.10504508018493652 }, { "epoch": 2.5726318359375e-05, "grad_norm": 0.17102010548114777, "learning_rate": 4.3897415459827e-05, "loss": 0.015, "step": 16860 }, { "epoch": 2.5726318359375e-05, "model_forward_time": 0.029054880142211914, "step": 16860 }, { "epoch": 2.5726318359375e-05, "step": 16860, "training_step_time": 0.10898709297180176 }, { "epoch": 2.572784423828125e-05, "model_forward_time": 0.025323867797851562, "step": 16861 }, { "epoch": 2.572784423828125e-05, "step": 16861, "training_step_time": 0.10597634315490723 }, { "epoch": 2.57293701171875e-05, "model_forward_time": 0.025467395782470703, "step": 16862 }, { "epoch": 2.57293701171875e-05, "step": 16862, "training_step_time": 0.10738611221313477 }, { "epoch": 2.573089599609375e-05, "model_forward_time": 0.025229454040527344, "step": 16863 }, { "epoch": 2.573089599609375e-05, "step": 16863, "training_step_time": 0.10597920417785645 }, { "epoch": 2.5732421875e-05, "model_forward_time": 0.02506422996520996, "step": 16864 }, { "epoch": 2.5732421875e-05, "step": 16864, "training_step_time": 0.10720109939575195 }, { "epoch": 2.573394775390625e-05, "model_forward_time": 0.02538013458251953, "step": 16865 }, { "epoch": 2.573394775390625e-05, "step": 16865, "training_step_time": 0.10530662536621094 }, { "epoch": 2.57354736328125e-05, "model_forward_time": 0.025450468063354492, "step": 16866 }, { "epoch": 2.57354736328125e-05, "step": 16866, "training_step_time": 0.10777139663696289 }, { "epoch": 2.573699951171875e-05, "model_forward_time": 0.025695323944091797, "step": 16867 }, { "epoch": 2.573699951171875e-05, "step": 16867, "training_step_time": 0.10548758506774902 }, { "epoch": 2.5738525390625e-05, "model_forward_time": 0.025517940521240234, "step": 16868 }, { "epoch": 2.5738525390625e-05, "step": 16868, "training_step_time": 0.1103827953338623 }, { "epoch": 2.574005126953125e-05, "model_forward_time": 0.02513861656188965, "step": 16869 }, { "epoch": 2.574005126953125e-05, "step": 16869, "training_step_time": 0.10711956024169922 }, { "epoch": 2.57415771484375e-05, "grad_norm": 0.34216833114624023, "learning_rate": 4.384271557664628e-05, "loss": 0.0076, "step": 16870 }, { "epoch": 2.57415771484375e-05, "model_forward_time": 0.02598404884338379, "step": 16870 }, { "epoch": 2.57415771484375e-05, "step": 16870, "training_step_time": 0.10640430450439453 }, { "epoch": 2.574310302734375e-05, "model_forward_time": 0.025947093963623047, "step": 16871 }, { "epoch": 2.574310302734375e-05, "step": 16871, "training_step_time": 0.10649633407592773 }, { "epoch": 2.574462890625e-05, "model_forward_time": 0.025693893432617188, "step": 16872 }, { "epoch": 2.574462890625e-05, "step": 16872, "training_step_time": 0.10899686813354492 }, { "epoch": 2.574615478515625e-05, "model_forward_time": 0.025373458862304688, "step": 16873 }, { "epoch": 2.574615478515625e-05, "step": 16873, "training_step_time": 0.12107419967651367 }, { "epoch": 2.57476806640625e-05, "model_forward_time": 0.025385618209838867, "step": 16874 }, { "epoch": 2.57476806640625e-05, "step": 16874, "training_step_time": 0.11342954635620117 }, { "epoch": 2.574920654296875e-05, "model_forward_time": 0.025710105895996094, "step": 16875 }, { "epoch": 2.574920654296875e-05, "step": 16875, "training_step_time": 0.10808491706848145 }, { "epoch": 2.5750732421875e-05, "model_forward_time": 0.02534198760986328, "step": 16876 }, { "epoch": 2.5750732421875e-05, "step": 16876, "training_step_time": 0.1106865406036377 }, { "epoch": 2.575225830078125e-05, "model_forward_time": 0.02613544464111328, "step": 16877 }, { "epoch": 2.575225830078125e-05, "step": 16877, "training_step_time": 0.10642719268798828 }, { "epoch": 2.57537841796875e-05, "model_forward_time": 0.024759769439697266, "step": 16878 }, { "epoch": 2.57537841796875e-05, "step": 16878, "training_step_time": 0.19239282608032227 }, { "epoch": 2.575531005859375e-05, "model_forward_time": 0.02441883087158203, "step": 16879 }, { "epoch": 2.575531005859375e-05, "step": 16879, "training_step_time": 0.10502099990844727 }, { "epoch": 2.57568359375e-05, "grad_norm": 0.20437732338905334, "learning_rate": 4.3788023175146747e-05, "loss": 0.0069, "step": 16880 }, { "epoch": 2.57568359375e-05, "model_forward_time": 0.024904251098632812, "step": 16880 }, { "epoch": 2.57568359375e-05, "step": 16880, "training_step_time": 0.10310006141662598 }, { "epoch": 2.575836181640625e-05, "model_forward_time": 0.025289297103881836, "step": 16881 }, { "epoch": 2.575836181640625e-05, "step": 16881, "training_step_time": 0.10827350616455078 }, { "epoch": 2.57598876953125e-05, "model_forward_time": 0.025571823120117188, "step": 16882 }, { "epoch": 2.57598876953125e-05, "step": 16882, "training_step_time": 0.11071014404296875 }, { "epoch": 2.576141357421875e-05, "model_forward_time": 0.025173664093017578, "step": 16883 }, { "epoch": 2.576141357421875e-05, "step": 16883, "training_step_time": 0.10628128051757812 }, { "epoch": 2.5762939453125e-05, "model_forward_time": 0.025313854217529297, "step": 16884 }, { "epoch": 2.5762939453125e-05, "step": 16884, "training_step_time": 0.10785484313964844 }, { "epoch": 2.576446533203125e-05, "model_forward_time": 0.025681495666503906, "step": 16885 }, { "epoch": 2.576446533203125e-05, "step": 16885, "training_step_time": 0.10761904716491699 }, { "epoch": 2.57659912109375e-05, "model_forward_time": 0.025356054306030273, "step": 16886 }, { "epoch": 2.57659912109375e-05, "step": 16886, "training_step_time": 0.10830402374267578 }, { "epoch": 2.576751708984375e-05, "model_forward_time": 0.025353431701660156, "step": 16887 }, { "epoch": 2.576751708984375e-05, "step": 16887, "training_step_time": 0.10603022575378418 }, { "epoch": 2.576904296875e-05, "model_forward_time": 0.025418519973754883, "step": 16888 }, { "epoch": 2.576904296875e-05, "step": 16888, "training_step_time": 0.10626578330993652 }, { "epoch": 2.577056884765625e-05, "model_forward_time": 0.024901866912841797, "step": 16889 }, { "epoch": 2.577056884765625e-05, "step": 16889, "training_step_time": 0.10535168647766113 }, { "epoch": 2.57720947265625e-05, "grad_norm": 0.31352540850639343, "learning_rate": 4.373333832178478e-05, "loss": 0.0139, "step": 16890 }, { "epoch": 2.57720947265625e-05, "model_forward_time": 0.025788068771362305, "step": 16890 }, { "epoch": 2.57720947265625e-05, "step": 16890, "training_step_time": 0.1047823429107666 }, { "epoch": 2.577362060546875e-05, "model_forward_time": 0.025628089904785156, "step": 16891 }, { "epoch": 2.577362060546875e-05, "step": 16891, "training_step_time": 0.10533618927001953 }, { "epoch": 2.5775146484375e-05, "model_forward_time": 0.024789094924926758, "step": 16892 }, { "epoch": 2.5775146484375e-05, "step": 16892, "training_step_time": 0.10709953308105469 }, { "epoch": 2.577667236328125e-05, "model_forward_time": 0.025305747985839844, "step": 16893 }, { "epoch": 2.577667236328125e-05, "step": 16893, "training_step_time": 0.1086430549621582 }, { "epoch": 2.57781982421875e-05, "model_forward_time": 0.024613380432128906, "step": 16894 }, { "epoch": 2.57781982421875e-05, "step": 16894, "training_step_time": 0.10937094688415527 }, { "epoch": 2.577972412109375e-05, "model_forward_time": 0.025641679763793945, "step": 16895 }, { "epoch": 2.577972412109375e-05, "step": 16895, "training_step_time": 0.10694503784179688 }, { "epoch": 2.578125e-05, "model_forward_time": 0.025772571563720703, "step": 16896 }, { "epoch": 2.578125e-05, "step": 16896, "training_step_time": 0.107269287109375 }, { "epoch": 2.578277587890625e-05, "model_forward_time": 0.027095556259155273, "step": 16897 }, { "epoch": 2.578277587890625e-05, "step": 16897, "training_step_time": 0.10834360122680664 }, { "epoch": 2.57843017578125e-05, "model_forward_time": 0.025116682052612305, "step": 16898 }, { "epoch": 2.57843017578125e-05, "step": 16898, "training_step_time": 0.10919857025146484 }, { "epoch": 2.578582763671875e-05, "model_forward_time": 0.023845672607421875, "step": 16899 }, { "epoch": 2.578582763671875e-05, "step": 16899, "training_step_time": 0.19077229499816895 }, { "epoch": 2.5787353515625e-05, "grad_norm": 0.48642513155937195, "learning_rate": 4.367866108300769e-05, "loss": 0.0085, "step": 16900 }, { "epoch": 2.5787353515625e-05, "model_forward_time": 0.024561405181884766, "step": 16900 }, { "epoch": 2.5787353515625e-05, "step": 16900, "training_step_time": 0.20948386192321777 }, { "epoch": 2.578887939453125e-05, "model_forward_time": 0.02608203887939453, "step": 16901 }, { "epoch": 2.578887939453125e-05, "step": 16901, "training_step_time": 0.1550281047821045 }, { "epoch": 2.57904052734375e-05, "model_forward_time": 0.024625539779663086, "step": 16902 }, { "epoch": 2.57904052734375e-05, "step": 16902, "training_step_time": 0.20520448684692383 }, { "epoch": 2.579193115234375e-05, "model_forward_time": 0.02440619468688965, "step": 16903 }, { "epoch": 2.579193115234375e-05, "step": 16903, "training_step_time": 0.19631075859069824 }, { "epoch": 2.579345703125e-05, "model_forward_time": 0.024335145950317383, "step": 16904 }, { "epoch": 2.579345703125e-05, "step": 16904, "training_step_time": 0.16623950004577637 }, { "epoch": 2.579498291015625e-05, "model_forward_time": 0.024112701416015625, "step": 16905 }, { "epoch": 2.579498291015625e-05, "step": 16905, "training_step_time": 0.10695457458496094 }, { "epoch": 2.57965087890625e-05, "model_forward_time": 0.02521514892578125, "step": 16906 }, { "epoch": 2.57965087890625e-05, "step": 16906, "training_step_time": 0.11034321784973145 }, { "epoch": 2.579803466796875e-05, "model_forward_time": 0.025048494338989258, "step": 16907 }, { "epoch": 2.579803466796875e-05, "step": 16907, "training_step_time": 0.11042356491088867 }, { "epoch": 2.5799560546875e-05, "model_forward_time": 0.025214433670043945, "step": 16908 }, { "epoch": 2.5799560546875e-05, "step": 16908, "training_step_time": 0.10883235931396484 }, { "epoch": 2.580108642578125e-05, "model_forward_time": 0.024890422821044922, "step": 16909 }, { "epoch": 2.580108642578125e-05, "step": 16909, "training_step_time": 0.10615134239196777 }, { "epoch": 2.58026123046875e-05, "grad_norm": 0.2852439880371094, "learning_rate": 4.362399152525344e-05, "loss": 0.0081, "step": 16910 }, { "epoch": 2.58026123046875e-05, "model_forward_time": 0.024887561798095703, "step": 16910 }, { "epoch": 2.58026123046875e-05, "step": 16910, "training_step_time": 0.10703063011169434 }, { "epoch": 2.580413818359375e-05, "model_forward_time": 0.02524590492248535, "step": 16911 }, { "epoch": 2.580413818359375e-05, "step": 16911, "training_step_time": 0.11104702949523926 }, { "epoch": 2.58056640625e-05, "model_forward_time": 0.025271892547607422, "step": 16912 }, { "epoch": 2.58056640625e-05, "step": 16912, "training_step_time": 0.10751628875732422 }, { "epoch": 2.580718994140625e-05, "model_forward_time": 0.02498602867126465, "step": 16913 }, { "epoch": 2.580718994140625e-05, "step": 16913, "training_step_time": 0.1089017391204834 }, { "epoch": 2.58087158203125e-05, "model_forward_time": 0.028953075408935547, "step": 16914 }, { "epoch": 2.58087158203125e-05, "step": 16914, "training_step_time": 0.11086463928222656 }, { "epoch": 2.581024169921875e-05, "model_forward_time": 0.025261640548706055, "step": 16915 }, { "epoch": 2.581024169921875e-05, "step": 16915, "training_step_time": 0.11073899269104004 }, { "epoch": 2.5811767578125e-05, "model_forward_time": 0.025379419326782227, "step": 16916 }, { "epoch": 2.5811767578125e-05, "step": 16916, "training_step_time": 0.1063237190246582 }, { "epoch": 2.581329345703125e-05, "model_forward_time": 0.02490067481994629, "step": 16917 }, { "epoch": 2.581329345703125e-05, "step": 16917, "training_step_time": 0.10724067687988281 }, { "epoch": 2.58148193359375e-05, "model_forward_time": 0.02509164810180664, "step": 16918 }, { "epoch": 2.58148193359375e-05, "step": 16918, "training_step_time": 0.14635658264160156 }, { "epoch": 2.581634521484375e-05, "model_forward_time": 0.025643587112426758, "step": 16919 }, { "epoch": 2.581634521484375e-05, "step": 16919, "training_step_time": 0.14476513862609863 }, { "epoch": 2.581787109375e-05, "grad_norm": 0.2205013483762741, "learning_rate": 4.3569329714950704e-05, "loss": 0.0113, "step": 16920 }, { "epoch": 2.581787109375e-05, "model_forward_time": 0.02438068389892578, "step": 16920 }, { "epoch": 2.581787109375e-05, "step": 16920, "training_step_time": 0.1072843074798584 }, { "epoch": 2.581939697265625e-05, "model_forward_time": 0.025136232376098633, "step": 16921 }, { "epoch": 2.581939697265625e-05, "step": 16921, "training_step_time": 0.10743451118469238 }, { "epoch": 2.58209228515625e-05, "model_forward_time": 0.0253903865814209, "step": 16922 }, { "epoch": 2.58209228515625e-05, "step": 16922, "training_step_time": 0.11026215553283691 }, { "epoch": 2.582244873046875e-05, "model_forward_time": 0.024866819381713867, "step": 16923 }, { "epoch": 2.582244873046875e-05, "step": 16923, "training_step_time": 0.10651254653930664 }, { "epoch": 2.5823974609375e-05, "model_forward_time": 0.025058507919311523, "step": 16924 }, { "epoch": 2.5823974609375e-05, "step": 16924, "training_step_time": 0.19185423851013184 }, { "epoch": 2.582550048828125e-05, "model_forward_time": 0.024497032165527344, "step": 16925 }, { "epoch": 2.582550048828125e-05, "step": 16925, "training_step_time": 0.10343146324157715 }, { "epoch": 2.58270263671875e-05, "model_forward_time": 0.024136066436767578, "step": 16926 }, { "epoch": 2.58270263671875e-05, "step": 16926, "training_step_time": 0.10203266143798828 }, { "epoch": 2.582855224609375e-05, "model_forward_time": 0.024825572967529297, "step": 16927 }, { "epoch": 2.582855224609375e-05, "step": 16927, "training_step_time": 0.10566568374633789 }, { "epoch": 2.5830078125e-05, "model_forward_time": 0.025715112686157227, "step": 16928 }, { "epoch": 2.5830078125e-05, "step": 16928, "training_step_time": 0.10661125183105469 }, { "epoch": 2.583160400390625e-05, "model_forward_time": 0.02545952796936035, "step": 16929 }, { "epoch": 2.583160400390625e-05, "step": 16929, "training_step_time": 0.10692834854125977 }, { "epoch": 2.58331298828125e-05, "grad_norm": 0.42118752002716064, "learning_rate": 4.3514675718518734e-05, "loss": 0.0126, "step": 16930 }, { "epoch": 2.58331298828125e-05, "model_forward_time": 0.02561044692993164, "step": 16930 }, { "epoch": 2.58331298828125e-05, "step": 16930, "training_step_time": 0.10451579093933105 }, { "epoch": 2.583465576171875e-05, "model_forward_time": 0.024625778198242188, "step": 16931 }, { "epoch": 2.583465576171875e-05, "step": 16931, "training_step_time": 0.10590362548828125 }, { "epoch": 2.5836181640625e-05, "model_forward_time": 0.02568197250366211, "step": 16932 }, { "epoch": 2.5836181640625e-05, "step": 16932, "training_step_time": 0.11085319519042969 }, { "epoch": 2.583770751953125e-05, "model_forward_time": 0.025183677673339844, "step": 16933 }, { "epoch": 2.583770751953125e-05, "step": 16933, "training_step_time": 0.1221308708190918 }, { "epoch": 2.58392333984375e-05, "model_forward_time": 0.02544546127319336, "step": 16934 }, { "epoch": 2.58392333984375e-05, "step": 16934, "training_step_time": 0.12078332901000977 }, { "epoch": 2.584075927734375e-05, "model_forward_time": 0.025042295455932617, "step": 16935 }, { "epoch": 2.584075927734375e-05, "step": 16935, "training_step_time": 0.11515021324157715 }, { "epoch": 2.584228515625e-05, "model_forward_time": 0.025109291076660156, "step": 16936 }, { "epoch": 2.584228515625e-05, "step": 16936, "training_step_time": 0.11169004440307617 }, { "epoch": 2.584381103515625e-05, "model_forward_time": 0.025378704071044922, "step": 16937 }, { "epoch": 2.584381103515625e-05, "step": 16937, "training_step_time": 0.10795879364013672 }, { "epoch": 2.58453369140625e-05, "model_forward_time": 0.028805017471313477, "step": 16938 }, { "epoch": 2.58453369140625e-05, "step": 16938, "training_step_time": 0.11074328422546387 }, { "epoch": 2.584686279296875e-05, "model_forward_time": 0.0253753662109375, "step": 16939 }, { "epoch": 2.584686279296875e-05, "step": 16939, "training_step_time": 0.10857772827148438 }, { "epoch": 2.5848388671875e-05, "grad_norm": 0.2935320734977722, "learning_rate": 4.3460029602367284e-05, "loss": 0.0095, "step": 16940 }, { "epoch": 2.5848388671875e-05, "model_forward_time": 0.025522232055664062, "step": 16940 }, { "epoch": 2.5848388671875e-05, "step": 16940, "training_step_time": 0.10840439796447754 }, { "epoch": 2.584991455078125e-05, "model_forward_time": 0.02521991729736328, "step": 16941 }, { "epoch": 2.584991455078125e-05, "step": 16941, "training_step_time": 0.10709714889526367 }, { "epoch": 2.58514404296875e-05, "model_forward_time": 0.025694847106933594, "step": 16942 }, { "epoch": 2.58514404296875e-05, "step": 16942, "training_step_time": 0.10622668266296387 }, { "epoch": 2.585296630859375e-05, "model_forward_time": 0.025577545166015625, "step": 16943 }, { "epoch": 2.585296630859375e-05, "step": 16943, "training_step_time": 0.10670995712280273 }, { "epoch": 2.58544921875e-05, "model_forward_time": 0.02601337432861328, "step": 16944 }, { "epoch": 2.58544921875e-05, "step": 16944, "training_step_time": 0.1082158088684082 }, { "epoch": 2.585601806640625e-05, "model_forward_time": 0.024785518646240234, "step": 16945 }, { "epoch": 2.585601806640625e-05, "step": 16945, "training_step_time": 0.19185543060302734 }, { "epoch": 2.58575439453125e-05, "model_forward_time": 0.02455615997314453, "step": 16946 }, { "epoch": 2.58575439453125e-05, "step": 16946, "training_step_time": 0.11673593521118164 }, { "epoch": 2.585906982421875e-05, "model_forward_time": 0.024769067764282227, "step": 16947 }, { "epoch": 2.585906982421875e-05, "step": 16947, "training_step_time": 0.12357568740844727 }, { "epoch": 2.5860595703125e-05, "model_forward_time": 0.026067733764648438, "step": 16948 }, { "epoch": 2.5860595703125e-05, "step": 16948, "training_step_time": 0.14186930656433105 }, { "epoch": 2.586212158203125e-05, "model_forward_time": 0.02537703514099121, "step": 16949 }, { "epoch": 2.586212158203125e-05, "step": 16949, "training_step_time": 0.19973063468933105 }, { "epoch": 2.58636474609375e-05, "grad_norm": 0.219862699508667, "learning_rate": 4.3405391432896555e-05, "loss": 0.0183, "step": 16950 }, { "epoch": 2.58636474609375e-05, "model_forward_time": 0.02424764633178711, "step": 16950 }, { "epoch": 2.58636474609375e-05, "step": 16950, "training_step_time": 0.15396332740783691 }, { "epoch": 2.586517333984375e-05, "model_forward_time": 0.024554967880249023, "step": 16951 }, { "epoch": 2.586517333984375e-05, "step": 16951, "training_step_time": 0.19883465766906738 }, { "epoch": 2.586669921875e-05, "model_forward_time": 0.02415609359741211, "step": 16952 }, { "epoch": 2.586669921875e-05, "step": 16952, "training_step_time": 0.10562729835510254 }, { "epoch": 2.586822509765625e-05, "model_forward_time": 0.024715185165405273, "step": 16953 }, { "epoch": 2.586822509765625e-05, "step": 16953, "training_step_time": 0.10382080078125 }, { "epoch": 2.58697509765625e-05, "model_forward_time": 0.02546858787536621, "step": 16954 }, { "epoch": 2.58697509765625e-05, "step": 16954, "training_step_time": 0.10567259788513184 }, { "epoch": 2.587127685546875e-05, "model_forward_time": 0.025244712829589844, "step": 16955 }, { "epoch": 2.587127685546875e-05, "step": 16955, "training_step_time": 0.10644268989562988 }, { "epoch": 2.5872802734375e-05, "model_forward_time": 0.025655746459960938, "step": 16956 }, { "epoch": 2.5872802734375e-05, "step": 16956, "training_step_time": 0.10638284683227539 }, { "epoch": 2.587432861328125e-05, "model_forward_time": 0.02520132064819336, "step": 16957 }, { "epoch": 2.587432861328125e-05, "step": 16957, "training_step_time": 0.1074066162109375 }, { "epoch": 2.58758544921875e-05, "model_forward_time": 0.025557279586791992, "step": 16958 }, { "epoch": 2.58758544921875e-05, "step": 16958, "training_step_time": 0.10603880882263184 }, { "epoch": 2.587738037109375e-05, "model_forward_time": 0.025171756744384766, "step": 16959 }, { "epoch": 2.587738037109375e-05, "step": 16959, "training_step_time": 0.10498642921447754 }, { "epoch": 2.587890625e-05, "grad_norm": 0.4881453514099121, "learning_rate": 4.335076127649707e-05, "loss": 0.0119, "step": 16960 }, { "epoch": 2.587890625e-05, "model_forward_time": 0.028659343719482422, "step": 16960 }, { "epoch": 2.587890625e-05, "step": 16960, "training_step_time": 0.10964417457580566 }, { "epoch": 2.588043212890625e-05, "model_forward_time": 0.025478839874267578, "step": 16961 }, { "epoch": 2.588043212890625e-05, "step": 16961, "training_step_time": 0.10509943962097168 }, { "epoch": 2.58819580078125e-05, "model_forward_time": 0.025113582611083984, "step": 16962 }, { "epoch": 2.58819580078125e-05, "step": 16962, "training_step_time": 0.103851318359375 }, { "epoch": 2.588348388671875e-05, "model_forward_time": 0.024210691452026367, "step": 16963 }, { "epoch": 2.588348388671875e-05, "step": 16963, "training_step_time": 0.10747623443603516 }, { "epoch": 2.5885009765625e-05, "model_forward_time": 0.025072574615478516, "step": 16964 }, { "epoch": 2.5885009765625e-05, "step": 16964, "training_step_time": 0.10664606094360352 }, { "epoch": 2.588653564453125e-05, "model_forward_time": 0.02512669563293457, "step": 16965 }, { "epoch": 2.588653564453125e-05, "step": 16965, "training_step_time": 0.10632658004760742 }, { "epoch": 2.58880615234375e-05, "model_forward_time": 0.02564406394958496, "step": 16966 }, { "epoch": 2.58880615234375e-05, "step": 16966, "training_step_time": 0.11219525337219238 }, { "epoch": 2.588958740234375e-05, "model_forward_time": 0.02495574951171875, "step": 16967 }, { "epoch": 2.588958740234375e-05, "step": 16967, "training_step_time": 0.14197850227355957 }, { "epoch": 2.589111328125e-05, "model_forward_time": 0.025455713272094727, "step": 16968 }, { "epoch": 2.589111328125e-05, "step": 16968, "training_step_time": 0.11001920700073242 }, { "epoch": 2.589263916015625e-05, "model_forward_time": 0.025901317596435547, "step": 16969 }, { "epoch": 2.589263916015625e-05, "step": 16969, "training_step_time": 0.1117708683013916 }, { "epoch": 2.58941650390625e-05, "grad_norm": 0.4694342315196991, "learning_rate": 4.329613919954962e-05, "loss": 0.0118, "step": 16970 }, { "epoch": 2.58941650390625e-05, "model_forward_time": 0.025183677673339844, "step": 16970 }, { "epoch": 2.58941650390625e-05, "step": 16970, "training_step_time": 0.10701608657836914 }, { "epoch": 2.589569091796875e-05, "model_forward_time": 0.02750706672668457, "step": 16971 }, { "epoch": 2.589569091796875e-05, "step": 16971, "training_step_time": 0.11022233963012695 }, { "epoch": 2.5897216796875e-05, "model_forward_time": 0.025002479553222656, "step": 16972 }, { "epoch": 2.5897216796875e-05, "step": 16972, "training_step_time": 0.19635295867919922 }, { "epoch": 2.589874267578125e-05, "model_forward_time": 0.024892807006835938, "step": 16973 }, { "epoch": 2.589874267578125e-05, "step": 16973, "training_step_time": 0.10893607139587402 }, { "epoch": 2.59002685546875e-05, "model_forward_time": 0.025014162063598633, "step": 16974 }, { "epoch": 2.59002685546875e-05, "step": 16974, "training_step_time": 0.10539746284484863 }, { "epoch": 2.590179443359375e-05, "model_forward_time": 0.025304317474365234, "step": 16975 }, { "epoch": 2.590179443359375e-05, "step": 16975, "training_step_time": 0.10684466361999512 }, { "epoch": 2.59033203125e-05, "model_forward_time": 0.028641700744628906, "step": 16976 }, { "epoch": 2.59033203125e-05, "step": 16976, "training_step_time": 0.1104745864868164 }, { "epoch": 2.590484619140625e-05, "model_forward_time": 0.024919509887695312, "step": 16977 }, { "epoch": 2.590484619140625e-05, "step": 16977, "training_step_time": 0.11068964004516602 }, { "epoch": 2.59063720703125e-05, "model_forward_time": 0.025005578994750977, "step": 16978 }, { "epoch": 2.59063720703125e-05, "step": 16978, "training_step_time": 0.10700702667236328 }, { "epoch": 2.590789794921875e-05, "model_forward_time": 0.025437116622924805, "step": 16979 }, { "epoch": 2.590789794921875e-05, "step": 16979, "training_step_time": 0.10615348815917969 }, { "epoch": 2.5909423828125e-05, "grad_norm": 0.17927812039852142, "learning_rate": 4.324152526842517e-05, "loss": 0.0117, "step": 16980 }, { "epoch": 2.5909423828125e-05, "model_forward_time": 0.024857521057128906, "step": 16980 }, { "epoch": 2.5909423828125e-05, "step": 16980, "training_step_time": 0.10616397857666016 }, { "epoch": 2.591094970703125e-05, "model_forward_time": 0.025354623794555664, "step": 16981 }, { "epoch": 2.591094970703125e-05, "step": 16981, "training_step_time": 0.10742712020874023 }, { "epoch": 2.59124755859375e-05, "model_forward_time": 0.024940967559814453, "step": 16982 }, { "epoch": 2.59124755859375e-05, "step": 16982, "training_step_time": 0.10494518280029297 }, { "epoch": 2.591400146484375e-05, "model_forward_time": 0.025066852569580078, "step": 16983 }, { "epoch": 2.591400146484375e-05, "step": 16983, "training_step_time": 0.10450482368469238 }, { "epoch": 2.591552734375e-05, "model_forward_time": 0.02487349510192871, "step": 16984 }, { "epoch": 2.591552734375e-05, "step": 16984, "training_step_time": 0.10953736305236816 }, { "epoch": 2.591705322265625e-05, "model_forward_time": 0.02508854866027832, "step": 16985 }, { "epoch": 2.591705322265625e-05, "step": 16985, "training_step_time": 0.10444498062133789 }, { "epoch": 2.59185791015625e-05, "model_forward_time": 0.025126934051513672, "step": 16986 }, { "epoch": 2.59185791015625e-05, "step": 16986, "training_step_time": 0.10359525680541992 }, { "epoch": 2.592010498046875e-05, "model_forward_time": 0.025371074676513672, "step": 16987 }, { "epoch": 2.592010498046875e-05, "step": 16987, "training_step_time": 0.1058967113494873 }, { "epoch": 2.5921630859375e-05, "model_forward_time": 0.025093793869018555, "step": 16988 }, { "epoch": 2.5921630859375e-05, "step": 16988, "training_step_time": 0.10768795013427734 }, { "epoch": 2.592315673828125e-05, "model_forward_time": 0.025876522064208984, "step": 16989 }, { "epoch": 2.592315673828125e-05, "step": 16989, "training_step_time": 0.10626888275146484 }, { "epoch": 2.59246826171875e-05, "grad_norm": 0.2882422208786011, "learning_rate": 4.3186919549484784e-05, "loss": 0.0156, "step": 16990 }, { "epoch": 2.59246826171875e-05, "model_forward_time": 0.025634765625, "step": 16990 }, { "epoch": 2.59246826171875e-05, "step": 16990, "training_step_time": 0.11052346229553223 }, { "epoch": 2.592620849609375e-05, "model_forward_time": 0.024799108505249023, "step": 16991 }, { "epoch": 2.592620849609375e-05, "step": 16991, "training_step_time": 0.11473727226257324 }, { "epoch": 2.5927734375e-05, "model_forward_time": 0.02532362937927246, "step": 16992 }, { "epoch": 2.5927734375e-05, "step": 16992, "training_step_time": 0.1107175350189209 }, { "epoch": 2.592926025390625e-05, "model_forward_time": 0.02476334571838379, "step": 16993 }, { "epoch": 2.592926025390625e-05, "step": 16993, "training_step_time": 0.14792537689208984 }, { "epoch": 2.59307861328125e-05, "model_forward_time": 0.02482295036315918, "step": 16994 }, { "epoch": 2.59307861328125e-05, "step": 16994, "training_step_time": 0.16503334045410156 }, { "epoch": 2.593231201171875e-05, "model_forward_time": 0.024509429931640625, "step": 16995 }, { "epoch": 2.593231201171875e-05, "step": 16995, "training_step_time": 0.16500091552734375 }, { "epoch": 2.5933837890625e-05, "model_forward_time": 0.02477717399597168, "step": 16996 }, { "epoch": 2.5933837890625e-05, "step": 16996, "training_step_time": 0.18506884574890137 }, { "epoch": 2.593536376953125e-05, "model_forward_time": 0.02429819107055664, "step": 16997 }, { "epoch": 2.593536376953125e-05, "step": 16997, "training_step_time": 0.12148761749267578 }, { "epoch": 2.59368896484375e-05, "model_forward_time": 0.024514436721801758, "step": 16998 }, { "epoch": 2.59368896484375e-05, "step": 16998, "training_step_time": 0.11691617965698242 }, { "epoch": 2.593841552734375e-05, "model_forward_time": 0.026449203491210938, "step": 16999 }, { "epoch": 2.593841552734375e-05, "step": 16999, "training_step_time": 0.1191549301147461 }, { "epoch": 2.593994140625e-05, "grad_norm": 0.7259067893028259, "learning_rate": 4.3132322109079596e-05, "loss": 0.0136, "step": 17000 }, { "epoch": 2.593994140625e-05, "model_forward_time": 0.02618098258972168, "step": 17000 }, { "epoch": 2.593994140625e-05, "step": 17000, "training_step_time": 0.10682129859924316 }, { "epoch": 2.594146728515625e-05, "model_forward_time": 0.025656700134277344, "step": 17001 }, { "epoch": 2.594146728515625e-05, "step": 17001, "training_step_time": 0.1811974048614502 }, { "epoch": 2.59429931640625e-05, "model_forward_time": 0.02442169189453125, "step": 17002 }, { "epoch": 2.59429931640625e-05, "step": 17002, "training_step_time": 0.15425825119018555 }, { "epoch": 2.594451904296875e-05, "model_forward_time": 0.024133920669555664, "step": 17003 }, { "epoch": 2.594451904296875e-05, "step": 17003, "training_step_time": 0.14968180656433105 }, { "epoch": 2.5946044921875e-05, "model_forward_time": 0.023937702178955078, "step": 17004 }, { "epoch": 2.5946044921875e-05, "step": 17004, "training_step_time": 0.2172856330871582 }, { "epoch": 2.594757080078125e-05, "model_forward_time": 0.024451732635498047, "step": 17005 }, { "epoch": 2.594757080078125e-05, "step": 17005, "training_step_time": 0.12508940696716309 }, { "epoch": 2.59490966796875e-05, "model_forward_time": 0.02489018440246582, "step": 17006 }, { "epoch": 2.59490966796875e-05, "step": 17006, "training_step_time": 0.12331604957580566 }, { "epoch": 2.595062255859375e-05, "model_forward_time": 0.025366783142089844, "step": 17007 }, { "epoch": 2.595062255859375e-05, "step": 17007, "training_step_time": 0.11742329597473145 }, { "epoch": 2.59521484375e-05, "model_forward_time": 0.02476358413696289, "step": 17008 }, { "epoch": 2.59521484375e-05, "step": 17008, "training_step_time": 0.11369204521179199 }, { "epoch": 2.595367431640625e-05, "model_forward_time": 0.025826215744018555, "step": 17009 }, { "epoch": 2.595367431640625e-05, "step": 17009, "training_step_time": 0.11291623115539551 }, { "epoch": 2.59552001953125e-05, "grad_norm": 0.31492379307746887, "learning_rate": 4.307773301355062e-05, "loss": 0.0184, "step": 17010 }, { "epoch": 2.59552001953125e-05, "model_forward_time": 0.025204896926879883, "step": 17010 }, { "epoch": 2.59552001953125e-05, "step": 17010, "training_step_time": 0.1060037612915039 }, { "epoch": 2.595672607421875e-05, "model_forward_time": 0.025438547134399414, "step": 17011 }, { "epoch": 2.595672607421875e-05, "step": 17011, "training_step_time": 0.10681343078613281 }, { "epoch": 2.5958251953125e-05, "model_forward_time": 0.025011301040649414, "step": 17012 }, { "epoch": 2.5958251953125e-05, "step": 17012, "training_step_time": 0.10706305503845215 }, { "epoch": 2.595977783203125e-05, "model_forward_time": 0.025531291961669922, "step": 17013 }, { "epoch": 2.595977783203125e-05, "step": 17013, "training_step_time": 0.10780835151672363 }, { "epoch": 2.59613037109375e-05, "model_forward_time": 0.025274991989135742, "step": 17014 }, { "epoch": 2.59613037109375e-05, "step": 17014, "training_step_time": 0.10739445686340332 }, { "epoch": 2.596282958984375e-05, "model_forward_time": 0.024921417236328125, "step": 17015 }, { "epoch": 2.596282958984375e-05, "step": 17015, "training_step_time": 0.10861754417419434 }, { "epoch": 2.596435546875e-05, "model_forward_time": 0.024893522262573242, "step": 17016 }, { "epoch": 2.596435546875e-05, "step": 17016, "training_step_time": 0.16959142684936523 }, { "epoch": 2.596588134765625e-05, "model_forward_time": 0.02426600456237793, "step": 17017 }, { "epoch": 2.596588134765625e-05, "step": 17017, "training_step_time": 0.1753695011138916 }, { "epoch": 2.59674072265625e-05, "model_forward_time": 0.024460315704345703, "step": 17018 }, { "epoch": 2.59674072265625e-05, "step": 17018, "training_step_time": 0.16143298149108887 }, { "epoch": 2.596893310546875e-05, "model_forward_time": 0.023349285125732422, "step": 17019 }, { "epoch": 2.596893310546875e-05, "step": 17019, "training_step_time": 0.1622481346130371 }, { "epoch": 2.5970458984375e-05, "grad_norm": 0.27651742100715637, "learning_rate": 4.302315232922876e-05, "loss": 0.0201, "step": 17020 }, { "epoch": 2.5970458984375e-05, "model_forward_time": 0.02457284927368164, "step": 17020 }, { "epoch": 2.5970458984375e-05, "step": 17020, "training_step_time": 0.14145970344543457 }, { "epoch": 2.597198486328125e-05, "model_forward_time": 0.0241701602935791, "step": 17021 }, { "epoch": 2.597198486328125e-05, "step": 17021, "training_step_time": 0.2047266960144043 }, { "epoch": 2.59735107421875e-05, "model_forward_time": 0.024259328842163086, "step": 17022 }, { "epoch": 2.59735107421875e-05, "step": 17022, "training_step_time": 0.1355588436126709 }, { "epoch": 2.597503662109375e-05, "model_forward_time": 0.02413344383239746, "step": 17023 }, { "epoch": 2.597503662109375e-05, "step": 17023, "training_step_time": 0.18563175201416016 }, { "epoch": 2.59765625e-05, "model_forward_time": 0.023995161056518555, "step": 17024 }, { "epoch": 2.59765625e-05, "step": 17024, "training_step_time": 0.10613703727722168 }, { "epoch": 2.597808837890625e-05, "model_forward_time": 0.024899959564208984, "step": 17025 }, { "epoch": 2.597808837890625e-05, "step": 17025, "training_step_time": 0.10330462455749512 }, { "epoch": 2.59796142578125e-05, "model_forward_time": 0.025627851486206055, "step": 17026 }, { "epoch": 2.59796142578125e-05, "step": 17026, "training_step_time": 0.10488390922546387 }, { "epoch": 2.598114013671875e-05, "model_forward_time": 0.029017210006713867, "step": 17027 }, { "epoch": 2.598114013671875e-05, "step": 17027, "training_step_time": 0.10715436935424805 }, { "epoch": 2.5982666015625e-05, "model_forward_time": 0.02545762062072754, "step": 17028 }, { "epoch": 2.5982666015625e-05, "step": 17028, "training_step_time": 0.10448384284973145 }, { "epoch": 2.598419189453125e-05, "model_forward_time": 0.025335311889648438, "step": 17029 }, { "epoch": 2.598419189453125e-05, "step": 17029, "training_step_time": 0.1055140495300293 }, { "epoch": 2.59857177734375e-05, "grad_norm": 0.14272859692573547, "learning_rate": 4.29685801224347e-05, "loss": 0.0099, "step": 17030 }, { "epoch": 2.59857177734375e-05, "model_forward_time": 0.02500295639038086, "step": 17030 }, { "epoch": 2.59857177734375e-05, "step": 17030, "training_step_time": 0.10641121864318848 }, { "epoch": 2.598724365234375e-05, "model_forward_time": 0.02498340606689453, "step": 17031 }, { "epoch": 2.598724365234375e-05, "step": 17031, "training_step_time": 0.1062626838684082 }, { "epoch": 2.598876953125e-05, "model_forward_time": 0.024964570999145508, "step": 17032 }, { "epoch": 2.598876953125e-05, "step": 17032, "training_step_time": 0.1054835319519043 }, { "epoch": 2.599029541015625e-05, "model_forward_time": 0.025264263153076172, "step": 17033 }, { "epoch": 2.599029541015625e-05, "step": 17033, "training_step_time": 0.1057441234588623 }, { "epoch": 2.59918212890625e-05, "model_forward_time": 0.024937152862548828, "step": 17034 }, { "epoch": 2.59918212890625e-05, "step": 17034, "training_step_time": 0.10555672645568848 }, { "epoch": 2.599334716796875e-05, "model_forward_time": 0.02401423454284668, "step": 17035 }, { "epoch": 2.599334716796875e-05, "step": 17035, "training_step_time": 0.10944104194641113 }, { "epoch": 2.5994873046875e-05, "model_forward_time": 0.024460792541503906, "step": 17036 }, { "epoch": 2.5994873046875e-05, "step": 17036, "training_step_time": 0.10684871673583984 }, { "epoch": 2.599639892578125e-05, "model_forward_time": 0.025495529174804688, "step": 17037 }, { "epoch": 2.599639892578125e-05, "step": 17037, "training_step_time": 0.10866308212280273 }, { "epoch": 2.59979248046875e-05, "model_forward_time": 0.024892091751098633, "step": 17038 }, { "epoch": 2.59979248046875e-05, "step": 17038, "training_step_time": 0.10711336135864258 }, { "epoch": 2.599945068359375e-05, "model_forward_time": 0.02543044090270996, "step": 17039 }, { "epoch": 2.599945068359375e-05, "step": 17039, "training_step_time": 0.10552167892456055 }, { "epoch": 2.60009765625e-05, "grad_norm": 0.29485321044921875, "learning_rate": 4.291401645947879e-05, "loss": 0.0092, "step": 17040 }, { "epoch": 2.60009765625e-05, "model_forward_time": 0.024939537048339844, "step": 17040 }, { "epoch": 2.60009765625e-05, "step": 17040, "training_step_time": 0.1069190502166748 }, { "epoch": 2.600250244140625e-05, "model_forward_time": 0.023682832717895508, "step": 17041 }, { "epoch": 2.600250244140625e-05, "step": 17041, "training_step_time": 0.10683345794677734 }, { "epoch": 2.60040283203125e-05, "model_forward_time": 0.025570392608642578, "step": 17042 }, { "epoch": 2.60040283203125e-05, "step": 17042, "training_step_time": 0.10709357261657715 }, { "epoch": 2.600555419921875e-05, "model_forward_time": 0.025325298309326172, "step": 17043 }, { "epoch": 2.600555419921875e-05, "step": 17043, "training_step_time": 0.10787248611450195 }, { "epoch": 2.6007080078125e-05, "model_forward_time": 0.0250396728515625, "step": 17044 }, { "epoch": 2.6007080078125e-05, "step": 17044, "training_step_time": 0.10573530197143555 }, { "epoch": 2.600860595703125e-05, "model_forward_time": 0.024651288986206055, "step": 17045 }, { "epoch": 2.600860595703125e-05, "step": 17045, "training_step_time": 0.14682364463806152 }, { "epoch": 2.60101318359375e-05, "model_forward_time": 0.024847984313964844, "step": 17046 }, { "epoch": 2.60101318359375e-05, "step": 17046, "training_step_time": 0.18950343132019043 }, { "epoch": 2.601165771484375e-05, "model_forward_time": 0.024460792541503906, "step": 17047 }, { "epoch": 2.601165771484375e-05, "step": 17047, "training_step_time": 0.15612173080444336 }, { "epoch": 2.601318359375e-05, "model_forward_time": 0.024820566177368164, "step": 17048 }, { "epoch": 2.601318359375e-05, "step": 17048, "training_step_time": 0.2116234302520752 }, { "epoch": 2.601470947265625e-05, "model_forward_time": 0.024631261825561523, "step": 17049 }, { "epoch": 2.601470947265625e-05, "step": 17049, "training_step_time": 0.19156265258789062 }, { "epoch": 2.60162353515625e-05, "grad_norm": 0.29122012853622437, "learning_rate": 4.2859461406661065e-05, "loss": 0.0116, "step": 17050 }, { "epoch": 2.60162353515625e-05, "model_forward_time": 0.024192333221435547, "step": 17050 }, { "epoch": 2.60162353515625e-05, "step": 17050, "training_step_time": 0.16969633102416992 }, { "epoch": 2.601776123046875e-05, "model_forward_time": 0.02439403533935547, "step": 17051 }, { "epoch": 2.601776123046875e-05, "step": 17051, "training_step_time": 0.11534762382507324 }, { "epoch": 2.6019287109375e-05, "model_forward_time": 0.02419900894165039, "step": 17052 }, { "epoch": 2.6019287109375e-05, "step": 17052, "training_step_time": 0.115814208984375 }, { "epoch": 2.602081298828125e-05, "model_forward_time": 0.02513885498046875, "step": 17053 }, { "epoch": 2.602081298828125e-05, "step": 17053, "training_step_time": 0.11325907707214355 }, { "epoch": 2.60223388671875e-05, "model_forward_time": 0.025537967681884766, "step": 17054 }, { "epoch": 2.60223388671875e-05, "step": 17054, "training_step_time": 0.11498880386352539 }, { "epoch": 2.602386474609375e-05, "model_forward_time": 0.026131153106689453, "step": 17055 }, { "epoch": 2.602386474609375e-05, "step": 17055, "training_step_time": 0.11494755744934082 }, { "epoch": 2.6025390625e-05, "model_forward_time": 0.02545785903930664, "step": 17056 }, { "epoch": 2.6025390625e-05, "step": 17056, "training_step_time": 0.1131281852722168 }, { "epoch": 2.602691650390625e-05, "model_forward_time": 0.025496244430541992, "step": 17057 }, { "epoch": 2.602691650390625e-05, "step": 17057, "training_step_time": 0.11297392845153809 }, { "epoch": 2.60284423828125e-05, "model_forward_time": 0.025649070739746094, "step": 17058 }, { "epoch": 2.60284423828125e-05, "step": 17058, "training_step_time": 0.1132206916809082 }, { "epoch": 2.602996826171875e-05, "model_forward_time": 0.02504134178161621, "step": 17059 }, { "epoch": 2.602996826171875e-05, "step": 17059, "training_step_time": 0.11168670654296875 }, { "epoch": 2.6031494140625e-05, "grad_norm": 0.5060628652572632, "learning_rate": 4.280491503027104e-05, "loss": 0.021, "step": 17060 }, { "epoch": 2.6031494140625e-05, "model_forward_time": 0.024856090545654297, "step": 17060 }, { "epoch": 2.6031494140625e-05, "step": 17060, "training_step_time": 0.10833024978637695 }, { "epoch": 2.603302001953125e-05, "model_forward_time": 0.02521204948425293, "step": 17061 }, { "epoch": 2.603302001953125e-05, "step": 17061, "training_step_time": 0.11145901679992676 }, { "epoch": 2.60345458984375e-05, "model_forward_time": 0.025153160095214844, "step": 17062 }, { "epoch": 2.60345458984375e-05, "step": 17062, "training_step_time": 0.10854840278625488 }, { "epoch": 2.603607177734375e-05, "model_forward_time": 0.02508068084716797, "step": 17063 }, { "epoch": 2.603607177734375e-05, "step": 17063, "training_step_time": 0.10778498649597168 }, { "epoch": 2.603759765625e-05, "model_forward_time": 0.024805784225463867, "step": 17064 }, { "epoch": 2.603759765625e-05, "step": 17064, "training_step_time": 0.18959355354309082 }, { "epoch": 2.603912353515625e-05, "model_forward_time": 0.02463221549987793, "step": 17065 }, { "epoch": 2.603912353515625e-05, "step": 17065, "training_step_time": 0.1422865390777588 }, { "epoch": 2.60406494140625e-05, "model_forward_time": 0.02444744110107422, "step": 17066 }, { "epoch": 2.60406494140625e-05, "step": 17066, "training_step_time": 0.10944843292236328 }, { "epoch": 2.604217529296875e-05, "model_forward_time": 0.0251462459564209, "step": 17067 }, { "epoch": 2.604217529296875e-05, "step": 17067, "training_step_time": 0.11240243911743164 }, { "epoch": 2.6043701171875e-05, "model_forward_time": 0.02491617202758789, "step": 17068 }, { "epoch": 2.6043701171875e-05, "step": 17068, "training_step_time": 0.11077141761779785 }, { "epoch": 2.604522705078125e-05, "model_forward_time": 0.025319814682006836, "step": 17069 }, { "epoch": 2.604522705078125e-05, "step": 17069, "training_step_time": 0.10977935791015625 }, { "epoch": 2.60467529296875e-05, "grad_norm": 0.29499542713165283, "learning_rate": 4.275037739658771e-05, "loss": 0.0102, "step": 17070 }, { "epoch": 2.60467529296875e-05, "model_forward_time": 0.0249178409576416, "step": 17070 }, { "epoch": 2.60467529296875e-05, "step": 17070, "training_step_time": 0.19310593605041504 }, { "epoch": 2.604827880859375e-05, "model_forward_time": 0.02432537078857422, "step": 17071 }, { "epoch": 2.604827880859375e-05, "step": 17071, "training_step_time": 0.10997390747070312 }, { "epoch": 2.60498046875e-05, "model_forward_time": 0.024555683135986328, "step": 17072 }, { "epoch": 2.60498046875e-05, "step": 17072, "training_step_time": 0.1048116683959961 }, { "epoch": 2.605133056640625e-05, "model_forward_time": 0.025147438049316406, "step": 17073 }, { "epoch": 2.605133056640625e-05, "step": 17073, "training_step_time": 0.10991859436035156 }, { "epoch": 2.60528564453125e-05, "model_forward_time": 0.02544546127319336, "step": 17074 }, { "epoch": 2.60528564453125e-05, "step": 17074, "training_step_time": 0.1059560775756836 }, { "epoch": 2.605438232421875e-05, "model_forward_time": 0.025191783905029297, "step": 17075 }, { "epoch": 2.605438232421875e-05, "step": 17075, "training_step_time": 0.10484504699707031 }, { "epoch": 2.6055908203125e-05, "model_forward_time": 0.025140762329101562, "step": 17076 }, { "epoch": 2.6055908203125e-05, "step": 17076, "training_step_time": 0.10912537574768066 }, { "epoch": 2.605743408203125e-05, "model_forward_time": 0.02520585060119629, "step": 17077 }, { "epoch": 2.605743408203125e-05, "step": 17077, "training_step_time": 0.10785150527954102 }, { "epoch": 2.60589599609375e-05, "model_forward_time": 0.02561354637145996, "step": 17078 }, { "epoch": 2.60589599609375e-05, "step": 17078, "training_step_time": 0.10725808143615723 }, { "epoch": 2.606048583984375e-05, "model_forward_time": 0.02504873275756836, "step": 17079 }, { "epoch": 2.606048583984375e-05, "step": 17079, "training_step_time": 0.10757589340209961 }, { "epoch": 2.606201171875e-05, "grad_norm": 0.4745246171951294, "learning_rate": 4.269584857187943e-05, "loss": 0.0109, "step": 17080 }, { "epoch": 2.606201171875e-05, "model_forward_time": 0.028053760528564453, "step": 17080 }, { "epoch": 2.606201171875e-05, "step": 17080, "training_step_time": 0.11024069786071777 }, { "epoch": 2.606353759765625e-05, "model_forward_time": 0.024960041046142578, "step": 17081 }, { "epoch": 2.606353759765625e-05, "step": 17081, "training_step_time": 0.10504364967346191 }, { "epoch": 2.60650634765625e-05, "model_forward_time": 0.024954557418823242, "step": 17082 }, { "epoch": 2.60650634765625e-05, "step": 17082, "training_step_time": 0.10534954071044922 }, { "epoch": 2.606658935546875e-05, "model_forward_time": 0.025187015533447266, "step": 17083 }, { "epoch": 2.606658935546875e-05, "step": 17083, "training_step_time": 0.10299038887023926 }, { "epoch": 2.6068115234375e-05, "model_forward_time": 0.024805545806884766, "step": 17084 }, { "epoch": 2.6068115234375e-05, "step": 17084, "training_step_time": 0.10322833061218262 }, { "epoch": 2.606964111328125e-05, "model_forward_time": 0.024718046188354492, "step": 17085 }, { "epoch": 2.606964111328125e-05, "step": 17085, "training_step_time": 0.10423946380615234 }, { "epoch": 2.60711669921875e-05, "model_forward_time": 0.0252835750579834, "step": 17086 }, { "epoch": 2.60711669921875e-05, "step": 17086, "training_step_time": 0.10544347763061523 }, { "epoch": 2.607269287109375e-05, "model_forward_time": 0.025025367736816406, "step": 17087 }, { "epoch": 2.607269287109375e-05, "step": 17087, "training_step_time": 0.10693883895874023 }, { "epoch": 2.607421875e-05, "model_forward_time": 0.02555561065673828, "step": 17088 }, { "epoch": 2.607421875e-05, "step": 17088, "training_step_time": 0.10808420181274414 }, { "epoch": 2.607574462890625e-05, "model_forward_time": 0.025806903839111328, "step": 17089 }, { "epoch": 2.607574462890625e-05, "step": 17089, "training_step_time": 0.10807204246520996 }, { "epoch": 2.60772705078125e-05, "grad_norm": 0.1551487147808075, "learning_rate": 4.264132862240387e-05, "loss": 0.0121, "step": 17090 }, { "epoch": 2.60772705078125e-05, "model_forward_time": 0.025624990463256836, "step": 17090 }, { "epoch": 2.60772705078125e-05, "step": 17090, "training_step_time": 0.10882806777954102 }, { "epoch": 2.607879638671875e-05, "model_forward_time": 0.024482250213623047, "step": 17091 }, { "epoch": 2.607879638671875e-05, "step": 17091, "training_step_time": 0.14906954765319824 }, { "epoch": 2.6080322265625e-05, "model_forward_time": 0.023906230926513672, "step": 17092 }, { "epoch": 2.6080322265625e-05, "step": 17092, "training_step_time": 0.18044233322143555 }, { "epoch": 2.608184814453125e-05, "model_forward_time": 0.024450302124023438, "step": 17093 }, { "epoch": 2.608184814453125e-05, "step": 17093, "training_step_time": 0.1486985683441162 }, { "epoch": 2.60833740234375e-05, "model_forward_time": 0.024481534957885742, "step": 17094 }, { "epoch": 2.60833740234375e-05, "step": 17094, "training_step_time": 0.15880489349365234 }, { "epoch": 2.608489990234375e-05, "model_forward_time": 0.024695396423339844, "step": 17095 }, { "epoch": 2.608489990234375e-05, "step": 17095, "training_step_time": 0.17287373542785645 }, { "epoch": 2.608642578125e-05, "model_forward_time": 0.02428436279296875, "step": 17096 }, { "epoch": 2.608642578125e-05, "step": 17096, "training_step_time": 0.11421704292297363 }, { "epoch": 2.608795166015625e-05, "model_forward_time": 0.02487945556640625, "step": 17097 }, { "epoch": 2.608795166015625e-05, "step": 17097, "training_step_time": 0.12466311454772949 }, { "epoch": 2.60894775390625e-05, "model_forward_time": 0.025197505950927734, "step": 17098 }, { "epoch": 2.60894775390625e-05, "step": 17098, "training_step_time": 0.10833597183227539 }, { "epoch": 2.609100341796875e-05, "model_forward_time": 0.02648019790649414, "step": 17099 }, { "epoch": 2.609100341796875e-05, "step": 17099, "training_step_time": 0.11718201637268066 }, { "epoch": 2.6092529296875e-05, "grad_norm": 0.32109713554382324, "learning_rate": 4.2586817614407895e-05, "loss": 0.0093, "step": 17100 }, { "epoch": 2.6092529296875e-05, "model_forward_time": 0.0250091552734375, "step": 17100 }, { "epoch": 2.6092529296875e-05, "step": 17100, "training_step_time": 0.10696840286254883 }, { "epoch": 2.609405517578125e-05, "model_forward_time": 0.025254249572753906, "step": 17101 }, { "epoch": 2.609405517578125e-05, "step": 17101, "training_step_time": 0.10769987106323242 }, { "epoch": 2.60955810546875e-05, "model_forward_time": 0.025107383728027344, "step": 17102 }, { "epoch": 2.60955810546875e-05, "step": 17102, "training_step_time": 0.10953664779663086 }, { "epoch": 2.609710693359375e-05, "model_forward_time": 0.025088787078857422, "step": 17103 }, { "epoch": 2.609710693359375e-05, "step": 17103, "training_step_time": 0.10822534561157227 }, { "epoch": 2.60986328125e-05, "model_forward_time": 0.025412321090698242, "step": 17104 }, { "epoch": 2.60986328125e-05, "step": 17104, "training_step_time": 0.10643887519836426 }, { "epoch": 2.610015869140625e-05, "model_forward_time": 0.02579641342163086, "step": 17105 }, { "epoch": 2.610015869140625e-05, "step": 17105, "training_step_time": 0.10853886604309082 }, { "epoch": 2.61016845703125e-05, "model_forward_time": 0.02515721321105957, "step": 17106 }, { "epoch": 2.61016845703125e-05, "step": 17106, "training_step_time": 0.10728812217712402 }, { "epoch": 2.610321044921875e-05, "model_forward_time": 0.025631427764892578, "step": 17107 }, { "epoch": 2.610321044921875e-05, "step": 17107, "training_step_time": 0.10686254501342773 }, { "epoch": 2.6104736328125e-05, "model_forward_time": 0.025186538696289062, "step": 17108 }, { "epoch": 2.6104736328125e-05, "step": 17108, "training_step_time": 0.10636210441589355 }, { "epoch": 2.610626220703125e-05, "model_forward_time": 0.025334835052490234, "step": 17109 }, { "epoch": 2.610626220703125e-05, "step": 17109, "training_step_time": 0.10912346839904785 }, { "epoch": 2.61077880859375e-05, "grad_norm": 0.19505758583545685, "learning_rate": 4.253231561412756e-05, "loss": 0.0125, "step": 17110 }, { "epoch": 2.61077880859375e-05, "model_forward_time": 0.025179147720336914, "step": 17110 }, { "epoch": 2.61077880859375e-05, "step": 17110, "training_step_time": 0.10516214370727539 }, { "epoch": 2.610931396484375e-05, "model_forward_time": 0.028001785278320312, "step": 17111 }, { "epoch": 2.610931396484375e-05, "step": 17111, "training_step_time": 0.10783267021179199 }, { "epoch": 2.611083984375e-05, "model_forward_time": 0.02522754669189453, "step": 17112 }, { "epoch": 2.611083984375e-05, "step": 17112, "training_step_time": 0.11644577980041504 }, { "epoch": 2.611236572265625e-05, "model_forward_time": 0.024864912033081055, "step": 17113 }, { "epoch": 2.611236572265625e-05, "step": 17113, "training_step_time": 0.14072036743164062 }, { "epoch": 2.61138916015625e-05, "model_forward_time": 0.025297164916992188, "step": 17114 }, { "epoch": 2.61138916015625e-05, "step": 17114, "training_step_time": 0.11302638053894043 }, { "epoch": 2.611541748046875e-05, "model_forward_time": 0.02515578269958496, "step": 17115 }, { "epoch": 2.611541748046875e-05, "step": 17115, "training_step_time": 0.11042451858520508 }, { "epoch": 2.6116943359375e-05, "model_forward_time": 0.025621414184570312, "step": 17116 }, { "epoch": 2.6116943359375e-05, "step": 17116, "training_step_time": 0.11751747131347656 }, { "epoch": 2.611846923828125e-05, "model_forward_time": 0.025569677352905273, "step": 17117 }, { "epoch": 2.611846923828125e-05, "step": 17117, "training_step_time": 0.11192536354064941 }, { "epoch": 2.61199951171875e-05, "model_forward_time": 0.024882078170776367, "step": 17118 }, { "epoch": 2.61199951171875e-05, "step": 17118, "training_step_time": 0.1949782371520996 }, { "epoch": 2.612152099609375e-05, "model_forward_time": 0.025637149810791016, "step": 17119 }, { "epoch": 2.612152099609375e-05, "step": 17119, "training_step_time": 0.11349797248840332 }, { "epoch": 2.6123046875e-05, "grad_norm": 0.18414703011512756, "learning_rate": 4.247782268778791e-05, "loss": 0.0094, "step": 17120 }, { "epoch": 2.6123046875e-05, "model_forward_time": 0.02391839027404785, "step": 17120 }, { "epoch": 2.6123046875e-05, "step": 17120, "training_step_time": 0.1106119155883789 }, { "epoch": 2.612457275390625e-05, "model_forward_time": 0.02509927749633789, "step": 17121 }, { "epoch": 2.612457275390625e-05, "step": 17121, "training_step_time": 0.11249518394470215 }, { "epoch": 2.61260986328125e-05, "model_forward_time": 0.025298118591308594, "step": 17122 }, { "epoch": 2.61260986328125e-05, "step": 17122, "training_step_time": 0.10798478126525879 }, { "epoch": 2.612762451171875e-05, "model_forward_time": 0.02499985694885254, "step": 17123 }, { "epoch": 2.612762451171875e-05, "step": 17123, "training_step_time": 0.11208581924438477 }, { "epoch": 2.6129150390625e-05, "model_forward_time": 0.025351762771606445, "step": 17124 }, { "epoch": 2.6129150390625e-05, "step": 17124, "training_step_time": 0.10893487930297852 }, { "epoch": 2.613067626953125e-05, "model_forward_time": 0.025028467178344727, "step": 17125 }, { "epoch": 2.613067626953125e-05, "step": 17125, "training_step_time": 0.10668301582336426 }, { "epoch": 2.61322021484375e-05, "model_forward_time": 0.025165319442749023, "step": 17126 }, { "epoch": 2.61322021484375e-05, "step": 17126, "training_step_time": 0.11016440391540527 }, { "epoch": 2.613372802734375e-05, "model_forward_time": 0.025391101837158203, "step": 17127 }, { "epoch": 2.613372802734375e-05, "step": 17127, "training_step_time": 0.10554647445678711 }, { "epoch": 2.613525390625e-05, "model_forward_time": 0.025171279907226562, "step": 17128 }, { "epoch": 2.613525390625e-05, "step": 17128, "training_step_time": 0.10634589195251465 }, { "epoch": 2.613677978515625e-05, "model_forward_time": 0.025005817413330078, "step": 17129 }, { "epoch": 2.613677978515625e-05, "step": 17129, "training_step_time": 0.10880637168884277 }, { "epoch": 2.61383056640625e-05, "grad_norm": 0.3356666564941406, "learning_rate": 4.2423338901602985e-05, "loss": 0.0087, "step": 17130 }, { "epoch": 2.61383056640625e-05, "model_forward_time": 0.025043249130249023, "step": 17130 }, { "epoch": 2.61383056640625e-05, "step": 17130, "training_step_time": 0.10920572280883789 }, { "epoch": 2.613983154296875e-05, "model_forward_time": 0.025278091430664062, "step": 17131 }, { "epoch": 2.613983154296875e-05, "step": 17131, "training_step_time": 0.11083436012268066 }, { "epoch": 2.6141357421875e-05, "model_forward_time": 0.02502608299255371, "step": 17132 }, { "epoch": 2.6141357421875e-05, "step": 17132, "training_step_time": 0.10721898078918457 }, { "epoch": 2.614288330078125e-05, "model_forward_time": 0.025105953216552734, "step": 17133 }, { "epoch": 2.614288330078125e-05, "step": 17133, "training_step_time": 0.11062979698181152 }, { "epoch": 2.61444091796875e-05, "model_forward_time": 0.0243682861328125, "step": 17134 }, { "epoch": 2.61444091796875e-05, "step": 17134, "training_step_time": 0.1041722297668457 }, { "epoch": 2.614593505859375e-05, "model_forward_time": 0.0250701904296875, "step": 17135 }, { "epoch": 2.614593505859375e-05, "step": 17135, "training_step_time": 0.1082925796508789 }, { "epoch": 2.61474609375e-05, "model_forward_time": 0.02537679672241211, "step": 17136 }, { "epoch": 2.61474609375e-05, "step": 17136, "training_step_time": 0.10610103607177734 }, { "epoch": 2.614898681640625e-05, "model_forward_time": 0.02535557746887207, "step": 17137 }, { "epoch": 2.614898681640625e-05, "step": 17137, "training_step_time": 0.10697245597839355 }, { "epoch": 2.61505126953125e-05, "model_forward_time": 0.025920867919921875, "step": 17138 }, { "epoch": 2.61505126953125e-05, "step": 17138, "training_step_time": 0.1043233871459961 }, { "epoch": 2.615203857421875e-05, "model_forward_time": 0.024859905242919922, "step": 17139 }, { "epoch": 2.615203857421875e-05, "step": 17139, "training_step_time": 0.18228983879089355 }, { "epoch": 2.6153564453125e-05, "grad_norm": 0.12060805410146713, "learning_rate": 4.236886432177572e-05, "loss": 0.0059, "step": 17140 }, { "epoch": 2.6153564453125e-05, "model_forward_time": 0.024861812591552734, "step": 17140 }, { "epoch": 2.6153564453125e-05, "step": 17140, "training_step_time": 0.17465496063232422 }, { "epoch": 2.615509033203125e-05, "model_forward_time": 0.024479389190673828, "step": 17141 }, { "epoch": 2.615509033203125e-05, "step": 17141, "training_step_time": 0.18379712104797363 }, { "epoch": 2.61566162109375e-05, "model_forward_time": 0.025355100631713867, "step": 17142 }, { "epoch": 2.61566162109375e-05, "step": 17142, "training_step_time": 0.18802142143249512 }, { "epoch": 2.615814208984375e-05, "model_forward_time": 0.024550437927246094, "step": 17143 }, { "epoch": 2.615814208984375e-05, "step": 17143, "training_step_time": 0.15912532806396484 }, { "epoch": 2.615966796875e-05, "model_forward_time": 0.024535179138183594, "step": 17144 }, { "epoch": 2.615966796875e-05, "step": 17144, "training_step_time": 0.1309065818786621 }, { "epoch": 2.616119384765625e-05, "model_forward_time": 0.02463817596435547, "step": 17145 }, { "epoch": 2.616119384765625e-05, "step": 17145, "training_step_time": 0.10896062850952148 }, { "epoch": 2.61627197265625e-05, "model_forward_time": 0.025447845458984375, "step": 17146 }, { "epoch": 2.61627197265625e-05, "step": 17146, "training_step_time": 0.1152653694152832 }, { "epoch": 2.616424560546875e-05, "model_forward_time": 0.025068998336791992, "step": 17147 }, { "epoch": 2.616424560546875e-05, "step": 17147, "training_step_time": 0.10412740707397461 }, { "epoch": 2.6165771484375e-05, "model_forward_time": 0.02513599395751953, "step": 17148 }, { "epoch": 2.6165771484375e-05, "step": 17148, "training_step_time": 0.10889887809753418 }, { "epoch": 2.616729736328125e-05, "model_forward_time": 0.025618314743041992, "step": 17149 }, { "epoch": 2.616729736328125e-05, "step": 17149, "training_step_time": 0.10521221160888672 }, { "epoch": 2.61688232421875e-05, "grad_norm": 0.1229744404554367, "learning_rate": 4.231439901449788e-05, "loss": 0.0109, "step": 17150 }, { "epoch": 2.61688232421875e-05, "model_forward_time": 0.025228500366210938, "step": 17150 }, { "epoch": 2.61688232421875e-05, "step": 17150, "training_step_time": 0.11307668685913086 }, { "epoch": 2.617034912109375e-05, "model_forward_time": 0.025208711624145508, "step": 17151 }, { "epoch": 2.617034912109375e-05, "step": 17151, "training_step_time": 0.11640739440917969 }, { "epoch": 2.6171875e-05, "model_forward_time": 0.025891780853271484, "step": 17152 }, { "epoch": 2.6171875e-05, "step": 17152, "training_step_time": 0.1880357265472412 }, { "epoch": 2.617340087890625e-05, "model_forward_time": 0.024780988693237305, "step": 17153 }, { "epoch": 2.617340087890625e-05, "step": 17153, "training_step_time": 0.2084496021270752 }, { "epoch": 2.61749267578125e-05, "model_forward_time": 0.024461030960083008, "step": 17154 }, { "epoch": 2.61749267578125e-05, "step": 17154, "training_step_time": 0.20163917541503906 }, { "epoch": 2.617645263671875e-05, "model_forward_time": 0.023973703384399414, "step": 17155 }, { "epoch": 2.617645263671875e-05, "step": 17155, "training_step_time": 0.2014768123626709 }, { "epoch": 2.6177978515625e-05, "model_forward_time": 0.024760961532592773, "step": 17156 }, { "epoch": 2.6177978515625e-05, "step": 17156, "training_step_time": 0.22733163833618164 }, { "epoch": 2.617950439453125e-05, "model_forward_time": 0.02423095703125, "step": 17157 }, { "epoch": 2.617950439453125e-05, "step": 17157, "training_step_time": 0.2110605239868164 }, { "epoch": 2.61810302734375e-05, "model_forward_time": 0.024020910263061523, "step": 17158 }, { "epoch": 2.61810302734375e-05, "step": 17158, "training_step_time": 0.17118430137634277 }, { "epoch": 2.618255615234375e-05, "model_forward_time": 0.024384260177612305, "step": 17159 }, { "epoch": 2.618255615234375e-05, "step": 17159, "training_step_time": 0.12720775604248047 }, { "epoch": 2.618408203125e-05, "grad_norm": 0.2088848203420639, "learning_rate": 4.2259943045949934e-05, "loss": 0.0122, "step": 17160 }, { "epoch": 2.618408203125e-05, "model_forward_time": 0.024971723556518555, "step": 17160 }, { "epoch": 2.618408203125e-05, "step": 17160, "training_step_time": 0.10787773132324219 }, { "epoch": 2.618560791015625e-05, "model_forward_time": 0.026466846466064453, "step": 17161 }, { "epoch": 2.618560791015625e-05, "step": 17161, "training_step_time": 0.11113119125366211 }, { "epoch": 2.61871337890625e-05, "model_forward_time": 0.024970054626464844, "step": 17162 }, { "epoch": 2.61871337890625e-05, "step": 17162, "training_step_time": 0.10395479202270508 }, { "epoch": 2.618865966796875e-05, "model_forward_time": 0.025197505950927734, "step": 17163 }, { "epoch": 2.618865966796875e-05, "step": 17163, "training_step_time": 0.10346865653991699 }, { "epoch": 2.6190185546875e-05, "model_forward_time": 0.025367021560668945, "step": 17164 }, { "epoch": 2.6190185546875e-05, "step": 17164, "training_step_time": 0.10303568840026855 }, { "epoch": 2.619171142578125e-05, "model_forward_time": 0.025267362594604492, "step": 17165 }, { "epoch": 2.619171142578125e-05, "step": 17165, "training_step_time": 0.10638952255249023 }, { "epoch": 2.61932373046875e-05, "model_forward_time": 0.02488112449645996, "step": 17166 }, { "epoch": 2.61932373046875e-05, "step": 17166, "training_step_time": 0.10659241676330566 }, { "epoch": 2.619476318359375e-05, "model_forward_time": 0.02528238296508789, "step": 17167 }, { "epoch": 2.619476318359375e-05, "step": 17167, "training_step_time": 0.18051862716674805 }, { "epoch": 2.61962890625e-05, "model_forward_time": 0.02492523193359375, "step": 17168 }, { "epoch": 2.61962890625e-05, "step": 17168, "training_step_time": 0.20075368881225586 }, { "epoch": 2.619781494140625e-05, "model_forward_time": 0.02717447280883789, "step": 17169 }, { "epoch": 2.619781494140625e-05, "step": 17169, "training_step_time": 0.1989452838897705 }, { "epoch": 2.61993408203125e-05, "grad_norm": 0.15796822309494019, "learning_rate": 4.220549648230104e-05, "loss": 0.0132, "step": 17170 }, { "epoch": 2.61993408203125e-05, "model_forward_time": 0.024607181549072266, "step": 17170 }, { "epoch": 2.61993408203125e-05, "step": 17170, "training_step_time": 0.17736172676086426 }, { "epoch": 2.620086669921875e-05, "model_forward_time": 0.025049209594726562, "step": 17171 }, { "epoch": 2.620086669921875e-05, "step": 17171, "training_step_time": 0.17417073249816895 }, { "epoch": 2.6202392578125e-05, "model_forward_time": 0.024482250213623047, "step": 17172 }, { "epoch": 2.6202392578125e-05, "step": 17172, "training_step_time": 0.1578364372253418 }, { "epoch": 2.620391845703125e-05, "model_forward_time": 0.02760601043701172, "step": 17173 }, { "epoch": 2.620391845703125e-05, "step": 17173, "training_step_time": 0.15186643600463867 }, { "epoch": 2.62054443359375e-05, "model_forward_time": 0.024909019470214844, "step": 17174 }, { "epoch": 2.62054443359375e-05, "step": 17174, "training_step_time": 0.13660550117492676 }, { "epoch": 2.620697021484375e-05, "model_forward_time": 0.024498939514160156, "step": 17175 }, { "epoch": 2.620697021484375e-05, "step": 17175, "training_step_time": 0.10116291046142578 }, { "epoch": 2.620849609375e-05, "model_forward_time": 0.02588677406311035, "step": 17176 }, { "epoch": 2.620849609375e-05, "step": 17176, "training_step_time": 0.10361266136169434 }, { "epoch": 2.621002197265625e-05, "model_forward_time": 0.025633811950683594, "step": 17177 }, { "epoch": 2.621002197265625e-05, "step": 17177, "training_step_time": 0.10399675369262695 }, { "epoch": 2.62115478515625e-05, "model_forward_time": 0.024599790573120117, "step": 17178 }, { "epoch": 2.62115478515625e-05, "step": 17178, "training_step_time": 0.21236872673034668 }, { "epoch": 2.621307373046875e-05, "model_forward_time": 0.02465343475341797, "step": 17179 }, { "epoch": 2.621307373046875e-05, "step": 17179, "training_step_time": 0.14782953262329102 }, { "epoch": 2.6214599609375e-05, "grad_norm": 0.2959529459476471, "learning_rate": 4.215105938970889e-05, "loss": 0.0092, "step": 17180 }, { "epoch": 2.6214599609375e-05, "model_forward_time": 0.025079965591430664, "step": 17180 }, { "epoch": 2.6214599609375e-05, "step": 17180, "training_step_time": 0.18485260009765625 }, { "epoch": 2.621612548828125e-05, "model_forward_time": 0.02491450309753418, "step": 17181 }, { "epoch": 2.621612548828125e-05, "step": 17181, "training_step_time": 0.15768003463745117 }, { "epoch": 2.62176513671875e-05, "model_forward_time": 0.02473282814025879, "step": 17182 }, { "epoch": 2.62176513671875e-05, "step": 17182, "training_step_time": 0.18913817405700684 }, { "epoch": 2.621917724609375e-05, "model_forward_time": 0.024394512176513672, "step": 17183 }, { "epoch": 2.621917724609375e-05, "step": 17183, "training_step_time": 0.1275479793548584 }, { "epoch": 2.6220703125e-05, "model_forward_time": 0.024399757385253906, "step": 17184 }, { "epoch": 2.6220703125e-05, "step": 17184, "training_step_time": 0.11643266677856445 }, { "epoch": 2.622222900390625e-05, "model_forward_time": 0.025268077850341797, "step": 17185 }, { "epoch": 2.622222900390625e-05, "step": 17185, "training_step_time": 0.12129044532775879 }, { "epoch": 2.62237548828125e-05, "model_forward_time": 0.025583267211914062, "step": 17186 }, { "epoch": 2.62237548828125e-05, "step": 17186, "training_step_time": 0.10653162002563477 }, { "epoch": 2.622528076171875e-05, "model_forward_time": 0.025265932083129883, "step": 17187 }, { "epoch": 2.622528076171875e-05, "step": 17187, "training_step_time": 0.10411548614501953 }, { "epoch": 2.6226806640625e-05, "model_forward_time": 0.024854421615600586, "step": 17188 }, { "epoch": 2.6226806640625e-05, "step": 17188, "training_step_time": 0.11610984802246094 }, { "epoch": 2.622833251953125e-05, "model_forward_time": 0.025444984436035156, "step": 17189 }, { "epoch": 2.622833251953125e-05, "step": 17189, "training_step_time": 0.1246798038482666 }, { "epoch": 2.62298583984375e-05, "grad_norm": 0.2057543843984604, "learning_rate": 4.209663183431969e-05, "loss": 0.0075, "step": 17190 }, { "epoch": 2.62298583984375e-05, "model_forward_time": 0.02504754066467285, "step": 17190 }, { "epoch": 2.62298583984375e-05, "step": 17190, "training_step_time": 0.12670564651489258 }, { "epoch": 2.623138427734375e-05, "model_forward_time": 0.0247342586517334, "step": 17191 }, { "epoch": 2.623138427734375e-05, "step": 17191, "training_step_time": 0.12469840049743652 }, { "epoch": 2.623291015625e-05, "model_forward_time": 0.02462458610534668, "step": 17192 }, { "epoch": 2.623291015625e-05, "step": 17192, "training_step_time": 0.12204670906066895 }, { "epoch": 2.623443603515625e-05, "model_forward_time": 0.025412797927856445, "step": 17193 }, { "epoch": 2.623443603515625e-05, "step": 17193, "training_step_time": 0.11913418769836426 }, { "epoch": 2.62359619140625e-05, "model_forward_time": 0.025601863861083984, "step": 17194 }, { "epoch": 2.62359619140625e-05, "step": 17194, "training_step_time": 0.11663269996643066 }, { "epoch": 2.623748779296875e-05, "model_forward_time": 0.02789759635925293, "step": 17195 }, { "epoch": 2.623748779296875e-05, "step": 17195, "training_step_time": 0.11406993865966797 }, { "epoch": 2.6239013671875e-05, "model_forward_time": 0.024938344955444336, "step": 17196 }, { "epoch": 2.6239013671875e-05, "step": 17196, "training_step_time": 0.13062238693237305 }, { "epoch": 2.624053955078125e-05, "model_forward_time": 0.025087833404541016, "step": 17197 }, { "epoch": 2.624053955078125e-05, "step": 17197, "training_step_time": 0.14064550399780273 }, { "epoch": 2.62420654296875e-05, "model_forward_time": 0.025380373001098633, "step": 17198 }, { "epoch": 2.62420654296875e-05, "step": 17198, "training_step_time": 0.11208939552307129 }, { "epoch": 2.624359130859375e-05, "model_forward_time": 0.02482748031616211, "step": 17199 }, { "epoch": 2.624359130859375e-05, "step": 17199, "training_step_time": 0.1139683723449707 }, { "epoch": 2.62451171875e-05, "grad_norm": 0.32496362924575806, "learning_rate": 4.2042213882268025e-05, "loss": 0.0164, "step": 17200 }, { "epoch": 2.62451171875e-05, "model_forward_time": 0.025487661361694336, "step": 17200 }, { "epoch": 2.62451171875e-05, "step": 17200, "training_step_time": 0.10844635963439941 }, { "epoch": 2.624664306640625e-05, "model_forward_time": 0.0249481201171875, "step": 17201 }, { "epoch": 2.624664306640625e-05, "step": 17201, "training_step_time": 0.11020445823669434 }, { "epoch": 2.62481689453125e-05, "model_forward_time": 0.02522110939025879, "step": 17202 }, { "epoch": 2.62481689453125e-05, "step": 17202, "training_step_time": 0.19670391082763672 }, { "epoch": 2.624969482421875e-05, "model_forward_time": 0.02395153045654297, "step": 17203 }, { "epoch": 2.624969482421875e-05, "step": 17203, "training_step_time": 0.10595870018005371 }, { "epoch": 2.6251220703125e-05, "model_forward_time": 0.024007558822631836, "step": 17204 }, { "epoch": 2.6251220703125e-05, "step": 17204, "training_step_time": 0.10351872444152832 }, { "epoch": 2.625274658203125e-05, "model_forward_time": 0.02525615692138672, "step": 17205 }, { "epoch": 2.625274658203125e-05, "step": 17205, "training_step_time": 0.10487174987792969 }, { "epoch": 2.62542724609375e-05, "model_forward_time": 0.025624513626098633, "step": 17206 }, { "epoch": 2.62542724609375e-05, "step": 17206, "training_step_time": 0.10958075523376465 }, { "epoch": 2.625579833984375e-05, "model_forward_time": 0.025081157684326172, "step": 17207 }, { "epoch": 2.625579833984375e-05, "step": 17207, "training_step_time": 0.10593056678771973 }, { "epoch": 2.625732421875e-05, "model_forward_time": 0.02512669563293457, "step": 17208 }, { "epoch": 2.625732421875e-05, "step": 17208, "training_step_time": 0.10435104370117188 }, { "epoch": 2.625885009765625e-05, "model_forward_time": 0.025334835052490234, "step": 17209 }, { "epoch": 2.625885009765625e-05, "step": 17209, "training_step_time": 0.10810708999633789 }, { "epoch": 2.62603759765625e-05, "grad_norm": 0.3234359622001648, "learning_rate": 4.1987805599676896e-05, "loss": 0.0088, "step": 17210 }, { "epoch": 2.62603759765625e-05, "model_forward_time": 0.025055408477783203, "step": 17210 }, { "epoch": 2.62603759765625e-05, "step": 17210, "training_step_time": 0.10582232475280762 }, { "epoch": 2.626190185546875e-05, "model_forward_time": 0.02558588981628418, "step": 17211 }, { "epoch": 2.626190185546875e-05, "step": 17211, "training_step_time": 0.10567283630371094 }, { "epoch": 2.6263427734375e-05, "model_forward_time": 0.025130033493041992, "step": 17212 }, { "epoch": 2.6263427734375e-05, "step": 17212, "training_step_time": 0.10471200942993164 }, { "epoch": 2.626495361328125e-05, "model_forward_time": 0.025229215621948242, "step": 17213 }, { "epoch": 2.626495361328125e-05, "step": 17213, "training_step_time": 0.10382223129272461 }, { "epoch": 2.62664794921875e-05, "model_forward_time": 0.023931503295898438, "step": 17214 }, { "epoch": 2.62664794921875e-05, "step": 17214, "training_step_time": 0.10650253295898438 }, { "epoch": 2.626800537109375e-05, "model_forward_time": 0.025163650512695312, "step": 17215 }, { "epoch": 2.626800537109375e-05, "step": 17215, "training_step_time": 0.10566401481628418 }, { "epoch": 2.626953125e-05, "model_forward_time": 0.025220394134521484, "step": 17216 }, { "epoch": 2.626953125e-05, "step": 17216, "training_step_time": 0.1061241626739502 }, { "epoch": 2.627105712890625e-05, "model_forward_time": 0.025054931640625, "step": 17217 }, { "epoch": 2.627105712890625e-05, "step": 17217, "training_step_time": 0.1074988842010498 }, { "epoch": 2.62725830078125e-05, "model_forward_time": 0.02532815933227539, "step": 17218 }, { "epoch": 2.62725830078125e-05, "step": 17218, "training_step_time": 0.10403299331665039 }, { "epoch": 2.627410888671875e-05, "model_forward_time": 0.025821685791015625, "step": 17219 }, { "epoch": 2.627410888671875e-05, "step": 17219, "training_step_time": 0.1048579216003418 }, { "epoch": 2.6275634765625e-05, "grad_norm": 0.1794009804725647, "learning_rate": 4.1933407052657456e-05, "loss": 0.0118, "step": 17220 }, { "epoch": 2.6275634765625e-05, "model_forward_time": 0.024385929107666016, "step": 17220 }, { "epoch": 2.6275634765625e-05, "step": 17220, "training_step_time": 0.1023712158203125 }, { "epoch": 2.627716064453125e-05, "model_forward_time": 0.0245819091796875, "step": 17221 }, { "epoch": 2.627716064453125e-05, "step": 17221, "training_step_time": 0.10754084587097168 }, { "epoch": 2.62786865234375e-05, "model_forward_time": 0.025638103485107422, "step": 17222 }, { "epoch": 2.62786865234375e-05, "step": 17222, "training_step_time": 0.10706353187561035 }, { "epoch": 2.628021240234375e-05, "model_forward_time": 0.025616168975830078, "step": 17223 }, { "epoch": 2.628021240234375e-05, "step": 17223, "training_step_time": 0.1905684471130371 }, { "epoch": 2.628173828125e-05, "model_forward_time": 0.024653196334838867, "step": 17224 }, { "epoch": 2.628173828125e-05, "step": 17224, "training_step_time": 0.22989368438720703 }, { "epoch": 2.628326416015625e-05, "model_forward_time": 0.024637699127197266, "step": 17225 }, { "epoch": 2.628326416015625e-05, "step": 17225, "training_step_time": 0.11776041984558105 }, { "epoch": 2.62847900390625e-05, "model_forward_time": 0.024598121643066406, "step": 17226 }, { "epoch": 2.62847900390625e-05, "step": 17226, "training_step_time": 0.1938154697418213 }, { "epoch": 2.628631591796875e-05, "model_forward_time": 0.024374008178710938, "step": 17227 }, { "epoch": 2.628631591796875e-05, "step": 17227, "training_step_time": 0.1888718605041504 }, { "epoch": 2.6287841796875e-05, "model_forward_time": 0.023993492126464844, "step": 17228 }, { "epoch": 2.6287841796875e-05, "step": 17228, "training_step_time": 0.21954083442687988 }, { "epoch": 2.628936767578125e-05, "model_forward_time": 0.024465084075927734, "step": 17229 }, { "epoch": 2.628936767578125e-05, "step": 17229, "training_step_time": 0.12374091148376465 }, { "epoch": 2.62908935546875e-05, "grad_norm": 0.3404396176338196, "learning_rate": 4.187901830730906e-05, "loss": 0.0082, "step": 17230 }, { "epoch": 2.62908935546875e-05, "model_forward_time": 0.02412557601928711, "step": 17230 }, { "epoch": 2.62908935546875e-05, "step": 17230, "training_step_time": 0.11684441566467285 }, { "epoch": 2.629241943359375e-05, "model_forward_time": 0.02497076988220215, "step": 17231 }, { "epoch": 2.629241943359375e-05, "step": 17231, "training_step_time": 0.10600471496582031 }, { "epoch": 2.62939453125e-05, "model_forward_time": 0.025120258331298828, "step": 17232 }, { "epoch": 2.62939453125e-05, "step": 17232, "training_step_time": 0.11123085021972656 }, { "epoch": 2.629547119140625e-05, "model_forward_time": 0.025255203247070312, "step": 17233 }, { "epoch": 2.629547119140625e-05, "step": 17233, "training_step_time": 0.10569357872009277 }, { "epoch": 2.62969970703125e-05, "model_forward_time": 0.025356531143188477, "step": 17234 }, { "epoch": 2.62969970703125e-05, "step": 17234, "training_step_time": 0.10668516159057617 }, { "epoch": 2.629852294921875e-05, "model_forward_time": 0.02522897720336914, "step": 17235 }, { "epoch": 2.629852294921875e-05, "step": 17235, "training_step_time": 0.10543513298034668 }, { "epoch": 2.6300048828125e-05, "model_forward_time": 0.025376319885253906, "step": 17236 }, { "epoch": 2.6300048828125e-05, "step": 17236, "training_step_time": 0.10745954513549805 }, { "epoch": 2.630157470703125e-05, "model_forward_time": 0.025453567504882812, "step": 17237 }, { "epoch": 2.630157470703125e-05, "step": 17237, "training_step_time": 0.10630512237548828 }, { "epoch": 2.63031005859375e-05, "model_forward_time": 0.025068998336791992, "step": 17238 }, { "epoch": 2.63031005859375e-05, "step": 17238, "training_step_time": 0.10552573204040527 }, { "epoch": 2.630462646484375e-05, "model_forward_time": 0.025550365447998047, "step": 17239 }, { "epoch": 2.630462646484375e-05, "step": 17239, "training_step_time": 0.10872364044189453 }, { "epoch": 2.630615234375e-05, "grad_norm": 0.176010400056839, "learning_rate": 4.18246394297192e-05, "loss": 0.0089, "step": 17240 }, { "epoch": 2.630615234375e-05, "model_forward_time": 0.025264978408813477, "step": 17240 }, { "epoch": 2.630615234375e-05, "step": 17240, "training_step_time": 0.10586357116699219 }, { "epoch": 2.630767822265625e-05, "model_forward_time": 0.025884628295898438, "step": 17241 }, { "epoch": 2.630767822265625e-05, "step": 17241, "training_step_time": 0.10614347457885742 }, { "epoch": 2.63092041015625e-05, "model_forward_time": 0.02504706382751465, "step": 17242 }, { "epoch": 2.63092041015625e-05, "step": 17242, "training_step_time": 0.10645174980163574 }, { "epoch": 2.631072998046875e-05, "model_forward_time": 0.025099992752075195, "step": 17243 }, { "epoch": 2.631072998046875e-05, "step": 17243, "training_step_time": 0.10491204261779785 }, { "epoch": 2.6312255859375e-05, "model_forward_time": 0.025243043899536133, "step": 17244 }, { "epoch": 2.6312255859375e-05, "step": 17244, "training_step_time": 0.13492488861083984 }, { "epoch": 2.631378173828125e-05, "model_forward_time": 0.025098085403442383, "step": 17245 }, { "epoch": 2.631378173828125e-05, "step": 17245, "training_step_time": 0.11101078987121582 }, { "epoch": 2.63153076171875e-05, "model_forward_time": 0.025069713592529297, "step": 17246 }, { "epoch": 2.63153076171875e-05, "step": 17246, "training_step_time": 0.11706113815307617 }, { "epoch": 2.631683349609375e-05, "model_forward_time": 0.02504277229309082, "step": 17247 }, { "epoch": 2.631683349609375e-05, "step": 17247, "training_step_time": 0.1148676872253418 }, { "epoch": 2.6318359375e-05, "model_forward_time": 0.02492976188659668, "step": 17248 }, { "epoch": 2.6318359375e-05, "step": 17248, "training_step_time": 0.10640382766723633 }, { "epoch": 2.631988525390625e-05, "model_forward_time": 0.026098251342773438, "step": 17249 }, { "epoch": 2.631988525390625e-05, "step": 17249, "training_step_time": 0.19114971160888672 }, { "epoch": 2.63214111328125e-05, "grad_norm": 0.21381649374961853, "learning_rate": 4.17702704859633e-05, "loss": 0.022, "step": 17250 }, { "epoch": 2.63214111328125e-05, "model_forward_time": 0.024113178253173828, "step": 17250 }, { "epoch": 2.63214111328125e-05, "step": 17250, "training_step_time": 0.10228466987609863 }, { "epoch": 2.632293701171875e-05, "model_forward_time": 0.024683713912963867, "step": 17251 }, { "epoch": 2.632293701171875e-05, "step": 17251, "training_step_time": 0.10090756416320801 }, { "epoch": 2.6324462890625e-05, "model_forward_time": 0.025188684463500977, "step": 17252 }, { "epoch": 2.6324462890625e-05, "step": 17252, "training_step_time": 0.10788750648498535 }, { "epoch": 2.632598876953125e-05, "model_forward_time": 0.025177478790283203, "step": 17253 }, { "epoch": 2.632598876953125e-05, "step": 17253, "training_step_time": 0.10860800743103027 }, { "epoch": 2.63275146484375e-05, "model_forward_time": 0.024621248245239258, "step": 17254 }, { "epoch": 2.63275146484375e-05, "step": 17254, "training_step_time": 0.1089925765991211 }, { "epoch": 2.632904052734375e-05, "model_forward_time": 0.02517533302307129, "step": 17255 }, { "epoch": 2.632904052734375e-05, "step": 17255, "training_step_time": 0.11105942726135254 }, { "epoch": 2.633056640625e-05, "model_forward_time": 0.025098085403442383, "step": 17256 }, { "epoch": 2.633056640625e-05, "step": 17256, "training_step_time": 0.10741615295410156 }, { "epoch": 2.633209228515625e-05, "model_forward_time": 0.025020360946655273, "step": 17257 }, { "epoch": 2.633209228515625e-05, "step": 17257, "training_step_time": 0.10759210586547852 }, { "epoch": 2.63336181640625e-05, "model_forward_time": 0.0252225399017334, "step": 17258 }, { "epoch": 2.63336181640625e-05, "step": 17258, "training_step_time": 0.10917949676513672 }, { "epoch": 2.633514404296875e-05, "model_forward_time": 0.024944782257080078, "step": 17259 }, { "epoch": 2.633514404296875e-05, "step": 17259, "training_step_time": 0.10735750198364258 }, { "epoch": 2.6336669921875e-05, "grad_norm": 0.22420473396778107, "learning_rate": 4.171591154210479e-05, "loss": 0.01, "step": 17260 }, { "epoch": 2.6336669921875e-05, "model_forward_time": 0.025457382202148438, "step": 17260 }, { "epoch": 2.6336669921875e-05, "step": 17260, "training_step_time": 0.10855531692504883 }, { "epoch": 2.633819580078125e-05, "model_forward_time": 0.025414705276489258, "step": 17261 }, { "epoch": 2.633819580078125e-05, "step": 17261, "training_step_time": 0.10737800598144531 }, { "epoch": 2.63397216796875e-05, "model_forward_time": 0.02512073516845703, "step": 17262 }, { "epoch": 2.63397216796875e-05, "step": 17262, "training_step_time": 0.10809922218322754 }, { "epoch": 2.634124755859375e-05, "model_forward_time": 0.02538752555847168, "step": 17263 }, { "epoch": 2.634124755859375e-05, "step": 17263, "training_step_time": 0.11210894584655762 }, { "epoch": 2.63427734375e-05, "model_forward_time": 0.025348663330078125, "step": 17264 }, { "epoch": 2.63427734375e-05, "step": 17264, "training_step_time": 0.10833573341369629 }, { "epoch": 2.634429931640625e-05, "model_forward_time": 0.024996519088745117, "step": 17265 }, { "epoch": 2.634429931640625e-05, "step": 17265, "training_step_time": 0.10490894317626953 }, { "epoch": 2.63458251953125e-05, "model_forward_time": 0.025429248809814453, "step": 17266 }, { "epoch": 2.63458251953125e-05, "step": 17266, "training_step_time": 0.10659646987915039 }, { "epoch": 2.634735107421875e-05, "model_forward_time": 0.02536773681640625, "step": 17267 }, { "epoch": 2.634735107421875e-05, "step": 17267, "training_step_time": 0.10606241226196289 }, { "epoch": 2.6348876953125e-05, "model_forward_time": 0.02514171600341797, "step": 17268 }, { "epoch": 2.6348876953125e-05, "step": 17268, "training_step_time": 0.10623812675476074 }, { "epoch": 2.635040283203125e-05, "model_forward_time": 0.025213241577148438, "step": 17269 }, { "epoch": 2.635040283203125e-05, "step": 17269, "training_step_time": 0.10567092895507812 }, { "epoch": 2.63519287109375e-05, "grad_norm": 0.19210898876190186, "learning_rate": 4.166156266419489e-05, "loss": 0.0091, "step": 17270 }, { "epoch": 2.63519287109375e-05, "model_forward_time": 0.02423381805419922, "step": 17270 }, { "epoch": 2.63519287109375e-05, "step": 17270, "training_step_time": 0.14628362655639648 }, { "epoch": 2.635345458984375e-05, "model_forward_time": 0.025501728057861328, "step": 17271 }, { "epoch": 2.635345458984375e-05, "step": 17271, "training_step_time": 0.11100983619689941 }, { "epoch": 2.635498046875e-05, "model_forward_time": 0.0246124267578125, "step": 17272 }, { "epoch": 2.635498046875e-05, "step": 17272, "training_step_time": 0.17659521102905273 }, { "epoch": 2.635650634765625e-05, "model_forward_time": 0.02464580535888672, "step": 17273 }, { "epoch": 2.635650634765625e-05, "step": 17273, "training_step_time": 0.16070008277893066 }, { "epoch": 2.63580322265625e-05, "model_forward_time": 0.023994922637939453, "step": 17274 }, { "epoch": 2.63580322265625e-05, "step": 17274, "training_step_time": 0.2162027359008789 }, { "epoch": 2.635955810546875e-05, "model_forward_time": 0.02430438995361328, "step": 17275 }, { "epoch": 2.635955810546875e-05, "step": 17275, "training_step_time": 0.19339776039123535 }, { "epoch": 2.6361083984375e-05, "model_forward_time": 0.024966955184936523, "step": 17276 }, { "epoch": 2.6361083984375e-05, "step": 17276, "training_step_time": 0.16322994232177734 }, { "epoch": 2.636260986328125e-05, "model_forward_time": 0.02600860595703125, "step": 17277 }, { "epoch": 2.636260986328125e-05, "step": 17277, "training_step_time": 0.1701974868774414 }, { "epoch": 2.63641357421875e-05, "model_forward_time": 0.024682998657226562, "step": 17278 }, { "epoch": 2.63641357421875e-05, "step": 17278, "training_step_time": 0.12079191207885742 }, { "epoch": 2.636566162109375e-05, "model_forward_time": 0.02451467514038086, "step": 17279 }, { "epoch": 2.636566162109375e-05, "step": 17279, "training_step_time": 0.10463428497314453 }, { "epoch": 2.63671875e-05, "grad_norm": 0.1971217542886734, "learning_rate": 4.160722391827262e-05, "loss": 0.0072, "step": 17280 }, { "epoch": 2.63671875e-05, "model_forward_time": 0.026851654052734375, "step": 17280 }, { "epoch": 2.63671875e-05, "step": 17280, "training_step_time": 0.10940814018249512 }, { "epoch": 2.636871337890625e-05, "model_forward_time": 0.025453805923461914, "step": 17281 }, { "epoch": 2.636871337890625e-05, "step": 17281, "training_step_time": 0.1073918342590332 }, { "epoch": 2.63702392578125e-05, "model_forward_time": 0.025534868240356445, "step": 17282 }, { "epoch": 2.63702392578125e-05, "step": 17282, "training_step_time": 0.10864138603210449 }, { "epoch": 2.637176513671875e-05, "model_forward_time": 0.024863243103027344, "step": 17283 }, { "epoch": 2.637176513671875e-05, "step": 17283, "training_step_time": 0.10479903221130371 }, { "epoch": 2.6373291015625e-05, "model_forward_time": 0.02559185028076172, "step": 17284 }, { "epoch": 2.6373291015625e-05, "step": 17284, "training_step_time": 0.10883474349975586 }, { "epoch": 2.637481689453125e-05, "model_forward_time": 0.02559351921081543, "step": 17285 }, { "epoch": 2.637481689453125e-05, "step": 17285, "training_step_time": 0.10543560981750488 }, { "epoch": 2.63763427734375e-05, "model_forward_time": 0.025194168090820312, "step": 17286 }, { "epoch": 2.63763427734375e-05, "step": 17286, "training_step_time": 0.10472941398620605 }, { "epoch": 2.637786865234375e-05, "model_forward_time": 0.02605891227722168, "step": 17287 }, { "epoch": 2.637786865234375e-05, "step": 17287, "training_step_time": 0.10611367225646973 }, { "epoch": 2.637939453125e-05, "model_forward_time": 0.0252530574798584, "step": 17288 }, { "epoch": 2.637939453125e-05, "step": 17288, "training_step_time": 0.10325503349304199 }, { "epoch": 2.638092041015625e-05, "model_forward_time": 0.02496504783630371, "step": 17289 }, { "epoch": 2.638092041015625e-05, "step": 17289, "training_step_time": 0.1527245044708252 }, { "epoch": 2.63824462890625e-05, "grad_norm": 0.11559871584177017, "learning_rate": 4.155289537036466e-05, "loss": 0.0118, "step": 17290 }, { "epoch": 2.63824462890625e-05, "model_forward_time": 0.024950742721557617, "step": 17290 }, { "epoch": 2.63824462890625e-05, "step": 17290, "training_step_time": 0.14170336723327637 }, { "epoch": 2.638397216796875e-05, "model_forward_time": 0.024463891983032227, "step": 17291 }, { "epoch": 2.638397216796875e-05, "step": 17291, "training_step_time": 0.10897707939147949 }, { "epoch": 2.6385498046875e-05, "model_forward_time": 0.025054216384887695, "step": 17292 }, { "epoch": 2.6385498046875e-05, "step": 17292, "training_step_time": 0.12053084373474121 }, { "epoch": 2.638702392578125e-05, "model_forward_time": 0.025673627853393555, "step": 17293 }, { "epoch": 2.638702392578125e-05, "step": 17293, "training_step_time": 0.10564637184143066 }, { "epoch": 2.63885498046875e-05, "model_forward_time": 0.02535533905029297, "step": 17294 }, { "epoch": 2.63885498046875e-05, "step": 17294, "training_step_time": 0.10338950157165527 }, { "epoch": 2.639007568359375e-05, "model_forward_time": 0.025218486785888672, "step": 17295 }, { "epoch": 2.639007568359375e-05, "step": 17295, "training_step_time": 0.16329336166381836 }, { "epoch": 2.63916015625e-05, "model_forward_time": 0.025038957595825195, "step": 17296 }, { "epoch": 2.63916015625e-05, "step": 17296, "training_step_time": 0.10536670684814453 }, { "epoch": 2.639312744140625e-05, "model_forward_time": 0.02482295036315918, "step": 17297 }, { "epoch": 2.639312744140625e-05, "step": 17297, "training_step_time": 0.10226035118103027 }, { "epoch": 2.63946533203125e-05, "model_forward_time": 0.026494741439819336, "step": 17298 }, { "epoch": 2.63946533203125e-05, "step": 17298, "training_step_time": 0.14969491958618164 }, { "epoch": 2.639617919921875e-05, "model_forward_time": 0.02373647689819336, "step": 17299 }, { "epoch": 2.639617919921875e-05, "step": 17299, "training_step_time": 0.18056702613830566 }, { "epoch": 2.6397705078125e-05, "grad_norm": 0.22532442212104797, "learning_rate": 4.149857708648536e-05, "loss": 0.009, "step": 17300 }, { "epoch": 2.6397705078125e-05, "model_forward_time": 0.02420639991760254, "step": 17300 }, { "epoch": 2.6397705078125e-05, "step": 17300, "training_step_time": 0.1705455780029297 }, { "epoch": 2.639923095703125e-05, "model_forward_time": 0.023001909255981445, "step": 17301 }, { "epoch": 2.639923095703125e-05, "step": 17301, "training_step_time": 0.15152192115783691 }, { "epoch": 2.64007568359375e-05, "model_forward_time": 0.023016691207885742, "step": 17302 }, { "epoch": 2.64007568359375e-05, "step": 17302, "training_step_time": 0.1493997573852539 }, { "epoch": 2.640228271484375e-05, "model_forward_time": 0.023234844207763672, "step": 17303 }, { "epoch": 2.640228271484375e-05, "step": 17303, "training_step_time": 0.1351921558380127 }, { "epoch": 2.640380859375e-05, "model_forward_time": 0.023465871810913086, "step": 17304 }, { "epoch": 2.640380859375e-05, "step": 17304, "training_step_time": 0.13282036781311035 }, { "epoch": 2.640533447265625e-05, "model_forward_time": 0.022979021072387695, "step": 17305 }, { "epoch": 2.640533447265625e-05, "step": 17305, "training_step_time": 0.1275489330291748 }, { "epoch": 2.64068603515625e-05, "model_forward_time": 0.023621082305908203, "step": 17306 }, { "epoch": 2.64068603515625e-05, "step": 17306, "training_step_time": 0.12679553031921387 }, { "epoch": 2.640838623046875e-05, "model_forward_time": 0.024034500122070312, "step": 17307 }, { "epoch": 2.640838623046875e-05, "step": 17307, "training_step_time": 0.12052083015441895 }, { "epoch": 2.6409912109375e-05, "model_forward_time": 0.024214744567871094, "step": 17308 }, { "epoch": 2.6409912109375e-05, "step": 17308, "training_step_time": 0.1169443130493164 }, { "epoch": 2.641143798828125e-05, "model_forward_time": 0.024995088577270508, "step": 17309 }, { "epoch": 2.641143798828125e-05, "step": 17309, "training_step_time": 0.11229920387268066 }, { "epoch": 2.64129638671875e-05, "grad_norm": 0.28290852904319763, "learning_rate": 4.14442691326365e-05, "loss": 0.0112, "step": 17310 }, { "epoch": 2.64129638671875e-05, "model_forward_time": 0.02481698989868164, "step": 17310 }, { "epoch": 2.64129638671875e-05, "step": 17310, "training_step_time": 0.11238884925842285 }, { "epoch": 2.641448974609375e-05, "model_forward_time": 0.025311946868896484, "step": 17311 }, { "epoch": 2.641448974609375e-05, "step": 17311, "training_step_time": 0.11097502708435059 }, { "epoch": 2.6416015625e-05, "model_forward_time": 0.024950027465820312, "step": 17312 }, { "epoch": 2.6416015625e-05, "step": 17312, "training_step_time": 0.10665178298950195 }, { "epoch": 2.641754150390625e-05, "model_forward_time": 0.025180816650390625, "step": 17313 }, { "epoch": 2.641754150390625e-05, "step": 17313, "training_step_time": 0.15871763229370117 }, { "epoch": 2.64190673828125e-05, "model_forward_time": 0.024593353271484375, "step": 17314 }, { "epoch": 2.64190673828125e-05, "step": 17314, "training_step_time": 0.10336470603942871 }, { "epoch": 2.642059326171875e-05, "model_forward_time": 0.024129629135131836, "step": 17315 }, { "epoch": 2.642059326171875e-05, "step": 17315, "training_step_time": 0.14912867546081543 }, { "epoch": 2.6422119140625e-05, "model_forward_time": 0.02430438995361328, "step": 17316 }, { "epoch": 2.6422119140625e-05, "step": 17316, "training_step_time": 0.1629195213317871 }, { "epoch": 2.642364501953125e-05, "model_forward_time": 0.024187088012695312, "step": 17317 }, { "epoch": 2.642364501953125e-05, "step": 17317, "training_step_time": 0.19014239311218262 }, { "epoch": 2.64251708984375e-05, "model_forward_time": 0.024280548095703125, "step": 17318 }, { "epoch": 2.64251708984375e-05, "step": 17318, "training_step_time": 0.16835427284240723 }, { "epoch": 2.642669677734375e-05, "model_forward_time": 0.0245969295501709, "step": 17319 }, { "epoch": 2.642669677734375e-05, "step": 17319, "training_step_time": 0.13764142990112305 }, { "epoch": 2.642822265625e-05, "grad_norm": 0.1506081372499466, "learning_rate": 4.1389971574807416e-05, "loss": 0.0057, "step": 17320 }, { "epoch": 2.642822265625e-05, "model_forward_time": 0.02449178695678711, "step": 17320 }, { "epoch": 2.642822265625e-05, "step": 17320, "training_step_time": 0.12434720993041992 }, { "epoch": 2.642974853515625e-05, "model_forward_time": 0.02458930015563965, "step": 17321 }, { "epoch": 2.642974853515625e-05, "step": 17321, "training_step_time": 0.1180570125579834 }, { "epoch": 2.64312744140625e-05, "model_forward_time": 0.025118350982666016, "step": 17322 }, { "epoch": 2.64312744140625e-05, "step": 17322, "training_step_time": 0.11302328109741211 }, { "epoch": 2.643280029296875e-05, "model_forward_time": 0.02489018440246582, "step": 17323 }, { "epoch": 2.643280029296875e-05, "step": 17323, "training_step_time": 0.10639643669128418 }, { "epoch": 2.6434326171875e-05, "model_forward_time": 0.025298595428466797, "step": 17324 }, { "epoch": 2.6434326171875e-05, "step": 17324, "training_step_time": 0.10550618171691895 }, { "epoch": 2.643585205078125e-05, "model_forward_time": 0.02501964569091797, "step": 17325 }, { "epoch": 2.643585205078125e-05, "step": 17325, "training_step_time": 0.1074678897857666 }, { "epoch": 2.64373779296875e-05, "model_forward_time": 0.025665760040283203, "step": 17326 }, { "epoch": 2.64373779296875e-05, "step": 17326, "training_step_time": 0.10670995712280273 }, { "epoch": 2.643890380859375e-05, "model_forward_time": 0.02523183822631836, "step": 17327 }, { "epoch": 2.643890380859375e-05, "step": 17327, "training_step_time": 0.11145305633544922 }, { "epoch": 2.64404296875e-05, "model_forward_time": 0.025435447692871094, "step": 17328 }, { "epoch": 2.64404296875e-05, "step": 17328, "training_step_time": 0.1056814193725586 }, { "epoch": 2.644195556640625e-05, "model_forward_time": 0.02504420280456543, "step": 17329 }, { "epoch": 2.644195556640625e-05, "step": 17329, "training_step_time": 0.10418152809143066 }, { "epoch": 2.64434814453125e-05, "grad_norm": 0.15126276016235352, "learning_rate": 4.1335684478974744e-05, "loss": 0.0078, "step": 17330 }, { "epoch": 2.64434814453125e-05, "model_forward_time": 0.025588274002075195, "step": 17330 }, { "epoch": 2.64434814453125e-05, "step": 17330, "training_step_time": 0.10495567321777344 }, { "epoch": 2.644500732421875e-05, "model_forward_time": 0.025192737579345703, "step": 17331 }, { "epoch": 2.644500732421875e-05, "step": 17331, "training_step_time": 0.10581445693969727 }, { "epoch": 2.6446533203125e-05, "model_forward_time": 0.02521204948425293, "step": 17332 }, { "epoch": 2.6446533203125e-05, "step": 17332, "training_step_time": 0.11215448379516602 }, { "epoch": 2.644805908203125e-05, "model_forward_time": 0.025271892547607422, "step": 17333 }, { "epoch": 2.644805908203125e-05, "step": 17333, "training_step_time": 0.11980390548706055 }, { "epoch": 2.64495849609375e-05, "model_forward_time": 0.02512073516845703, "step": 17334 }, { "epoch": 2.64495849609375e-05, "step": 17334, "training_step_time": 0.13007402420043945 }, { "epoch": 2.645111083984375e-05, "model_forward_time": 0.02453303337097168, "step": 17335 }, { "epoch": 2.645111083984375e-05, "step": 17335, "training_step_time": 0.1090550422668457 }, { "epoch": 2.645263671875e-05, "model_forward_time": 0.025823593139648438, "step": 17336 }, { "epoch": 2.645263671875e-05, "step": 17336, "training_step_time": 0.12313580513000488 }, { "epoch": 2.645416259765625e-05, "model_forward_time": 0.025315046310424805, "step": 17337 }, { "epoch": 2.645416259765625e-05, "step": 17337, "training_step_time": 0.1257326602935791 }, { "epoch": 2.64556884765625e-05, "model_forward_time": 0.025159835815429688, "step": 17338 }, { "epoch": 2.64556884765625e-05, "step": 17338, "training_step_time": 0.18605780601501465 }, { "epoch": 2.645721435546875e-05, "model_forward_time": 0.02439141273498535, "step": 17339 }, { "epoch": 2.645721435546875e-05, "step": 17339, "training_step_time": 0.11363887786865234 }, { "epoch": 2.6458740234375e-05, "grad_norm": 0.13624484837055206, "learning_rate": 4.1281407911102425e-05, "loss": 0.0113, "step": 17340 }, { "epoch": 2.6458740234375e-05, "model_forward_time": 0.023279190063476562, "step": 17340 }, { "epoch": 2.6458740234375e-05, "step": 17340, "training_step_time": 0.11270380020141602 }, { "epoch": 2.646026611328125e-05, "model_forward_time": 0.024838685989379883, "step": 17341 }, { "epoch": 2.646026611328125e-05, "step": 17341, "training_step_time": 0.1092836856842041 }, { "epoch": 2.64617919921875e-05, "model_forward_time": 0.025051593780517578, "step": 17342 }, { "epoch": 2.64617919921875e-05, "step": 17342, "training_step_time": 0.11126255989074707 }, { "epoch": 2.646331787109375e-05, "model_forward_time": 0.025032520294189453, "step": 17343 }, { "epoch": 2.646331787109375e-05, "step": 17343, "training_step_time": 0.10983967781066895 }, { "epoch": 2.646484375e-05, "model_forward_time": 0.024884700775146484, "step": 17344 }, { "epoch": 2.646484375e-05, "step": 17344, "training_step_time": 0.10742068290710449 }, { "epoch": 2.646636962890625e-05, "model_forward_time": 0.025624990463256836, "step": 17345 }, { "epoch": 2.646636962890625e-05, "step": 17345, "training_step_time": 0.10619139671325684 }, { "epoch": 2.64678955078125e-05, "model_forward_time": 0.024969816207885742, "step": 17346 }, { "epoch": 2.64678955078125e-05, "step": 17346, "training_step_time": 0.10596013069152832 }, { "epoch": 2.646942138671875e-05, "model_forward_time": 0.025330543518066406, "step": 17347 }, { "epoch": 2.646942138671875e-05, "step": 17347, "training_step_time": 0.10667634010314941 }, { "epoch": 2.6470947265625e-05, "model_forward_time": 0.025158405303955078, "step": 17348 }, { "epoch": 2.6470947265625e-05, "step": 17348, "training_step_time": 0.1053316593170166 }, { "epoch": 2.647247314453125e-05, "model_forward_time": 0.024869918823242188, "step": 17349 }, { "epoch": 2.647247314453125e-05, "step": 17349, "training_step_time": 0.10649418830871582 }, { "epoch": 2.64739990234375e-05, "grad_norm": 0.09851908683776855, "learning_rate": 4.12271419371416e-05, "loss": 0.0072, "step": 17350 }, { "epoch": 2.64739990234375e-05, "model_forward_time": 0.02512669563293457, "step": 17350 }, { "epoch": 2.64739990234375e-05, "step": 17350, "training_step_time": 0.10622787475585938 }, { "epoch": 2.647552490234375e-05, "model_forward_time": 0.025164127349853516, "step": 17351 }, { "epoch": 2.647552490234375e-05, "step": 17351, "training_step_time": 0.1058645248413086 }, { "epoch": 2.647705078125e-05, "model_forward_time": 0.026790380477905273, "step": 17352 }, { "epoch": 2.647705078125e-05, "step": 17352, "training_step_time": 0.10963869094848633 }, { "epoch": 2.647857666015625e-05, "model_forward_time": 0.025164365768432617, "step": 17353 }, { "epoch": 2.647857666015625e-05, "step": 17353, "training_step_time": 0.1059424877166748 }, { "epoch": 2.64801025390625e-05, "model_forward_time": 0.024867534637451172, "step": 17354 }, { "epoch": 2.64801025390625e-05, "step": 17354, "training_step_time": 0.1049659252166748 }, { "epoch": 2.648162841796875e-05, "model_forward_time": 0.0250091552734375, "step": 17355 }, { "epoch": 2.648162841796875e-05, "step": 17355, "training_step_time": 0.10384726524353027 }, { "epoch": 2.6483154296875e-05, "model_forward_time": 0.0248873233795166, "step": 17356 }, { "epoch": 2.6483154296875e-05, "step": 17356, "training_step_time": 0.10447549819946289 }, { "epoch": 2.648468017578125e-05, "model_forward_time": 0.02530360221862793, "step": 17357 }, { "epoch": 2.648468017578125e-05, "step": 17357, "training_step_time": 0.10425162315368652 }, { "epoch": 2.64862060546875e-05, "model_forward_time": 0.02587294578552246, "step": 17358 }, { "epoch": 2.64862060546875e-05, "step": 17358, "training_step_time": 0.10603785514831543 }, { "epoch": 2.648773193359375e-05, "model_forward_time": 0.02524566650390625, "step": 17359 }, { "epoch": 2.648773193359375e-05, "step": 17359, "training_step_time": 0.10524725914001465 }, { "epoch": 2.64892578125e-05, "grad_norm": 0.2906000018119812, "learning_rate": 4.1172886623030526e-05, "loss": 0.0146, "step": 17360 }, { "epoch": 2.64892578125e-05, "model_forward_time": 0.0249478816986084, "step": 17360 }, { "epoch": 2.64892578125e-05, "step": 17360, "training_step_time": 0.10855603218078613 }, { "epoch": 2.649078369140625e-05, "model_forward_time": 0.0253143310546875, "step": 17361 }, { "epoch": 2.649078369140625e-05, "step": 17361, "training_step_time": 0.16248512268066406 }, { "epoch": 2.64923095703125e-05, "model_forward_time": 0.02475714683532715, "step": 17362 }, { "epoch": 2.64923095703125e-05, "step": 17362, "training_step_time": 0.11358499526977539 }, { "epoch": 2.649383544921875e-05, "model_forward_time": 0.024197816848754883, "step": 17363 }, { "epoch": 2.649383544921875e-05, "step": 17363, "training_step_time": 0.13588547706604004 }, { "epoch": 2.6495361328125e-05, "model_forward_time": 0.024999141693115234, "step": 17364 }, { "epoch": 2.6495361328125e-05, "step": 17364, "training_step_time": 0.1621096134185791 }, { "epoch": 2.649688720703125e-05, "model_forward_time": 0.025062084197998047, "step": 17365 }, { "epoch": 2.649688720703125e-05, "step": 17365, "training_step_time": 0.21131491661071777 }, { "epoch": 2.64984130859375e-05, "model_forward_time": 0.024469375610351562, "step": 17366 }, { "epoch": 2.64984130859375e-05, "step": 17366, "training_step_time": 0.1831972599029541 }, { "epoch": 2.649993896484375e-05, "model_forward_time": 0.0245363712310791, "step": 17367 }, { "epoch": 2.649993896484375e-05, "step": 17367, "training_step_time": 0.1284778118133545 }, { "epoch": 2.650146484375e-05, "model_forward_time": 0.024142742156982422, "step": 17368 }, { "epoch": 2.650146484375e-05, "step": 17368, "training_step_time": 0.13205671310424805 }, { "epoch": 2.650299072265625e-05, "model_forward_time": 0.02471470832824707, "step": 17369 }, { "epoch": 2.650299072265625e-05, "step": 17369, "training_step_time": 0.10786938667297363 }, { "epoch": 2.65045166015625e-05, "grad_norm": 0.21624208986759186, "learning_rate": 4.111864203469457e-05, "loss": 0.0095, "step": 17370 }, { "epoch": 2.65045166015625e-05, "model_forward_time": 0.024713993072509766, "step": 17370 }, { "epoch": 2.65045166015625e-05, "step": 17370, "training_step_time": 0.11575579643249512 }, { "epoch": 2.650604248046875e-05, "model_forward_time": 0.02503657341003418, "step": 17371 }, { "epoch": 2.650604248046875e-05, "step": 17371, "training_step_time": 0.10616540908813477 }, { "epoch": 2.6507568359375e-05, "model_forward_time": 0.024625062942504883, "step": 17372 }, { "epoch": 2.6507568359375e-05, "step": 17372, "training_step_time": 0.10445523262023926 }, { "epoch": 2.650909423828125e-05, "model_forward_time": 0.025371074676513672, "step": 17373 }, { "epoch": 2.650909423828125e-05, "step": 17373, "training_step_time": 0.10759639739990234 }, { "epoch": 2.65106201171875e-05, "model_forward_time": 0.024988174438476562, "step": 17374 }, { "epoch": 2.65106201171875e-05, "step": 17374, "training_step_time": 0.12104511260986328 }, { "epoch": 2.651214599609375e-05, "model_forward_time": 0.0249483585357666, "step": 17375 }, { "epoch": 2.651214599609375e-05, "step": 17375, "training_step_time": 0.11311626434326172 }, { "epoch": 2.6513671875e-05, "model_forward_time": 0.02491021156311035, "step": 17376 }, { "epoch": 2.6513671875e-05, "step": 17376, "training_step_time": 0.11319541931152344 }, { "epoch": 2.651519775390625e-05, "model_forward_time": 0.02414679527282715, "step": 17377 }, { "epoch": 2.651519775390625e-05, "step": 17377, "training_step_time": 0.11836695671081543 }, { "epoch": 2.65167236328125e-05, "model_forward_time": 0.02394843101501465, "step": 17378 }, { "epoch": 2.65167236328125e-05, "step": 17378, "training_step_time": 0.11460113525390625 }, { "epoch": 2.651824951171875e-05, "model_forward_time": 0.023104190826416016, "step": 17379 }, { "epoch": 2.651824951171875e-05, "step": 17379, "training_step_time": 0.12064480781555176 }, { "epoch": 2.6519775390625e-05, "grad_norm": 0.22850698232650757, "learning_rate": 4.1064408238045994e-05, "loss": 0.0082, "step": 17380 }, { "epoch": 2.6519775390625e-05, "model_forward_time": 0.023877859115600586, "step": 17380 }, { "epoch": 2.6519775390625e-05, "step": 17380, "training_step_time": 0.13541173934936523 }, { "epoch": 2.652130126953125e-05, "model_forward_time": 0.024523496627807617, "step": 17381 }, { "epoch": 2.652130126953125e-05, "step": 17381, "training_step_time": 0.1353771686553955 }, { "epoch": 2.65228271484375e-05, "model_forward_time": 0.027658462524414062, "step": 17382 }, { "epoch": 2.65228271484375e-05, "step": 17382, "training_step_time": 0.11253714561462402 }, { "epoch": 2.652435302734375e-05, "model_forward_time": 0.024641036987304688, "step": 17383 }, { "epoch": 2.652435302734375e-05, "step": 17383, "training_step_time": 0.11119627952575684 }, { "epoch": 2.652587890625e-05, "model_forward_time": 0.024518728256225586, "step": 17384 }, { "epoch": 2.652587890625e-05, "step": 17384, "training_step_time": 0.11068272590637207 }, { "epoch": 2.652740478515625e-05, "model_forward_time": 0.024587154388427734, "step": 17385 }, { "epoch": 2.652740478515625e-05, "step": 17385, "training_step_time": 0.10877060890197754 }, { "epoch": 2.65289306640625e-05, "model_forward_time": 0.024904727935791016, "step": 17386 }, { "epoch": 2.65289306640625e-05, "step": 17386, "training_step_time": 0.19142818450927734 }, { "epoch": 2.653045654296875e-05, "model_forward_time": 0.02515101432800293, "step": 17387 }, { "epoch": 2.653045654296875e-05, "step": 17387, "training_step_time": 0.1065223217010498 }, { "epoch": 2.6531982421875e-05, "model_forward_time": 0.024884462356567383, "step": 17388 }, { "epoch": 2.6531982421875e-05, "step": 17388, "training_step_time": 0.1046895980834961 }, { "epoch": 2.653350830078125e-05, "model_forward_time": 0.024915695190429688, "step": 17389 }, { "epoch": 2.653350830078125e-05, "step": 17389, "training_step_time": 0.10771393775939941 }, { "epoch": 2.65350341796875e-05, "grad_norm": 0.11718787997961044, "learning_rate": 4.1010185298983984e-05, "loss": 0.0083, "step": 17390 }, { "epoch": 2.65350341796875e-05, "model_forward_time": 0.02546548843383789, "step": 17390 }, { "epoch": 2.65350341796875e-05, "step": 17390, "training_step_time": 0.10936498641967773 }, { "epoch": 2.653656005859375e-05, "model_forward_time": 0.024692773818969727, "step": 17391 }, { "epoch": 2.653656005859375e-05, "step": 17391, "training_step_time": 0.10518097877502441 }, { "epoch": 2.65380859375e-05, "model_forward_time": 0.02512192726135254, "step": 17392 }, { "epoch": 2.65380859375e-05, "step": 17392, "training_step_time": 0.10402536392211914 }, { "epoch": 2.653961181640625e-05, "model_forward_time": 0.02496051788330078, "step": 17393 }, { "epoch": 2.653961181640625e-05, "step": 17393, "training_step_time": 0.10575103759765625 }, { "epoch": 2.65411376953125e-05, "model_forward_time": 0.024969816207885742, "step": 17394 }, { "epoch": 2.65411376953125e-05, "step": 17394, "training_step_time": 0.10711193084716797 }, { "epoch": 2.654266357421875e-05, "model_forward_time": 0.025255918502807617, "step": 17395 }, { "epoch": 2.654266357421875e-05, "step": 17395, "training_step_time": 0.10687065124511719 }, { "epoch": 2.6544189453125e-05, "model_forward_time": 0.02545475959777832, "step": 17396 }, { "epoch": 2.6544189453125e-05, "step": 17396, "training_step_time": 0.10602188110351562 }, { "epoch": 2.654571533203125e-05, "model_forward_time": 0.024350404739379883, "step": 17397 }, { "epoch": 2.654571533203125e-05, "step": 17397, "training_step_time": 0.10664248466491699 }, { "epoch": 2.65472412109375e-05, "model_forward_time": 0.024203062057495117, "step": 17398 }, { "epoch": 2.65472412109375e-05, "step": 17398, "training_step_time": 0.11047029495239258 }, { "epoch": 2.654876708984375e-05, "model_forward_time": 0.024906635284423828, "step": 17399 }, { "epoch": 2.654876708984375e-05, "step": 17399, "training_step_time": 0.10651612281799316 }, { "epoch": 2.655029296875e-05, "grad_norm": 0.17764317989349365, "learning_rate": 4.095597328339452e-05, "loss": 0.0124, "step": 17400 }, { "epoch": 2.655029296875e-05, "model_forward_time": 0.024923324584960938, "step": 17400 }, { "epoch": 2.655029296875e-05, "step": 17400, "training_step_time": 0.10562252998352051 }, { "epoch": 2.655181884765625e-05, "model_forward_time": 0.024651765823364258, "step": 17401 }, { "epoch": 2.655181884765625e-05, "step": 17401, "training_step_time": 0.10401678085327148 }, { "epoch": 2.65533447265625e-05, "model_forward_time": 0.02510690689086914, "step": 17402 }, { "epoch": 2.65533447265625e-05, "step": 17402, "training_step_time": 0.10438370704650879 }, { "epoch": 2.655487060546875e-05, "model_forward_time": 0.02412271499633789, "step": 17403 }, { "epoch": 2.655487060546875e-05, "step": 17403, "training_step_time": 0.10577702522277832 }, { "epoch": 2.6556396484375e-05, "model_forward_time": 0.02506232261657715, "step": 17404 }, { "epoch": 2.6556396484375e-05, "step": 17404, "training_step_time": 0.10733413696289062 }, { "epoch": 2.655792236328125e-05, "model_forward_time": 0.024585247039794922, "step": 17405 }, { "epoch": 2.655792236328125e-05, "step": 17405, "training_step_time": 0.1061396598815918 }, { "epoch": 2.65594482421875e-05, "model_forward_time": 0.02439737319946289, "step": 17406 }, { "epoch": 2.65594482421875e-05, "step": 17406, "training_step_time": 0.10524320602416992 }, { "epoch": 2.656097412109375e-05, "model_forward_time": 0.025557994842529297, "step": 17407 }, { "epoch": 2.656097412109375e-05, "step": 17407, "training_step_time": 0.10546684265136719 }, { "epoch": 2.65625e-05, "model_forward_time": 0.02512073516845703, "step": 17408 }, { "epoch": 2.65625e-05, "step": 17408, "training_step_time": 0.10961747169494629 }, { "epoch": 2.656402587890625e-05, "model_forward_time": 0.025422334671020508, "step": 17409 }, { "epoch": 2.656402587890625e-05, "step": 17409, "training_step_time": 0.11741209030151367 }, { "epoch": 2.65655517578125e-05, "grad_norm": 0.2733633816242218, "learning_rate": 4.09017722571503e-05, "loss": 0.0054, "step": 17410 }, { "epoch": 2.65655517578125e-05, "model_forward_time": 0.024267196655273438, "step": 17410 }, { "epoch": 2.65655517578125e-05, "step": 17410, "training_step_time": 0.12797117233276367 }, { "epoch": 2.656707763671875e-05, "model_forward_time": 0.024196624755859375, "step": 17411 }, { "epoch": 2.656707763671875e-05, "step": 17411, "training_step_time": 0.16608357429504395 }, { "epoch": 2.6568603515625e-05, "model_forward_time": 0.024645090103149414, "step": 17412 }, { "epoch": 2.6568603515625e-05, "step": 17412, "training_step_time": 0.12074518203735352 }, { "epoch": 2.657012939453125e-05, "model_forward_time": 0.024386882781982422, "step": 17413 }, { "epoch": 2.657012939453125e-05, "step": 17413, "training_step_time": 0.2061324119567871 }, { "epoch": 2.65716552734375e-05, "model_forward_time": 0.0244748592376709, "step": 17414 }, { "epoch": 2.65716552734375e-05, "step": 17414, "training_step_time": 0.13633227348327637 }, { "epoch": 2.657318115234375e-05, "model_forward_time": 0.0245974063873291, "step": 17415 }, { "epoch": 2.657318115234375e-05, "step": 17415, "training_step_time": 0.12576532363891602 }, { "epoch": 2.657470703125e-05, "model_forward_time": 0.024877548217773438, "step": 17416 }, { "epoch": 2.657470703125e-05, "step": 17416, "training_step_time": 0.12431168556213379 }, { "epoch": 2.657623291015625e-05, "model_forward_time": 0.025235652923583984, "step": 17417 }, { "epoch": 2.657623291015625e-05, "step": 17417, "training_step_time": 0.11294889450073242 }, { "epoch": 2.65777587890625e-05, "model_forward_time": 0.025366783142089844, "step": 17418 }, { "epoch": 2.65777587890625e-05, "step": 17418, "training_step_time": 0.1235969066619873 }, { "epoch": 2.657928466796875e-05, "model_forward_time": 0.025411367416381836, "step": 17419 }, { "epoch": 2.657928466796875e-05, "step": 17419, "training_step_time": 0.1071782112121582 }, { "epoch": 2.6580810546875e-05, "grad_norm": 0.29529955983161926, "learning_rate": 4.08475822861107e-05, "loss": 0.0113, "step": 17420 }, { "epoch": 2.6580810546875e-05, "model_forward_time": 0.025249242782592773, "step": 17420 }, { "epoch": 2.6580810546875e-05, "step": 17420, "training_step_time": 0.10512804985046387 }, { "epoch": 2.658233642578125e-05, "model_forward_time": 0.025555849075317383, "step": 17421 }, { "epoch": 2.658233642578125e-05, "step": 17421, "training_step_time": 0.10470390319824219 }, { "epoch": 2.65838623046875e-05, "model_forward_time": 0.02538752555847168, "step": 17422 }, { "epoch": 2.65838623046875e-05, "step": 17422, "training_step_time": 0.10553383827209473 }, { "epoch": 2.658538818359375e-05, "model_forward_time": 0.02519059181213379, "step": 17423 }, { "epoch": 2.658538818359375e-05, "step": 17423, "training_step_time": 0.10537862777709961 }, { "epoch": 2.65869140625e-05, "model_forward_time": 0.025046110153198242, "step": 17424 }, { "epoch": 2.65869140625e-05, "step": 17424, "training_step_time": 0.10690665245056152 }, { "epoch": 2.658843994140625e-05, "model_forward_time": 0.025130510330200195, "step": 17425 }, { "epoch": 2.658843994140625e-05, "step": 17425, "training_step_time": 0.11019396781921387 }, { "epoch": 2.65899658203125e-05, "model_forward_time": 0.02534627914428711, "step": 17426 }, { "epoch": 2.65899658203125e-05, "step": 17426, "training_step_time": 0.10869908332824707 }, { "epoch": 2.659149169921875e-05, "model_forward_time": 0.025396108627319336, "step": 17427 }, { "epoch": 2.659149169921875e-05, "step": 17427, "training_step_time": 0.1078641414642334 }, { "epoch": 2.6593017578125e-05, "model_forward_time": 0.02506875991821289, "step": 17428 }, { "epoch": 2.6593017578125e-05, "step": 17428, "training_step_time": 0.1121985912322998 }, { "epoch": 2.659454345703125e-05, "model_forward_time": 0.025459766387939453, "step": 17429 }, { "epoch": 2.659454345703125e-05, "step": 17429, "training_step_time": 0.13109111785888672 }, { "epoch": 2.65960693359375e-05, "grad_norm": 0.20403897762298584, "learning_rate": 4.079340343612165e-05, "loss": 0.0067, "step": 17430 }, { "epoch": 2.65960693359375e-05, "model_forward_time": 0.024564743041992188, "step": 17430 }, { "epoch": 2.65960693359375e-05, "step": 17430, "training_step_time": 0.10887956619262695 }, { "epoch": 2.659759521484375e-05, "model_forward_time": 0.025285005569458008, "step": 17431 }, { "epoch": 2.659759521484375e-05, "step": 17431, "training_step_time": 0.1164555549621582 }, { "epoch": 2.659912109375e-05, "model_forward_time": 0.025060415267944336, "step": 17432 }, { "epoch": 2.659912109375e-05, "step": 17432, "training_step_time": 0.11455202102661133 }, { "epoch": 2.660064697265625e-05, "model_forward_time": 0.025921106338500977, "step": 17433 }, { "epoch": 2.660064697265625e-05, "step": 17433, "training_step_time": 0.11010456085205078 }, { "epoch": 2.66021728515625e-05, "model_forward_time": 0.025133132934570312, "step": 17434 }, { "epoch": 2.66021728515625e-05, "step": 17434, "training_step_time": 0.15939688682556152 }, { "epoch": 2.660369873046875e-05, "model_forward_time": 0.02482461929321289, "step": 17435 }, { "epoch": 2.660369873046875e-05, "step": 17435, "training_step_time": 0.10695528984069824 }, { "epoch": 2.6605224609375e-05, "model_forward_time": 0.024995088577270508, "step": 17436 }, { "epoch": 2.6605224609375e-05, "step": 17436, "training_step_time": 0.10377931594848633 }, { "epoch": 2.660675048828125e-05, "model_forward_time": 0.025388479232788086, "step": 17437 }, { "epoch": 2.660675048828125e-05, "step": 17437, "training_step_time": 0.10732412338256836 }, { "epoch": 2.66082763671875e-05, "model_forward_time": 0.025522708892822266, "step": 17438 }, { "epoch": 2.66082763671875e-05, "step": 17438, "training_step_time": 0.11180329322814941 }, { "epoch": 2.660980224609375e-05, "model_forward_time": 0.02534031867980957, "step": 17439 }, { "epoch": 2.660980224609375e-05, "step": 17439, "training_step_time": 0.10809898376464844 }, { "epoch": 2.6611328125e-05, "grad_norm": 0.23942086100578308, "learning_rate": 4.0739235773015536e-05, "loss": 0.0121, "step": 17440 }, { "epoch": 2.6611328125e-05, "model_forward_time": 0.02501082420349121, "step": 17440 }, { "epoch": 2.6611328125e-05, "step": 17440, "training_step_time": 0.10644102096557617 }, { "epoch": 2.661285400390625e-05, "model_forward_time": 0.02586650848388672, "step": 17441 }, { "epoch": 2.661285400390625e-05, "step": 17441, "training_step_time": 0.10570383071899414 }, { "epoch": 2.66143798828125e-05, "model_forward_time": 0.025304794311523438, "step": 17442 }, { "epoch": 2.66143798828125e-05, "step": 17442, "training_step_time": 0.10619282722473145 }, { "epoch": 2.661590576171875e-05, "model_forward_time": 0.02513742446899414, "step": 17443 }, { "epoch": 2.661590576171875e-05, "step": 17443, "training_step_time": 0.1076653003692627 }, { "epoch": 2.6617431640625e-05, "model_forward_time": 0.025265216827392578, "step": 17444 }, { "epoch": 2.6617431640625e-05, "step": 17444, "training_step_time": 0.1077871322631836 }, { "epoch": 2.661895751953125e-05, "model_forward_time": 0.025287151336669922, "step": 17445 }, { "epoch": 2.661895751953125e-05, "step": 17445, "training_step_time": 0.1093447208404541 }, { "epoch": 2.66204833984375e-05, "model_forward_time": 0.025706052780151367, "step": 17446 }, { "epoch": 2.66204833984375e-05, "step": 17446, "training_step_time": 0.10851263999938965 }, { "epoch": 2.662200927734375e-05, "model_forward_time": 0.02530837059020996, "step": 17447 }, { "epoch": 2.662200927734375e-05, "step": 17447, "training_step_time": 0.10523533821105957 }, { "epoch": 2.662353515625e-05, "model_forward_time": 0.025644779205322266, "step": 17448 }, { "epoch": 2.662353515625e-05, "step": 17448, "training_step_time": 0.10475492477416992 }, { "epoch": 2.662506103515625e-05, "model_forward_time": 0.025245189666748047, "step": 17449 }, { "epoch": 2.662506103515625e-05, "step": 17449, "training_step_time": 0.1042928695678711 }, { "epoch": 2.66265869140625e-05, "grad_norm": 0.2432943880558014, "learning_rate": 4.0685079362611204e-05, "loss": 0.0105, "step": 17450 }, { "epoch": 2.66265869140625e-05, "model_forward_time": 0.02544236183166504, "step": 17450 }, { "epoch": 2.66265869140625e-05, "step": 17450, "training_step_time": 0.10440278053283691 }, { "epoch": 2.662811279296875e-05, "model_forward_time": 0.02541828155517578, "step": 17451 }, { "epoch": 2.662811279296875e-05, "step": 17451, "training_step_time": 0.1040029525756836 }, { "epoch": 2.6629638671875e-05, "model_forward_time": 0.02515244483947754, "step": 17452 }, { "epoch": 2.6629638671875e-05, "step": 17452, "training_step_time": 0.10409808158874512 }, { "epoch": 2.663116455078125e-05, "model_forward_time": 0.025072574615478516, "step": 17453 }, { "epoch": 2.663116455078125e-05, "step": 17453, "training_step_time": 0.10724830627441406 }, { "epoch": 2.66326904296875e-05, "model_forward_time": 0.02482318878173828, "step": 17454 }, { "epoch": 2.66326904296875e-05, "step": 17454, "training_step_time": 0.10903453826904297 }, { "epoch": 2.663421630859375e-05, "model_forward_time": 0.02501225471496582, "step": 17455 }, { "epoch": 2.663421630859375e-05, "step": 17455, "training_step_time": 0.10596799850463867 }, { "epoch": 2.66357421875e-05, "model_forward_time": 0.025432109832763672, "step": 17456 }, { "epoch": 2.66357421875e-05, "step": 17456, "training_step_time": 0.10452413558959961 }, { "epoch": 2.663726806640625e-05, "model_forward_time": 0.024520397186279297, "step": 17457 }, { "epoch": 2.663726806640625e-05, "step": 17457, "training_step_time": 0.1013803482055664 }, { "epoch": 2.66387939453125e-05, "model_forward_time": 0.0249025821685791, "step": 17458 }, { "epoch": 2.66387939453125e-05, "step": 17458, "training_step_time": 0.1785118579864502 }, { "epoch": 2.664031982421875e-05, "model_forward_time": 0.024979591369628906, "step": 17459 }, { "epoch": 2.664031982421875e-05, "step": 17459, "training_step_time": 0.13813138008117676 }, { "epoch": 2.6641845703125e-05, "grad_norm": 0.22207190096378326, "learning_rate": 4.063093427071376e-05, "loss": 0.008, "step": 17460 }, { "epoch": 2.6641845703125e-05, "model_forward_time": 0.02448582649230957, "step": 17460 }, { "epoch": 2.6641845703125e-05, "step": 17460, "training_step_time": 0.2078540325164795 }, { "epoch": 2.664337158203125e-05, "model_forward_time": 0.024660587310791016, "step": 17461 }, { "epoch": 2.664337158203125e-05, "step": 17461, "training_step_time": 0.18691492080688477 }, { "epoch": 2.66448974609375e-05, "model_forward_time": 0.02429938316345215, "step": 17462 }, { "epoch": 2.66448974609375e-05, "step": 17462, "training_step_time": 0.11313176155090332 }, { "epoch": 2.664642333984375e-05, "model_forward_time": 0.024513959884643555, "step": 17463 }, { "epoch": 2.664642333984375e-05, "step": 17463, "training_step_time": 0.1282343864440918 }, { "epoch": 2.664794921875e-05, "model_forward_time": 0.02527642250061035, "step": 17464 }, { "epoch": 2.664794921875e-05, "step": 17464, "training_step_time": 0.13495659828186035 }, { "epoch": 2.664947509765625e-05, "model_forward_time": 0.024925947189331055, "step": 17465 }, { "epoch": 2.664947509765625e-05, "step": 17465, "training_step_time": 0.11701369285583496 }, { "epoch": 2.66510009765625e-05, "model_forward_time": 0.024776458740234375, "step": 17466 }, { "epoch": 2.66510009765625e-05, "step": 17466, "training_step_time": 0.116668701171875 }, { "epoch": 2.665252685546875e-05, "model_forward_time": 0.02521371841430664, "step": 17467 }, { "epoch": 2.665252685546875e-05, "step": 17467, "training_step_time": 0.11075782775878906 }, { "epoch": 2.6654052734375e-05, "model_forward_time": 0.02500748634338379, "step": 17468 }, { "epoch": 2.6654052734375e-05, "step": 17468, "training_step_time": 0.10521435737609863 }, { "epoch": 2.665557861328125e-05, "model_forward_time": 0.024941205978393555, "step": 17469 }, { "epoch": 2.665557861328125e-05, "step": 17469, "training_step_time": 0.1032874584197998 }, { "epoch": 2.66571044921875e-05, "grad_norm": 0.15203389525413513, "learning_rate": 4.0576800563114646e-05, "loss": 0.0078, "step": 17470 }, { "epoch": 2.66571044921875e-05, "model_forward_time": 0.025072574615478516, "step": 17470 }, { "epoch": 2.66571044921875e-05, "step": 17470, "training_step_time": 0.10580253601074219 }, { "epoch": 2.665863037109375e-05, "model_forward_time": 0.025432586669921875, "step": 17471 }, { "epoch": 2.665863037109375e-05, "step": 17471, "training_step_time": 0.10408854484558105 }, { "epoch": 2.666015625e-05, "model_forward_time": 0.025377273559570312, "step": 17472 }, { "epoch": 2.666015625e-05, "step": 17472, "training_step_time": 0.10570120811462402 }, { "epoch": 2.666168212890625e-05, "model_forward_time": 0.025342464447021484, "step": 17473 }, { "epoch": 2.666168212890625e-05, "step": 17473, "training_step_time": 0.11157059669494629 }, { "epoch": 2.66632080078125e-05, "model_forward_time": 0.025072574615478516, "step": 17474 }, { "epoch": 2.66632080078125e-05, "step": 17474, "training_step_time": 0.11541461944580078 }, { "epoch": 2.666473388671875e-05, "model_forward_time": 0.02510356903076172, "step": 17475 }, { "epoch": 2.666473388671875e-05, "step": 17475, "training_step_time": 0.1733393669128418 }, { "epoch": 2.6666259765625e-05, "model_forward_time": 0.024592876434326172, "step": 17476 }, { "epoch": 2.6666259765625e-05, "step": 17476, "training_step_time": 0.140547513961792 }, { "epoch": 2.666778564453125e-05, "model_forward_time": 0.02441239356994629, "step": 17477 }, { "epoch": 2.666778564453125e-05, "step": 17477, "training_step_time": 0.10975837707519531 }, { "epoch": 2.66693115234375e-05, "model_forward_time": 0.025066852569580078, "step": 17478 }, { "epoch": 2.66693115234375e-05, "step": 17478, "training_step_time": 0.12679553031921387 }, { "epoch": 2.667083740234375e-05, "model_forward_time": 0.025180339813232422, "step": 17479 }, { "epoch": 2.667083740234375e-05, "step": 17479, "training_step_time": 0.11168718338012695 }, { "epoch": 2.667236328125e-05, "grad_norm": 0.22342152893543243, "learning_rate": 4.05226783055914e-05, "loss": 0.0106, "step": 17480 }, { "epoch": 2.667236328125e-05, "model_forward_time": 0.025020837783813477, "step": 17480 }, { "epoch": 2.667236328125e-05, "step": 17480, "training_step_time": 0.10591530799865723 }, { "epoch": 2.667388916015625e-05, "model_forward_time": 0.024991989135742188, "step": 17481 }, { "epoch": 2.667388916015625e-05, "step": 17481, "training_step_time": 0.13350939750671387 }, { "epoch": 2.66754150390625e-05, "model_forward_time": 0.02670145034790039, "step": 17482 }, { "epoch": 2.66754150390625e-05, "step": 17482, "training_step_time": 0.11898279190063477 }, { "epoch": 2.667694091796875e-05, "model_forward_time": 0.027080059051513672, "step": 17483 }, { "epoch": 2.667694091796875e-05, "step": 17483, "training_step_time": 0.10708332061767578 }, { "epoch": 2.6678466796875e-05, "model_forward_time": 0.024899959564208984, "step": 17484 }, { "epoch": 2.6678466796875e-05, "step": 17484, "training_step_time": 0.10762834548950195 }, { "epoch": 2.667999267578125e-05, "model_forward_time": 0.02585768699645996, "step": 17485 }, { "epoch": 2.667999267578125e-05, "step": 17485, "training_step_time": 0.10704922676086426 }, { "epoch": 2.66815185546875e-05, "model_forward_time": 0.027258872985839844, "step": 17486 }, { "epoch": 2.66815185546875e-05, "step": 17486, "training_step_time": 0.10779976844787598 }, { "epoch": 2.668304443359375e-05, "model_forward_time": 0.02631545066833496, "step": 17487 }, { "epoch": 2.668304443359375e-05, "step": 17487, "training_step_time": 0.10582447052001953 }, { "epoch": 2.66845703125e-05, "model_forward_time": 0.026203393936157227, "step": 17488 }, { "epoch": 2.66845703125e-05, "step": 17488, "training_step_time": 0.10605001449584961 }, { "epoch": 2.668609619140625e-05, "model_forward_time": 0.025286436080932617, "step": 17489 }, { "epoch": 2.668609619140625e-05, "step": 17489, "training_step_time": 0.10620832443237305 }, { "epoch": 2.66876220703125e-05, "grad_norm": 0.24360807240009308, "learning_rate": 4.046856756390767e-05, "loss": 0.0087, "step": 17490 }, { "epoch": 2.66876220703125e-05, "model_forward_time": 0.025455713272094727, "step": 17490 }, { "epoch": 2.66876220703125e-05, "step": 17490, "training_step_time": 0.10508847236633301 }, { "epoch": 2.668914794921875e-05, "model_forward_time": 0.02489781379699707, "step": 17491 }, { "epoch": 2.668914794921875e-05, "step": 17491, "training_step_time": 0.10788583755493164 }, { "epoch": 2.6690673828125e-05, "model_forward_time": 0.025566577911376953, "step": 17492 }, { "epoch": 2.6690673828125e-05, "step": 17492, "training_step_time": 0.10794830322265625 }, { "epoch": 2.669219970703125e-05, "model_forward_time": 0.025432348251342773, "step": 17493 }, { "epoch": 2.669219970703125e-05, "step": 17493, "training_step_time": 0.10476803779602051 }, { "epoch": 2.66937255859375e-05, "model_forward_time": 0.025760173797607422, "step": 17494 }, { "epoch": 2.66937255859375e-05, "step": 17494, "training_step_time": 0.10517549514770508 }, { "epoch": 2.669525146484375e-05, "model_forward_time": 0.025211095809936523, "step": 17495 }, { "epoch": 2.669525146484375e-05, "step": 17495, "training_step_time": 0.1053609848022461 }, { "epoch": 2.669677734375e-05, "model_forward_time": 0.025086164474487305, "step": 17496 }, { "epoch": 2.669677734375e-05, "step": 17496, "training_step_time": 0.1063849925994873 }, { "epoch": 2.669830322265625e-05, "model_forward_time": 0.025098800659179688, "step": 17497 }, { "epoch": 2.669830322265625e-05, "step": 17497, "training_step_time": 0.10855412483215332 }, { "epoch": 2.66998291015625e-05, "model_forward_time": 0.02516770362854004, "step": 17498 }, { "epoch": 2.66998291015625e-05, "step": 17498, "training_step_time": 0.11717343330383301 }, { "epoch": 2.670135498046875e-05, "model_forward_time": 0.025408267974853516, "step": 17499 }, { "epoch": 2.670135498046875e-05, "step": 17499, "training_step_time": 0.19134831428527832 }, { "epoch": 2.6702880859375e-05, "grad_norm": 0.20986664295196533, "learning_rate": 4.0414468403813095e-05, "loss": 0.0069, "step": 17500 }, { "epoch": 2.6702880859375e-05, "model_forward_time": 0.024262428283691406, "step": 17500 }, { "epoch": 2.6702880859375e-05, "step": 17500, "training_step_time": 0.196730375289917 }, { "epoch": 2.670440673828125e-05, "model_forward_time": 0.023336410522460938, "step": 17501 }, { "epoch": 2.670440673828125e-05, "step": 17501, "training_step_time": 0.19736576080322266 }, { "epoch": 2.67059326171875e-05, "model_forward_time": 0.02443408966064453, "step": 17502 }, { "epoch": 2.67059326171875e-05, "step": 17502, "training_step_time": 0.1837158203125 }, { "epoch": 2.670745849609375e-05, "model_forward_time": 0.024596691131591797, "step": 17503 }, { "epoch": 2.670745849609375e-05, "step": 17503, "training_step_time": 0.20628118515014648 }, { "epoch": 2.6708984375e-05, "model_forward_time": 0.02451777458190918, "step": 17504 }, { "epoch": 2.6708984375e-05, "step": 17504, "training_step_time": 0.1834108829498291 }, { "epoch": 2.671051025390625e-05, "model_forward_time": 0.0247647762298584, "step": 17505 }, { "epoch": 2.671051025390625e-05, "step": 17505, "training_step_time": 0.22809576988220215 }, { "epoch": 2.67120361328125e-05, "model_forward_time": 0.024686813354492188, "step": 17506 }, { "epoch": 2.67120361328125e-05, "step": 17506, "training_step_time": 0.2069075107574463 }, { "epoch": 2.671356201171875e-05, "model_forward_time": 0.024651765823364258, "step": 17507 }, { "epoch": 2.671356201171875e-05, "step": 17507, "training_step_time": 0.17825555801391602 }, { "epoch": 2.6715087890625e-05, "model_forward_time": 0.02429962158203125, "step": 17508 }, { "epoch": 2.6715087890625e-05, "step": 17508, "training_step_time": 0.12929630279541016 }, { "epoch": 2.671661376953125e-05, "model_forward_time": 0.026206016540527344, "step": 17509 }, { "epoch": 2.671661376953125e-05, "step": 17509, "training_step_time": 0.11239290237426758 }, { "epoch": 2.67181396484375e-05, "grad_norm": 0.1403159350156784, "learning_rate": 4.036038089104326e-05, "loss": 0.0118, "step": 17510 }, { "epoch": 2.67181396484375e-05, "model_forward_time": 0.024848222732543945, "step": 17510 }, { "epoch": 2.67181396484375e-05, "step": 17510, "training_step_time": 0.12391138076782227 }, { "epoch": 2.671966552734375e-05, "model_forward_time": 0.024845600128173828, "step": 17511 }, { "epoch": 2.671966552734375e-05, "step": 17511, "training_step_time": 0.10621500015258789 }, { "epoch": 2.672119140625e-05, "model_forward_time": 0.025402069091796875, "step": 17512 }, { "epoch": 2.672119140625e-05, "step": 17512, "training_step_time": 0.10650277137756348 }, { "epoch": 2.672271728515625e-05, "model_forward_time": 0.02516031265258789, "step": 17513 }, { "epoch": 2.672271728515625e-05, "step": 17513, "training_step_time": 0.10428094863891602 }, { "epoch": 2.67242431640625e-05, "model_forward_time": 0.025116682052612305, "step": 17514 }, { "epoch": 2.67242431640625e-05, "step": 17514, "training_step_time": 0.10645437240600586 }, { "epoch": 2.672576904296875e-05, "model_forward_time": 0.0245821475982666, "step": 17515 }, { "epoch": 2.672576904296875e-05, "step": 17515, "training_step_time": 0.10923647880554199 }, { "epoch": 2.6727294921875e-05, "model_forward_time": 0.025254249572753906, "step": 17516 }, { "epoch": 2.6727294921875e-05, "step": 17516, "training_step_time": 0.11212587356567383 }, { "epoch": 2.672882080078125e-05, "model_forward_time": 0.027120590209960938, "step": 17517 }, { "epoch": 2.672882080078125e-05, "step": 17517, "training_step_time": 0.11363744735717773 }, { "epoch": 2.67303466796875e-05, "model_forward_time": 0.024886131286621094, "step": 17518 }, { "epoch": 2.67303466796875e-05, "step": 17518, "training_step_time": 0.11769413948059082 }, { "epoch": 2.673187255859375e-05, "model_forward_time": 0.025722265243530273, "step": 17519 }, { "epoch": 2.673187255859375e-05, "step": 17519, "training_step_time": 0.11069297790527344 }, { "epoch": 2.67333984375e-05, "grad_norm": 0.1784925013780594, "learning_rate": 4.0306305091319595e-05, "loss": 0.0101, "step": 17520 }, { "epoch": 2.67333984375e-05, "model_forward_time": 0.025279998779296875, "step": 17520 }, { "epoch": 2.67333984375e-05, "step": 17520, "training_step_time": 0.11763215065002441 }, { "epoch": 2.673492431640625e-05, "model_forward_time": 0.02506279945373535, "step": 17521 }, { "epoch": 2.673492431640625e-05, "step": 17521, "training_step_time": 0.11384463310241699 }, { "epoch": 2.67364501953125e-05, "model_forward_time": 0.02523350715637207, "step": 17522 }, { "epoch": 2.67364501953125e-05, "step": 17522, "training_step_time": 0.10673141479492188 }, { "epoch": 2.673797607421875e-05, "model_forward_time": 0.02489018440246582, "step": 17523 }, { "epoch": 2.673797607421875e-05, "step": 17523, "training_step_time": 0.10979938507080078 }, { "epoch": 2.6739501953125e-05, "model_forward_time": 0.025228023529052734, "step": 17524 }, { "epoch": 2.6739501953125e-05, "step": 17524, "training_step_time": 0.1249995231628418 }, { "epoch": 2.674102783203125e-05, "model_forward_time": 0.025093555450439453, "step": 17525 }, { "epoch": 2.674102783203125e-05, "step": 17525, "training_step_time": 0.13387060165405273 }, { "epoch": 2.67425537109375e-05, "model_forward_time": 0.024849653244018555, "step": 17526 }, { "epoch": 2.67425537109375e-05, "step": 17526, "training_step_time": 0.11285543441772461 }, { "epoch": 2.674407958984375e-05, "model_forward_time": 0.024337053298950195, "step": 17527 }, { "epoch": 2.674407958984375e-05, "step": 17527, "training_step_time": 0.11037778854370117 }, { "epoch": 2.674560546875e-05, "model_forward_time": 0.024193286895751953, "step": 17528 }, { "epoch": 2.674560546875e-05, "step": 17528, "training_step_time": 0.10362815856933594 }, { "epoch": 2.674713134765625e-05, "model_forward_time": 0.024202823638916016, "step": 17529 }, { "epoch": 2.674713134765625e-05, "step": 17529, "training_step_time": 0.10526299476623535 }, { "epoch": 2.67486572265625e-05, "grad_norm": 0.17740324139595032, "learning_rate": 4.0252241070349304e-05, "loss": 0.0093, "step": 17530 }, { "epoch": 2.67486572265625e-05, "model_forward_time": 0.025275230407714844, "step": 17530 }, { "epoch": 2.67486572265625e-05, "step": 17530, "training_step_time": 0.10555815696716309 }, { "epoch": 2.675018310546875e-05, "model_forward_time": 0.024858474731445312, "step": 17531 }, { "epoch": 2.675018310546875e-05, "step": 17531, "training_step_time": 0.1067655086517334 }, { "epoch": 2.6751708984375e-05, "model_forward_time": 0.025211572647094727, "step": 17532 }, { "epoch": 2.6751708984375e-05, "step": 17532, "training_step_time": 0.10576438903808594 }, { "epoch": 2.675323486328125e-05, "model_forward_time": 0.02503800392150879, "step": 17533 }, { "epoch": 2.675323486328125e-05, "step": 17533, "training_step_time": 0.10522794723510742 }, { "epoch": 2.67547607421875e-05, "model_forward_time": 0.024678945541381836, "step": 17534 }, { "epoch": 2.67547607421875e-05, "step": 17534, "training_step_time": 0.10391926765441895 }, { "epoch": 2.675628662109375e-05, "model_forward_time": 0.027218103408813477, "step": 17535 }, { "epoch": 2.675628662109375e-05, "step": 17535, "training_step_time": 0.10737776756286621 }, { "epoch": 2.67578125e-05, "model_forward_time": 0.025104761123657227, "step": 17536 }, { "epoch": 2.67578125e-05, "step": 17536, "training_step_time": 0.10677170753479004 }, { "epoch": 2.675933837890625e-05, "model_forward_time": 0.02547287940979004, "step": 17537 }, { "epoch": 2.675933837890625e-05, "step": 17537, "training_step_time": 0.10755491256713867 }, { "epoch": 2.67608642578125e-05, "model_forward_time": 0.025130033493041992, "step": 17538 }, { "epoch": 2.67608642578125e-05, "step": 17538, "training_step_time": 0.10477066040039062 }, { "epoch": 2.676239013671875e-05, "model_forward_time": 0.025388479232788086, "step": 17539 }, { "epoch": 2.676239013671875e-05, "step": 17539, "training_step_time": 0.10525965690612793 }, { "epoch": 2.6763916015625e-05, "grad_norm": 0.21201591193675995, "learning_rate": 4.019818889382528e-05, "loss": 0.0169, "step": 17540 }, { "epoch": 2.6763916015625e-05, "model_forward_time": 0.02529597282409668, "step": 17540 }, { "epoch": 2.6763916015625e-05, "step": 17540, "training_step_time": 0.1049807071685791 }, { "epoch": 2.676544189453125e-05, "model_forward_time": 0.025216341018676758, "step": 17541 }, { "epoch": 2.676544189453125e-05, "step": 17541, "training_step_time": 0.10723400115966797 }, { "epoch": 2.67669677734375e-05, "model_forward_time": 0.02614307403564453, "step": 17542 }, { "epoch": 2.67669677734375e-05, "step": 17542, "training_step_time": 0.10492801666259766 }, { "epoch": 2.676849365234375e-05, "model_forward_time": 0.025014638900756836, "step": 17543 }, { "epoch": 2.676849365234375e-05, "step": 17543, "training_step_time": 0.1089332103729248 }, { "epoch": 2.677001953125e-05, "model_forward_time": 0.025524616241455078, "step": 17544 }, { "epoch": 2.677001953125e-05, "step": 17544, "training_step_time": 0.10503506660461426 }, { "epoch": 2.677154541015625e-05, "model_forward_time": 0.025252342224121094, "step": 17545 }, { "epoch": 2.677154541015625e-05, "step": 17545, "training_step_time": 0.10491299629211426 }, { "epoch": 2.67730712890625e-05, "model_forward_time": 0.025172948837280273, "step": 17546 }, { "epoch": 2.67730712890625e-05, "step": 17546, "training_step_time": 0.10589790344238281 }, { "epoch": 2.677459716796875e-05, "model_forward_time": 0.02561783790588379, "step": 17547 }, { "epoch": 2.677459716796875e-05, "step": 17547, "training_step_time": 0.10440325736999512 }, { "epoch": 2.6776123046875e-05, "model_forward_time": 0.02538609504699707, "step": 17548 }, { "epoch": 2.6776123046875e-05, "step": 17548, "training_step_time": 0.16729021072387695 }, { "epoch": 2.677764892578125e-05, "model_forward_time": 0.024501323699951172, "step": 17549 }, { "epoch": 2.677764892578125e-05, "step": 17549, "training_step_time": 0.10737895965576172 }, { "epoch": 2.67791748046875e-05, "grad_norm": 0.24493327736854553, "learning_rate": 4.0144148627425993e-05, "loss": 0.0097, "step": 17550 }, { "epoch": 2.67791748046875e-05, "model_forward_time": 0.025326013565063477, "step": 17550 }, { "epoch": 2.67791748046875e-05, "step": 17550, "training_step_time": 0.10544729232788086 }, { "epoch": 2.678070068359375e-05, "model_forward_time": 0.024802207946777344, "step": 17551 }, { "epoch": 2.678070068359375e-05, "step": 17551, "training_step_time": 0.17994952201843262 }, { "epoch": 2.67822265625e-05, "model_forward_time": 0.02500462532043457, "step": 17552 }, { "epoch": 2.67822265625e-05, "step": 17552, "training_step_time": 0.1630702018737793 }, { "epoch": 2.678375244140625e-05, "model_forward_time": 0.024752140045166016, "step": 17553 }, { "epoch": 2.678375244140625e-05, "step": 17553, "training_step_time": 0.20124530792236328 }, { "epoch": 2.67852783203125e-05, "model_forward_time": 0.024921178817749023, "step": 17554 }, { "epoch": 2.67852783203125e-05, "step": 17554, "training_step_time": 0.12817096710205078 }, { "epoch": 2.678680419921875e-05, "model_forward_time": 0.02462625503540039, "step": 17555 }, { "epoch": 2.678680419921875e-05, "step": 17555, "training_step_time": 0.12283825874328613 }, { "epoch": 2.6788330078125e-05, "model_forward_time": 0.02425384521484375, "step": 17556 }, { "epoch": 2.6788330078125e-05, "step": 17556, "training_step_time": 0.2152097225189209 }, { "epoch": 2.678985595703125e-05, "model_forward_time": 0.02482128143310547, "step": 17557 }, { "epoch": 2.678985595703125e-05, "step": 17557, "training_step_time": 0.12040305137634277 }, { "epoch": 2.67913818359375e-05, "model_forward_time": 0.02370452880859375, "step": 17558 }, { "epoch": 2.67913818359375e-05, "step": 17558, "training_step_time": 0.11918973922729492 }, { "epoch": 2.679290771484375e-05, "model_forward_time": 0.02402472496032715, "step": 17559 }, { "epoch": 2.679290771484375e-05, "step": 17559, "training_step_time": 0.11201024055480957 }, { "epoch": 2.679443359375e-05, "grad_norm": 0.31486862897872925, "learning_rate": 4.0090120336815474e-05, "loss": 0.0214, "step": 17560 }, { "epoch": 2.679443359375e-05, "model_forward_time": 0.024424314498901367, "step": 17560 }, { "epoch": 2.679443359375e-05, "step": 17560, "training_step_time": 0.10986781120300293 }, { "epoch": 2.679595947265625e-05, "model_forward_time": 0.024964332580566406, "step": 17561 }, { "epoch": 2.679595947265625e-05, "step": 17561, "training_step_time": 0.10563397407531738 }, { "epoch": 2.67974853515625e-05, "model_forward_time": 0.025179386138916016, "step": 17562 }, { "epoch": 2.67974853515625e-05, "step": 17562, "training_step_time": 0.10660910606384277 }, { "epoch": 2.679901123046875e-05, "model_forward_time": 0.024835586547851562, "step": 17563 }, { "epoch": 2.679901123046875e-05, "step": 17563, "training_step_time": 0.1060636043548584 }, { "epoch": 2.6800537109375e-05, "model_forward_time": 0.024786949157714844, "step": 17564 }, { "epoch": 2.6800537109375e-05, "step": 17564, "training_step_time": 0.10484719276428223 }, { "epoch": 2.680206298828125e-05, "model_forward_time": 0.024945735931396484, "step": 17565 }, { "epoch": 2.680206298828125e-05, "step": 17565, "training_step_time": 0.11204814910888672 }, { "epoch": 2.68035888671875e-05, "model_forward_time": 0.025630712509155273, "step": 17566 }, { "epoch": 2.68035888671875e-05, "step": 17566, "training_step_time": 0.10729765892028809 }, { "epoch": 2.680511474609375e-05, "model_forward_time": 0.026267290115356445, "step": 17567 }, { "epoch": 2.680511474609375e-05, "step": 17567, "training_step_time": 0.10755467414855957 }, { "epoch": 2.6806640625e-05, "model_forward_time": 0.02503061294555664, "step": 17568 }, { "epoch": 2.6806640625e-05, "step": 17568, "training_step_time": 0.13031625747680664 }, { "epoch": 2.680816650390625e-05, "model_forward_time": 0.025171518325805664, "step": 17569 }, { "epoch": 2.680816650390625e-05, "step": 17569, "training_step_time": 0.1303567886352539 }, { "epoch": 2.68096923828125e-05, "grad_norm": 0.24160060286521912, "learning_rate": 4.003610408764317e-05, "loss": 0.0177, "step": 17570 }, { "epoch": 2.68096923828125e-05, "model_forward_time": 0.024194955825805664, "step": 17570 }, { "epoch": 2.68096923828125e-05, "step": 17570, "training_step_time": 0.10577630996704102 }, { "epoch": 2.681121826171875e-05, "model_forward_time": 0.02499222755432129, "step": 17571 }, { "epoch": 2.681121826171875e-05, "step": 17571, "training_step_time": 0.1332862377166748 }, { "epoch": 2.6812744140625e-05, "model_forward_time": 0.02595996856689453, "step": 17572 }, { "epoch": 2.6812744140625e-05, "step": 17572, "training_step_time": 0.11815023422241211 }, { "epoch": 2.681427001953125e-05, "model_forward_time": 0.025684118270874023, "step": 17573 }, { "epoch": 2.681427001953125e-05, "step": 17573, "training_step_time": 0.1052243709564209 }, { "epoch": 2.68157958984375e-05, "model_forward_time": 0.02522444725036621, "step": 17574 }, { "epoch": 2.68157958984375e-05, "step": 17574, "training_step_time": 0.12038278579711914 }, { "epoch": 2.681732177734375e-05, "model_forward_time": 0.025427579879760742, "step": 17575 }, { "epoch": 2.681732177734375e-05, "step": 17575, "training_step_time": 0.1071779727935791 }, { "epoch": 2.681884765625e-05, "model_forward_time": 0.024938106536865234, "step": 17576 }, { "epoch": 2.681884765625e-05, "step": 17576, "training_step_time": 0.10592961311340332 }, { "epoch": 2.682037353515625e-05, "model_forward_time": 0.02560138702392578, "step": 17577 }, { "epoch": 2.682037353515625e-05, "step": 17577, "training_step_time": 0.10559773445129395 }, { "epoch": 2.68218994140625e-05, "model_forward_time": 0.02500319480895996, "step": 17578 }, { "epoch": 2.68218994140625e-05, "step": 17578, "training_step_time": 0.10771727561950684 }, { "epoch": 2.682342529296875e-05, "model_forward_time": 0.024972200393676758, "step": 17579 }, { "epoch": 2.682342529296875e-05, "step": 17579, "training_step_time": 0.10482573509216309 }, { "epoch": 2.6824951171875e-05, "grad_norm": 0.4323936700820923, "learning_rate": 3.9982099945543945e-05, "loss": 0.0127, "step": 17580 }, { "epoch": 2.6824951171875e-05, "model_forward_time": 0.025233745574951172, "step": 17580 }, { "epoch": 2.6824951171875e-05, "step": 17580, "training_step_time": 0.10559391975402832 }, { "epoch": 2.682647705078125e-05, "model_forward_time": 0.025292396545410156, "step": 17581 }, { "epoch": 2.682647705078125e-05, "step": 17581, "training_step_time": 0.11092042922973633 }, { "epoch": 2.68280029296875e-05, "model_forward_time": 0.025029659271240234, "step": 17582 }, { "epoch": 2.68280029296875e-05, "step": 17582, "training_step_time": 0.11712217330932617 }, { "epoch": 2.682952880859375e-05, "model_forward_time": 0.025607824325561523, "step": 17583 }, { "epoch": 2.682952880859375e-05, "step": 17583, "training_step_time": 0.13639211654663086 }, { "epoch": 2.68310546875e-05, "model_forward_time": 0.0238802433013916, "step": 17584 }, { "epoch": 2.68310546875e-05, "step": 17584, "training_step_time": 0.13219332695007324 }, { "epoch": 2.683258056640625e-05, "model_forward_time": 0.0234832763671875, "step": 17585 }, { "epoch": 2.683258056640625e-05, "step": 17585, "training_step_time": 0.12552380561828613 }, { "epoch": 2.68341064453125e-05, "model_forward_time": 0.023898601531982422, "step": 17586 }, { "epoch": 2.68341064453125e-05, "step": 17586, "training_step_time": 0.11919856071472168 }, { "epoch": 2.683563232421875e-05, "model_forward_time": 0.025143861770629883, "step": 17587 }, { "epoch": 2.683563232421875e-05, "step": 17587, "training_step_time": 0.11458563804626465 }, { "epoch": 2.6837158203125e-05, "model_forward_time": 0.025059938430786133, "step": 17588 }, { "epoch": 2.6837158203125e-05, "step": 17588, "training_step_time": 0.11882805824279785 }, { "epoch": 2.683868408203125e-05, "model_forward_time": 0.028535842895507812, "step": 17589 }, { "epoch": 2.683868408203125e-05, "step": 17589, "training_step_time": 0.11333632469177246 }, { "epoch": 2.68402099609375e-05, "grad_norm": 0.16022445261478424, "learning_rate": 3.9928107976137906e-05, "loss": 0.0159, "step": 17590 }, { "epoch": 2.68402099609375e-05, "model_forward_time": 0.02550792694091797, "step": 17590 }, { "epoch": 2.68402099609375e-05, "step": 17590, "training_step_time": 0.11345887184143066 }, { "epoch": 2.684173583984375e-05, "model_forward_time": 0.025355100631713867, "step": 17591 }, { "epoch": 2.684173583984375e-05, "step": 17591, "training_step_time": 0.10816287994384766 }, { "epoch": 2.684326171875e-05, "model_forward_time": 0.02538919448852539, "step": 17592 }, { "epoch": 2.684326171875e-05, "step": 17592, "training_step_time": 0.10787701606750488 }, { "epoch": 2.684478759765625e-05, "model_forward_time": 0.024888038635253906, "step": 17593 }, { "epoch": 2.684478759765625e-05, "step": 17593, "training_step_time": 0.10723018646240234 }, { "epoch": 2.68463134765625e-05, "model_forward_time": 0.025054216384887695, "step": 17594 }, { "epoch": 2.68463134765625e-05, "step": 17594, "training_step_time": 0.10564684867858887 }, { "epoch": 2.684783935546875e-05, "model_forward_time": 0.025013208389282227, "step": 17595 }, { "epoch": 2.684783935546875e-05, "step": 17595, "training_step_time": 0.13254308700561523 }, { "epoch": 2.6849365234375e-05, "model_forward_time": 0.025025129318237305, "step": 17596 }, { "epoch": 2.6849365234375e-05, "step": 17596, "training_step_time": 0.10689663887023926 }, { "epoch": 2.685089111328125e-05, "model_forward_time": 0.02489495277404785, "step": 17597 }, { "epoch": 2.685089111328125e-05, "step": 17597, "training_step_time": 0.17582941055297852 }, { "epoch": 2.68524169921875e-05, "model_forward_time": 0.02502274513244629, "step": 17598 }, { "epoch": 2.68524169921875e-05, "step": 17598, "training_step_time": 0.1739051342010498 }, { "epoch": 2.685394287109375e-05, "model_forward_time": 0.024285078048706055, "step": 17599 }, { "epoch": 2.685394287109375e-05, "step": 17599, "training_step_time": 0.13638877868652344 }, { "epoch": 2.685546875e-05, "grad_norm": 0.11669151484966278, "learning_rate": 3.9874128245030404e-05, "loss": 0.0214, "step": 17600 }, { "epoch": 2.685546875e-05, "model_forward_time": 0.024765968322753906, "step": 17600 }, { "epoch": 2.685546875e-05, "step": 17600, "training_step_time": 0.17300677299499512 }, { "epoch": 2.685699462890625e-05, "model_forward_time": 0.024624347686767578, "step": 17601 }, { "epoch": 2.685699462890625e-05, "step": 17601, "training_step_time": 0.15225958824157715 }, { "epoch": 2.68585205078125e-05, "model_forward_time": 0.023837566375732422, "step": 17602 }, { "epoch": 2.68585205078125e-05, "step": 17602, "training_step_time": 0.13386917114257812 }, { "epoch": 2.686004638671875e-05, "model_forward_time": 0.024768829345703125, "step": 17603 }, { "epoch": 2.686004638671875e-05, "step": 17603, "training_step_time": 0.13197803497314453 }, { "epoch": 2.6861572265625e-05, "model_forward_time": 0.024423599243164062, "step": 17604 }, { "epoch": 2.6861572265625e-05, "step": 17604, "training_step_time": 0.10570359230041504 }, { "epoch": 2.686309814453125e-05, "model_forward_time": 0.025017976760864258, "step": 17605 }, { "epoch": 2.686309814453125e-05, "step": 17605, "training_step_time": 0.12499380111694336 }, { "epoch": 2.68646240234375e-05, "model_forward_time": 0.02516913414001465, "step": 17606 }, { "epoch": 2.68646240234375e-05, "step": 17606, "training_step_time": 0.10401558876037598 }, { "epoch": 2.686614990234375e-05, "model_forward_time": 0.025025129318237305, "step": 17607 }, { "epoch": 2.686614990234375e-05, "step": 17607, "training_step_time": 0.1042790412902832 }, { "epoch": 2.686767578125e-05, "model_forward_time": 0.025037288665771484, "step": 17608 }, { "epoch": 2.686767578125e-05, "step": 17608, "training_step_time": 0.1080327033996582 }, { "epoch": 2.686920166015625e-05, "model_forward_time": 0.02513432502746582, "step": 17609 }, { "epoch": 2.686920166015625e-05, "step": 17609, "training_step_time": 0.10373783111572266 }, { "epoch": 2.68707275390625e-05, "grad_norm": 0.3164677321910858, "learning_rate": 3.982016081781189e-05, "loss": 0.016, "step": 17610 }, { "epoch": 2.68707275390625e-05, "model_forward_time": 0.02566242218017578, "step": 17610 }, { "epoch": 2.68707275390625e-05, "step": 17610, "training_step_time": 0.10534548759460449 }, { "epoch": 2.687225341796875e-05, "model_forward_time": 0.026174545288085938, "step": 17611 }, { "epoch": 2.687225341796875e-05, "step": 17611, "training_step_time": 0.1085824966430664 }, { "epoch": 2.6873779296875e-05, "model_forward_time": 0.0253293514251709, "step": 17612 }, { "epoch": 2.6873779296875e-05, "step": 17612, "training_step_time": 0.10844278335571289 }, { "epoch": 2.687530517578125e-05, "model_forward_time": 0.025023937225341797, "step": 17613 }, { "epoch": 2.687530517578125e-05, "step": 17613, "training_step_time": 0.1492776870727539 }, { "epoch": 2.68768310546875e-05, "model_forward_time": 0.024797439575195312, "step": 17614 }, { "epoch": 2.68768310546875e-05, "step": 17614, "training_step_time": 0.1757051944732666 }, { "epoch": 2.687835693359375e-05, "model_forward_time": 0.02590465545654297, "step": 17615 }, { "epoch": 2.687835693359375e-05, "step": 17615, "training_step_time": 0.20859956741333008 }, { "epoch": 2.68798828125e-05, "model_forward_time": 0.024591922760009766, "step": 17616 }, { "epoch": 2.68798828125e-05, "step": 17616, "training_step_time": 0.15752792358398438 }, { "epoch": 2.688140869140625e-05, "model_forward_time": 0.024396419525146484, "step": 17617 }, { "epoch": 2.688140869140625e-05, "step": 17617, "training_step_time": 0.19089508056640625 }, { "epoch": 2.68829345703125e-05, "model_forward_time": 0.02430891990661621, "step": 17618 }, { "epoch": 2.68829345703125e-05, "step": 17618, "training_step_time": 0.14585423469543457 }, { "epoch": 2.688446044921875e-05, "model_forward_time": 0.025299787521362305, "step": 17619 }, { "epoch": 2.688446044921875e-05, "step": 17619, "training_step_time": 0.17435526847839355 }, { "epoch": 2.6885986328125e-05, "grad_norm": 0.3168177604675293, "learning_rate": 3.976620576005786e-05, "loss": 0.0117, "step": 17620 }, { "epoch": 2.6885986328125e-05, "model_forward_time": 0.024061203002929688, "step": 17620 }, { "epoch": 2.6885986328125e-05, "step": 17620, "training_step_time": 0.12474346160888672 }, { "epoch": 2.688751220703125e-05, "model_forward_time": 0.02381610870361328, "step": 17621 }, { "epoch": 2.688751220703125e-05, "step": 17621, "training_step_time": 0.12219595909118652 }, { "epoch": 2.68890380859375e-05, "model_forward_time": 0.024809837341308594, "step": 17622 }, { "epoch": 2.68890380859375e-05, "step": 17622, "training_step_time": 0.11995744705200195 }, { "epoch": 2.689056396484375e-05, "model_forward_time": 0.02580857276916504, "step": 17623 }, { "epoch": 2.689056396484375e-05, "step": 17623, "training_step_time": 0.11118197441101074 }, { "epoch": 2.689208984375e-05, "model_forward_time": 0.025321006774902344, "step": 17624 }, { "epoch": 2.689208984375e-05, "step": 17624, "training_step_time": 0.11187386512756348 }, { "epoch": 2.689361572265625e-05, "model_forward_time": 0.02521681785583496, "step": 17625 }, { "epoch": 2.689361572265625e-05, "step": 17625, "training_step_time": 0.11209273338317871 }, { "epoch": 2.68951416015625e-05, "model_forward_time": 0.025374650955200195, "step": 17626 }, { "epoch": 2.68951416015625e-05, "step": 17626, "training_step_time": 0.10788846015930176 }, { "epoch": 2.689666748046875e-05, "model_forward_time": 0.025298118591308594, "step": 17627 }, { "epoch": 2.689666748046875e-05, "step": 17627, "training_step_time": 0.10836362838745117 }, { "epoch": 2.6898193359375e-05, "model_forward_time": 0.024960041046142578, "step": 17628 }, { "epoch": 2.6898193359375e-05, "step": 17628, "training_step_time": 0.10666871070861816 }, { "epoch": 2.689971923828125e-05, "model_forward_time": 0.024715185165405273, "step": 17629 }, { "epoch": 2.689971923828125e-05, "step": 17629, "training_step_time": 0.10511326789855957 }, { "epoch": 2.69012451171875e-05, "grad_norm": 0.24846415221691132, "learning_rate": 3.9712263137328836e-05, "loss": 0.0076, "step": 17630 }, { "epoch": 2.69012451171875e-05, "model_forward_time": 0.024354219436645508, "step": 17630 }, { "epoch": 2.69012451171875e-05, "step": 17630, "training_step_time": 0.1078329086303711 }, { "epoch": 2.690277099609375e-05, "model_forward_time": 0.02511739730834961, "step": 17631 }, { "epoch": 2.690277099609375e-05, "step": 17631, "training_step_time": 0.10584855079650879 }, { "epoch": 2.6904296875e-05, "model_forward_time": 0.02543473243713379, "step": 17632 }, { "epoch": 2.6904296875e-05, "step": 17632, "training_step_time": 0.11030459403991699 }, { "epoch": 2.690582275390625e-05, "model_forward_time": 0.024874448776245117, "step": 17633 }, { "epoch": 2.690582275390625e-05, "step": 17633, "training_step_time": 0.11121892929077148 }, { "epoch": 2.69073486328125e-05, "model_forward_time": 0.02493429183959961, "step": 17634 }, { "epoch": 2.69073486328125e-05, "step": 17634, "training_step_time": 0.11022543907165527 }, { "epoch": 2.690887451171875e-05, "model_forward_time": 0.024924039840698242, "step": 17635 }, { "epoch": 2.690887451171875e-05, "step": 17635, "training_step_time": 0.10805249214172363 }, { "epoch": 2.6910400390625e-05, "model_forward_time": 0.024998188018798828, "step": 17636 }, { "epoch": 2.6910400390625e-05, "step": 17636, "training_step_time": 0.10616612434387207 }, { "epoch": 2.691192626953125e-05, "model_forward_time": 0.024995803833007812, "step": 17637 }, { "epoch": 2.691192626953125e-05, "step": 17637, "training_step_time": 0.10666322708129883 }, { "epoch": 2.69134521484375e-05, "model_forward_time": 0.02477860450744629, "step": 17638 }, { "epoch": 2.69134521484375e-05, "step": 17638, "training_step_time": 0.1084434986114502 }, { "epoch": 2.691497802734375e-05, "model_forward_time": 0.02492380142211914, "step": 17639 }, { "epoch": 2.691497802734375e-05, "step": 17639, "training_step_time": 0.17279887199401855 }, { "epoch": 2.691650390625e-05, "grad_norm": 0.42609038949012756, "learning_rate": 3.965833301517017e-05, "loss": 0.0119, "step": 17640 }, { "epoch": 2.691650390625e-05, "model_forward_time": 0.024018526077270508, "step": 17640 }, { "epoch": 2.691650390625e-05, "step": 17640, "training_step_time": 0.10555362701416016 }, { "epoch": 2.691802978515625e-05, "model_forward_time": 0.024334430694580078, "step": 17641 }, { "epoch": 2.691802978515625e-05, "step": 17641, "training_step_time": 0.1655597686767578 }, { "epoch": 2.69195556640625e-05, "model_forward_time": 0.024773836135864258, "step": 17642 }, { "epoch": 2.69195556640625e-05, "step": 17642, "training_step_time": 0.15639066696166992 }, { "epoch": 2.692108154296875e-05, "model_forward_time": 0.02418041229248047, "step": 17643 }, { "epoch": 2.692108154296875e-05, "step": 17643, "training_step_time": 0.1178121566772461 }, { "epoch": 2.6922607421875e-05, "model_forward_time": 0.02393507957458496, "step": 17644 }, { "epoch": 2.6922607421875e-05, "step": 17644, "training_step_time": 0.16220307350158691 }, { "epoch": 2.692413330078125e-05, "model_forward_time": 0.02425241470336914, "step": 17645 }, { "epoch": 2.692413330078125e-05, "step": 17645, "training_step_time": 0.19728708267211914 }, { "epoch": 2.69256591796875e-05, "model_forward_time": 0.02452826499938965, "step": 17646 }, { "epoch": 2.69256591796875e-05, "step": 17646, "training_step_time": 0.15880775451660156 }, { "epoch": 2.692718505859375e-05, "model_forward_time": 0.0243685245513916, "step": 17647 }, { "epoch": 2.692718505859375e-05, "step": 17647, "training_step_time": 0.11892008781433105 }, { "epoch": 2.69287109375e-05, "model_forward_time": 0.024597644805908203, "step": 17648 }, { "epoch": 2.69287109375e-05, "step": 17648, "training_step_time": 0.11197018623352051 }, { "epoch": 2.693023681640625e-05, "model_forward_time": 0.025578737258911133, "step": 17649 }, { "epoch": 2.693023681640625e-05, "step": 17649, "training_step_time": 0.1174924373626709 }, { "epoch": 2.69317626953125e-05, "grad_norm": 0.19126154482364655, "learning_rate": 3.960441545911204e-05, "loss": 0.01, "step": 17650 }, { "epoch": 2.69317626953125e-05, "model_forward_time": 0.024829387664794922, "step": 17650 }, { "epoch": 2.69317626953125e-05, "step": 17650, "training_step_time": 0.10478663444519043 }, { "epoch": 2.693328857421875e-05, "model_forward_time": 0.024965763092041016, "step": 17651 }, { "epoch": 2.693328857421875e-05, "step": 17651, "training_step_time": 0.10884952545166016 }, { "epoch": 2.6934814453125e-05, "model_forward_time": 0.024044275283813477, "step": 17652 }, { "epoch": 2.6934814453125e-05, "step": 17652, "training_step_time": 0.10444879531860352 }, { "epoch": 2.693634033203125e-05, "model_forward_time": 0.024767160415649414, "step": 17653 }, { "epoch": 2.693634033203125e-05, "step": 17653, "training_step_time": 0.10766768455505371 }, { "epoch": 2.69378662109375e-05, "model_forward_time": 0.025311708450317383, "step": 17654 }, { "epoch": 2.69378662109375e-05, "step": 17654, "training_step_time": 0.10631537437438965 }, { "epoch": 2.693939208984375e-05, "model_forward_time": 0.024583101272583008, "step": 17655 }, { "epoch": 2.693939208984375e-05, "step": 17655, "training_step_time": 0.10692811012268066 }, { "epoch": 2.694091796875e-05, "model_forward_time": 0.025423526763916016, "step": 17656 }, { "epoch": 2.694091796875e-05, "step": 17656, "training_step_time": 0.10620355606079102 }, { "epoch": 2.694244384765625e-05, "model_forward_time": 0.02510690689086914, "step": 17657 }, { "epoch": 2.694244384765625e-05, "step": 17657, "training_step_time": 0.10471224784851074 }, { "epoch": 2.69439697265625e-05, "model_forward_time": 0.0250704288482666, "step": 17658 }, { "epoch": 2.69439697265625e-05, "step": 17658, "training_step_time": 0.10493111610412598 }, { "epoch": 2.694549560546875e-05, "model_forward_time": 0.02513742446899414, "step": 17659 }, { "epoch": 2.694549560546875e-05, "step": 17659, "training_step_time": 0.10521602630615234 }, { "epoch": 2.6947021484375e-05, "grad_norm": 0.34858188033103943, "learning_rate": 3.955051053466937e-05, "loss": 0.0183, "step": 17660 }, { "epoch": 2.6947021484375e-05, "model_forward_time": 0.024333715438842773, "step": 17660 }, { "epoch": 2.6947021484375e-05, "step": 17660, "training_step_time": 0.15219402313232422 }, { "epoch": 2.694854736328125e-05, "model_forward_time": 0.02514195442199707, "step": 17661 }, { "epoch": 2.694854736328125e-05, "step": 17661, "training_step_time": 0.11740517616271973 }, { "epoch": 2.69500732421875e-05, "model_forward_time": 0.025423526763916016, "step": 17662 }, { "epoch": 2.69500732421875e-05, "step": 17662, "training_step_time": 0.11199712753295898 }, { "epoch": 2.695159912109375e-05, "model_forward_time": 0.02520585060119629, "step": 17663 }, { "epoch": 2.695159912109375e-05, "step": 17663, "training_step_time": 0.11115360260009766 }, { "epoch": 2.6953125e-05, "model_forward_time": 0.025011301040649414, "step": 17664 }, { "epoch": 2.6953125e-05, "step": 17664, "training_step_time": 0.12707781791687012 }, { "epoch": 2.695465087890625e-05, "model_forward_time": 0.0248873233795166, "step": 17665 }, { "epoch": 2.695465087890625e-05, "step": 17665, "training_step_time": 0.1087038516998291 }, { "epoch": 2.69561767578125e-05, "model_forward_time": 0.02496504783630371, "step": 17666 }, { "epoch": 2.69561767578125e-05, "step": 17666, "training_step_time": 0.1276235580444336 }, { "epoch": 2.695770263671875e-05, "model_forward_time": 0.024932146072387695, "step": 17667 }, { "epoch": 2.695770263671875e-05, "step": 17667, "training_step_time": 0.129652738571167 }, { "epoch": 2.6959228515625e-05, "model_forward_time": 0.024729013442993164, "step": 17668 }, { "epoch": 2.6959228515625e-05, "step": 17668, "training_step_time": 0.10419940948486328 }, { "epoch": 2.696075439453125e-05, "model_forward_time": 0.02474212646484375, "step": 17669 }, { "epoch": 2.696075439453125e-05, "step": 17669, "training_step_time": 0.10564637184143066 }, { "epoch": 2.69622802734375e-05, "grad_norm": 0.2636212408542633, "learning_rate": 3.949661830734172e-05, "loss": 0.011, "step": 17670 }, { "epoch": 2.69622802734375e-05, "model_forward_time": 0.02762746810913086, "step": 17670 }, { "epoch": 2.69622802734375e-05, "step": 17670, "training_step_time": 0.10918784141540527 }, { "epoch": 2.696380615234375e-05, "model_forward_time": 0.025259971618652344, "step": 17671 }, { "epoch": 2.696380615234375e-05, "step": 17671, "training_step_time": 0.10972213745117188 }, { "epoch": 2.696533203125e-05, "model_forward_time": 0.025129079818725586, "step": 17672 }, { "epoch": 2.696533203125e-05, "step": 17672, "training_step_time": 0.11216926574707031 }, { "epoch": 2.696685791015625e-05, "model_forward_time": 0.025035858154296875, "step": 17673 }, { "epoch": 2.696685791015625e-05, "step": 17673, "training_step_time": 0.10884857177734375 }, { "epoch": 2.69683837890625e-05, "model_forward_time": 0.02518630027770996, "step": 17674 }, { "epoch": 2.69683837890625e-05, "step": 17674, "training_step_time": 0.10832715034484863 }, { "epoch": 2.696990966796875e-05, "model_forward_time": 0.02406167984008789, "step": 17675 }, { "epoch": 2.696990966796875e-05, "step": 17675, "training_step_time": 0.1084146499633789 }, { "epoch": 2.6971435546875e-05, "model_forward_time": 0.02490520477294922, "step": 17676 }, { "epoch": 2.6971435546875e-05, "step": 17676, "training_step_time": 0.11028790473937988 }, { "epoch": 2.697296142578125e-05, "model_forward_time": 0.02487778663635254, "step": 17677 }, { "epoch": 2.697296142578125e-05, "step": 17677, "training_step_time": 0.10698342323303223 }, { "epoch": 2.69744873046875e-05, "model_forward_time": 0.024646759033203125, "step": 17678 }, { "epoch": 2.69744873046875e-05, "step": 17678, "training_step_time": 0.1171114444732666 }, { "epoch": 2.697601318359375e-05, "model_forward_time": 0.02503347396850586, "step": 17679 }, { "epoch": 2.697601318359375e-05, "step": 17679, "training_step_time": 0.10998749732971191 }, { "epoch": 2.69775390625e-05, "grad_norm": 0.3377482295036316, "learning_rate": 3.944273884261322e-05, "loss": 0.0127, "step": 17680 }, { "epoch": 2.69775390625e-05, "model_forward_time": 0.02502274513244629, "step": 17680 }, { "epoch": 2.69775390625e-05, "step": 17680, "training_step_time": 0.10909867286682129 }, { "epoch": 2.697906494140625e-05, "model_forward_time": 0.02521204948425293, "step": 17681 }, { "epoch": 2.697906494140625e-05, "step": 17681, "training_step_time": 0.10574984550476074 }, { "epoch": 2.69805908203125e-05, "model_forward_time": 0.024303913116455078, "step": 17682 }, { "epoch": 2.69805908203125e-05, "step": 17682, "training_step_time": 0.10669422149658203 }, { "epoch": 2.698211669921875e-05, "model_forward_time": 0.02496647834777832, "step": 17683 }, { "epoch": 2.698211669921875e-05, "step": 17683, "training_step_time": 0.10692262649536133 }, { "epoch": 2.6983642578125e-05, "model_forward_time": 0.025257587432861328, "step": 17684 }, { "epoch": 2.6983642578125e-05, "step": 17684, "training_step_time": 0.10901212692260742 }, { "epoch": 2.698516845703125e-05, "model_forward_time": 0.02487325668334961, "step": 17685 }, { "epoch": 2.698516845703125e-05, "step": 17685, "training_step_time": 0.15722346305847168 }, { "epoch": 2.69866943359375e-05, "model_forward_time": 0.024527311325073242, "step": 17686 }, { "epoch": 2.69866943359375e-05, "step": 17686, "training_step_time": 0.2013704776763916 }, { "epoch": 2.698822021484375e-05, "model_forward_time": 0.025080204010009766, "step": 17687 }, { "epoch": 2.698822021484375e-05, "step": 17687, "training_step_time": 0.10350990295410156 }, { "epoch": 2.698974609375e-05, "model_forward_time": 0.02432560920715332, "step": 17688 }, { "epoch": 2.698974609375e-05, "step": 17688, "training_step_time": 0.13787198066711426 }, { "epoch": 2.699127197265625e-05, "model_forward_time": 0.02454090118408203, "step": 17689 }, { "epoch": 2.699127197265625e-05, "step": 17689, "training_step_time": 0.1922752857208252 }, { "epoch": 2.69927978515625e-05, "grad_norm": 0.2020740509033203, "learning_rate": 3.9388872205952526e-05, "loss": 0.0094, "step": 17690 }, { "epoch": 2.69927978515625e-05, "model_forward_time": 0.024262189865112305, "step": 17690 }, { "epoch": 2.69927978515625e-05, "step": 17690, "training_step_time": 0.13552045822143555 }, { "epoch": 2.699432373046875e-05, "model_forward_time": 0.024175643920898438, "step": 17691 }, { "epoch": 2.699432373046875e-05, "step": 17691, "training_step_time": 0.14896655082702637 }, { "epoch": 2.6995849609375e-05, "model_forward_time": 0.024967193603515625, "step": 17692 }, { "epoch": 2.6995849609375e-05, "step": 17692, "training_step_time": 0.19589471817016602 }, { "epoch": 2.699737548828125e-05, "model_forward_time": 0.024436473846435547, "step": 17693 }, { "epoch": 2.699737548828125e-05, "step": 17693, "training_step_time": 0.16414737701416016 }, { "epoch": 2.69989013671875e-05, "model_forward_time": 0.02432417869567871, "step": 17694 }, { "epoch": 2.69989013671875e-05, "step": 17694, "training_step_time": 0.1172029972076416 }, { "epoch": 2.700042724609375e-05, "model_forward_time": 0.024344682693481445, "step": 17695 }, { "epoch": 2.700042724609375e-05, "step": 17695, "training_step_time": 0.11496686935424805 }, { "epoch": 2.7001953125e-05, "model_forward_time": 0.02486276626586914, "step": 17696 }, { "epoch": 2.7001953125e-05, "step": 17696, "training_step_time": 0.11989808082580566 }, { "epoch": 2.700347900390625e-05, "model_forward_time": 0.02474045753479004, "step": 17697 }, { "epoch": 2.700347900390625e-05, "step": 17697, "training_step_time": 0.10661792755126953 }, { "epoch": 2.70050048828125e-05, "model_forward_time": 0.024944305419921875, "step": 17698 }, { "epoch": 2.70050048828125e-05, "step": 17698, "training_step_time": 0.10542559623718262 }, { "epoch": 2.700653076171875e-05, "model_forward_time": 0.02500319480895996, "step": 17699 }, { "epoch": 2.700653076171875e-05, "step": 17699, "training_step_time": 0.11017203330993652 }, { "epoch": 2.7008056640625e-05, "grad_norm": 0.11523959785699844, "learning_rate": 3.933501846281267e-05, "loss": 0.0118, "step": 17700 }, { "epoch": 2.7008056640625e-05, "model_forward_time": 0.02520465850830078, "step": 17700 }, { "epoch": 2.7008056640625e-05, "step": 17700, "training_step_time": 0.10685610771179199 }, { "epoch": 2.700958251953125e-05, "model_forward_time": 0.025278806686401367, "step": 17701 }, { "epoch": 2.700958251953125e-05, "step": 17701, "training_step_time": 0.10702848434448242 }, { "epoch": 2.70111083984375e-05, "model_forward_time": 0.024937868118286133, "step": 17702 }, { "epoch": 2.70111083984375e-05, "step": 17702, "training_step_time": 0.10634708404541016 }, { "epoch": 2.701263427734375e-05, "model_forward_time": 0.024875164031982422, "step": 17703 }, { "epoch": 2.701263427734375e-05, "step": 17703, "training_step_time": 0.10957956314086914 }, { "epoch": 2.701416015625e-05, "model_forward_time": 0.024683475494384766, "step": 17704 }, { "epoch": 2.701416015625e-05, "step": 17704, "training_step_time": 0.1058650016784668 }, { "epoch": 2.701568603515625e-05, "model_forward_time": 0.025029420852661133, "step": 17705 }, { "epoch": 2.701568603515625e-05, "step": 17705, "training_step_time": 0.10803079605102539 }, { "epoch": 2.70172119140625e-05, "model_forward_time": 0.025226354598999023, "step": 17706 }, { "epoch": 2.70172119140625e-05, "step": 17706, "training_step_time": 0.10757231712341309 }, { "epoch": 2.701873779296875e-05, "model_forward_time": 0.025002241134643555, "step": 17707 }, { "epoch": 2.701873779296875e-05, "step": 17707, "training_step_time": 0.14447689056396484 }, { "epoch": 2.7020263671875e-05, "model_forward_time": 0.02555251121520996, "step": 17708 }, { "epoch": 2.7020263671875e-05, "step": 17708, "training_step_time": 0.14852571487426758 }, { "epoch": 2.702178955078125e-05, "model_forward_time": 0.0244905948638916, "step": 17709 }, { "epoch": 2.702178955078125e-05, "step": 17709, "training_step_time": 0.11041784286499023 }, { "epoch": 2.70233154296875e-05, "grad_norm": 0.47804808616638184, "learning_rate": 3.928117767863102e-05, "loss": 0.0148, "step": 17710 }, { "epoch": 2.70233154296875e-05, "model_forward_time": 0.02489328384399414, "step": 17710 }, { "epoch": 2.70233154296875e-05, "step": 17710, "training_step_time": 0.1114044189453125 }, { "epoch": 2.702484130859375e-05, "model_forward_time": 0.025493860244750977, "step": 17711 }, { "epoch": 2.702484130859375e-05, "step": 17711, "training_step_time": 0.15456080436706543 }, { "epoch": 2.70263671875e-05, "model_forward_time": 0.024722814559936523, "step": 17712 }, { "epoch": 2.70263671875e-05, "step": 17712, "training_step_time": 0.20668625831604004 }, { "epoch": 2.702789306640625e-05, "model_forward_time": 0.02420210838317871, "step": 17713 }, { "epoch": 2.702789306640625e-05, "step": 17713, "training_step_time": 0.15131807327270508 }, { "epoch": 2.70294189453125e-05, "model_forward_time": 0.024515151977539062, "step": 17714 }, { "epoch": 2.70294189453125e-05, "step": 17714, "training_step_time": 0.13583779335021973 }, { "epoch": 2.703094482421875e-05, "model_forward_time": 0.02474689483642578, "step": 17715 }, { "epoch": 2.703094482421875e-05, "step": 17715, "training_step_time": 0.13012433052062988 }, { "epoch": 2.7032470703125e-05, "model_forward_time": 0.024486541748046875, "step": 17716 }, { "epoch": 2.7032470703125e-05, "step": 17716, "training_step_time": 0.12494301795959473 }, { "epoch": 2.703399658203125e-05, "model_forward_time": 0.024567604064941406, "step": 17717 }, { "epoch": 2.703399658203125e-05, "step": 17717, "training_step_time": 0.11674308776855469 }, { "epoch": 2.70355224609375e-05, "model_forward_time": 0.02482295036315918, "step": 17718 }, { "epoch": 2.70355224609375e-05, "step": 17718, "training_step_time": 0.11928224563598633 }, { "epoch": 2.703704833984375e-05, "model_forward_time": 0.024878263473510742, "step": 17719 }, { "epoch": 2.703704833984375e-05, "step": 17719, "training_step_time": 0.1118168830871582 }, { "epoch": 2.703857421875e-05, "grad_norm": 0.16659250855445862, "learning_rate": 3.92273499188292e-05, "loss": 0.0105, "step": 17720 }, { "epoch": 2.703857421875e-05, "model_forward_time": 0.02500009536743164, "step": 17720 }, { "epoch": 2.703857421875e-05, "step": 17720, "training_step_time": 0.11332273483276367 }, { "epoch": 2.704010009765625e-05, "model_forward_time": 0.02514481544494629, "step": 17721 }, { "epoch": 2.704010009765625e-05, "step": 17721, "training_step_time": 0.10699892044067383 }, { "epoch": 2.70416259765625e-05, "model_forward_time": 0.025344371795654297, "step": 17722 }, { "epoch": 2.70416259765625e-05, "step": 17722, "training_step_time": 0.1105661392211914 }, { "epoch": 2.704315185546875e-05, "model_forward_time": 0.025389671325683594, "step": 17723 }, { "epoch": 2.704315185546875e-05, "step": 17723, "training_step_time": 0.1089777946472168 }, { "epoch": 2.7044677734375e-05, "model_forward_time": 0.025086641311645508, "step": 17724 }, { "epoch": 2.7044677734375e-05, "step": 17724, "training_step_time": 0.10904264450073242 }, { "epoch": 2.704620361328125e-05, "model_forward_time": 0.025250673294067383, "step": 17725 }, { "epoch": 2.704620361328125e-05, "step": 17725, "training_step_time": 0.10663652420043945 }, { "epoch": 2.70477294921875e-05, "model_forward_time": 0.02503347396850586, "step": 17726 }, { "epoch": 2.70477294921875e-05, "step": 17726, "training_step_time": 0.10947728157043457 }, { "epoch": 2.704925537109375e-05, "model_forward_time": 0.025022506713867188, "step": 17727 }, { "epoch": 2.704925537109375e-05, "step": 17727, "training_step_time": 0.1046450138092041 }, { "epoch": 2.705078125e-05, "model_forward_time": 0.024769067764282227, "step": 17728 }, { "epoch": 2.705078125e-05, "step": 17728, "training_step_time": 0.10725212097167969 }, { "epoch": 2.705230712890625e-05, "model_forward_time": 0.025760889053344727, "step": 17729 }, { "epoch": 2.705230712890625e-05, "step": 17729, "training_step_time": 0.10475873947143555 }, { "epoch": 2.70538330078125e-05, "grad_norm": 0.1870919018983841, "learning_rate": 3.917353524881302e-05, "loss": 0.0082, "step": 17730 }, { "epoch": 2.70538330078125e-05, "model_forward_time": 0.024820566177368164, "step": 17730 }, { "epoch": 2.70538330078125e-05, "step": 17730, "training_step_time": 0.15718626976013184 }, { "epoch": 2.705535888671875e-05, "model_forward_time": 0.02448749542236328, "step": 17731 }, { "epoch": 2.705535888671875e-05, "step": 17731, "training_step_time": 0.12178659439086914 }, { "epoch": 2.7056884765625e-05, "model_forward_time": 0.024935007095336914, "step": 17732 }, { "epoch": 2.7056884765625e-05, "step": 17732, "training_step_time": 0.10374808311462402 }, { "epoch": 2.705841064453125e-05, "model_forward_time": 0.024270057678222656, "step": 17733 }, { "epoch": 2.705841064453125e-05, "step": 17733, "training_step_time": 0.10608386993408203 }, { "epoch": 2.70599365234375e-05, "model_forward_time": 0.025130271911621094, "step": 17734 }, { "epoch": 2.70599365234375e-05, "step": 17734, "training_step_time": 0.21529817581176758 }, { "epoch": 2.706146240234375e-05, "model_forward_time": 0.02480459213256836, "step": 17735 }, { "epoch": 2.706146240234375e-05, "step": 17735, "training_step_time": 0.1272883415222168 }, { "epoch": 2.706298828125e-05, "model_forward_time": 0.02445054054260254, "step": 17736 }, { "epoch": 2.706298828125e-05, "step": 17736, "training_step_time": 0.19802546501159668 }, { "epoch": 2.706451416015625e-05, "model_forward_time": 0.02454686164855957, "step": 17737 }, { "epoch": 2.706451416015625e-05, "step": 17737, "training_step_time": 0.14284801483154297 }, { "epoch": 2.70660400390625e-05, "model_forward_time": 0.024601221084594727, "step": 17738 }, { "epoch": 2.70660400390625e-05, "step": 17738, "training_step_time": 0.2062370777130127 }, { "epoch": 2.706756591796875e-05, "model_forward_time": 0.02434372901916504, "step": 17739 }, { "epoch": 2.706756591796875e-05, "step": 17739, "training_step_time": 0.1223289966583252 }, { "epoch": 2.7069091796875e-05, "grad_norm": 0.19186779856681824, "learning_rate": 3.9119733733972387e-05, "loss": 0.0155, "step": 17740 }, { "epoch": 2.7069091796875e-05, "model_forward_time": 0.026275157928466797, "step": 17740 }, { "epoch": 2.7069091796875e-05, "step": 17740, "training_step_time": 0.11357760429382324 }, { "epoch": 2.707061767578125e-05, "model_forward_time": 0.025209903717041016, "step": 17741 }, { "epoch": 2.707061767578125e-05, "step": 17741, "training_step_time": 0.11390233039855957 }, { "epoch": 2.70721435546875e-05, "model_forward_time": 0.024872541427612305, "step": 17742 }, { "epoch": 2.70721435546875e-05, "step": 17742, "training_step_time": 0.11147785186767578 }, { "epoch": 2.707366943359375e-05, "model_forward_time": 0.02546548843383789, "step": 17743 }, { "epoch": 2.707366943359375e-05, "step": 17743, "training_step_time": 0.10614657402038574 }, { "epoch": 2.70751953125e-05, "model_forward_time": 0.02489018440246582, "step": 17744 }, { "epoch": 2.70751953125e-05, "step": 17744, "training_step_time": 0.10436511039733887 }, { "epoch": 2.707672119140625e-05, "model_forward_time": 0.024971485137939453, "step": 17745 }, { "epoch": 2.707672119140625e-05, "step": 17745, "training_step_time": 0.10857510566711426 }, { "epoch": 2.70782470703125e-05, "model_forward_time": 0.026225805282592773, "step": 17746 }, { "epoch": 2.70782470703125e-05, "step": 17746, "training_step_time": 0.1093130111694336 }, { "epoch": 2.707977294921875e-05, "model_forward_time": 0.024330854415893555, "step": 17747 }, { "epoch": 2.707977294921875e-05, "step": 17747, "training_step_time": 0.10790705680847168 }, { "epoch": 2.7081298828125e-05, "model_forward_time": 0.02520155906677246, "step": 17748 }, { "epoch": 2.7081298828125e-05, "step": 17748, "training_step_time": 0.10710811614990234 }, { "epoch": 2.708282470703125e-05, "model_forward_time": 0.02506852149963379, "step": 17749 }, { "epoch": 2.708282470703125e-05, "step": 17749, "training_step_time": 0.10506844520568848 }, { "epoch": 2.70843505859375e-05, "grad_norm": 0.29710689187049866, "learning_rate": 3.9065945439681214e-05, "loss": 0.021, "step": 17750 }, { "epoch": 2.70843505859375e-05, "model_forward_time": 0.024570226669311523, "step": 17750 }, { "epoch": 2.70843505859375e-05, "step": 17750, "training_step_time": 0.10391879081726074 }, { "epoch": 2.708587646484375e-05, "model_forward_time": 0.024657726287841797, "step": 17751 }, { "epoch": 2.708587646484375e-05, "step": 17751, "training_step_time": 0.10632586479187012 }, { "epoch": 2.708740234375e-05, "model_forward_time": 0.02540111541748047, "step": 17752 }, { "epoch": 2.708740234375e-05, "step": 17752, "training_step_time": 0.14304161071777344 }, { "epoch": 2.708892822265625e-05, "model_forward_time": 0.025876522064208984, "step": 17753 }, { "epoch": 2.708892822265625e-05, "step": 17753, "training_step_time": 0.11693501472473145 }, { "epoch": 2.70904541015625e-05, "model_forward_time": 0.0242159366607666, "step": 17754 }, { "epoch": 2.70904541015625e-05, "step": 17754, "training_step_time": 0.10764336585998535 }, { "epoch": 2.709197998046875e-05, "model_forward_time": 0.025173425674438477, "step": 17755 }, { "epoch": 2.709197998046875e-05, "step": 17755, "training_step_time": 0.1087043285369873 }, { "epoch": 2.7093505859375e-05, "model_forward_time": 0.025537967681884766, "step": 17756 }, { "epoch": 2.7093505859375e-05, "step": 17756, "training_step_time": 0.12853407859802246 }, { "epoch": 2.709503173828125e-05, "model_forward_time": 0.025127172470092773, "step": 17757 }, { "epoch": 2.709503173828125e-05, "step": 17757, "training_step_time": 0.10955452919006348 }, { "epoch": 2.70965576171875e-05, "model_forward_time": 0.025132179260253906, "step": 17758 }, { "epoch": 2.70965576171875e-05, "step": 17758, "training_step_time": 0.13810229301452637 }, { "epoch": 2.709808349609375e-05, "model_forward_time": 0.025025606155395508, "step": 17759 }, { "epoch": 2.709808349609375e-05, "step": 17759, "training_step_time": 0.11642932891845703 }, { "epoch": 2.7099609375e-05, "grad_norm": 0.3531988561153412, "learning_rate": 3.901217043129735e-05, "loss": 0.0161, "step": 17760 }, { "epoch": 2.7099609375e-05, "model_forward_time": 0.02491617202758789, "step": 17760 }, { "epoch": 2.7099609375e-05, "step": 17760, "training_step_time": 0.10485625267028809 }, { "epoch": 2.710113525390625e-05, "model_forward_time": 0.025086402893066406, "step": 17761 }, { "epoch": 2.710113525390625e-05, "step": 17761, "training_step_time": 0.11249613761901855 }, { "epoch": 2.71026611328125e-05, "model_forward_time": 0.02499222755432129, "step": 17762 }, { "epoch": 2.71026611328125e-05, "step": 17762, "training_step_time": 0.1263437271118164 }, { "epoch": 2.710418701171875e-05, "model_forward_time": 0.02515888214111328, "step": 17763 }, { "epoch": 2.710418701171875e-05, "step": 17763, "training_step_time": 0.12273168563842773 }, { "epoch": 2.7105712890625e-05, "model_forward_time": 0.02496027946472168, "step": 17764 }, { "epoch": 2.7105712890625e-05, "step": 17764, "training_step_time": 0.12393403053283691 }, { "epoch": 2.710723876953125e-05, "model_forward_time": 0.025132417678833008, "step": 17765 }, { "epoch": 2.710723876953125e-05, "step": 17765, "training_step_time": 0.11769413948059082 }, { "epoch": 2.71087646484375e-05, "model_forward_time": 0.02494192123413086, "step": 17766 }, { "epoch": 2.71087646484375e-05, "step": 17766, "training_step_time": 0.11533880233764648 }, { "epoch": 2.711029052734375e-05, "model_forward_time": 0.025026798248291016, "step": 17767 }, { "epoch": 2.711029052734375e-05, "step": 17767, "training_step_time": 0.11738038063049316 }, { "epoch": 2.711181640625e-05, "model_forward_time": 0.024738073348999023, "step": 17768 }, { "epoch": 2.711181640625e-05, "step": 17768, "training_step_time": 0.11084151268005371 }, { "epoch": 2.711334228515625e-05, "model_forward_time": 0.02548384666442871, "step": 17769 }, { "epoch": 2.711334228515625e-05, "step": 17769, "training_step_time": 0.11447405815124512 }, { "epoch": 2.71148681640625e-05, "grad_norm": 0.2782493233680725, "learning_rate": 3.895840877416249e-05, "loss": 0.0077, "step": 17770 }, { "epoch": 2.71148681640625e-05, "model_forward_time": 0.02552175521850586, "step": 17770 }, { "epoch": 2.71148681640625e-05, "step": 17770, "training_step_time": 0.10943126678466797 }, { "epoch": 2.711639404296875e-05, "model_forward_time": 0.025345563888549805, "step": 17771 }, { "epoch": 2.711639404296875e-05, "step": 17771, "training_step_time": 0.10887742042541504 }, { "epoch": 2.7117919921875e-05, "model_forward_time": 0.025590896606445312, "step": 17772 }, { "epoch": 2.7117919921875e-05, "step": 17772, "training_step_time": 0.10801267623901367 }, { "epoch": 2.711944580078125e-05, "model_forward_time": 0.026002168655395508, "step": 17773 }, { "epoch": 2.711944580078125e-05, "step": 17773, "training_step_time": 0.10735845565795898 }, { "epoch": 2.71209716796875e-05, "model_forward_time": 0.0259702205657959, "step": 17774 }, { "epoch": 2.71209716796875e-05, "step": 17774, "training_step_time": 0.10898089408874512 }, { "epoch": 2.712249755859375e-05, "model_forward_time": 0.02728748321533203, "step": 17775 }, { "epoch": 2.712249755859375e-05, "step": 17775, "training_step_time": 0.11421632766723633 }, { "epoch": 2.71240234375e-05, "model_forward_time": 0.0254514217376709, "step": 17776 }, { "epoch": 2.71240234375e-05, "step": 17776, "training_step_time": 0.214951753616333 }, { "epoch": 2.712554931640625e-05, "model_forward_time": 0.02418375015258789, "step": 17777 }, { "epoch": 2.712554931640625e-05, "step": 17777, "training_step_time": 0.12428712844848633 }, { "epoch": 2.71270751953125e-05, "model_forward_time": 0.026113510131835938, "step": 17778 }, { "epoch": 2.71270751953125e-05, "step": 17778, "training_step_time": 0.10728883743286133 }, { "epoch": 2.712860107421875e-05, "model_forward_time": 0.024712085723876953, "step": 17779 }, { "epoch": 2.712860107421875e-05, "step": 17779, "training_step_time": 0.12960124015808105 }, { "epoch": 2.7130126953125e-05, "grad_norm": 0.24737589061260223, "learning_rate": 3.890466053360211e-05, "loss": 0.0187, "step": 17780 }, { "epoch": 2.7130126953125e-05, "model_forward_time": 0.025352954864501953, "step": 17780 }, { "epoch": 2.7130126953125e-05, "step": 17780, "training_step_time": 0.22339248657226562 }, { "epoch": 2.713165283203125e-05, "model_forward_time": 0.024074077606201172, "step": 17781 }, { "epoch": 2.713165283203125e-05, "step": 17781, "training_step_time": 0.22021794319152832 }, { "epoch": 2.71331787109375e-05, "model_forward_time": 0.02434086799621582, "step": 17782 }, { "epoch": 2.71331787109375e-05, "step": 17782, "training_step_time": 0.1247413158416748 }, { "epoch": 2.713470458984375e-05, "model_forward_time": 0.024853944778442383, "step": 17783 }, { "epoch": 2.713470458984375e-05, "step": 17783, "training_step_time": 0.11188817024230957 }, { "epoch": 2.713623046875e-05, "model_forward_time": 0.024930953979492188, "step": 17784 }, { "epoch": 2.713623046875e-05, "step": 17784, "training_step_time": 0.10723018646240234 }, { "epoch": 2.713775634765625e-05, "model_forward_time": 0.024888992309570312, "step": 17785 }, { "epoch": 2.713775634765625e-05, "step": 17785, "training_step_time": 0.12799930572509766 }, { "epoch": 2.71392822265625e-05, "model_forward_time": 0.024920940399169922, "step": 17786 }, { "epoch": 2.71392822265625e-05, "step": 17786, "training_step_time": 0.12634873390197754 }, { "epoch": 2.714080810546875e-05, "model_forward_time": 0.024768829345703125, "step": 17787 }, { "epoch": 2.714080810546875e-05, "step": 17787, "training_step_time": 0.10676336288452148 }, { "epoch": 2.7142333984375e-05, "model_forward_time": 0.025170326232910156, "step": 17788 }, { "epoch": 2.7142333984375e-05, "step": 17788, "training_step_time": 0.12009906768798828 }, { "epoch": 2.714385986328125e-05, "model_forward_time": 0.025107145309448242, "step": 17789 }, { "epoch": 2.714385986328125e-05, "step": 17789, "training_step_time": 0.11100149154663086 }, { "epoch": 2.71453857421875e-05, "grad_norm": 0.14849424362182617, "learning_rate": 3.8850925774925425e-05, "loss": 0.0044, "step": 17790 }, { "epoch": 2.71453857421875e-05, "model_forward_time": 0.025336265563964844, "step": 17790 }, { "epoch": 2.71453857421875e-05, "step": 17790, "training_step_time": 0.10687541961669922 }, { "epoch": 2.714691162109375e-05, "model_forward_time": 0.025002241134643555, "step": 17791 }, { "epoch": 2.714691162109375e-05, "step": 17791, "training_step_time": 0.10997629165649414 }, { "epoch": 2.71484375e-05, "model_forward_time": 0.02515125274658203, "step": 17792 }, { "epoch": 2.71484375e-05, "step": 17792, "training_step_time": 0.14942479133605957 }, { "epoch": 2.714996337890625e-05, "model_forward_time": 0.02477264404296875, "step": 17793 }, { "epoch": 2.714996337890625e-05, "step": 17793, "training_step_time": 0.17796111106872559 }, { "epoch": 2.71514892578125e-05, "model_forward_time": 0.024335622787475586, "step": 17794 }, { "epoch": 2.71514892578125e-05, "step": 17794, "training_step_time": 0.18273639678955078 }, { "epoch": 2.715301513671875e-05, "model_forward_time": 0.024575233459472656, "step": 17795 }, { "epoch": 2.715301513671875e-05, "step": 17795, "training_step_time": 0.1756458282470703 }, { "epoch": 2.7154541015625e-05, "model_forward_time": 0.024271011352539062, "step": 17796 }, { "epoch": 2.7154541015625e-05, "step": 17796, "training_step_time": 0.19151997566223145 }, { "epoch": 2.715606689453125e-05, "model_forward_time": 0.02467179298400879, "step": 17797 }, { "epoch": 2.715606689453125e-05, "step": 17797, "training_step_time": 0.16283822059631348 }, { "epoch": 2.71575927734375e-05, "model_forward_time": 0.02411174774169922, "step": 17798 }, { "epoch": 2.71575927734375e-05, "step": 17798, "training_step_time": 0.16323304176330566 }, { "epoch": 2.715911865234375e-05, "model_forward_time": 0.024342775344848633, "step": 17799 }, { "epoch": 2.715911865234375e-05, "step": 17799, "training_step_time": 0.13154125213623047 }, { "epoch": 2.716064453125e-05, "grad_norm": 0.29433611035346985, "learning_rate": 3.879720456342521e-05, "loss": 0.0079, "step": 17800 }, { "epoch": 2.716064453125e-05, "model_forward_time": 0.023529767990112305, "step": 17800 }, { "epoch": 2.716064453125e-05, "step": 17800, "training_step_time": 0.16872954368591309 }, { "epoch": 2.716217041015625e-05, "model_forward_time": 0.024827003479003906, "step": 17801 }, { "epoch": 2.716217041015625e-05, "step": 17801, "training_step_time": 0.160963773727417 }, { "epoch": 2.71636962890625e-05, "model_forward_time": 0.02395153045654297, "step": 17802 }, { "epoch": 2.71636962890625e-05, "step": 17802, "training_step_time": 0.0999443531036377 }, { "epoch": 2.716522216796875e-05, "model_forward_time": 0.024326324462890625, "step": 17803 }, { "epoch": 2.716522216796875e-05, "step": 17803, "training_step_time": 0.10318398475646973 }, { "epoch": 2.7166748046875e-05, "model_forward_time": 0.025301218032836914, "step": 17804 }, { "epoch": 2.7166748046875e-05, "step": 17804, "training_step_time": 0.10341000556945801 }, { "epoch": 2.716827392578125e-05, "model_forward_time": 0.02529311180114746, "step": 17805 }, { "epoch": 2.716827392578125e-05, "step": 17805, "training_step_time": 0.10476231575012207 }, { "epoch": 2.71697998046875e-05, "model_forward_time": 0.025449514389038086, "step": 17806 }, { "epoch": 2.71697998046875e-05, "step": 17806, "training_step_time": 0.10488677024841309 }, { "epoch": 2.717132568359375e-05, "model_forward_time": 0.025334835052490234, "step": 17807 }, { "epoch": 2.717132568359375e-05, "step": 17807, "training_step_time": 0.1038503646850586 }, { "epoch": 2.71728515625e-05, "model_forward_time": 0.025269269943237305, "step": 17808 }, { "epoch": 2.71728515625e-05, "step": 17808, "training_step_time": 0.10480117797851562 }, { "epoch": 2.717437744140625e-05, "model_forward_time": 0.025156259536743164, "step": 17809 }, { "epoch": 2.717437744140625e-05, "step": 17809, "training_step_time": 0.10785365104675293 }, { "epoch": 2.71759033203125e-05, "grad_norm": 0.30822667479515076, "learning_rate": 3.87434969643778e-05, "loss": 0.0146, "step": 17810 }, { "epoch": 2.71759033203125e-05, "model_forward_time": 0.025263309478759766, "step": 17810 }, { "epoch": 2.71759033203125e-05, "step": 17810, "training_step_time": 0.10577630996704102 }, { "epoch": 2.717742919921875e-05, "model_forward_time": 0.025304317474365234, "step": 17811 }, { "epoch": 2.717742919921875e-05, "step": 17811, "training_step_time": 0.10428953170776367 }, { "epoch": 2.7178955078125e-05, "model_forward_time": 0.025110960006713867, "step": 17812 }, { "epoch": 2.7178955078125e-05, "step": 17812, "training_step_time": 0.10439157485961914 }, { "epoch": 2.718048095703125e-05, "model_forward_time": 0.02563309669494629, "step": 17813 }, { "epoch": 2.718048095703125e-05, "step": 17813, "training_step_time": 0.10702228546142578 }, { "epoch": 2.71820068359375e-05, "model_forward_time": 0.024785280227661133, "step": 17814 }, { "epoch": 2.71820068359375e-05, "step": 17814, "training_step_time": 0.10537457466125488 }, { "epoch": 2.718353271484375e-05, "model_forward_time": 0.025554418563842773, "step": 17815 }, { "epoch": 2.718353271484375e-05, "step": 17815, "training_step_time": 0.11116719245910645 }, { "epoch": 2.718505859375e-05, "model_forward_time": 0.025223970413208008, "step": 17816 }, { "epoch": 2.718505859375e-05, "step": 17816, "training_step_time": 0.10426211357116699 }, { "epoch": 2.718658447265625e-05, "model_forward_time": 0.024869441986083984, "step": 17817 }, { "epoch": 2.718658447265625e-05, "step": 17817, "training_step_time": 0.10783696174621582 }, { "epoch": 2.71881103515625e-05, "model_forward_time": 0.025443553924560547, "step": 17818 }, { "epoch": 2.71881103515625e-05, "step": 17818, "training_step_time": 0.11128664016723633 }, { "epoch": 2.718963623046875e-05, "model_forward_time": 0.023201704025268555, "step": 17819 }, { "epoch": 2.718963623046875e-05, "step": 17819, "training_step_time": 0.20087242126464844 }, { "epoch": 2.7191162109375e-05, "grad_norm": 0.2797226011753082, "learning_rate": 3.8689803043043e-05, "loss": 0.013, "step": 17820 }, { "epoch": 2.7191162109375e-05, "model_forward_time": 0.02434086799621582, "step": 17820 }, { "epoch": 2.7191162109375e-05, "step": 17820, "training_step_time": 0.11786365509033203 }, { "epoch": 2.719268798828125e-05, "model_forward_time": 0.025154829025268555, "step": 17821 }, { "epoch": 2.719268798828125e-05, "step": 17821, "training_step_time": 0.10412883758544922 }, { "epoch": 2.71942138671875e-05, "model_forward_time": 0.02414560317993164, "step": 17822 }, { "epoch": 2.71942138671875e-05, "step": 17822, "training_step_time": 0.14868927001953125 }, { "epoch": 2.719573974609375e-05, "model_forward_time": 0.025008201599121094, "step": 17823 }, { "epoch": 2.719573974609375e-05, "step": 17823, "training_step_time": 0.20080280303955078 }, { "epoch": 2.7197265625e-05, "model_forward_time": 0.024900436401367188, "step": 17824 }, { "epoch": 2.7197265625e-05, "step": 17824, "training_step_time": 0.17074918746948242 }, { "epoch": 2.719879150390625e-05, "model_forward_time": 0.024929285049438477, "step": 17825 }, { "epoch": 2.719879150390625e-05, "step": 17825, "training_step_time": 0.2047901153564453 }, { "epoch": 2.72003173828125e-05, "model_forward_time": 0.0244443416595459, "step": 17826 }, { "epoch": 2.72003173828125e-05, "step": 17826, "training_step_time": 0.10326552391052246 }, { "epoch": 2.720184326171875e-05, "model_forward_time": 0.024632930755615234, "step": 17827 }, { "epoch": 2.720184326171875e-05, "step": 17827, "training_step_time": 0.11418461799621582 }, { "epoch": 2.7203369140625e-05, "model_forward_time": 0.024976253509521484, "step": 17828 }, { "epoch": 2.7203369140625e-05, "step": 17828, "training_step_time": 0.12517619132995605 }, { "epoch": 2.720489501953125e-05, "model_forward_time": 0.025359392166137695, "step": 17829 }, { "epoch": 2.720489501953125e-05, "step": 17829, "training_step_time": 0.13460326194763184 }, { "epoch": 2.72064208984375e-05, "grad_norm": 0.160170778632164, "learning_rate": 3.863612286466396e-05, "loss": 0.0124, "step": 17830 }, { "epoch": 2.72064208984375e-05, "model_forward_time": 0.024671554565429688, "step": 17830 }, { "epoch": 2.72064208984375e-05, "step": 17830, "training_step_time": 0.11482381820678711 }, { "epoch": 2.720794677734375e-05, "model_forward_time": 0.025677204132080078, "step": 17831 }, { "epoch": 2.720794677734375e-05, "step": 17831, "training_step_time": 0.11292362213134766 }, { "epoch": 2.720947265625e-05, "model_forward_time": 0.025136947631835938, "step": 17832 }, { "epoch": 2.720947265625e-05, "step": 17832, "training_step_time": 0.15885281562805176 }, { "epoch": 2.721099853515625e-05, "model_forward_time": 0.024775028228759766, "step": 17833 }, { "epoch": 2.721099853515625e-05, "step": 17833, "training_step_time": 0.18265700340270996 }, { "epoch": 2.72125244140625e-05, "model_forward_time": 0.024469614028930664, "step": 17834 }, { "epoch": 2.72125244140625e-05, "step": 17834, "training_step_time": 0.16023707389831543 }, { "epoch": 2.721405029296875e-05, "model_forward_time": 0.024408340454101562, "step": 17835 }, { "epoch": 2.721405029296875e-05, "step": 17835, "training_step_time": 0.14282965660095215 }, { "epoch": 2.7215576171875e-05, "model_forward_time": 0.02514362335205078, "step": 17836 }, { "epoch": 2.7215576171875e-05, "step": 17836, "training_step_time": 0.13952112197875977 }, { "epoch": 2.721710205078125e-05, "model_forward_time": 0.024619102478027344, "step": 17837 }, { "epoch": 2.721710205078125e-05, "step": 17837, "training_step_time": 0.12865781784057617 }, { "epoch": 2.72186279296875e-05, "model_forward_time": 0.02448105812072754, "step": 17838 }, { "epoch": 2.72186279296875e-05, "step": 17838, "training_step_time": 0.12499737739562988 }, { "epoch": 2.722015380859375e-05, "model_forward_time": 0.02493143081665039, "step": 17839 }, { "epoch": 2.722015380859375e-05, "step": 17839, "training_step_time": 0.13923406600952148 }, { "epoch": 2.72216796875e-05, "grad_norm": 0.20724281668663025, "learning_rate": 3.858245649446721e-05, "loss": 0.0089, "step": 17840 }, { "epoch": 2.72216796875e-05, "model_forward_time": 0.02482128143310547, "step": 17840 }, { "epoch": 2.72216796875e-05, "step": 17840, "training_step_time": 0.13233351707458496 }, { "epoch": 2.722320556640625e-05, "model_forward_time": 0.0250244140625, "step": 17841 }, { "epoch": 2.722320556640625e-05, "step": 17841, "training_step_time": 0.11634016036987305 }, { "epoch": 2.72247314453125e-05, "model_forward_time": 0.02407383918762207, "step": 17842 }, { "epoch": 2.72247314453125e-05, "step": 17842, "training_step_time": 0.1212615966796875 }, { "epoch": 2.722625732421875e-05, "model_forward_time": 0.024248838424682617, "step": 17843 }, { "epoch": 2.722625732421875e-05, "step": 17843, "training_step_time": 0.1099236011505127 }, { "epoch": 2.7227783203125e-05, "model_forward_time": 0.029558897018432617, "step": 17844 }, { "epoch": 2.7227783203125e-05, "step": 17844, "training_step_time": 0.12109065055847168 }, { "epoch": 2.722930908203125e-05, "model_forward_time": 0.02521991729736328, "step": 17845 }, { "epoch": 2.722930908203125e-05, "step": 17845, "training_step_time": 0.18286466598510742 }, { "epoch": 2.72308349609375e-05, "model_forward_time": 0.0246889591217041, "step": 17846 }, { "epoch": 2.72308349609375e-05, "step": 17846, "training_step_time": 0.10530400276184082 }, { "epoch": 2.723236083984375e-05, "model_forward_time": 0.024507999420166016, "step": 17847 }, { "epoch": 2.723236083984375e-05, "step": 17847, "training_step_time": 0.10642695426940918 }, { "epoch": 2.723388671875e-05, "model_forward_time": 0.025201082229614258, "step": 17848 }, { "epoch": 2.723388671875e-05, "step": 17848, "training_step_time": 0.10555386543273926 }, { "epoch": 2.723541259765625e-05, "model_forward_time": 0.02525019645690918, "step": 17849 }, { "epoch": 2.723541259765625e-05, "step": 17849, "training_step_time": 0.11086893081665039 }, { "epoch": 2.72369384765625e-05, "grad_norm": 0.29175248742103577, "learning_rate": 3.852880399766243e-05, "loss": 0.0102, "step": 17850 }, { "epoch": 2.72369384765625e-05, "model_forward_time": 0.023540735244750977, "step": 17850 }, { "epoch": 2.72369384765625e-05, "step": 17850, "training_step_time": 0.1093754768371582 }, { "epoch": 2.723846435546875e-05, "model_forward_time": 0.02463531494140625, "step": 17851 }, { "epoch": 2.723846435546875e-05, "step": 17851, "training_step_time": 0.11007237434387207 }, { "epoch": 2.7239990234375e-05, "model_forward_time": 0.02586674690246582, "step": 17852 }, { "epoch": 2.7239990234375e-05, "step": 17852, "training_step_time": 0.10846757888793945 }, { "epoch": 2.724151611328125e-05, "model_forward_time": 0.0252835750579834, "step": 17853 }, { "epoch": 2.724151611328125e-05, "step": 17853, "training_step_time": 0.1046297550201416 }, { "epoch": 2.72430419921875e-05, "model_forward_time": 0.025349855422973633, "step": 17854 }, { "epoch": 2.72430419921875e-05, "step": 17854, "training_step_time": 0.10582780838012695 }, { "epoch": 2.724456787109375e-05, "model_forward_time": 0.02541637420654297, "step": 17855 }, { "epoch": 2.724456787109375e-05, "step": 17855, "training_step_time": 0.1049504280090332 }, { "epoch": 2.724609375e-05, "model_forward_time": 0.025146484375, "step": 17856 }, { "epoch": 2.724609375e-05, "step": 17856, "training_step_time": 0.1041109561920166 }, { "epoch": 2.724761962890625e-05, "model_forward_time": 0.025187015533447266, "step": 17857 }, { "epoch": 2.724761962890625e-05, "step": 17857, "training_step_time": 0.1051790714263916 }, { "epoch": 2.72491455078125e-05, "model_forward_time": 0.025150299072265625, "step": 17858 }, { "epoch": 2.72491455078125e-05, "step": 17858, "training_step_time": 0.10592508316040039 }, { "epoch": 2.725067138671875e-05, "model_forward_time": 0.026232481002807617, "step": 17859 }, { "epoch": 2.725067138671875e-05, "step": 17859, "training_step_time": 0.10644984245300293 }, { "epoch": 2.7252197265625e-05, "grad_norm": 0.2431711107492447, "learning_rate": 3.8475165439442446e-05, "loss": 0.0091, "step": 17860 }, { "epoch": 2.7252197265625e-05, "model_forward_time": 0.025551557540893555, "step": 17860 }, { "epoch": 2.7252197265625e-05, "step": 17860, "training_step_time": 0.10732293128967285 }, { "epoch": 2.725372314453125e-05, "model_forward_time": 0.026672840118408203, "step": 17861 }, { "epoch": 2.725372314453125e-05, "step": 17861, "training_step_time": 0.10648775100708008 }, { "epoch": 2.72552490234375e-05, "model_forward_time": 0.02519536018371582, "step": 17862 }, { "epoch": 2.72552490234375e-05, "step": 17862, "training_step_time": 0.10601353645324707 }, { "epoch": 2.725677490234375e-05, "model_forward_time": 0.02534031867980957, "step": 17863 }, { "epoch": 2.725677490234375e-05, "step": 17863, "training_step_time": 0.18191146850585938 }, { "epoch": 2.725830078125e-05, "model_forward_time": 0.024652481079101562, "step": 17864 }, { "epoch": 2.725830078125e-05, "step": 17864, "training_step_time": 0.12701034545898438 }, { "epoch": 2.725982666015625e-05, "model_forward_time": 0.02532815933227539, "step": 17865 }, { "epoch": 2.725982666015625e-05, "step": 17865, "training_step_time": 0.10142159461975098 }, { "epoch": 2.72613525390625e-05, "model_forward_time": 0.024637937545776367, "step": 17866 }, { "epoch": 2.72613525390625e-05, "step": 17866, "training_step_time": 0.1498427391052246 }, { "epoch": 2.726287841796875e-05, "model_forward_time": 0.02797865867614746, "step": 17867 }, { "epoch": 2.726287841796875e-05, "step": 17867, "training_step_time": 0.19466686248779297 }, { "epoch": 2.7264404296875e-05, "model_forward_time": 0.024108171463012695, "step": 17868 }, { "epoch": 2.7264404296875e-05, "step": 17868, "training_step_time": 0.21257948875427246 }, { "epoch": 2.726593017578125e-05, "model_forward_time": 0.024782657623291016, "step": 17869 }, { "epoch": 2.726593017578125e-05, "step": 17869, "training_step_time": 0.16902542114257812 }, { "epoch": 2.72674560546875e-05, "grad_norm": 0.6194291710853577, "learning_rate": 3.842154088498316e-05, "loss": 0.0118, "step": 17870 }, { "epoch": 2.72674560546875e-05, "model_forward_time": 0.024775028228759766, "step": 17870 }, { "epoch": 2.72674560546875e-05, "step": 17870, "training_step_time": 0.11042070388793945 }, { "epoch": 2.726898193359375e-05, "model_forward_time": 0.024674654006958008, "step": 17871 }, { "epoch": 2.726898193359375e-05, "step": 17871, "training_step_time": 0.1410846710205078 }, { "epoch": 2.72705078125e-05, "model_forward_time": 0.024624347686767578, "step": 17872 }, { "epoch": 2.72705078125e-05, "step": 17872, "training_step_time": 0.10656380653381348 }, { "epoch": 2.727203369140625e-05, "model_forward_time": 0.02496647834777832, "step": 17873 }, { "epoch": 2.727203369140625e-05, "step": 17873, "training_step_time": 0.12030458450317383 }, { "epoch": 2.72735595703125e-05, "model_forward_time": 0.02531147003173828, "step": 17874 }, { "epoch": 2.72735595703125e-05, "step": 17874, "training_step_time": 0.1288294792175293 }, { "epoch": 2.727508544921875e-05, "model_forward_time": 0.02532505989074707, "step": 17875 }, { "epoch": 2.727508544921875e-05, "step": 17875, "training_step_time": 0.11631083488464355 }, { "epoch": 2.7276611328125e-05, "model_forward_time": 0.025744915008544922, "step": 17876 }, { "epoch": 2.7276611328125e-05, "step": 17876, "training_step_time": 0.11756610870361328 }, { "epoch": 2.727813720703125e-05, "model_forward_time": 0.02522730827331543, "step": 17877 }, { "epoch": 2.727813720703125e-05, "step": 17877, "training_step_time": 0.10687875747680664 }, { "epoch": 2.72796630859375e-05, "model_forward_time": 0.025696277618408203, "step": 17878 }, { "epoch": 2.72796630859375e-05, "step": 17878, "training_step_time": 0.10690808296203613 }, { "epoch": 2.728118896484375e-05, "model_forward_time": 0.025755882263183594, "step": 17879 }, { "epoch": 2.728118896484375e-05, "step": 17879, "training_step_time": 0.10820436477661133 }, { "epoch": 2.728271484375e-05, "grad_norm": 0.22942398488521576, "learning_rate": 3.836793039944349e-05, "loss": 0.0097, "step": 17880 }, { "epoch": 2.728271484375e-05, "model_forward_time": 0.02470231056213379, "step": 17880 }, { "epoch": 2.728271484375e-05, "step": 17880, "training_step_time": 0.10743188858032227 }, { "epoch": 2.728424072265625e-05, "model_forward_time": 0.02498030662536621, "step": 17881 }, { "epoch": 2.728424072265625e-05, "step": 17881, "training_step_time": 0.10877490043640137 }, { "epoch": 2.72857666015625e-05, "model_forward_time": 0.025319814682006836, "step": 17882 }, { "epoch": 2.72857666015625e-05, "step": 17882, "training_step_time": 0.10659074783325195 }, { "epoch": 2.728729248046875e-05, "model_forward_time": 0.025788307189941406, "step": 17883 }, { "epoch": 2.728729248046875e-05, "step": 17883, "training_step_time": 0.10712337493896484 }, { "epoch": 2.7288818359375e-05, "model_forward_time": 0.0251615047454834, "step": 17884 }, { "epoch": 2.7288818359375e-05, "step": 17884, "training_step_time": 0.10642099380493164 }, { "epoch": 2.729034423828125e-05, "model_forward_time": 0.025527000427246094, "step": 17885 }, { "epoch": 2.729034423828125e-05, "step": 17885, "training_step_time": 0.1554703712463379 }, { "epoch": 2.72918701171875e-05, "model_forward_time": 0.02492237091064453, "step": 17886 }, { "epoch": 2.72918701171875e-05, "step": 17886, "training_step_time": 0.1442704200744629 }, { "epoch": 2.729339599609375e-05, "model_forward_time": 0.024675607681274414, "step": 17887 }, { "epoch": 2.729339599609375e-05, "step": 17887, "training_step_time": 0.10928940773010254 }, { "epoch": 2.7294921875e-05, "model_forward_time": 0.024618864059448242, "step": 17888 }, { "epoch": 2.7294921875e-05, "step": 17888, "training_step_time": 0.11086416244506836 }, { "epoch": 2.729644775390625e-05, "model_forward_time": 0.02772665023803711, "step": 17889 }, { "epoch": 2.729644775390625e-05, "step": 17889, "training_step_time": 0.10779953002929688 }, { "epoch": 2.72979736328125e-05, "grad_norm": 0.21543103456497192, "learning_rate": 3.831433404796521e-05, "loss": 0.0108, "step": 17890 }, { "epoch": 2.72979736328125e-05, "model_forward_time": 0.02519989013671875, "step": 17890 }, { "epoch": 2.72979736328125e-05, "step": 17890, "training_step_time": 0.10872244834899902 }, { "epoch": 2.729949951171875e-05, "model_forward_time": 0.02528548240661621, "step": 17891 }, { "epoch": 2.729949951171875e-05, "step": 17891, "training_step_time": 0.10813093185424805 }, { "epoch": 2.7301025390625e-05, "model_forward_time": 0.025361299514770508, "step": 17892 }, { "epoch": 2.7301025390625e-05, "step": 17892, "training_step_time": 0.11327600479125977 }, { "epoch": 2.730255126953125e-05, "model_forward_time": 0.025127887725830078, "step": 17893 }, { "epoch": 2.730255126953125e-05, "step": 17893, "training_step_time": 0.10529470443725586 }, { "epoch": 2.73040771484375e-05, "model_forward_time": 0.025068998336791992, "step": 17894 }, { "epoch": 2.73040771484375e-05, "step": 17894, "training_step_time": 0.1077580451965332 }, { "epoch": 2.730560302734375e-05, "model_forward_time": 0.025731801986694336, "step": 17895 }, { "epoch": 2.730560302734375e-05, "step": 17895, "training_step_time": 0.10904932022094727 }, { "epoch": 2.730712890625e-05, "model_forward_time": 0.025609970092773438, "step": 17896 }, { "epoch": 2.730712890625e-05, "step": 17896, "training_step_time": 0.10838103294372559 }, { "epoch": 2.730865478515625e-05, "model_forward_time": 0.02516007423400879, "step": 17897 }, { "epoch": 2.730865478515625e-05, "step": 17897, "training_step_time": 0.1088261604309082 }, { "epoch": 2.73101806640625e-05, "model_forward_time": 0.025600194931030273, "step": 17898 }, { "epoch": 2.73101806640625e-05, "step": 17898, "training_step_time": 0.10634136199951172 }, { "epoch": 2.731170654296875e-05, "model_forward_time": 0.025068044662475586, "step": 17899 }, { "epoch": 2.731170654296875e-05, "step": 17899, "training_step_time": 0.1076054573059082 }, { "epoch": 2.7313232421875e-05, "grad_norm": 0.270064115524292, "learning_rate": 3.826075189567296e-05, "loss": 0.0085, "step": 17900 }, { "epoch": 2.7313232421875e-05, "model_forward_time": 0.02731156349182129, "step": 17900 }, { "epoch": 2.7313232421875e-05, "step": 17900, "training_step_time": 0.10802221298217773 }, { "epoch": 2.731475830078125e-05, "model_forward_time": 0.02545309066772461, "step": 17901 }, { "epoch": 2.731475830078125e-05, "step": 17901, "training_step_time": 0.10477757453918457 }, { "epoch": 2.73162841796875e-05, "model_forward_time": 0.025415897369384766, "step": 17902 }, { "epoch": 2.73162841796875e-05, "step": 17902, "training_step_time": 0.1063234806060791 }, { "epoch": 2.731781005859375e-05, "model_forward_time": 0.024984121322631836, "step": 17903 }, { "epoch": 2.731781005859375e-05, "step": 17903, "training_step_time": 0.1046607494354248 }, { "epoch": 2.73193359375e-05, "model_forward_time": 0.025334596633911133, "step": 17904 }, { "epoch": 2.73193359375e-05, "step": 17904, "training_step_time": 0.10480475425720215 }, { "epoch": 2.732086181640625e-05, "model_forward_time": 0.025446176528930664, "step": 17905 }, { "epoch": 2.732086181640625e-05, "step": 17905, "training_step_time": 0.10482096672058105 }, { "epoch": 2.73223876953125e-05, "model_forward_time": 0.02720952033996582, "step": 17906 }, { "epoch": 2.73223876953125e-05, "step": 17906, "training_step_time": 0.10787391662597656 }, { "epoch": 2.732391357421875e-05, "model_forward_time": 0.025163650512695312, "step": 17907 }, { "epoch": 2.732391357421875e-05, "step": 17907, "training_step_time": 0.10352063179016113 }, { "epoch": 2.7325439453125e-05, "model_forward_time": 0.02638530731201172, "step": 17908 }, { "epoch": 2.7325439453125e-05, "step": 17908, "training_step_time": 0.10435891151428223 }, { "epoch": 2.732696533203125e-05, "model_forward_time": 0.025278329849243164, "step": 17909 }, { "epoch": 2.732696533203125e-05, "step": 17909, "training_step_time": 0.10368895530700684 }, { "epoch": 2.73284912109375e-05, "grad_norm": 0.5367772579193115, "learning_rate": 3.820718400767409e-05, "loss": 0.0128, "step": 17910 }, { "epoch": 2.73284912109375e-05, "model_forward_time": 0.02409958839416504, "step": 17910 }, { "epoch": 2.73284912109375e-05, "step": 17910, "training_step_time": 0.14976286888122559 }, { "epoch": 2.733001708984375e-05, "model_forward_time": 0.025003910064697266, "step": 17911 }, { "epoch": 2.733001708984375e-05, "step": 17911, "training_step_time": 0.11549615859985352 }, { "epoch": 2.733154296875e-05, "model_forward_time": 0.025644302368164062, "step": 17912 }, { "epoch": 2.733154296875e-05, "step": 17912, "training_step_time": 0.12738299369812012 }, { "epoch": 2.733306884765625e-05, "model_forward_time": 0.025639057159423828, "step": 17913 }, { "epoch": 2.733306884765625e-05, "step": 17913, "training_step_time": 0.1114192008972168 }, { "epoch": 2.73345947265625e-05, "model_forward_time": 0.0251767635345459, "step": 17914 }, { "epoch": 2.73345947265625e-05, "step": 17914, "training_step_time": 0.17258620262145996 }, { "epoch": 2.733612060546875e-05, "model_forward_time": 0.024928569793701172, "step": 17915 }, { "epoch": 2.733612060546875e-05, "step": 17915, "training_step_time": 0.1775527000427246 }, { "epoch": 2.7337646484375e-05, "model_forward_time": 0.02510976791381836, "step": 17916 }, { "epoch": 2.7337646484375e-05, "step": 17916, "training_step_time": 0.11106157302856445 }, { "epoch": 2.733917236328125e-05, "model_forward_time": 0.02491140365600586, "step": 17917 }, { "epoch": 2.733917236328125e-05, "step": 17917, "training_step_time": 0.11459517478942871 }, { "epoch": 2.73406982421875e-05, "model_forward_time": 0.025683879852294922, "step": 17918 }, { "epoch": 2.73406982421875e-05, "step": 17918, "training_step_time": 0.14354419708251953 }, { "epoch": 2.734222412109375e-05, "model_forward_time": 0.02560591697692871, "step": 17919 }, { "epoch": 2.734222412109375e-05, "step": 17919, "training_step_time": 0.1070866584777832 }, { "epoch": 2.734375e-05, "grad_norm": 0.33692288398742676, "learning_rate": 3.8153630449058646e-05, "loss": 0.009, "step": 17920 }, { "epoch": 2.734375e-05, "model_forward_time": 0.026272296905517578, "step": 17920 }, { "epoch": 2.734375e-05, "step": 17920, "training_step_time": 0.12343764305114746 }, { "epoch": 2.734527587890625e-05, "model_forward_time": 0.02541065216064453, "step": 17921 }, { "epoch": 2.734527587890625e-05, "step": 17921, "training_step_time": 0.1226193904876709 }, { "epoch": 2.73468017578125e-05, "model_forward_time": 0.025192975997924805, "step": 17922 }, { "epoch": 2.73468017578125e-05, "step": 17922, "training_step_time": 0.13296270370483398 }, { "epoch": 2.734832763671875e-05, "model_forward_time": 0.02533698081970215, "step": 17923 }, { "epoch": 2.734832763671875e-05, "step": 17923, "training_step_time": 0.12338542938232422 }, { "epoch": 2.7349853515625e-05, "model_forward_time": 0.024743080139160156, "step": 17924 }, { "epoch": 2.7349853515625e-05, "step": 17924, "training_step_time": 0.1304161548614502 }, { "epoch": 2.735137939453125e-05, "model_forward_time": 0.024888038635253906, "step": 17925 }, { "epoch": 2.735137939453125e-05, "step": 17925, "training_step_time": 0.10959291458129883 }, { "epoch": 2.73529052734375e-05, "model_forward_time": 0.025396347045898438, "step": 17926 }, { "epoch": 2.73529052734375e-05, "step": 17926, "training_step_time": 0.10519862174987793 }, { "epoch": 2.735443115234375e-05, "model_forward_time": 0.02593398094177246, "step": 17927 }, { "epoch": 2.735443115234375e-05, "step": 17927, "training_step_time": 0.10903263092041016 }, { "epoch": 2.735595703125e-05, "model_forward_time": 0.025213956832885742, "step": 17928 }, { "epoch": 2.735595703125e-05, "step": 17928, "training_step_time": 0.11295914649963379 }, { "epoch": 2.735748291015625e-05, "model_forward_time": 0.0255584716796875, "step": 17929 }, { "epoch": 2.735748291015625e-05, "step": 17929, "training_step_time": 0.1051168441772461 }, { "epoch": 2.73590087890625e-05, "grad_norm": 0.14932318031787872, "learning_rate": 3.810009128489925e-05, "loss": 0.0095, "step": 17930 }, { "epoch": 2.73590087890625e-05, "model_forward_time": 0.025115966796875, "step": 17930 }, { "epoch": 2.73590087890625e-05, "step": 17930, "training_step_time": 0.10474348068237305 }, { "epoch": 2.736053466796875e-05, "model_forward_time": 0.02556324005126953, "step": 17931 }, { "epoch": 2.736053466796875e-05, "step": 17931, "training_step_time": 0.10521578788757324 }, { "epoch": 2.7362060546875e-05, "model_forward_time": 0.025293827056884766, "step": 17932 }, { "epoch": 2.7362060546875e-05, "step": 17932, "training_step_time": 0.1053309440612793 }, { "epoch": 2.736358642578125e-05, "model_forward_time": 0.025243282318115234, "step": 17933 }, { "epoch": 2.736358642578125e-05, "step": 17933, "training_step_time": 0.1839134693145752 }, { "epoch": 2.73651123046875e-05, "model_forward_time": 0.024533987045288086, "step": 17934 }, { "epoch": 2.73651123046875e-05, "step": 17934, "training_step_time": 0.14489030838012695 }, { "epoch": 2.736663818359375e-05, "model_forward_time": 0.02425980567932129, "step": 17935 }, { "epoch": 2.736663818359375e-05, "step": 17935, "training_step_time": 0.1082301139831543 }, { "epoch": 2.73681640625e-05, "model_forward_time": 0.02870798110961914, "step": 17936 }, { "epoch": 2.73681640625e-05, "step": 17936, "training_step_time": 0.11541581153869629 }, { "epoch": 2.736968994140625e-05, "model_forward_time": 0.025124073028564453, "step": 17937 }, { "epoch": 2.736968994140625e-05, "step": 17937, "training_step_time": 0.10977864265441895 }, { "epoch": 2.73712158203125e-05, "model_forward_time": 0.025575876235961914, "step": 17938 }, { "epoch": 2.73712158203125e-05, "step": 17938, "training_step_time": 0.1134188175201416 }, { "epoch": 2.737274169921875e-05, "model_forward_time": 0.025717973709106445, "step": 17939 }, { "epoch": 2.737274169921875e-05, "step": 17939, "training_step_time": 0.13526415824890137 }, { "epoch": 2.7374267578125e-05, "grad_norm": 0.3307208716869354, "learning_rate": 3.8046566580251e-05, "loss": 0.0085, "step": 17940 }, { "epoch": 2.7374267578125e-05, "model_forward_time": 0.025039196014404297, "step": 17940 }, { "epoch": 2.7374267578125e-05, "step": 17940, "training_step_time": 0.1084134578704834 }, { "epoch": 2.737579345703125e-05, "model_forward_time": 0.02523946762084961, "step": 17941 }, { "epoch": 2.737579345703125e-05, "step": 17941, "training_step_time": 0.10616540908813477 }, { "epoch": 2.73773193359375e-05, "model_forward_time": 0.0255124568939209, "step": 17942 }, { "epoch": 2.73773193359375e-05, "step": 17942, "training_step_time": 0.10609555244445801 }, { "epoch": 2.737884521484375e-05, "model_forward_time": 0.025499343872070312, "step": 17943 }, { "epoch": 2.737884521484375e-05, "step": 17943, "training_step_time": 0.10842013359069824 }, { "epoch": 2.738037109375e-05, "model_forward_time": 0.02535271644592285, "step": 17944 }, { "epoch": 2.738037109375e-05, "step": 17944, "training_step_time": 0.1105194091796875 }, { "epoch": 2.738189697265625e-05, "model_forward_time": 0.025133609771728516, "step": 17945 }, { "epoch": 2.738189697265625e-05, "step": 17945, "training_step_time": 0.10741400718688965 }, { "epoch": 2.73834228515625e-05, "model_forward_time": 0.024821996688842773, "step": 17946 }, { "epoch": 2.73834228515625e-05, "step": 17946, "training_step_time": 0.1054847240447998 }, { "epoch": 2.738494873046875e-05, "model_forward_time": 0.025104999542236328, "step": 17947 }, { "epoch": 2.738494873046875e-05, "step": 17947, "training_step_time": 0.10679817199707031 }, { "epoch": 2.7386474609375e-05, "model_forward_time": 0.02493739128112793, "step": 17948 }, { "epoch": 2.7386474609375e-05, "step": 17948, "training_step_time": 0.10469698905944824 }, { "epoch": 2.738800048828125e-05, "model_forward_time": 0.02559208869934082, "step": 17949 }, { "epoch": 2.738800048828125e-05, "step": 17949, "training_step_time": 0.10902833938598633 }, { "epoch": 2.73895263671875e-05, "grad_norm": 0.2965926229953766, "learning_rate": 3.799305640015152e-05, "loss": 0.0154, "step": 17950 }, { "epoch": 2.73895263671875e-05, "model_forward_time": 0.025310754776000977, "step": 17950 }, { "epoch": 2.73895263671875e-05, "step": 17950, "training_step_time": 0.11017346382141113 }, { "epoch": 2.739105224609375e-05, "model_forward_time": 0.025224685668945312, "step": 17951 }, { "epoch": 2.739105224609375e-05, "step": 17951, "training_step_time": 0.10585355758666992 }, { "epoch": 2.7392578125e-05, "model_forward_time": 0.025029659271240234, "step": 17952 }, { "epoch": 2.7392578125e-05, "step": 17952, "training_step_time": 0.10971832275390625 }, { "epoch": 2.739410400390625e-05, "model_forward_time": 0.025522947311401367, "step": 17953 }, { "epoch": 2.739410400390625e-05, "step": 17953, "training_step_time": 0.11266779899597168 }, { "epoch": 2.73956298828125e-05, "model_forward_time": 0.025726795196533203, "step": 17954 }, { "epoch": 2.73956298828125e-05, "step": 17954, "training_step_time": 0.10637211799621582 }, { "epoch": 2.739715576171875e-05, "model_forward_time": 0.02517223358154297, "step": 17955 }, { "epoch": 2.739715576171875e-05, "step": 17955, "training_step_time": 0.10816764831542969 }, { "epoch": 2.7398681640625e-05, "model_forward_time": 0.025997400283813477, "step": 17956 }, { "epoch": 2.7398681640625e-05, "step": 17956, "training_step_time": 0.1063072681427002 }, { "epoch": 2.740020751953125e-05, "model_forward_time": 0.02524256706237793, "step": 17957 }, { "epoch": 2.740020751953125e-05, "step": 17957, "training_step_time": 0.1907942295074463 }, { "epoch": 2.74017333984375e-05, "model_forward_time": 0.02437615394592285, "step": 17958 }, { "epoch": 2.74017333984375e-05, "step": 17958, "training_step_time": 0.11754894256591797 }, { "epoch": 2.740325927734375e-05, "model_forward_time": 0.024527549743652344, "step": 17959 }, { "epoch": 2.740325927734375e-05, "step": 17959, "training_step_time": 0.12745976448059082 }, { "epoch": 2.740478515625e-05, "grad_norm": 0.2102188616991043, "learning_rate": 3.793956080962068e-05, "loss": 0.0122, "step": 17960 }, { "epoch": 2.740478515625e-05, "model_forward_time": 0.025849580764770508, "step": 17960 }, { "epoch": 2.740478515625e-05, "step": 17960, "training_step_time": 0.10631322860717773 }, { "epoch": 2.740631103515625e-05, "model_forward_time": 0.024773836135864258, "step": 17961 }, { "epoch": 2.740631103515625e-05, "step": 17961, "training_step_time": 0.2002706527709961 }, { "epoch": 2.74078369140625e-05, "model_forward_time": 0.024735450744628906, "step": 17962 }, { "epoch": 2.74078369140625e-05, "step": 17962, "training_step_time": 0.1320209503173828 }, { "epoch": 2.740936279296875e-05, "model_forward_time": 0.024775266647338867, "step": 17963 }, { "epoch": 2.740936279296875e-05, "step": 17963, "training_step_time": 0.11639237403869629 }, { "epoch": 2.7410888671875e-05, "model_forward_time": 0.023922443389892578, "step": 17964 }, { "epoch": 2.7410888671875e-05, "step": 17964, "training_step_time": 0.1317157745361328 }, { "epoch": 2.741241455078125e-05, "model_forward_time": 0.024988412857055664, "step": 17965 }, { "epoch": 2.741241455078125e-05, "step": 17965, "training_step_time": 0.10732078552246094 }, { "epoch": 2.74139404296875e-05, "model_forward_time": 0.025366783142089844, "step": 17966 }, { "epoch": 2.74139404296875e-05, "step": 17966, "training_step_time": 0.12041521072387695 }, { "epoch": 2.741546630859375e-05, "model_forward_time": 0.0282289981842041, "step": 17967 }, { "epoch": 2.741546630859375e-05, "step": 17967, "training_step_time": 0.11133360862731934 }, { "epoch": 2.74169921875e-05, "model_forward_time": 0.02516341209411621, "step": 17968 }, { "epoch": 2.74169921875e-05, "step": 17968, "training_step_time": 0.10429811477661133 }, { "epoch": 2.741851806640625e-05, "model_forward_time": 0.025118112564086914, "step": 17969 }, { "epoch": 2.741851806640625e-05, "step": 17969, "training_step_time": 0.1212007999420166 }, { "epoch": 2.74200439453125e-05, "grad_norm": 0.18284453451633453, "learning_rate": 3.788607987366069e-05, "loss": 0.0076, "step": 17970 }, { "epoch": 2.74200439453125e-05, "model_forward_time": 0.025450944900512695, "step": 17970 }, { "epoch": 2.74200439453125e-05, "step": 17970, "training_step_time": 0.12494087219238281 }, { "epoch": 2.742156982421875e-05, "model_forward_time": 0.025272130966186523, "step": 17971 }, { "epoch": 2.742156982421875e-05, "step": 17971, "training_step_time": 0.11694717407226562 }, { "epoch": 2.7423095703125e-05, "model_forward_time": 0.025257349014282227, "step": 17972 }, { "epoch": 2.7423095703125e-05, "step": 17972, "training_step_time": 0.11249542236328125 }, { "epoch": 2.742462158203125e-05, "model_forward_time": 0.02526712417602539, "step": 17973 }, { "epoch": 2.742462158203125e-05, "step": 17973, "training_step_time": 0.10704183578491211 }, { "epoch": 2.74261474609375e-05, "model_forward_time": 0.025203704833984375, "step": 17974 }, { "epoch": 2.74261474609375e-05, "step": 17974, "training_step_time": 0.11341476440429688 }, { "epoch": 2.742767333984375e-05, "model_forward_time": 0.028165340423583984, "step": 17975 }, { "epoch": 2.742767333984375e-05, "step": 17975, "training_step_time": 0.11616992950439453 }, { "epoch": 2.742919921875e-05, "model_forward_time": 0.025283336639404297, "step": 17976 }, { "epoch": 2.742919921875e-05, "step": 17976, "training_step_time": 0.1097259521484375 }, { "epoch": 2.743072509765625e-05, "model_forward_time": 0.02525615692138672, "step": 17977 }, { "epoch": 2.743072509765625e-05, "step": 17977, "training_step_time": 0.11013531684875488 }, { "epoch": 2.74322509765625e-05, "model_forward_time": 0.024924278259277344, "step": 17978 }, { "epoch": 2.74322509765625e-05, "step": 17978, "training_step_time": 0.11000370979309082 }, { "epoch": 2.743377685546875e-05, "model_forward_time": 0.024070262908935547, "step": 17979 }, { "epoch": 2.743377685546875e-05, "step": 17979, "training_step_time": 0.10774922370910645 }, { "epoch": 2.7435302734375e-05, "grad_norm": 0.27353590726852417, "learning_rate": 3.783261365725592e-05, "loss": 0.0167, "step": 17980 }, { "epoch": 2.7435302734375e-05, "model_forward_time": 0.025155305862426758, "step": 17980 }, { "epoch": 2.7435302734375e-05, "step": 17980, "training_step_time": 0.11198043823242188 }, { "epoch": 2.743682861328125e-05, "model_forward_time": 0.024308204650878906, "step": 17981 }, { "epoch": 2.743682861328125e-05, "step": 17981, "training_step_time": 0.15448594093322754 }, { "epoch": 2.74383544921875e-05, "model_forward_time": 0.0247194766998291, "step": 17982 }, { "epoch": 2.74383544921875e-05, "step": 17982, "training_step_time": 0.11015748977661133 }, { "epoch": 2.743988037109375e-05, "model_forward_time": 0.02830982208251953, "step": 17983 }, { "epoch": 2.743988037109375e-05, "step": 17983, "training_step_time": 0.11189126968383789 }, { "epoch": 2.744140625e-05, "model_forward_time": 0.025470495223999023, "step": 17984 }, { "epoch": 2.744140625e-05, "step": 17984, "training_step_time": 0.1059103012084961 }, { "epoch": 2.744293212890625e-05, "model_forward_time": 0.02527475357055664, "step": 17985 }, { "epoch": 2.744293212890625e-05, "step": 17985, "training_step_time": 0.11489653587341309 }, { "epoch": 2.74444580078125e-05, "model_forward_time": 0.025204181671142578, "step": 17986 }, { "epoch": 2.74444580078125e-05, "step": 17986, "training_step_time": 0.22503018379211426 }, { "epoch": 2.744598388671875e-05, "model_forward_time": 0.023204565048217773, "step": 17987 }, { "epoch": 2.744598388671875e-05, "step": 17987, "training_step_time": 0.11169886589050293 }, { "epoch": 2.7447509765625e-05, "model_forward_time": 0.024588346481323242, "step": 17988 }, { "epoch": 2.7447509765625e-05, "step": 17988, "training_step_time": 0.10314106941223145 }, { "epoch": 2.744903564453125e-05, "model_forward_time": 0.02552056312561035, "step": 17989 }, { "epoch": 2.744903564453125e-05, "step": 17989, "training_step_time": 0.10691571235656738 }, { "epoch": 2.74505615234375e-05, "grad_norm": 0.4213157892227173, "learning_rate": 3.777916222537285e-05, "loss": 0.0076, "step": 17990 }, { "epoch": 2.74505615234375e-05, "model_forward_time": 0.023868560791015625, "step": 17990 }, { "epoch": 2.74505615234375e-05, "step": 17990, "training_step_time": 0.1145319938659668 }, { "epoch": 2.745208740234375e-05, "model_forward_time": 0.02421736717224121, "step": 17991 }, { "epoch": 2.745208740234375e-05, "step": 17991, "training_step_time": 0.11203432083129883 }, { "epoch": 2.745361328125e-05, "model_forward_time": 0.02419567108154297, "step": 17992 }, { "epoch": 2.745361328125e-05, "step": 17992, "training_step_time": 0.10560011863708496 }, { "epoch": 2.745513916015625e-05, "model_forward_time": 0.0252225399017334, "step": 17993 }, { "epoch": 2.745513916015625e-05, "step": 17993, "training_step_time": 0.11844587326049805 }, { "epoch": 2.74566650390625e-05, "model_forward_time": 0.02525806427001953, "step": 17994 }, { "epoch": 2.74566650390625e-05, "step": 17994, "training_step_time": 0.10957527160644531 }, { "epoch": 2.745819091796875e-05, "model_forward_time": 0.025019168853759766, "step": 17995 }, { "epoch": 2.745819091796875e-05, "step": 17995, "training_step_time": 0.10733246803283691 }, { "epoch": 2.7459716796875e-05, "model_forward_time": 0.02508258819580078, "step": 17996 }, { "epoch": 2.7459716796875e-05, "step": 17996, "training_step_time": 0.11580681800842285 }, { "epoch": 2.746124267578125e-05, "model_forward_time": 0.025430917739868164, "step": 17997 }, { "epoch": 2.746124267578125e-05, "step": 17997, "training_step_time": 0.1108696460723877 }, { "epoch": 2.74627685546875e-05, "model_forward_time": 0.025517940521240234, "step": 17998 }, { "epoch": 2.74627685546875e-05, "step": 17998, "training_step_time": 0.10732054710388184 }, { "epoch": 2.746429443359375e-05, "model_forward_time": 0.025665998458862305, "step": 17999 }, { "epoch": 2.746429443359375e-05, "step": 17999, "training_step_time": 0.1076362133026123 }, { "epoch": 2.74658203125e-05, "grad_norm": 0.18084366619586945, "learning_rate": 3.772572564296005e-05, "loss": 0.014, "step": 18000 }, { "epoch": 2.74658203125e-05, "model_forward_time": 0.025009632110595703, "step": 18000 }, { "epoch": 2.74658203125e-05, "step": 18000, "training_step_time": 0.10673260688781738 }, { "epoch": 2.746734619140625e-05, "model_forward_time": 0.024454832077026367, "step": 18001 }, { "epoch": 2.746734619140625e-05, "step": 18001, "training_step_time": 0.1043856143951416 }, { "epoch": 2.74688720703125e-05, "model_forward_time": 0.02487635612487793, "step": 18002 }, { "epoch": 2.74688720703125e-05, "step": 18002, "training_step_time": 0.10207295417785645 }, { "epoch": 2.747039794921875e-05, "model_forward_time": 0.025430679321289062, "step": 18003 }, { "epoch": 2.747039794921875e-05, "step": 18003, "training_step_time": 0.10685372352600098 }, { "epoch": 2.7471923828125e-05, "model_forward_time": 0.02514052391052246, "step": 18004 }, { "epoch": 2.7471923828125e-05, "step": 18004, "training_step_time": 0.10427451133728027 }, { "epoch": 2.747344970703125e-05, "model_forward_time": 0.024594545364379883, "step": 18005 }, { "epoch": 2.747344970703125e-05, "step": 18005, "training_step_time": 0.10706639289855957 }, { "epoch": 2.74749755859375e-05, "model_forward_time": 0.025272130966186523, "step": 18006 }, { "epoch": 2.74749755859375e-05, "step": 18006, "training_step_time": 0.10577702522277832 }, { "epoch": 2.747650146484375e-05, "model_forward_time": 0.025304317474365234, "step": 18007 }, { "epoch": 2.747650146484375e-05, "step": 18007, "training_step_time": 0.1071314811706543 }, { "epoch": 2.747802734375e-05, "model_forward_time": 0.02533888816833496, "step": 18008 }, { "epoch": 2.747802734375e-05, "step": 18008, "training_step_time": 0.10979104042053223 }, { "epoch": 2.747955322265625e-05, "model_forward_time": 0.0255126953125, "step": 18009 }, { "epoch": 2.747955322265625e-05, "step": 18009, "training_step_time": 0.10956525802612305 }, { "epoch": 2.74810791015625e-05, "grad_norm": 0.2783471345901489, "learning_rate": 3.767230397494798e-05, "loss": 0.0188, "step": 18010 }, { "epoch": 2.74810791015625e-05, "model_forward_time": 0.02611517906188965, "step": 18010 }, { "epoch": 2.74810791015625e-05, "step": 18010, "training_step_time": 0.10888242721557617 }, { "epoch": 2.748260498046875e-05, "model_forward_time": 0.025205612182617188, "step": 18011 }, { "epoch": 2.748260498046875e-05, "step": 18011, "training_step_time": 0.1067039966583252 }, { "epoch": 2.7484130859375e-05, "model_forward_time": 0.025316715240478516, "step": 18012 }, { "epoch": 2.7484130859375e-05, "step": 18012, "training_step_time": 0.10681939125061035 }, { "epoch": 2.748565673828125e-05, "model_forward_time": 0.025093793869018555, "step": 18013 }, { "epoch": 2.748565673828125e-05, "step": 18013, "training_step_time": 0.10825729370117188 }, { "epoch": 2.74871826171875e-05, "model_forward_time": 0.025198936462402344, "step": 18014 }, { "epoch": 2.74871826171875e-05, "step": 18014, "training_step_time": 0.10617876052856445 }, { "epoch": 2.748870849609375e-05, "model_forward_time": 0.025570392608642578, "step": 18015 }, { "epoch": 2.748870849609375e-05, "step": 18015, "training_step_time": 0.1208188533782959 }, { "epoch": 2.7490234375e-05, "model_forward_time": 0.02491021156311035, "step": 18016 }, { "epoch": 2.7490234375e-05, "step": 18016, "training_step_time": 0.10976386070251465 }, { "epoch": 2.749176025390625e-05, "model_forward_time": 0.025936365127563477, "step": 18017 }, { "epoch": 2.749176025390625e-05, "step": 18017, "training_step_time": 0.11401891708374023 }, { "epoch": 2.74932861328125e-05, "model_forward_time": 0.025758981704711914, "step": 18018 }, { "epoch": 2.74932861328125e-05, "step": 18018, "training_step_time": 0.12434673309326172 }, { "epoch": 2.749481201171875e-05, "model_forward_time": 0.02500152587890625, "step": 18019 }, { "epoch": 2.749481201171875e-05, "step": 18019, "training_step_time": 0.1058804988861084 }, { "epoch": 2.7496337890625e-05, "grad_norm": 0.2061215192079544, "learning_rate": 3.761889728624899e-05, "loss": 0.0112, "step": 18020 }, { "epoch": 2.7496337890625e-05, "model_forward_time": 0.02477264404296875, "step": 18020 }, { "epoch": 2.7496337890625e-05, "step": 18020, "training_step_time": 0.11475586891174316 }, { "epoch": 2.749786376953125e-05, "model_forward_time": 0.02521204948425293, "step": 18021 }, { "epoch": 2.749786376953125e-05, "step": 18021, "training_step_time": 0.12122535705566406 }, { "epoch": 2.74993896484375e-05, "model_forward_time": 0.025592803955078125, "step": 18022 }, { "epoch": 2.74993896484375e-05, "step": 18022, "training_step_time": 0.11378741264343262 }, { "epoch": 2.750091552734375e-05, "model_forward_time": 0.025400876998901367, "step": 18023 }, { "epoch": 2.750091552734375e-05, "step": 18023, "training_step_time": 0.12374234199523926 }, { "epoch": 2.750244140625e-05, "model_forward_time": 0.025568008422851562, "step": 18024 }, { "epoch": 2.750244140625e-05, "step": 18024, "training_step_time": 0.1431748867034912 }, { "epoch": 2.750396728515625e-05, "model_forward_time": 0.026306867599487305, "step": 18025 }, { "epoch": 2.750396728515625e-05, "step": 18025, "training_step_time": 0.14913010597229004 }, { "epoch": 2.75054931640625e-05, "model_forward_time": 0.024827003479003906, "step": 18026 }, { "epoch": 2.75054931640625e-05, "step": 18026, "training_step_time": 0.10567975044250488 }, { "epoch": 2.750701904296875e-05, "model_forward_time": 0.025075197219848633, "step": 18027 }, { "epoch": 2.750701904296875e-05, "step": 18027, "training_step_time": 0.10854268074035645 }, { "epoch": 2.7508544921875e-05, "model_forward_time": 0.025426149368286133, "step": 18028 }, { "epoch": 2.7508544921875e-05, "step": 18028, "training_step_time": 0.13500595092773438 }, { "epoch": 2.751007080078125e-05, "model_forward_time": 0.025188922882080078, "step": 18029 }, { "epoch": 2.751007080078125e-05, "step": 18029, "training_step_time": 0.1299581527709961 }, { "epoch": 2.75115966796875e-05, "grad_norm": 0.2631520628929138, "learning_rate": 3.756550564175727e-05, "loss": 0.0193, "step": 18030 }, { "epoch": 2.75115966796875e-05, "model_forward_time": 0.0247802734375, "step": 18030 }, { "epoch": 2.75115966796875e-05, "step": 18030, "training_step_time": 0.10551571846008301 }, { "epoch": 2.751312255859375e-05, "model_forward_time": 0.025038480758666992, "step": 18031 }, { "epoch": 2.751312255859375e-05, "step": 18031, "training_step_time": 0.11161971092224121 }, { "epoch": 2.75146484375e-05, "model_forward_time": 0.025191783905029297, "step": 18032 }, { "epoch": 2.75146484375e-05, "step": 18032, "training_step_time": 0.11246919631958008 }, { "epoch": 2.751617431640625e-05, "model_forward_time": 0.025264501571655273, "step": 18033 }, { "epoch": 2.751617431640625e-05, "step": 18033, "training_step_time": 0.10839033126831055 }, { "epoch": 2.75177001953125e-05, "model_forward_time": 0.02579522132873535, "step": 18034 }, { "epoch": 2.75177001953125e-05, "step": 18034, "training_step_time": 0.10805821418762207 }, { "epoch": 2.751922607421875e-05, "model_forward_time": 0.025609493255615234, "step": 18035 }, { "epoch": 2.751922607421875e-05, "step": 18035, "training_step_time": 0.10558676719665527 }, { "epoch": 2.7520751953125e-05, "model_forward_time": 0.025491952896118164, "step": 18036 }, { "epoch": 2.7520751953125e-05, "step": 18036, "training_step_time": 0.10474371910095215 }, { "epoch": 2.752227783203125e-05, "model_forward_time": 0.02527594566345215, "step": 18037 }, { "epoch": 2.752227783203125e-05, "step": 18037, "training_step_time": 0.10887598991394043 }, { "epoch": 2.75238037109375e-05, "model_forward_time": 0.02546977996826172, "step": 18038 }, { "epoch": 2.75238037109375e-05, "step": 18038, "training_step_time": 0.1085824966430664 }, { "epoch": 2.752532958984375e-05, "model_forward_time": 0.02501201629638672, "step": 18039 }, { "epoch": 2.752532958984375e-05, "step": 18039, "training_step_time": 0.10491251945495605 }, { "epoch": 2.752685546875e-05, "grad_norm": 0.13272154331207275, "learning_rate": 3.751212910634867e-05, "loss": 0.0127, "step": 18040 }, { "epoch": 2.752685546875e-05, "model_forward_time": 0.025127410888671875, "step": 18040 }, { "epoch": 2.752685546875e-05, "step": 18040, "training_step_time": 0.11132955551147461 }, { "epoch": 2.752838134765625e-05, "model_forward_time": 0.027968168258666992, "step": 18041 }, { "epoch": 2.752838134765625e-05, "step": 18041, "training_step_time": 0.1144716739654541 }, { "epoch": 2.75299072265625e-05, "model_forward_time": 0.02566051483154297, "step": 18042 }, { "epoch": 2.75299072265625e-05, "step": 18042, "training_step_time": 0.16414642333984375 }, { "epoch": 2.753143310546875e-05, "model_forward_time": 0.024952411651611328, "step": 18043 }, { "epoch": 2.753143310546875e-05, "step": 18043, "training_step_time": 0.1578686237335205 }, { "epoch": 2.7532958984375e-05, "model_forward_time": 0.026008129119873047, "step": 18044 }, { "epoch": 2.7532958984375e-05, "step": 18044, "training_step_time": 0.11167311668395996 }, { "epoch": 2.753448486328125e-05, "model_forward_time": 0.024770021438598633, "step": 18045 }, { "epoch": 2.753448486328125e-05, "step": 18045, "training_step_time": 0.1226344108581543 }, { "epoch": 2.75360107421875e-05, "model_forward_time": 0.027828454971313477, "step": 18046 }, { "epoch": 2.75360107421875e-05, "step": 18046, "training_step_time": 0.11861753463745117 }, { "epoch": 2.753753662109375e-05, "model_forward_time": 0.025232315063476562, "step": 18047 }, { "epoch": 2.753753662109375e-05, "step": 18047, "training_step_time": 0.1080009937286377 }, { "epoch": 2.75390625e-05, "model_forward_time": 0.02543950080871582, "step": 18048 }, { "epoch": 2.75390625e-05, "step": 18048, "training_step_time": 0.10968279838562012 }, { "epoch": 2.754058837890625e-05, "model_forward_time": 0.024803876876831055, "step": 18049 }, { "epoch": 2.754058837890625e-05, "step": 18049, "training_step_time": 0.10719132423400879 }, { "epoch": 2.75421142578125e-05, "grad_norm": 0.4691051244735718, "learning_rate": 3.7458767744880765e-05, "loss": 0.0123, "step": 18050 }, { "epoch": 2.75421142578125e-05, "model_forward_time": 0.0252687931060791, "step": 18050 }, { "epoch": 2.75421142578125e-05, "step": 18050, "training_step_time": 0.10846114158630371 }, { "epoch": 2.754364013671875e-05, "model_forward_time": 0.025236129760742188, "step": 18051 }, { "epoch": 2.754364013671875e-05, "step": 18051, "training_step_time": 0.10599970817565918 }, { "epoch": 2.7545166015625e-05, "model_forward_time": 0.02538013458251953, "step": 18052 }, { "epoch": 2.7545166015625e-05, "step": 18052, "training_step_time": 0.10949397087097168 }, { "epoch": 2.754669189453125e-05, "model_forward_time": 0.02544379234313965, "step": 18053 }, { "epoch": 2.754669189453125e-05, "step": 18053, "training_step_time": 0.10610055923461914 }, { "epoch": 2.75482177734375e-05, "model_forward_time": 0.025038480758666992, "step": 18054 }, { "epoch": 2.75482177734375e-05, "step": 18054, "training_step_time": 0.10511040687561035 }, { "epoch": 2.754974365234375e-05, "model_forward_time": 0.025271892547607422, "step": 18055 }, { "epoch": 2.754974365234375e-05, "step": 18055, "training_step_time": 0.10669755935668945 }, { "epoch": 2.755126953125e-05, "model_forward_time": 0.025285959243774414, "step": 18056 }, { "epoch": 2.755126953125e-05, "step": 18056, "training_step_time": 0.1104896068572998 }, { "epoch": 2.755279541015625e-05, "model_forward_time": 0.025473594665527344, "step": 18057 }, { "epoch": 2.755279541015625e-05, "step": 18057, "training_step_time": 0.10799503326416016 }, { "epoch": 2.75543212890625e-05, "model_forward_time": 0.025156497955322266, "step": 18058 }, { "epoch": 2.75543212890625e-05, "step": 18058, "training_step_time": 0.10685944557189941 }, { "epoch": 2.755584716796875e-05, "model_forward_time": 0.02577948570251465, "step": 18059 }, { "epoch": 2.755584716796875e-05, "step": 18059, "training_step_time": 0.10591387748718262 }, { "epoch": 2.7557373046875e-05, "grad_norm": 0.4659165143966675, "learning_rate": 3.74054216221926e-05, "loss": 0.0102, "step": 18060 }, { "epoch": 2.7557373046875e-05, "model_forward_time": 0.025429964065551758, "step": 18060 }, { "epoch": 2.7557373046875e-05, "step": 18060, "training_step_time": 0.10542178153991699 }, { "epoch": 2.755889892578125e-05, "model_forward_time": 0.02537059783935547, "step": 18061 }, { "epoch": 2.755889892578125e-05, "step": 18061, "training_step_time": 0.10715532302856445 }, { "epoch": 2.75604248046875e-05, "model_forward_time": 0.02581000328063965, "step": 18062 }, { "epoch": 2.75604248046875e-05, "step": 18062, "training_step_time": 0.10545921325683594 }, { "epoch": 2.756195068359375e-05, "model_forward_time": 0.025296926498413086, "step": 18063 }, { "epoch": 2.756195068359375e-05, "step": 18063, "training_step_time": 0.15051770210266113 }, { "epoch": 2.75634765625e-05, "model_forward_time": 0.0249478816986084, "step": 18064 }, { "epoch": 2.75634765625e-05, "step": 18064, "training_step_time": 0.10940718650817871 }, { "epoch": 2.756500244140625e-05, "model_forward_time": 0.027528047561645508, "step": 18065 }, { "epoch": 2.756500244140625e-05, "step": 18065, "training_step_time": 0.13128972053527832 }, { "epoch": 2.75665283203125e-05, "model_forward_time": 0.025417089462280273, "step": 18066 }, { "epoch": 2.75665283203125e-05, "step": 18066, "training_step_time": 0.16022133827209473 }, { "epoch": 2.756805419921875e-05, "model_forward_time": 0.024984121322631836, "step": 18067 }, { "epoch": 2.756805419921875e-05, "step": 18067, "training_step_time": 0.10365819931030273 }, { "epoch": 2.7569580078125e-05, "model_forward_time": 0.024680376052856445, "step": 18068 }, { "epoch": 2.7569580078125e-05, "step": 18068, "training_step_time": 0.14702725410461426 }, { "epoch": 2.757110595703125e-05, "model_forward_time": 0.02456974983215332, "step": 18069 }, { "epoch": 2.757110595703125e-05, "step": 18069, "training_step_time": 0.1059417724609375 }, { "epoch": 2.75726318359375e-05, "grad_norm": 0.21708019077777863, "learning_rate": 3.7352090803104765e-05, "loss": 0.013, "step": 18070 }, { "epoch": 2.75726318359375e-05, "model_forward_time": 0.025348186492919922, "step": 18070 }, { "epoch": 2.75726318359375e-05, "step": 18070, "training_step_time": 0.1965010166168213 }, { "epoch": 2.757415771484375e-05, "model_forward_time": 0.02434229850769043, "step": 18071 }, { "epoch": 2.757415771484375e-05, "step": 18071, "training_step_time": 0.13995099067687988 }, { "epoch": 2.757568359375e-05, "model_forward_time": 0.024852752685546875, "step": 18072 }, { "epoch": 2.757568359375e-05, "step": 18072, "training_step_time": 0.1960604190826416 }, { "epoch": 2.757720947265625e-05, "model_forward_time": 0.024295806884765625, "step": 18073 }, { "epoch": 2.757720947265625e-05, "step": 18073, "training_step_time": 0.10580563545227051 }, { "epoch": 2.75787353515625e-05, "model_forward_time": 0.02414679527282715, "step": 18074 }, { "epoch": 2.75787353515625e-05, "step": 18074, "training_step_time": 0.10179758071899414 }, { "epoch": 2.758026123046875e-05, "model_forward_time": 0.02497386932373047, "step": 18075 }, { "epoch": 2.758026123046875e-05, "step": 18075, "training_step_time": 0.12715911865234375 }, { "epoch": 2.7581787109375e-05, "model_forward_time": 0.025264978408813477, "step": 18076 }, { "epoch": 2.7581787109375e-05, "step": 18076, "training_step_time": 0.15166163444519043 }, { "epoch": 2.758331298828125e-05, "model_forward_time": 0.024080514907836914, "step": 18077 }, { "epoch": 2.758331298828125e-05, "step": 18077, "training_step_time": 0.2084980010986328 }, { "epoch": 2.75848388671875e-05, "model_forward_time": 0.023659229278564453, "step": 18078 }, { "epoch": 2.75848388671875e-05, "step": 18078, "training_step_time": 0.1961688995361328 }, { "epoch": 2.758636474609375e-05, "model_forward_time": 0.02367377281188965, "step": 18079 }, { "epoch": 2.758636474609375e-05, "step": 18079, "training_step_time": 0.18773531913757324 }, { "epoch": 2.7587890625e-05, "grad_norm": 0.2568994462490082, "learning_rate": 3.7298775352419206e-05, "loss": 0.0082, "step": 18080 }, { "epoch": 2.7587890625e-05, "model_forward_time": 0.023906469345092773, "step": 18080 }, { "epoch": 2.7587890625e-05, "step": 18080, "training_step_time": 0.17955398559570312 }, { "epoch": 2.758941650390625e-05, "model_forward_time": 0.02474188804626465, "step": 18081 }, { "epoch": 2.758941650390625e-05, "step": 18081, "training_step_time": 0.1676037311553955 }, { "epoch": 2.75909423828125e-05, "model_forward_time": 0.024628639221191406, "step": 18082 }, { "epoch": 2.75909423828125e-05, "step": 18082, "training_step_time": 0.15869379043579102 }, { "epoch": 2.759246826171875e-05, "model_forward_time": 0.023293018341064453, "step": 18083 }, { "epoch": 2.759246826171875e-05, "step": 18083, "training_step_time": 0.19436335563659668 }, { "epoch": 2.7593994140625e-05, "model_forward_time": 0.02470088005065918, "step": 18084 }, { "epoch": 2.7593994140625e-05, "step": 18084, "training_step_time": 0.1279900074005127 }, { "epoch": 2.759552001953125e-05, "model_forward_time": 0.024484634399414062, "step": 18085 }, { "epoch": 2.759552001953125e-05, "step": 18085, "training_step_time": 0.12510132789611816 }, { "epoch": 2.75970458984375e-05, "model_forward_time": 0.025023698806762695, "step": 18086 }, { "epoch": 2.75970458984375e-05, "step": 18086, "training_step_time": 0.12277531623840332 }, { "epoch": 2.759857177734375e-05, "model_forward_time": 0.0250091552734375, "step": 18087 }, { "epoch": 2.759857177734375e-05, "step": 18087, "training_step_time": 0.1942758560180664 }, { "epoch": 2.760009765625e-05, "model_forward_time": 0.024823427200317383, "step": 18088 }, { "epoch": 2.760009765625e-05, "step": 18088, "training_step_time": 0.12148809432983398 }, { "epoch": 2.760162353515625e-05, "model_forward_time": 0.027415990829467773, "step": 18089 }, { "epoch": 2.760162353515625e-05, "step": 18089, "training_step_time": 0.11230754852294922 }, { "epoch": 2.76031494140625e-05, "grad_norm": 0.19425970315933228, "learning_rate": 3.7245475334919246e-05, "loss": 0.011, "step": 18090 }, { "epoch": 2.76031494140625e-05, "model_forward_time": 0.02588486671447754, "step": 18090 }, { "epoch": 2.76031494140625e-05, "step": 18090, "training_step_time": 0.10913205146789551 }, { "epoch": 2.760467529296875e-05, "model_forward_time": 0.025197267532348633, "step": 18091 }, { "epoch": 2.760467529296875e-05, "step": 18091, "training_step_time": 0.10808968544006348 }, { "epoch": 2.7606201171875e-05, "model_forward_time": 0.025452613830566406, "step": 18092 }, { "epoch": 2.7606201171875e-05, "step": 18092, "training_step_time": 0.1070561408996582 }, { "epoch": 2.760772705078125e-05, "model_forward_time": 0.025223970413208008, "step": 18093 }, { "epoch": 2.760772705078125e-05, "step": 18093, "training_step_time": 0.10733151435852051 }, { "epoch": 2.76092529296875e-05, "model_forward_time": 0.02486133575439453, "step": 18094 }, { "epoch": 2.76092529296875e-05, "step": 18094, "training_step_time": 0.10876965522766113 }, { "epoch": 2.761077880859375e-05, "model_forward_time": 0.024648666381835938, "step": 18095 }, { "epoch": 2.761077880859375e-05, "step": 18095, "training_step_time": 0.11098361015319824 }, { "epoch": 2.76123046875e-05, "model_forward_time": 0.024187088012695312, "step": 18096 }, { "epoch": 2.76123046875e-05, "step": 18096, "training_step_time": 0.10561871528625488 }, { "epoch": 2.761383056640625e-05, "model_forward_time": 0.025218963623046875, "step": 18097 }, { "epoch": 2.761383056640625e-05, "step": 18097, "training_step_time": 0.11185169219970703 }, { "epoch": 2.76153564453125e-05, "model_forward_time": 0.02584671974182129, "step": 18098 }, { "epoch": 2.76153564453125e-05, "step": 18098, "training_step_time": 0.1062629222869873 }, { "epoch": 2.761688232421875e-05, "model_forward_time": 0.025019407272338867, "step": 18099 }, { "epoch": 2.761688232421875e-05, "step": 18099, "training_step_time": 0.10460329055786133 }, { "epoch": 2.7618408203125e-05, "grad_norm": 0.2011028677225113, "learning_rate": 3.719219081536942e-05, "loss": 0.0172, "step": 18100 }, { "epoch": 2.7618408203125e-05, "model_forward_time": 0.028470277786254883, "step": 18100 }, { "epoch": 2.7618408203125e-05, "step": 18100, "training_step_time": 0.10840082168579102 }, { "epoch": 2.761993408203125e-05, "model_forward_time": 0.02505207061767578, "step": 18101 }, { "epoch": 2.761993408203125e-05, "step": 18101, "training_step_time": 0.10689377784729004 }, { "epoch": 2.76214599609375e-05, "model_forward_time": 0.025411367416381836, "step": 18102 }, { "epoch": 2.76214599609375e-05, "step": 18102, "training_step_time": 0.10832095146179199 }, { "epoch": 2.762298583984375e-05, "model_forward_time": 0.02538132667541504, "step": 18103 }, { "epoch": 2.762298583984375e-05, "step": 18103, "training_step_time": 0.10732579231262207 }, { "epoch": 2.762451171875e-05, "model_forward_time": 0.025362491607666016, "step": 18104 }, { "epoch": 2.762451171875e-05, "step": 18104, "training_step_time": 0.17307043075561523 }, { "epoch": 2.762603759765625e-05, "model_forward_time": 0.0247347354888916, "step": 18105 }, { "epoch": 2.762603759765625e-05, "step": 18105, "training_step_time": 0.11890149116516113 }, { "epoch": 2.76275634765625e-05, "model_forward_time": 0.02479076385498047, "step": 18106 }, { "epoch": 2.76275634765625e-05, "step": 18106, "training_step_time": 0.1668558120727539 }, { "epoch": 2.762908935546875e-05, "model_forward_time": 0.02463555335998535, "step": 18107 }, { "epoch": 2.762908935546875e-05, "step": 18107, "training_step_time": 0.14542913436889648 }, { "epoch": 2.7630615234375e-05, "model_forward_time": 0.02484893798828125, "step": 18108 }, { "epoch": 2.7630615234375e-05, "step": 18108, "training_step_time": 0.12649965286254883 }, { "epoch": 2.763214111328125e-05, "model_forward_time": 0.025589466094970703, "step": 18109 }, { "epoch": 2.763214111328125e-05, "step": 18109, "training_step_time": 0.10438394546508789 }, { "epoch": 2.76336669921875e-05, "grad_norm": 0.4140009582042694, "learning_rate": 3.713892185851548e-05, "loss": 0.0087, "step": 18110 }, { "epoch": 2.76336669921875e-05, "model_forward_time": 0.02533268928527832, "step": 18110 }, { "epoch": 2.76336669921875e-05, "step": 18110, "training_step_time": 0.14310765266418457 }, { "epoch": 2.763519287109375e-05, "model_forward_time": 0.025089263916015625, "step": 18111 }, { "epoch": 2.763519287109375e-05, "step": 18111, "training_step_time": 0.14928841590881348 }, { "epoch": 2.763671875e-05, "model_forward_time": 0.025203227996826172, "step": 18112 }, { "epoch": 2.763671875e-05, "step": 18112, "training_step_time": 0.1381986141204834 }, { "epoch": 2.763824462890625e-05, "model_forward_time": 0.02474689483642578, "step": 18113 }, { "epoch": 2.763824462890625e-05, "step": 18113, "training_step_time": 0.1374964714050293 }, { "epoch": 2.76397705078125e-05, "model_forward_time": 0.024968862533569336, "step": 18114 }, { "epoch": 2.76397705078125e-05, "step": 18114, "training_step_time": 0.20070242881774902 }, { "epoch": 2.764129638671875e-05, "model_forward_time": 0.024248838424682617, "step": 18115 }, { "epoch": 2.764129638671875e-05, "step": 18115, "training_step_time": 0.14917302131652832 }, { "epoch": 2.7642822265625e-05, "model_forward_time": 0.024539709091186523, "step": 18116 }, { "epoch": 2.7642822265625e-05, "step": 18116, "training_step_time": 0.13200664520263672 }, { "epoch": 2.764434814453125e-05, "model_forward_time": 0.02427816390991211, "step": 18117 }, { "epoch": 2.764434814453125e-05, "step": 18117, "training_step_time": 0.13616585731506348 }, { "epoch": 2.76458740234375e-05, "model_forward_time": 0.027407169342041016, "step": 18118 }, { "epoch": 2.76458740234375e-05, "step": 18118, "training_step_time": 0.1273195743560791 }, { "epoch": 2.764739990234375e-05, "model_forward_time": 0.025042057037353516, "step": 18119 }, { "epoch": 2.764739990234375e-05, "step": 18119, "training_step_time": 0.11828994750976562 }, { "epoch": 2.764892578125e-05, "grad_norm": 0.5197622776031494, "learning_rate": 3.7085668529084184e-05, "loss": 0.0103, "step": 18120 }, { "epoch": 2.764892578125e-05, "model_forward_time": 0.025221586227416992, "step": 18120 }, { "epoch": 2.764892578125e-05, "step": 18120, "training_step_time": 0.1313326358795166 }, { "epoch": 2.765045166015625e-05, "model_forward_time": 0.02507805824279785, "step": 18121 }, { "epoch": 2.765045166015625e-05, "step": 18121, "training_step_time": 0.10732698440551758 }, { "epoch": 2.76519775390625e-05, "model_forward_time": 0.025742769241333008, "step": 18122 }, { "epoch": 2.76519775390625e-05, "step": 18122, "training_step_time": 0.10694074630737305 }, { "epoch": 2.765350341796875e-05, "model_forward_time": 0.02510380744934082, "step": 18123 }, { "epoch": 2.765350341796875e-05, "step": 18123, "training_step_time": 0.10821866989135742 }, { "epoch": 2.7655029296875e-05, "model_forward_time": 0.025131940841674805, "step": 18124 }, { "epoch": 2.7655029296875e-05, "step": 18124, "training_step_time": 0.10664939880371094 }, { "epoch": 2.765655517578125e-05, "model_forward_time": 0.02499532699584961, "step": 18125 }, { "epoch": 2.765655517578125e-05, "step": 18125, "training_step_time": 0.10856246948242188 }, { "epoch": 2.76580810546875e-05, "model_forward_time": 0.02704596519470215, "step": 18126 }, { "epoch": 2.76580810546875e-05, "step": 18126, "training_step_time": 0.11043643951416016 }, { "epoch": 2.765960693359375e-05, "model_forward_time": 0.025505781173706055, "step": 18127 }, { "epoch": 2.765960693359375e-05, "step": 18127, "training_step_time": 0.1676006317138672 }, { "epoch": 2.76611328125e-05, "model_forward_time": 0.025732040405273438, "step": 18128 }, { "epoch": 2.76611328125e-05, "step": 18128, "training_step_time": 0.1321582794189453 }, { "epoch": 2.766265869140625e-05, "model_forward_time": 0.024463891983032227, "step": 18129 }, { "epoch": 2.766265869140625e-05, "step": 18129, "training_step_time": 0.1138925552368164 }, { "epoch": 2.76641845703125e-05, "grad_norm": 0.3108060359954834, "learning_rate": 3.703243089178337e-05, "loss": 0.013, "step": 18130 }, { "epoch": 2.76641845703125e-05, "model_forward_time": 0.02553868293762207, "step": 18130 }, { "epoch": 2.76641845703125e-05, "step": 18130, "training_step_time": 0.10431957244873047 }, { "epoch": 2.766571044921875e-05, "model_forward_time": 0.025703907012939453, "step": 18131 }, { "epoch": 2.766571044921875e-05, "step": 18131, "training_step_time": 0.11706137657165527 }, { "epoch": 2.7667236328125e-05, "model_forward_time": 0.02522444725036621, "step": 18132 }, { "epoch": 2.7667236328125e-05, "step": 18132, "training_step_time": 0.1340196132659912 }, { "epoch": 2.766876220703125e-05, "model_forward_time": 0.024974346160888672, "step": 18133 }, { "epoch": 2.766876220703125e-05, "step": 18133, "training_step_time": 0.19586873054504395 }, { "epoch": 2.76702880859375e-05, "model_forward_time": 0.024336814880371094, "step": 18134 }, { "epoch": 2.76702880859375e-05, "step": 18134, "training_step_time": 0.153656005859375 }, { "epoch": 2.767181396484375e-05, "model_forward_time": 0.024678707122802734, "step": 18135 }, { "epoch": 2.767181396484375e-05, "step": 18135, "training_step_time": 0.1350870132446289 }, { "epoch": 2.767333984375e-05, "model_forward_time": 0.024815082550048828, "step": 18136 }, { "epoch": 2.767333984375e-05, "step": 18136, "training_step_time": 0.12999582290649414 }, { "epoch": 2.767486572265625e-05, "model_forward_time": 0.025023937225341797, "step": 18137 }, { "epoch": 2.767486572265625e-05, "step": 18137, "training_step_time": 0.12358975410461426 }, { "epoch": 2.76763916015625e-05, "model_forward_time": 0.02514791488647461, "step": 18138 }, { "epoch": 2.76763916015625e-05, "step": 18138, "training_step_time": 0.12296390533447266 }, { "epoch": 2.767791748046875e-05, "model_forward_time": 0.02565932273864746, "step": 18139 }, { "epoch": 2.767791748046875e-05, "step": 18139, "training_step_time": 0.11563706398010254 }, { "epoch": 2.7679443359375e-05, "grad_norm": 0.18115819990634918, "learning_rate": 3.697920901130178e-05, "loss": 0.007, "step": 18140 }, { "epoch": 2.7679443359375e-05, "model_forward_time": 0.027875900268554688, "step": 18140 }, { "epoch": 2.7679443359375e-05, "step": 18140, "training_step_time": 0.11524224281311035 }, { "epoch": 2.768096923828125e-05, "model_forward_time": 0.025517940521240234, "step": 18141 }, { "epoch": 2.768096923828125e-05, "step": 18141, "training_step_time": 0.11596441268920898 }, { "epoch": 2.76824951171875e-05, "model_forward_time": 0.025122880935668945, "step": 18142 }, { "epoch": 2.76824951171875e-05, "step": 18142, "training_step_time": 0.11036348342895508 }, { "epoch": 2.768402099609375e-05, "model_forward_time": 0.02572941780090332, "step": 18143 }, { "epoch": 2.768402099609375e-05, "step": 18143, "training_step_time": 0.10387086868286133 }, { "epoch": 2.7685546875e-05, "model_forward_time": 0.02544093132019043, "step": 18144 }, { "epoch": 2.7685546875e-05, "step": 18144, "training_step_time": 0.10453033447265625 }, { "epoch": 2.768707275390625e-05, "model_forward_time": 0.02531719207763672, "step": 18145 }, { "epoch": 2.768707275390625e-05, "step": 18145, "training_step_time": 0.10442113876342773 }, { "epoch": 2.76885986328125e-05, "model_forward_time": 0.02498149871826172, "step": 18146 }, { "epoch": 2.76885986328125e-05, "step": 18146, "training_step_time": 0.10443997383117676 }, { "epoch": 2.769012451171875e-05, "model_forward_time": 0.025177001953125, "step": 18147 }, { "epoch": 2.769012451171875e-05, "step": 18147, "training_step_time": 0.10314130783081055 }, { "epoch": 2.7691650390625e-05, "model_forward_time": 0.024719715118408203, "step": 18148 }, { "epoch": 2.7691650390625e-05, "step": 18148, "training_step_time": 0.17686700820922852 }, { "epoch": 2.769317626953125e-05, "model_forward_time": 0.024550676345825195, "step": 18149 }, { "epoch": 2.769317626953125e-05, "step": 18149, "training_step_time": 0.12951898574829102 }, { "epoch": 2.76947021484375e-05, "grad_norm": 0.4410998821258545, "learning_rate": 3.6926002952309016e-05, "loss": 0.0094, "step": 18150 }, { "epoch": 2.76947021484375e-05, "model_forward_time": 0.024393558502197266, "step": 18150 }, { "epoch": 2.76947021484375e-05, "step": 18150, "training_step_time": 0.1180570125579834 }, { "epoch": 2.769622802734375e-05, "model_forward_time": 0.02485823631286621, "step": 18151 }, { "epoch": 2.769622802734375e-05, "step": 18151, "training_step_time": 0.11525511741638184 }, { "epoch": 2.769775390625e-05, "model_forward_time": 0.025414228439331055, "step": 18152 }, { "epoch": 2.769775390625e-05, "step": 18152, "training_step_time": 0.18957757949829102 }, { "epoch": 2.769927978515625e-05, "model_forward_time": 0.025480031967163086, "step": 18153 }, { "epoch": 2.769927978515625e-05, "step": 18153, "training_step_time": 0.10802483558654785 }, { "epoch": 2.77008056640625e-05, "model_forward_time": 0.024160385131835938, "step": 18154 }, { "epoch": 2.77008056640625e-05, "step": 18154, "training_step_time": 0.14633464813232422 }, { "epoch": 2.770233154296875e-05, "model_forward_time": 0.025056838989257812, "step": 18155 }, { "epoch": 2.770233154296875e-05, "step": 18155, "training_step_time": 0.1696178913116455 }, { "epoch": 2.7703857421875e-05, "model_forward_time": 0.02483367919921875, "step": 18156 }, { "epoch": 2.7703857421875e-05, "step": 18156, "training_step_time": 0.11711668968200684 }, { "epoch": 2.770538330078125e-05, "model_forward_time": 0.024790048599243164, "step": 18157 }, { "epoch": 2.770538330078125e-05, "step": 18157, "training_step_time": 0.11730551719665527 }, { "epoch": 2.77069091796875e-05, "model_forward_time": 0.02529311180114746, "step": 18158 }, { "epoch": 2.77069091796875e-05, "step": 18158, "training_step_time": 0.12294721603393555 }, { "epoch": 2.770843505859375e-05, "model_forward_time": 0.02542591094970703, "step": 18159 }, { "epoch": 2.770843505859375e-05, "step": 18159, "training_step_time": 0.1450653076171875 }, { "epoch": 2.77099609375e-05, "grad_norm": 0.18899288773536682, "learning_rate": 3.687281277945547e-05, "loss": 0.0105, "step": 18160 }, { "epoch": 2.77099609375e-05, "model_forward_time": 0.02483510971069336, "step": 18160 }, { "epoch": 2.77099609375e-05, "step": 18160, "training_step_time": 0.11096620559692383 }, { "epoch": 2.771148681640625e-05, "model_forward_time": 0.025243520736694336, "step": 18161 }, { "epoch": 2.771148681640625e-05, "step": 18161, "training_step_time": 0.10970807075500488 }, { "epoch": 2.77130126953125e-05, "model_forward_time": 0.024886608123779297, "step": 18162 }, { "epoch": 2.77130126953125e-05, "step": 18162, "training_step_time": 0.12086653709411621 }, { "epoch": 2.771453857421875e-05, "model_forward_time": 0.025333166122436523, "step": 18163 }, { "epoch": 2.771453857421875e-05, "step": 18163, "training_step_time": 0.125319242477417 }, { "epoch": 2.7716064453125e-05, "model_forward_time": 0.02520155906677246, "step": 18164 }, { "epoch": 2.7716064453125e-05, "step": 18164, "training_step_time": 0.11907243728637695 }, { "epoch": 2.771759033203125e-05, "model_forward_time": 0.025361061096191406, "step": 18165 }, { "epoch": 2.771759033203125e-05, "step": 18165, "training_step_time": 0.12551188468933105 }, { "epoch": 2.77191162109375e-05, "model_forward_time": 0.025522947311401367, "step": 18166 }, { "epoch": 2.77191162109375e-05, "step": 18166, "training_step_time": 0.11586213111877441 }, { "epoch": 2.772064208984375e-05, "model_forward_time": 0.027037382125854492, "step": 18167 }, { "epoch": 2.772064208984375e-05, "step": 18167, "training_step_time": 0.13370609283447266 }, { "epoch": 2.772216796875e-05, "model_forward_time": 0.024961233139038086, "step": 18168 }, { "epoch": 2.772216796875e-05, "step": 18168, "training_step_time": 0.10550260543823242 }, { "epoch": 2.772369384765625e-05, "model_forward_time": 0.025358200073242188, "step": 18169 }, { "epoch": 2.772369384765625e-05, "step": 18169, "training_step_time": 0.10926318168640137 }, { "epoch": 2.77252197265625e-05, "grad_norm": 0.39983922243118286, "learning_rate": 3.68196385573722e-05, "loss": 0.0121, "step": 18170 }, { "epoch": 2.77252197265625e-05, "model_forward_time": 0.02585315704345703, "step": 18170 }, { "epoch": 2.77252197265625e-05, "step": 18170, "training_step_time": 0.10893988609313965 }, { "epoch": 2.772674560546875e-05, "model_forward_time": 0.02562117576599121, "step": 18171 }, { "epoch": 2.772674560546875e-05, "step": 18171, "training_step_time": 0.10671329498291016 }, { "epoch": 2.7728271484375e-05, "model_forward_time": 0.025365114212036133, "step": 18172 }, { "epoch": 2.7728271484375e-05, "step": 18172, "training_step_time": 0.1687023639678955 }, { "epoch": 2.772979736328125e-05, "model_forward_time": 0.02462172508239746, "step": 18173 }, { "epoch": 2.772979736328125e-05, "step": 18173, "training_step_time": 0.14101552963256836 }, { "epoch": 2.77313232421875e-05, "model_forward_time": 0.024817466735839844, "step": 18174 }, { "epoch": 2.77313232421875e-05, "step": 18174, "training_step_time": 0.11067318916320801 }, { "epoch": 2.773284912109375e-05, "model_forward_time": 0.025244951248168945, "step": 18175 }, { "epoch": 2.773284912109375e-05, "step": 18175, "training_step_time": 0.10966300964355469 }, { "epoch": 2.7734375e-05, "model_forward_time": 0.026389360427856445, "step": 18176 }, { "epoch": 2.7734375e-05, "step": 18176, "training_step_time": 0.10791897773742676 }, { "epoch": 2.773590087890625e-05, "model_forward_time": 0.025504589080810547, "step": 18177 }, { "epoch": 2.773590087890625e-05, "step": 18177, "training_step_time": 0.1130526065826416 }, { "epoch": 2.77374267578125e-05, "model_forward_time": 0.026491880416870117, "step": 18178 }, { "epoch": 2.77374267578125e-05, "step": 18178, "training_step_time": 0.12763357162475586 }, { "epoch": 2.773895263671875e-05, "model_forward_time": 0.02591109275817871, "step": 18179 }, { "epoch": 2.773895263671875e-05, "step": 18179, "training_step_time": 0.13400602340698242 }, { "epoch": 2.7740478515625e-05, "grad_norm": 0.19778363406658173, "learning_rate": 3.676648035067093e-05, "loss": 0.0074, "step": 18180 }, { "epoch": 2.7740478515625e-05, "model_forward_time": 0.02493882179260254, "step": 18180 }, { "epoch": 2.7740478515625e-05, "step": 18180, "training_step_time": 0.10508608818054199 }, { "epoch": 2.774200439453125e-05, "model_forward_time": 0.02560257911682129, "step": 18181 }, { "epoch": 2.774200439453125e-05, "step": 18181, "training_step_time": 0.1059730052947998 }, { "epoch": 2.77435302734375e-05, "model_forward_time": 0.025562047958374023, "step": 18182 }, { "epoch": 2.77435302734375e-05, "step": 18182, "training_step_time": 0.10698080062866211 }, { "epoch": 2.774505615234375e-05, "model_forward_time": 0.025084972381591797, "step": 18183 }, { "epoch": 2.774505615234375e-05, "step": 18183, "training_step_time": 0.10579085350036621 }, { "epoch": 2.774658203125e-05, "model_forward_time": 0.025585174560546875, "step": 18184 }, { "epoch": 2.774658203125e-05, "step": 18184, "training_step_time": 0.10533595085144043 }, { "epoch": 2.774810791015625e-05, "model_forward_time": 0.026225566864013672, "step": 18185 }, { "epoch": 2.774810791015625e-05, "step": 18185, "training_step_time": 0.10784554481506348 }, { "epoch": 2.77496337890625e-05, "model_forward_time": 0.02575397491455078, "step": 18186 }, { "epoch": 2.77496337890625e-05, "step": 18186, "training_step_time": 0.10736346244812012 }, { "epoch": 2.775115966796875e-05, "model_forward_time": 0.025572776794433594, "step": 18187 }, { "epoch": 2.775115966796875e-05, "step": 18187, "training_step_time": 0.10652446746826172 }, { "epoch": 2.7752685546875e-05, "model_forward_time": 0.025231122970581055, "step": 18188 }, { "epoch": 2.7752685546875e-05, "step": 18188, "training_step_time": 0.10638213157653809 }, { "epoch": 2.775421142578125e-05, "model_forward_time": 0.025702953338623047, "step": 18189 }, { "epoch": 2.775421142578125e-05, "step": 18189, "training_step_time": 0.10791540145874023 }, { "epoch": 2.77557373046875e-05, "grad_norm": 0.18023350834846497, "learning_rate": 3.6713338223943867e-05, "loss": 0.0112, "step": 18190 }, { "epoch": 2.77557373046875e-05, "model_forward_time": 0.025295495986938477, "step": 18190 }, { "epoch": 2.77557373046875e-05, "step": 18190, "training_step_time": 0.10558056831359863 }, { "epoch": 2.775726318359375e-05, "model_forward_time": 0.025256872177124023, "step": 18191 }, { "epoch": 2.775726318359375e-05, "step": 18191, "training_step_time": 0.10480761528015137 }, { "epoch": 2.77587890625e-05, "model_forward_time": 0.02544093132019043, "step": 18192 }, { "epoch": 2.77587890625e-05, "step": 18192, "training_step_time": 0.10573649406433105 }, { "epoch": 2.776031494140625e-05, "model_forward_time": 0.025029420852661133, "step": 18193 }, { "epoch": 2.776031494140625e-05, "step": 18193, "training_step_time": 0.10547256469726562 }, { "epoch": 2.77618408203125e-05, "model_forward_time": 0.025355815887451172, "step": 18194 }, { "epoch": 2.77618408203125e-05, "step": 18194, "training_step_time": 0.11199140548706055 }, { "epoch": 2.776336669921875e-05, "model_forward_time": 0.025072336196899414, "step": 18195 }, { "epoch": 2.776336669921875e-05, "step": 18195, "training_step_time": 0.17957091331481934 }, { "epoch": 2.7764892578125e-05, "model_forward_time": 0.024799585342407227, "step": 18196 }, { "epoch": 2.7764892578125e-05, "step": 18196, "training_step_time": 0.10981535911560059 }, { "epoch": 2.776641845703125e-05, "model_forward_time": 0.025828838348388672, "step": 18197 }, { "epoch": 2.776641845703125e-05, "step": 18197, "training_step_time": 0.1475992202758789 }, { "epoch": 2.77679443359375e-05, "model_forward_time": 0.025468826293945312, "step": 18198 }, { "epoch": 2.77679443359375e-05, "step": 18198, "training_step_time": 0.15376830101013184 }, { "epoch": 2.776947021484375e-05, "model_forward_time": 0.0243833065032959, "step": 18199 }, { "epoch": 2.776947021484375e-05, "step": 18199, "training_step_time": 0.22815275192260742 }, { "epoch": 2.777099609375e-05, "grad_norm": 0.1301114410161972, "learning_rate": 3.666021224176369e-05, "loss": 0.0066, "step": 18200 }, { "epoch": 2.777099609375e-05, "model_forward_time": 0.025046348571777344, "step": 18200 }, { "epoch": 2.777099609375e-05, "step": 18200, "training_step_time": 0.16370177268981934 }, { "epoch": 2.777252197265625e-05, "model_forward_time": 0.024945497512817383, "step": 18201 }, { "epoch": 2.777252197265625e-05, "step": 18201, "training_step_time": 0.12728333473205566 }, { "epoch": 2.77740478515625e-05, "model_forward_time": 0.025346040725708008, "step": 18202 }, { "epoch": 2.77740478515625e-05, "step": 18202, "training_step_time": 0.1124577522277832 }, { "epoch": 2.777557373046875e-05, "model_forward_time": 0.028141260147094727, "step": 18203 }, { "epoch": 2.777557373046875e-05, "step": 18203, "training_step_time": 0.11623358726501465 }, { "epoch": 2.7777099609375e-05, "model_forward_time": 0.02539801597595215, "step": 18204 }, { "epoch": 2.7777099609375e-05, "step": 18204, "training_step_time": 0.1962277889251709 }, { "epoch": 2.777862548828125e-05, "model_forward_time": 0.025658845901489258, "step": 18205 }, { "epoch": 2.777862548828125e-05, "step": 18205, "training_step_time": 0.19392824172973633 }, { "epoch": 2.77801513671875e-05, "model_forward_time": 0.024623870849609375, "step": 18206 }, { "epoch": 2.77801513671875e-05, "step": 18206, "training_step_time": 0.10860705375671387 }, { "epoch": 2.778167724609375e-05, "model_forward_time": 0.025190353393554688, "step": 18207 }, { "epoch": 2.778167724609375e-05, "step": 18207, "training_step_time": 0.11414504051208496 }, { "epoch": 2.7783203125e-05, "model_forward_time": 0.02712726593017578, "step": 18208 }, { "epoch": 2.7783203125e-05, "step": 18208, "training_step_time": 0.11818647384643555 }, { "epoch": 2.778472900390625e-05, "model_forward_time": 0.025449752807617188, "step": 18209 }, { "epoch": 2.778472900390625e-05, "step": 18209, "training_step_time": 0.12958788871765137 }, { "epoch": 2.77862548828125e-05, "grad_norm": 0.24471884965896606, "learning_rate": 3.6607102468683526e-05, "loss": 0.0103, "step": 18210 }, { "epoch": 2.77862548828125e-05, "model_forward_time": 0.025458097457885742, "step": 18210 }, { "epoch": 2.77862548828125e-05, "step": 18210, "training_step_time": 0.1079249382019043 }, { "epoch": 2.778778076171875e-05, "model_forward_time": 0.025824785232543945, "step": 18211 }, { "epoch": 2.778778076171875e-05, "step": 18211, "training_step_time": 0.11292338371276855 }, { "epoch": 2.7789306640625e-05, "model_forward_time": 0.025556564331054688, "step": 18212 }, { "epoch": 2.7789306640625e-05, "step": 18212, "training_step_time": 0.10806417465209961 }, { "epoch": 2.779083251953125e-05, "model_forward_time": 0.025482177734375, "step": 18213 }, { "epoch": 2.779083251953125e-05, "step": 18213, "training_step_time": 0.10474801063537598 }, { "epoch": 2.77923583984375e-05, "model_forward_time": 0.02543783187866211, "step": 18214 }, { "epoch": 2.77923583984375e-05, "step": 18214, "training_step_time": 0.1116631031036377 }, { "epoch": 2.779388427734375e-05, "model_forward_time": 0.02560591697692871, "step": 18215 }, { "epoch": 2.779388427734375e-05, "step": 18215, "training_step_time": 0.10955500602722168 }, { "epoch": 2.779541015625e-05, "model_forward_time": 0.02567601203918457, "step": 18216 }, { "epoch": 2.779541015625e-05, "step": 18216, "training_step_time": 0.10474205017089844 }, { "epoch": 2.779693603515625e-05, "model_forward_time": 0.025799036026000977, "step": 18217 }, { "epoch": 2.779693603515625e-05, "step": 18217, "training_step_time": 0.10496282577514648 }, { "epoch": 2.77984619140625e-05, "model_forward_time": 0.025357484817504883, "step": 18218 }, { "epoch": 2.77984619140625e-05, "step": 18218, "training_step_time": 0.10508894920349121 }, { "epoch": 2.779998779296875e-05, "model_forward_time": 0.02510690689086914, "step": 18219 }, { "epoch": 2.779998779296875e-05, "step": 18219, "training_step_time": 0.12858891487121582 }, { "epoch": 2.7801513671875e-05, "grad_norm": 0.1059555932879448, "learning_rate": 3.655400896923672e-05, "loss": 0.0053, "step": 18220 }, { "epoch": 2.7801513671875e-05, "model_forward_time": 0.025163650512695312, "step": 18220 }, { "epoch": 2.7801513671875e-05, "step": 18220, "training_step_time": 0.13014960289001465 }, { "epoch": 2.780303955078125e-05, "model_forward_time": 0.024983644485473633, "step": 18221 }, { "epoch": 2.780303955078125e-05, "step": 18221, "training_step_time": 0.10358047485351562 }, { "epoch": 2.78045654296875e-05, "model_forward_time": 0.02560138702392578, "step": 18222 }, { "epoch": 2.78045654296875e-05, "step": 18222, "training_step_time": 0.12018156051635742 }, { "epoch": 2.780609130859375e-05, "model_forward_time": 0.026764392852783203, "step": 18223 }, { "epoch": 2.780609130859375e-05, "step": 18223, "training_step_time": 0.1076352596282959 }, { "epoch": 2.78076171875e-05, "model_forward_time": 0.025504589080810547, "step": 18224 }, { "epoch": 2.78076171875e-05, "step": 18224, "training_step_time": 0.10963940620422363 }, { "epoch": 2.780914306640625e-05, "model_forward_time": 0.025185346603393555, "step": 18225 }, { "epoch": 2.780914306640625e-05, "step": 18225, "training_step_time": 0.11594223976135254 }, { "epoch": 2.78106689453125e-05, "model_forward_time": 0.02525615692138672, "step": 18226 }, { "epoch": 2.78106689453125e-05, "step": 18226, "training_step_time": 0.11114740371704102 }, { "epoch": 2.781219482421875e-05, "model_forward_time": 0.025641202926635742, "step": 18227 }, { "epoch": 2.781219482421875e-05, "step": 18227, "training_step_time": 0.10615658760070801 }, { "epoch": 2.7813720703125e-05, "model_forward_time": 0.02593827247619629, "step": 18228 }, { "epoch": 2.7813720703125e-05, "step": 18228, "training_step_time": 0.1060175895690918 }, { "epoch": 2.781524658203125e-05, "model_forward_time": 0.02541208267211914, "step": 18229 }, { "epoch": 2.781524658203125e-05, "step": 18229, "training_step_time": 0.1056976318359375 }, { "epoch": 2.78167724609375e-05, "grad_norm": 0.1257658302783966, "learning_rate": 3.650093180793689e-05, "loss": 0.0081, "step": 18230 }, { "epoch": 2.78167724609375e-05, "model_forward_time": 0.025366783142089844, "step": 18230 }, { "epoch": 2.78167724609375e-05, "step": 18230, "training_step_time": 0.10752391815185547 }, { "epoch": 2.781829833984375e-05, "model_forward_time": 0.025318145751953125, "step": 18231 }, { "epoch": 2.781829833984375e-05, "step": 18231, "training_step_time": 0.11149001121520996 }, { "epoch": 2.781982421875e-05, "model_forward_time": 0.02529764175415039, "step": 18232 }, { "epoch": 2.781982421875e-05, "step": 18232, "training_step_time": 0.10521078109741211 }, { "epoch": 2.782135009765625e-05, "model_forward_time": 0.025228500366210938, "step": 18233 }, { "epoch": 2.782135009765625e-05, "step": 18233, "training_step_time": 0.10685896873474121 }, { "epoch": 2.78228759765625e-05, "model_forward_time": 0.02555370330810547, "step": 18234 }, { "epoch": 2.78228759765625e-05, "step": 18234, "training_step_time": 0.10959029197692871 }, { "epoch": 2.782440185546875e-05, "model_forward_time": 0.025466442108154297, "step": 18235 }, { "epoch": 2.782440185546875e-05, "step": 18235, "training_step_time": 0.10811138153076172 }, { "epoch": 2.7825927734375e-05, "model_forward_time": 0.025349855422973633, "step": 18236 }, { "epoch": 2.7825927734375e-05, "step": 18236, "training_step_time": 0.10788393020629883 }, { "epoch": 2.782745361328125e-05, "model_forward_time": 0.02436208724975586, "step": 18237 }, { "epoch": 2.782745361328125e-05, "step": 18237, "training_step_time": 0.10637521743774414 }, { "epoch": 2.78289794921875e-05, "model_forward_time": 0.02462029457092285, "step": 18238 }, { "epoch": 2.78289794921875e-05, "step": 18238, "training_step_time": 0.10635614395141602 }, { "epoch": 2.783050537109375e-05, "model_forward_time": 0.024852514266967773, "step": 18239 }, { "epoch": 2.783050537109375e-05, "step": 18239, "training_step_time": 0.11096882820129395 }, { "epoch": 2.783203125e-05, "grad_norm": 0.3065794110298157, "learning_rate": 3.6447871049277796e-05, "loss": 0.0086, "step": 18240 }, { "epoch": 2.783203125e-05, "model_forward_time": 0.025746822357177734, "step": 18240 }, { "epoch": 2.783203125e-05, "step": 18240, "training_step_time": 0.1088871955871582 }, { "epoch": 2.783355712890625e-05, "model_forward_time": 0.02533864974975586, "step": 18241 }, { "epoch": 2.783355712890625e-05, "step": 18241, "training_step_time": 0.21394085884094238 }, { "epoch": 2.78350830078125e-05, "model_forward_time": 0.024507761001586914, "step": 18242 }, { "epoch": 2.78350830078125e-05, "step": 18242, "training_step_time": 0.11600279808044434 }, { "epoch": 2.783660888671875e-05, "model_forward_time": 0.024621009826660156, "step": 18243 }, { "epoch": 2.783660888671875e-05, "step": 18243, "training_step_time": 0.11689233779907227 }, { "epoch": 2.7838134765625e-05, "model_forward_time": 0.025287866592407227, "step": 18244 }, { "epoch": 2.7838134765625e-05, "step": 18244, "training_step_time": 0.15595769882202148 }, { "epoch": 2.783966064453125e-05, "model_forward_time": 0.02460789680480957, "step": 18245 }, { "epoch": 2.783966064453125e-05, "step": 18245, "training_step_time": 0.1749579906463623 }, { "epoch": 2.78411865234375e-05, "model_forward_time": 0.028271198272705078, "step": 18246 }, { "epoch": 2.78411865234375e-05, "step": 18246, "training_step_time": 0.12980031967163086 }, { "epoch": 2.784271240234375e-05, "model_forward_time": 0.024895906448364258, "step": 18247 }, { "epoch": 2.784271240234375e-05, "step": 18247, "training_step_time": 0.20570969581604004 }, { "epoch": 2.784423828125e-05, "model_forward_time": 0.024450302124023438, "step": 18248 }, { "epoch": 2.784423828125e-05, "step": 18248, "training_step_time": 0.11033034324645996 }, { "epoch": 2.784576416015625e-05, "model_forward_time": 0.025096654891967773, "step": 18249 }, { "epoch": 2.784576416015625e-05, "step": 18249, "training_step_time": 0.11541080474853516 }, { "epoch": 2.78472900390625e-05, "grad_norm": 0.24270372092723846, "learning_rate": 3.639482675773324e-05, "loss": 0.0137, "step": 18250 }, { "epoch": 2.78472900390625e-05, "model_forward_time": 0.025186538696289062, "step": 18250 }, { "epoch": 2.78472900390625e-05, "step": 18250, "training_step_time": 0.13156437873840332 }, { "epoch": 2.784881591796875e-05, "model_forward_time": 0.025368928909301758, "step": 18251 }, { "epoch": 2.784881591796875e-05, "step": 18251, "training_step_time": 0.1927814483642578 }, { "epoch": 2.7850341796875e-05, "model_forward_time": 0.024472475051879883, "step": 18252 }, { "epoch": 2.7850341796875e-05, "step": 18252, "training_step_time": 0.12273263931274414 }, { "epoch": 2.785186767578125e-05, "model_forward_time": 0.024443626403808594, "step": 18253 }, { "epoch": 2.785186767578125e-05, "step": 18253, "training_step_time": 0.10458683967590332 }, { "epoch": 2.78533935546875e-05, "model_forward_time": 0.025203943252563477, "step": 18254 }, { "epoch": 2.78533935546875e-05, "step": 18254, "training_step_time": 0.11017036437988281 }, { "epoch": 2.785491943359375e-05, "model_forward_time": 0.025699138641357422, "step": 18255 }, { "epoch": 2.785491943359375e-05, "step": 18255, "training_step_time": 0.12563204765319824 }, { "epoch": 2.78564453125e-05, "model_forward_time": 0.02537393569946289, "step": 18256 }, { "epoch": 2.78564453125e-05, "step": 18256, "training_step_time": 0.11905717849731445 }, { "epoch": 2.785797119140625e-05, "model_forward_time": 0.025377988815307617, "step": 18257 }, { "epoch": 2.785797119140625e-05, "step": 18257, "training_step_time": 0.11957621574401855 }, { "epoch": 2.78594970703125e-05, "model_forward_time": 0.025751829147338867, "step": 18258 }, { "epoch": 2.78594970703125e-05, "step": 18258, "training_step_time": 0.11984014511108398 }, { "epoch": 2.786102294921875e-05, "model_forward_time": 0.025429248809814453, "step": 18259 }, { "epoch": 2.786102294921875e-05, "step": 18259, "training_step_time": 0.10334491729736328 }, { "epoch": 2.7862548828125e-05, "grad_norm": 0.25312596559524536, "learning_rate": 3.634179899775708e-05, "loss": 0.0089, "step": 18260 }, { "epoch": 2.7862548828125e-05, "model_forward_time": 0.025567293167114258, "step": 18260 }, { "epoch": 2.7862548828125e-05, "step": 18260, "training_step_time": 0.10787129402160645 }, { "epoch": 2.786407470703125e-05, "model_forward_time": 0.025156497955322266, "step": 18261 }, { "epoch": 2.786407470703125e-05, "step": 18261, "training_step_time": 0.10639476776123047 }, { "epoch": 2.78656005859375e-05, "model_forward_time": 0.02548050880432129, "step": 18262 }, { "epoch": 2.78656005859375e-05, "step": 18262, "training_step_time": 0.10518908500671387 }, { "epoch": 2.786712646484375e-05, "model_forward_time": 0.025381088256835938, "step": 18263 }, { "epoch": 2.786712646484375e-05, "step": 18263, "training_step_time": 0.10773634910583496 }, { "epoch": 2.786865234375e-05, "model_forward_time": 0.025629281997680664, "step": 18264 }, { "epoch": 2.786865234375e-05, "step": 18264, "training_step_time": 0.17014408111572266 }, { "epoch": 2.787017822265625e-05, "model_forward_time": 0.025063514709472656, "step": 18265 }, { "epoch": 2.787017822265625e-05, "step": 18265, "training_step_time": 0.21938323974609375 }, { "epoch": 2.78717041015625e-05, "model_forward_time": 0.024457693099975586, "step": 18266 }, { "epoch": 2.78717041015625e-05, "step": 18266, "training_step_time": 0.18399739265441895 }, { "epoch": 2.787322998046875e-05, "model_forward_time": 0.02755141258239746, "step": 18267 }, { "epoch": 2.787322998046875e-05, "step": 18267, "training_step_time": 0.17259716987609863 }, { "epoch": 2.7874755859375e-05, "model_forward_time": 0.024562358856201172, "step": 18268 }, { "epoch": 2.7874755859375e-05, "step": 18268, "training_step_time": 0.1928555965423584 }, { "epoch": 2.787628173828125e-05, "model_forward_time": 0.02499222755432129, "step": 18269 }, { "epoch": 2.787628173828125e-05, "step": 18269, "training_step_time": 0.2004554271697998 }, { "epoch": 2.78778076171875e-05, "grad_norm": 0.4324885606765747, "learning_rate": 3.628878783378302e-05, "loss": 0.0144, "step": 18270 }, { "epoch": 2.78778076171875e-05, "model_forward_time": 0.02541065216064453, "step": 18270 }, { "epoch": 2.78778076171875e-05, "step": 18270, "training_step_time": 0.13161182403564453 }, { "epoch": 2.787933349609375e-05, "model_forward_time": 0.023491621017456055, "step": 18271 }, { "epoch": 2.787933349609375e-05, "step": 18271, "training_step_time": 0.12691235542297363 }, { "epoch": 2.7880859375e-05, "model_forward_time": 0.024225234985351562, "step": 18272 }, { "epoch": 2.7880859375e-05, "step": 18272, "training_step_time": 0.12451887130737305 }, { "epoch": 2.788238525390625e-05, "model_forward_time": 0.02633523941040039, "step": 18273 }, { "epoch": 2.788238525390625e-05, "step": 18273, "training_step_time": 0.11860537528991699 }, { "epoch": 2.78839111328125e-05, "model_forward_time": 0.025267839431762695, "step": 18274 }, { "epoch": 2.78839111328125e-05, "step": 18274, "training_step_time": 0.11661529541015625 }, { "epoch": 2.788543701171875e-05, "model_forward_time": 0.025425195693969727, "step": 18275 }, { "epoch": 2.788543701171875e-05, "step": 18275, "training_step_time": 0.10848474502563477 }, { "epoch": 2.7886962890625e-05, "model_forward_time": 0.025292396545410156, "step": 18276 }, { "epoch": 2.7886962890625e-05, "step": 18276, "training_step_time": 0.10890531539916992 }, { "epoch": 2.788848876953125e-05, "model_forward_time": 0.02562260627746582, "step": 18277 }, { "epoch": 2.788848876953125e-05, "step": 18277, "training_step_time": 0.1089627742767334 }, { "epoch": 2.78900146484375e-05, "model_forward_time": 0.02563309669494629, "step": 18278 }, { "epoch": 2.78900146484375e-05, "step": 18278, "training_step_time": 0.10908651351928711 }, { "epoch": 2.789154052734375e-05, "model_forward_time": 0.025481700897216797, "step": 18279 }, { "epoch": 2.789154052734375e-05, "step": 18279, "training_step_time": 0.10716485977172852 }, { "epoch": 2.789306640625e-05, "grad_norm": 0.16999660432338715, "learning_rate": 3.6235793330224635e-05, "loss": 0.0064, "step": 18280 }, { "epoch": 2.789306640625e-05, "model_forward_time": 0.02515864372253418, "step": 18280 }, { "epoch": 2.789306640625e-05, "step": 18280, "training_step_time": 0.10670804977416992 }, { "epoch": 2.789459228515625e-05, "model_forward_time": 0.025453805923461914, "step": 18281 }, { "epoch": 2.789459228515625e-05, "step": 18281, "training_step_time": 0.11113977432250977 }, { "epoch": 2.78961181640625e-05, "model_forward_time": 0.02538895606994629, "step": 18282 }, { "epoch": 2.78961181640625e-05, "step": 18282, "training_step_time": 0.10705280303955078 }, { "epoch": 2.789764404296875e-05, "model_forward_time": 0.02567315101623535, "step": 18283 }, { "epoch": 2.789764404296875e-05, "step": 18283, "training_step_time": 0.10621809959411621 }, { "epoch": 2.7899169921875e-05, "model_forward_time": 0.02526998519897461, "step": 18284 }, { "epoch": 2.7899169921875e-05, "step": 18284, "training_step_time": 0.12465810775756836 }, { "epoch": 2.790069580078125e-05, "model_forward_time": 0.025272369384765625, "step": 18285 }, { "epoch": 2.790069580078125e-05, "step": 18285, "training_step_time": 0.11791062355041504 }, { "epoch": 2.79022216796875e-05, "model_forward_time": 0.02541375160217285, "step": 18286 }, { "epoch": 2.79022216796875e-05, "step": 18286, "training_step_time": 0.13313007354736328 }, { "epoch": 2.790374755859375e-05, "model_forward_time": 0.02515268325805664, "step": 18287 }, { "epoch": 2.790374755859375e-05, "step": 18287, "training_step_time": 0.15256285667419434 }, { "epoch": 2.79052734375e-05, "model_forward_time": 0.024395227432250977, "step": 18288 }, { "epoch": 2.79052734375e-05, "step": 18288, "training_step_time": 0.10927748680114746 }, { "epoch": 2.790679931640625e-05, "model_forward_time": 0.02699589729309082, "step": 18289 }, { "epoch": 2.790679931640625e-05, "step": 18289, "training_step_time": 0.11343097686767578 }, { "epoch": 2.79083251953125e-05, "grad_norm": 0.3422635495662689, "learning_rate": 3.618281555147522e-05, "loss": 0.0111, "step": 18290 }, { "epoch": 2.79083251953125e-05, "model_forward_time": 0.02630162239074707, "step": 18290 }, { "epoch": 2.79083251953125e-05, "step": 18290, "training_step_time": 0.10517454147338867 }, { "epoch": 2.790985107421875e-05, "model_forward_time": 0.025934457778930664, "step": 18291 }, { "epoch": 2.790985107421875e-05, "step": 18291, "training_step_time": 0.1387767791748047 }, { "epoch": 2.7911376953125e-05, "model_forward_time": 0.024608850479125977, "step": 18292 }, { "epoch": 2.7911376953125e-05, "step": 18292, "training_step_time": 0.12232422828674316 }, { "epoch": 2.791290283203125e-05, "model_forward_time": 0.025117158889770508, "step": 18293 }, { "epoch": 2.791290283203125e-05, "step": 18293, "training_step_time": 0.10627508163452148 }, { "epoch": 2.79144287109375e-05, "model_forward_time": 0.025742292404174805, "step": 18294 }, { "epoch": 2.79144287109375e-05, "step": 18294, "training_step_time": 0.12194013595581055 }, { "epoch": 2.791595458984375e-05, "model_forward_time": 0.025872468948364258, "step": 18295 }, { "epoch": 2.791595458984375e-05, "step": 18295, "training_step_time": 0.12984180450439453 }, { "epoch": 2.791748046875e-05, "model_forward_time": 0.025594711303710938, "step": 18296 }, { "epoch": 2.791748046875e-05, "step": 18296, "training_step_time": 0.13345718383789062 }, { "epoch": 2.791900634765625e-05, "model_forward_time": 0.0255889892578125, "step": 18297 }, { "epoch": 2.791900634765625e-05, "step": 18297, "training_step_time": 0.1466231346130371 }, { "epoch": 2.79205322265625e-05, "model_forward_time": 0.024729490280151367, "step": 18298 }, { "epoch": 2.79205322265625e-05, "step": 18298, "training_step_time": 0.12056779861450195 }, { "epoch": 2.792205810546875e-05, "model_forward_time": 0.024597883224487305, "step": 18299 }, { "epoch": 2.792205810546875e-05, "step": 18299, "training_step_time": 0.21015191078186035 }, { "epoch": 2.7923583984375e-05, "grad_norm": 0.3045021891593933, "learning_rate": 3.612985456190778e-05, "loss": 0.0083, "step": 18300 }, { "epoch": 2.7923583984375e-05, "model_forward_time": 0.024926424026489258, "step": 18300 }, { "epoch": 2.7923583984375e-05, "step": 18300, "training_step_time": 0.1305677890777588 }, { "epoch": 2.792510986328125e-05, "model_forward_time": 0.027011632919311523, "step": 18301 }, { "epoch": 2.792510986328125e-05, "step": 18301, "training_step_time": 0.11493659019470215 }, { "epoch": 2.79266357421875e-05, "model_forward_time": 0.025228023529052734, "step": 18302 }, { "epoch": 2.79266357421875e-05, "step": 18302, "training_step_time": 0.11197972297668457 }, { "epoch": 2.792816162109375e-05, "model_forward_time": 0.025374889373779297, "step": 18303 }, { "epoch": 2.792816162109375e-05, "step": 18303, "training_step_time": 0.11043453216552734 }, { "epoch": 2.79296875e-05, "model_forward_time": 0.02577376365661621, "step": 18304 }, { "epoch": 2.79296875e-05, "step": 18304, "training_step_time": 0.10995841026306152 }, { "epoch": 2.793121337890625e-05, "model_forward_time": 0.02533745765686035, "step": 18305 }, { "epoch": 2.793121337890625e-05, "step": 18305, "training_step_time": 0.10722613334655762 }, { "epoch": 2.79327392578125e-05, "model_forward_time": 0.027135848999023438, "step": 18306 }, { "epoch": 2.79327392578125e-05, "step": 18306, "training_step_time": 0.10967206954956055 }, { "epoch": 2.793426513671875e-05, "model_forward_time": 0.02510976791381836, "step": 18307 }, { "epoch": 2.793426513671875e-05, "step": 18307, "training_step_time": 0.10702037811279297 }, { "epoch": 2.7935791015625e-05, "model_forward_time": 0.025091886520385742, "step": 18308 }, { "epoch": 2.7935791015625e-05, "step": 18308, "training_step_time": 0.10604023933410645 }, { "epoch": 2.793731689453125e-05, "model_forward_time": 0.025442123413085938, "step": 18309 }, { "epoch": 2.793731689453125e-05, "step": 18309, "training_step_time": 0.1079108715057373 }, { "epoch": 2.79388427734375e-05, "grad_norm": 0.1470704823732376, "learning_rate": 3.607691042587492e-05, "loss": 0.0116, "step": 18310 }, { "epoch": 2.79388427734375e-05, "model_forward_time": 0.024973392486572266, "step": 18310 }, { "epoch": 2.79388427734375e-05, "step": 18310, "training_step_time": 0.10716629028320312 }, { "epoch": 2.794036865234375e-05, "model_forward_time": 0.02525019645690918, "step": 18311 }, { "epoch": 2.794036865234375e-05, "step": 18311, "training_step_time": 0.12679576873779297 }, { "epoch": 2.794189453125e-05, "model_forward_time": 0.025838851928710938, "step": 18312 }, { "epoch": 2.794189453125e-05, "step": 18312, "training_step_time": 0.1123208999633789 }, { "epoch": 2.794342041015625e-05, "model_forward_time": 0.02532339096069336, "step": 18313 }, { "epoch": 2.794342041015625e-05, "step": 18313, "training_step_time": 0.11084699630737305 }, { "epoch": 2.79449462890625e-05, "model_forward_time": 0.025145769119262695, "step": 18314 }, { "epoch": 2.79449462890625e-05, "step": 18314, "training_step_time": 0.11454176902770996 }, { "epoch": 2.794647216796875e-05, "model_forward_time": 0.02503037452697754, "step": 18315 }, { "epoch": 2.794647216796875e-05, "step": 18315, "training_step_time": 0.10746407508850098 }, { "epoch": 2.7947998046875e-05, "model_forward_time": 0.025521516799926758, "step": 18316 }, { "epoch": 2.7947998046875e-05, "step": 18316, "training_step_time": 0.19453072547912598 }, { "epoch": 2.794952392578125e-05, "model_forward_time": 0.02474212646484375, "step": 18317 }, { "epoch": 2.794952392578125e-05, "step": 18317, "training_step_time": 0.11647605895996094 }, { "epoch": 2.79510498046875e-05, "model_forward_time": 0.025281667709350586, "step": 18318 }, { "epoch": 2.79510498046875e-05, "step": 18318, "training_step_time": 0.10419130325317383 }, { "epoch": 2.795257568359375e-05, "model_forward_time": 0.025305747985839844, "step": 18319 }, { "epoch": 2.795257568359375e-05, "step": 18319, "training_step_time": 0.10953283309936523 }, { "epoch": 2.79541015625e-05, "grad_norm": 0.2538145184516907, "learning_rate": 3.602398320770875e-05, "loss": 0.0102, "step": 18320 }, { "epoch": 2.79541015625e-05, "model_forward_time": 0.025603532791137695, "step": 18320 }, { "epoch": 2.79541015625e-05, "step": 18320, "training_step_time": 0.10664629936218262 }, { "epoch": 2.795562744140625e-05, "model_forward_time": 0.02505016326904297, "step": 18321 }, { "epoch": 2.795562744140625e-05, "step": 18321, "training_step_time": 0.10572004318237305 }, { "epoch": 2.79571533203125e-05, "model_forward_time": 0.025905609130859375, "step": 18322 }, { "epoch": 2.79571533203125e-05, "step": 18322, "training_step_time": 0.10814404487609863 }, { "epoch": 2.795867919921875e-05, "model_forward_time": 0.026244401931762695, "step": 18323 }, { "epoch": 2.795867919921875e-05, "step": 18323, "training_step_time": 0.11467742919921875 }, { "epoch": 2.7960205078125e-05, "model_forward_time": 0.025305986404418945, "step": 18324 }, { "epoch": 2.7960205078125e-05, "step": 18324, "training_step_time": 0.10579109191894531 }, { "epoch": 2.796173095703125e-05, "model_forward_time": 0.025427579879760742, "step": 18325 }, { "epoch": 2.796173095703125e-05, "step": 18325, "training_step_time": 0.10454297065734863 }, { "epoch": 2.79632568359375e-05, "model_forward_time": 0.024906158447265625, "step": 18326 }, { "epoch": 2.79632568359375e-05, "step": 18326, "training_step_time": 0.10759568214416504 }, { "epoch": 2.796478271484375e-05, "model_forward_time": 0.025582075119018555, "step": 18327 }, { "epoch": 2.796478271484375e-05, "step": 18327, "training_step_time": 0.10872650146484375 }, { "epoch": 2.796630859375e-05, "model_forward_time": 0.025388717651367188, "step": 18328 }, { "epoch": 2.796630859375e-05, "step": 18328, "training_step_time": 0.1077432632446289 }, { "epoch": 2.796783447265625e-05, "model_forward_time": 0.025267362594604492, "step": 18329 }, { "epoch": 2.796783447265625e-05, "step": 18329, "training_step_time": 0.1051628589630127 }, { "epoch": 2.79693603515625e-05, "grad_norm": 0.1745704561471939, "learning_rate": 3.597107297172084e-05, "loss": 0.009, "step": 18330 }, { "epoch": 2.79693603515625e-05, "model_forward_time": 0.02561354637145996, "step": 18330 }, { "epoch": 2.79693603515625e-05, "step": 18330, "training_step_time": 0.10848546028137207 }, { "epoch": 2.797088623046875e-05, "model_forward_time": 0.02574777603149414, "step": 18331 }, { "epoch": 2.797088623046875e-05, "step": 18331, "training_step_time": 0.17898988723754883 }, { "epoch": 2.7972412109375e-05, "model_forward_time": 0.025195837020874023, "step": 18332 }, { "epoch": 2.7972412109375e-05, "step": 18332, "training_step_time": 0.1186821460723877 }, { "epoch": 2.797393798828125e-05, "model_forward_time": 0.024928808212280273, "step": 18333 }, { "epoch": 2.797393798828125e-05, "step": 18333, "training_step_time": 0.1316087245941162 }, { "epoch": 2.79754638671875e-05, "model_forward_time": 0.02517390251159668, "step": 18334 }, { "epoch": 2.79754638671875e-05, "step": 18334, "training_step_time": 0.16114306449890137 }, { "epoch": 2.797698974609375e-05, "model_forward_time": 0.025583982467651367, "step": 18335 }, { "epoch": 2.797698974609375e-05, "step": 18335, "training_step_time": 0.17955422401428223 }, { "epoch": 2.7978515625e-05, "model_forward_time": 0.024855852127075195, "step": 18336 }, { "epoch": 2.7978515625e-05, "step": 18336, "training_step_time": 0.15543723106384277 }, { "epoch": 2.798004150390625e-05, "model_forward_time": 0.024886369705200195, "step": 18337 }, { "epoch": 2.798004150390625e-05, "step": 18337, "training_step_time": 0.20273065567016602 }, { "epoch": 2.79815673828125e-05, "model_forward_time": 0.02460026741027832, "step": 18338 }, { "epoch": 2.79815673828125e-05, "step": 18338, "training_step_time": 0.12636876106262207 }, { "epoch": 2.798309326171875e-05, "model_forward_time": 0.026241302490234375, "step": 18339 }, { "epoch": 2.798309326171875e-05, "step": 18339, "training_step_time": 0.11115455627441406 }, { "epoch": 2.7984619140625e-05, "grad_norm": 0.25465038418769836, "learning_rate": 3.591817978220212e-05, "loss": 0.0105, "step": 18340 }, { "epoch": 2.7984619140625e-05, "model_forward_time": 0.026186227798461914, "step": 18340 }, { "epoch": 2.7984619140625e-05, "step": 18340, "training_step_time": 0.1095130443572998 }, { "epoch": 2.798614501953125e-05, "model_forward_time": 0.025721073150634766, "step": 18341 }, { "epoch": 2.798614501953125e-05, "step": 18341, "training_step_time": 0.21494841575622559 }, { "epoch": 2.79876708984375e-05, "model_forward_time": 0.025144338607788086, "step": 18342 }, { "epoch": 2.79876708984375e-05, "step": 18342, "training_step_time": 0.16524791717529297 }, { "epoch": 2.798919677734375e-05, "model_forward_time": 0.024668455123901367, "step": 18343 }, { "epoch": 2.798919677734375e-05, "step": 18343, "training_step_time": 0.11128783226013184 }, { "epoch": 2.799072265625e-05, "model_forward_time": 0.02464151382446289, "step": 18344 }, { "epoch": 2.799072265625e-05, "step": 18344, "training_step_time": 0.10782194137573242 }, { "epoch": 2.799224853515625e-05, "model_forward_time": 0.025577545166015625, "step": 18345 }, { "epoch": 2.799224853515625e-05, "step": 18345, "training_step_time": 0.11761832237243652 }, { "epoch": 2.79937744140625e-05, "model_forward_time": 0.024912118911743164, "step": 18346 }, { "epoch": 2.79937744140625e-05, "step": 18346, "training_step_time": 0.12977242469787598 }, { "epoch": 2.799530029296875e-05, "model_forward_time": 0.02516937255859375, "step": 18347 }, { "epoch": 2.799530029296875e-05, "step": 18347, "training_step_time": 0.12907767295837402 }, { "epoch": 2.7996826171875e-05, "model_forward_time": 0.02736210823059082, "step": 18348 }, { "epoch": 2.7996826171875e-05, "step": 18348, "training_step_time": 0.11854243278503418 }, { "epoch": 2.799835205078125e-05, "model_forward_time": 0.02537250518798828, "step": 18349 }, { "epoch": 2.799835205078125e-05, "step": 18349, "training_step_time": 0.10651755332946777 }, { "epoch": 2.79998779296875e-05, "grad_norm": 0.14040468633174896, "learning_rate": 3.586530370342279e-05, "loss": 0.0071, "step": 18350 }, { "epoch": 2.79998779296875e-05, "model_forward_time": 0.025244951248168945, "step": 18350 }, { "epoch": 2.79998779296875e-05, "step": 18350, "training_step_time": 0.10501599311828613 }, { "epoch": 2.800140380859375e-05, "model_forward_time": 0.025095224380493164, "step": 18351 }, { "epoch": 2.800140380859375e-05, "step": 18351, "training_step_time": 0.10441708564758301 }, { "epoch": 2.80029296875e-05, "model_forward_time": 0.02532815933227539, "step": 18352 }, { "epoch": 2.80029296875e-05, "step": 18352, "training_step_time": 0.10570383071899414 }, { "epoch": 2.800445556640625e-05, "model_forward_time": 0.0251924991607666, "step": 18353 }, { "epoch": 2.800445556640625e-05, "step": 18353, "training_step_time": 0.10441207885742188 }, { "epoch": 2.80059814453125e-05, "model_forward_time": 0.025303125381469727, "step": 18354 }, { "epoch": 2.80059814453125e-05, "step": 18354, "training_step_time": 0.10609126091003418 }, { "epoch": 2.800750732421875e-05, "model_forward_time": 0.025273561477661133, "step": 18355 }, { "epoch": 2.800750732421875e-05, "step": 18355, "training_step_time": 0.10513925552368164 }, { "epoch": 2.8009033203125e-05, "model_forward_time": 0.025633811950683594, "step": 18356 }, { "epoch": 2.8009033203125e-05, "step": 18356, "training_step_time": 0.10897946357727051 }, { "epoch": 2.801055908203125e-05, "model_forward_time": 0.025460004806518555, "step": 18357 }, { "epoch": 2.801055908203125e-05, "step": 18357, "training_step_time": 0.12163329124450684 }, { "epoch": 2.80120849609375e-05, "model_forward_time": 0.025160789489746094, "step": 18358 }, { "epoch": 2.80120849609375e-05, "step": 18358, "training_step_time": 0.10438084602355957 }, { "epoch": 2.801361083984375e-05, "model_forward_time": 0.024986743927001953, "step": 18359 }, { "epoch": 2.801361083984375e-05, "step": 18359, "training_step_time": 0.12178850173950195 }, { "epoch": 2.801513671875e-05, "grad_norm": 0.25987479090690613, "learning_rate": 3.581244479963225e-05, "loss": 0.0088, "step": 18360 }, { "epoch": 2.801513671875e-05, "model_forward_time": 0.02523040771484375, "step": 18360 }, { "epoch": 2.801513671875e-05, "step": 18360, "training_step_time": 0.1124269962310791 }, { "epoch": 2.801666259765625e-05, "model_forward_time": 0.02678990364074707, "step": 18361 }, { "epoch": 2.801666259765625e-05, "step": 18361, "training_step_time": 0.10658144950866699 }, { "epoch": 2.80181884765625e-05, "model_forward_time": 0.025498628616333008, "step": 18362 }, { "epoch": 2.80181884765625e-05, "step": 18362, "training_step_time": 0.19771933555603027 }, { "epoch": 2.801971435546875e-05, "model_forward_time": 0.024520158767700195, "step": 18363 }, { "epoch": 2.801971435546875e-05, "step": 18363, "training_step_time": 0.1043539047241211 }, { "epoch": 2.8021240234375e-05, "model_forward_time": 0.02482295036315918, "step": 18364 }, { "epoch": 2.8021240234375e-05, "step": 18364, "training_step_time": 0.10703754425048828 }, { "epoch": 2.802276611328125e-05, "model_forward_time": 0.025222063064575195, "step": 18365 }, { "epoch": 2.802276611328125e-05, "step": 18365, "training_step_time": 0.11151504516601562 }, { "epoch": 2.80242919921875e-05, "model_forward_time": 0.024646997451782227, "step": 18366 }, { "epoch": 2.80242919921875e-05, "step": 18366, "training_step_time": 0.1063683032989502 }, { "epoch": 2.802581787109375e-05, "model_forward_time": 0.024829387664794922, "step": 18367 }, { "epoch": 2.802581787109375e-05, "step": 18367, "training_step_time": 0.10512495040893555 }, { "epoch": 2.802734375e-05, "model_forward_time": 0.02535533905029297, "step": 18368 }, { "epoch": 2.802734375e-05, "step": 18368, "training_step_time": 0.10500288009643555 }, { "epoch": 2.802886962890625e-05, "model_forward_time": 0.025400161743164062, "step": 18369 }, { "epoch": 2.802886962890625e-05, "step": 18369, "training_step_time": 0.1052854061126709 }, { "epoch": 2.80303955078125e-05, "grad_norm": 0.42455539107322693, "learning_rate": 3.57596031350591e-05, "loss": 0.007, "step": 18370 }, { "epoch": 2.80303955078125e-05, "model_forward_time": 0.02508378028869629, "step": 18370 }, { "epoch": 2.80303955078125e-05, "step": 18370, "training_step_time": 0.10556197166442871 }, { "epoch": 2.803192138671875e-05, "model_forward_time": 0.025608539581298828, "step": 18371 }, { "epoch": 2.803192138671875e-05, "step": 18371, "training_step_time": 0.14202189445495605 }, { "epoch": 2.8033447265625e-05, "model_forward_time": 0.025231599807739258, "step": 18372 }, { "epoch": 2.8033447265625e-05, "step": 18372, "training_step_time": 0.15815329551696777 }, { "epoch": 2.803497314453125e-05, "model_forward_time": 0.024489164352416992, "step": 18373 }, { "epoch": 2.803497314453125e-05, "step": 18373, "training_step_time": 0.14473247528076172 }, { "epoch": 2.80364990234375e-05, "model_forward_time": 0.024794816970825195, "step": 18374 }, { "epoch": 2.80364990234375e-05, "step": 18374, "training_step_time": 0.14179587364196777 }, { "epoch": 2.803802490234375e-05, "model_forward_time": 0.02445530891418457, "step": 18375 }, { "epoch": 2.803802490234375e-05, "step": 18375, "training_step_time": 0.1239786148071289 }, { "epoch": 2.803955078125e-05, "model_forward_time": 0.024916648864746094, "step": 18376 }, { "epoch": 2.803955078125e-05, "step": 18376, "training_step_time": 0.12299704551696777 }, { "epoch": 2.804107666015625e-05, "model_forward_time": 0.025094032287597656, "step": 18377 }, { "epoch": 2.804107666015625e-05, "step": 18377, "training_step_time": 0.1181344985961914 }, { "epoch": 2.80426025390625e-05, "model_forward_time": 0.025272369384765625, "step": 18378 }, { "epoch": 2.80426025390625e-05, "step": 18378, "training_step_time": 0.11593127250671387 }, { "epoch": 2.804412841796875e-05, "model_forward_time": 0.02548527717590332, "step": 18379 }, { "epoch": 2.804412841796875e-05, "step": 18379, "training_step_time": 0.15390753746032715 }, { "epoch": 2.8045654296875e-05, "grad_norm": 0.20815393328666687, "learning_rate": 3.570677877391092e-05, "loss": 0.0093, "step": 18380 }, { "epoch": 2.8045654296875e-05, "model_forward_time": 0.024593591690063477, "step": 18380 }, { "epoch": 2.8045654296875e-05, "step": 18380, "training_step_time": 0.20925521850585938 }, { "epoch": 2.804718017578125e-05, "model_forward_time": 0.024608373641967773, "step": 18381 }, { "epoch": 2.804718017578125e-05, "step": 18381, "training_step_time": 0.12543153762817383 }, { "epoch": 2.80487060546875e-05, "model_forward_time": 0.02474665641784668, "step": 18382 }, { "epoch": 2.80487060546875e-05, "step": 18382, "training_step_time": 0.19297575950622559 }, { "epoch": 2.805023193359375e-05, "model_forward_time": 0.024935245513916016, "step": 18383 }, { "epoch": 2.805023193359375e-05, "step": 18383, "training_step_time": 0.11851978302001953 }, { "epoch": 2.80517578125e-05, "model_forward_time": 0.02477264404296875, "step": 18384 }, { "epoch": 2.80517578125e-05, "step": 18384, "training_step_time": 0.10373735427856445 }, { "epoch": 2.805328369140625e-05, "model_forward_time": 0.02557992935180664, "step": 18385 }, { "epoch": 2.805328369140625e-05, "step": 18385, "training_step_time": 0.11870336532592773 }, { "epoch": 2.80548095703125e-05, "model_forward_time": 0.025134801864624023, "step": 18386 }, { "epoch": 2.80548095703125e-05, "step": 18386, "training_step_time": 0.10883426666259766 }, { "epoch": 2.805633544921875e-05, "model_forward_time": 0.02520585060119629, "step": 18387 }, { "epoch": 2.805633544921875e-05, "step": 18387, "training_step_time": 0.22242188453674316 }, { "epoch": 2.8057861328125e-05, "model_forward_time": 0.024539470672607422, "step": 18388 }, { "epoch": 2.8057861328125e-05, "step": 18388, "training_step_time": 0.1521773338317871 }, { "epoch": 2.805938720703125e-05, "model_forward_time": 0.024480819702148438, "step": 18389 }, { "epoch": 2.805938720703125e-05, "step": 18389, "training_step_time": 0.15972447395324707 }, { "epoch": 2.80609130859375e-05, "grad_norm": 0.11801661550998688, "learning_rate": 3.5653971780374295e-05, "loss": 0.0073, "step": 18390 }, { "epoch": 2.80609130859375e-05, "model_forward_time": 0.024174213409423828, "step": 18390 }, { "epoch": 2.80609130859375e-05, "step": 18390, "training_step_time": 0.16575264930725098 }, { "epoch": 2.806243896484375e-05, "model_forward_time": 0.024132251739501953, "step": 18391 }, { "epoch": 2.806243896484375e-05, "step": 18391, "training_step_time": 0.13094019889831543 }, { "epoch": 2.806396484375e-05, "model_forward_time": 0.024740934371948242, "step": 18392 }, { "epoch": 2.806396484375e-05, "step": 18392, "training_step_time": 0.11393380165100098 }, { "epoch": 2.806549072265625e-05, "model_forward_time": 0.025031089782714844, "step": 18393 }, { "epoch": 2.806549072265625e-05, "step": 18393, "training_step_time": 0.12122154235839844 }, { "epoch": 2.80670166015625e-05, "model_forward_time": 0.025151491165161133, "step": 18394 }, { "epoch": 2.80670166015625e-05, "step": 18394, "training_step_time": 0.1111593246459961 }, { "epoch": 2.806854248046875e-05, "model_forward_time": 0.024866580963134766, "step": 18395 }, { "epoch": 2.806854248046875e-05, "step": 18395, "training_step_time": 0.10981321334838867 }, { "epoch": 2.8070068359375e-05, "model_forward_time": 0.02387523651123047, "step": 18396 }, { "epoch": 2.8070068359375e-05, "step": 18396, "training_step_time": 0.10695528984069824 }, { "epoch": 2.807159423828125e-05, "model_forward_time": 0.02476215362548828, "step": 18397 }, { "epoch": 2.807159423828125e-05, "step": 18397, "training_step_time": 0.1074059009552002 }, { "epoch": 2.80731201171875e-05, "model_forward_time": 0.024916410446166992, "step": 18398 }, { "epoch": 2.80731201171875e-05, "step": 18398, "training_step_time": 0.10904502868652344 }, { "epoch": 2.807464599609375e-05, "model_forward_time": 0.02487945556640625, "step": 18399 }, { "epoch": 2.807464599609375e-05, "step": 18399, "training_step_time": 0.10808920860290527 }, { "epoch": 2.8076171875e-05, "grad_norm": 0.6371890902519226, "learning_rate": 3.56011822186147e-05, "loss": 0.0156, "step": 18400 }, { "epoch": 2.8076171875e-05, "model_forward_time": 0.025880098342895508, "step": 18400 }, { "epoch": 2.8076171875e-05, "step": 18400, "training_step_time": 0.1888105869293213 }, { "epoch": 2.807769775390625e-05, "model_forward_time": 0.024862051010131836, "step": 18401 }, { "epoch": 2.807769775390625e-05, "step": 18401, "training_step_time": 0.1420910358428955 }, { "epoch": 2.80792236328125e-05, "model_forward_time": 0.02399420738220215, "step": 18402 }, { "epoch": 2.80792236328125e-05, "step": 18402, "training_step_time": 0.10989713668823242 }, { "epoch": 2.808074951171875e-05, "model_forward_time": 0.02477264404296875, "step": 18403 }, { "epoch": 2.808074951171875e-05, "step": 18403, "training_step_time": 0.11352252960205078 }, { "epoch": 2.8082275390625e-05, "model_forward_time": 0.024726152420043945, "step": 18404 }, { "epoch": 2.8082275390625e-05, "step": 18404, "training_step_time": 0.11578106880187988 }, { "epoch": 2.808380126953125e-05, "model_forward_time": 0.024883031845092773, "step": 18405 }, { "epoch": 2.808380126953125e-05, "step": 18405, "training_step_time": 0.1070871353149414 }, { "epoch": 2.80853271484375e-05, "model_forward_time": 0.024727582931518555, "step": 18406 }, { "epoch": 2.80853271484375e-05, "step": 18406, "training_step_time": 0.2001943588256836 }, { "epoch": 2.808685302734375e-05, "model_forward_time": 0.024663209915161133, "step": 18407 }, { "epoch": 2.808685302734375e-05, "step": 18407, "training_step_time": 0.10566210746765137 }, { "epoch": 2.808837890625e-05, "model_forward_time": 0.024473905563354492, "step": 18408 }, { "epoch": 2.808837890625e-05, "step": 18408, "training_step_time": 0.10317206382751465 }, { "epoch": 2.808990478515625e-05, "model_forward_time": 0.024999380111694336, "step": 18409 }, { "epoch": 2.808990478515625e-05, "step": 18409, "training_step_time": 0.10439682006835938 }, { "epoch": 2.80914306640625e-05, "grad_norm": 0.3864893913269043, "learning_rate": 3.554841015277641e-05, "loss": 0.0127, "step": 18410 }, { "epoch": 2.80914306640625e-05, "model_forward_time": 0.025127649307250977, "step": 18410 }, { "epoch": 2.80914306640625e-05, "step": 18410, "training_step_time": 0.10350608825683594 }, { "epoch": 2.809295654296875e-05, "model_forward_time": 0.02503228187561035, "step": 18411 }, { "epoch": 2.809295654296875e-05, "step": 18411, "training_step_time": 0.10484910011291504 }, { "epoch": 2.8094482421875e-05, "model_forward_time": 0.025030136108398438, "step": 18412 }, { "epoch": 2.8094482421875e-05, "step": 18412, "training_step_time": 0.10496020317077637 }, { "epoch": 2.809600830078125e-05, "model_forward_time": 0.025463342666625977, "step": 18413 }, { "epoch": 2.809600830078125e-05, "step": 18413, "training_step_time": 0.10821986198425293 }, { "epoch": 2.80975341796875e-05, "model_forward_time": 0.024854183197021484, "step": 18414 }, { "epoch": 2.80975341796875e-05, "step": 18414, "training_step_time": 0.10462737083435059 }, { "epoch": 2.809906005859375e-05, "model_forward_time": 0.02503800392150879, "step": 18415 }, { "epoch": 2.809906005859375e-05, "step": 18415, "training_step_time": 0.11236786842346191 }, { "epoch": 2.81005859375e-05, "model_forward_time": 0.02520465850830078, "step": 18416 }, { "epoch": 2.81005859375e-05, "step": 18416, "training_step_time": 0.10600948333740234 }, { "epoch": 2.810211181640625e-05, "model_forward_time": 0.024947643280029297, "step": 18417 }, { "epoch": 2.810211181640625e-05, "step": 18417, "training_step_time": 0.10386013984680176 }, { "epoch": 2.81036376953125e-05, "model_forward_time": 0.025141239166259766, "step": 18418 }, { "epoch": 2.81036376953125e-05, "step": 18418, "training_step_time": 0.1051034927368164 }, { "epoch": 2.810516357421875e-05, "model_forward_time": 0.024964094161987305, "step": 18419 }, { "epoch": 2.810516357421875e-05, "step": 18419, "training_step_time": 0.10456967353820801 }, { "epoch": 2.8106689453125e-05, "grad_norm": 0.2138444483280182, "learning_rate": 3.5495655646982505e-05, "loss": 0.0133, "step": 18420 }, { "epoch": 2.8106689453125e-05, "model_forward_time": 0.0247342586517334, "step": 18420 }, { "epoch": 2.8106689453125e-05, "step": 18420, "training_step_time": 0.21396708488464355 }, { "epoch": 2.810821533203125e-05, "model_forward_time": 0.02487802505493164, "step": 18421 }, { "epoch": 2.810821533203125e-05, "step": 18421, "training_step_time": 0.12329959869384766 }, { "epoch": 2.81097412109375e-05, "model_forward_time": 0.02423095703125, "step": 18422 }, { "epoch": 2.81097412109375e-05, "step": 18422, "training_step_time": 0.13443446159362793 }, { "epoch": 2.811126708984375e-05, "model_forward_time": 0.025026321411132812, "step": 18423 }, { "epoch": 2.811126708984375e-05, "step": 18423, "training_step_time": 0.16050934791564941 }, { "epoch": 2.811279296875e-05, "model_forward_time": 0.024158239364624023, "step": 18424 }, { "epoch": 2.811279296875e-05, "step": 18424, "training_step_time": 0.21318364143371582 }, { "epoch": 2.811431884765625e-05, "model_forward_time": 0.024852991104125977, "step": 18425 }, { "epoch": 2.811431884765625e-05, "step": 18425, "training_step_time": 0.11339306831359863 }, { "epoch": 2.81158447265625e-05, "model_forward_time": 0.02451014518737793, "step": 18426 }, { "epoch": 2.81158447265625e-05, "step": 18426, "training_step_time": 0.10437846183776855 }, { "epoch": 2.811737060546875e-05, "model_forward_time": 0.025655031204223633, "step": 18427 }, { "epoch": 2.811737060546875e-05, "step": 18427, "training_step_time": 0.1085519790649414 }, { "epoch": 2.8118896484375e-05, "model_forward_time": 0.0254213809967041, "step": 18428 }, { "epoch": 2.8118896484375e-05, "step": 18428, "training_step_time": 0.11316752433776855 }, { "epoch": 2.812042236328125e-05, "model_forward_time": 0.025228023529052734, "step": 18429 }, { "epoch": 2.812042236328125e-05, "step": 18429, "training_step_time": 0.11049842834472656 }, { "epoch": 2.81219482421875e-05, "grad_norm": 0.17229513823986053, "learning_rate": 3.544291876533466e-05, "loss": 0.0084, "step": 18430 }, { "epoch": 2.81219482421875e-05, "model_forward_time": 0.02562689781188965, "step": 18430 }, { "epoch": 2.81219482421875e-05, "step": 18430, "training_step_time": 0.1096339225769043 }, { "epoch": 2.812347412109375e-05, "model_forward_time": 0.025182008743286133, "step": 18431 }, { "epoch": 2.812347412109375e-05, "step": 18431, "training_step_time": 0.22024297714233398 }, { "epoch": 2.8125e-05, "model_forward_time": 0.024271488189697266, "step": 18432 }, { "epoch": 2.8125e-05, "step": 18432, "training_step_time": 0.13884210586547852 }, { "epoch": 2.812652587890625e-05, "model_forward_time": 0.024174928665161133, "step": 18433 }, { "epoch": 2.812652587890625e-05, "step": 18433, "training_step_time": 0.14784836769104004 }, { "epoch": 2.81280517578125e-05, "model_forward_time": 0.024707317352294922, "step": 18434 }, { "epoch": 2.81280517578125e-05, "step": 18434, "training_step_time": 0.1278843879699707 }, { "epoch": 2.812957763671875e-05, "model_forward_time": 0.024101734161376953, "step": 18435 }, { "epoch": 2.812957763671875e-05, "step": 18435, "training_step_time": 0.2028806209564209 }, { "epoch": 2.8131103515625e-05, "model_forward_time": 0.025168657302856445, "step": 18436 }, { "epoch": 2.8131103515625e-05, "step": 18436, "training_step_time": 0.13534832000732422 }, { "epoch": 2.813262939453125e-05, "model_forward_time": 0.024213314056396484, "step": 18437 }, { "epoch": 2.813262939453125e-05, "step": 18437, "training_step_time": 0.11329245567321777 }, { "epoch": 2.81341552734375e-05, "model_forward_time": 0.02493906021118164, "step": 18438 }, { "epoch": 2.81341552734375e-05, "step": 18438, "training_step_time": 0.11572647094726562 }, { "epoch": 2.813568115234375e-05, "model_forward_time": 0.023944377899169922, "step": 18439 }, { "epoch": 2.813568115234375e-05, "step": 18439, "training_step_time": 0.11264848709106445 }, { "epoch": 2.813720703125e-05, "grad_norm": 0.3152703642845154, "learning_rate": 3.539019957191315e-05, "loss": 0.0082, "step": 18440 }, { "epoch": 2.813720703125e-05, "model_forward_time": 0.025650501251220703, "step": 18440 }, { "epoch": 2.813720703125e-05, "step": 18440, "training_step_time": 0.10782742500305176 }, { "epoch": 2.813873291015625e-05, "model_forward_time": 0.024502277374267578, "step": 18441 }, { "epoch": 2.813873291015625e-05, "step": 18441, "training_step_time": 0.11281180381774902 }, { "epoch": 2.81402587890625e-05, "model_forward_time": 0.025213956832885742, "step": 18442 }, { "epoch": 2.81402587890625e-05, "step": 18442, "training_step_time": 0.10926604270935059 }, { "epoch": 2.814178466796875e-05, "model_forward_time": 0.024883508682250977, "step": 18443 }, { "epoch": 2.814178466796875e-05, "step": 18443, "training_step_time": 0.10839986801147461 }, { "epoch": 2.8143310546875e-05, "model_forward_time": 0.025763988494873047, "step": 18444 }, { "epoch": 2.8143310546875e-05, "step": 18444, "training_step_time": 0.11085271835327148 }, { "epoch": 2.814483642578125e-05, "model_forward_time": 0.02500605583190918, "step": 18445 }, { "epoch": 2.814483642578125e-05, "step": 18445, "training_step_time": 0.16804289817810059 }, { "epoch": 2.81463623046875e-05, "model_forward_time": 0.02451467514038086, "step": 18446 }, { "epoch": 2.81463623046875e-05, "step": 18446, "training_step_time": 0.14084267616271973 }, { "epoch": 2.814788818359375e-05, "model_forward_time": 0.0244596004486084, "step": 18447 }, { "epoch": 2.814788818359375e-05, "step": 18447, "training_step_time": 0.10860490798950195 }, { "epoch": 2.81494140625e-05, "model_forward_time": 0.02468395233154297, "step": 18448 }, { "epoch": 2.81494140625e-05, "step": 18448, "training_step_time": 0.1138613224029541 }, { "epoch": 2.815093994140625e-05, "model_forward_time": 0.024578571319580078, "step": 18449 }, { "epoch": 2.815093994140625e-05, "step": 18449, "training_step_time": 0.11719465255737305 }, { "epoch": 2.81524658203125e-05, "grad_norm": 0.1702016443014145, "learning_rate": 3.533749813077677e-05, "loss": 0.0144, "step": 18450 }, { "epoch": 2.81524658203125e-05, "model_forward_time": 0.024478435516357422, "step": 18450 }, { "epoch": 2.81524658203125e-05, "step": 18450, "training_step_time": 0.10366058349609375 }, { "epoch": 2.815399169921875e-05, "model_forward_time": 0.02488422393798828, "step": 18451 }, { "epoch": 2.815399169921875e-05, "step": 18451, "training_step_time": 0.20134234428405762 }, { "epoch": 2.8155517578125e-05, "model_forward_time": 0.02420210838317871, "step": 18452 }, { "epoch": 2.8155517578125e-05, "step": 18452, "training_step_time": 0.10092902183532715 }, { "epoch": 2.815704345703125e-05, "model_forward_time": 0.024678945541381836, "step": 18453 }, { "epoch": 2.815704345703125e-05, "step": 18453, "training_step_time": 0.10239624977111816 }, { "epoch": 2.81585693359375e-05, "model_forward_time": 0.024515151977539062, "step": 18454 }, { "epoch": 2.81585693359375e-05, "step": 18454, "training_step_time": 0.10326099395751953 }, { "epoch": 2.816009521484375e-05, "model_forward_time": 0.024840116500854492, "step": 18455 }, { "epoch": 2.816009521484375e-05, "step": 18455, "training_step_time": 0.10522174835205078 }, { "epoch": 2.816162109375e-05, "model_forward_time": 0.024948596954345703, "step": 18456 }, { "epoch": 2.816162109375e-05, "step": 18456, "training_step_time": 0.10436892509460449 }, { "epoch": 2.816314697265625e-05, "model_forward_time": 0.024748802185058594, "step": 18457 }, { "epoch": 2.816314697265625e-05, "step": 18457, "training_step_time": 0.10403943061828613 }, { "epoch": 2.81646728515625e-05, "model_forward_time": 0.0251004695892334, "step": 18458 }, { "epoch": 2.81646728515625e-05, "step": 18458, "training_step_time": 0.10621166229248047 }, { "epoch": 2.816619873046875e-05, "model_forward_time": 0.025872230529785156, "step": 18459 }, { "epoch": 2.816619873046875e-05, "step": 18459, "training_step_time": 0.11031723022460938 }, { "epoch": 2.8167724609375e-05, "grad_norm": 0.4336320161819458, "learning_rate": 3.528481450596274e-05, "loss": 0.0117, "step": 18460 }, { "epoch": 2.8167724609375e-05, "model_forward_time": 0.02498030662536621, "step": 18460 }, { "epoch": 2.8167724609375e-05, "step": 18460, "training_step_time": 0.10858559608459473 }, { "epoch": 2.816925048828125e-05, "model_forward_time": 0.024907350540161133, "step": 18461 }, { "epoch": 2.816925048828125e-05, "step": 18461, "training_step_time": 0.10492515563964844 }, { "epoch": 2.81707763671875e-05, "model_forward_time": 0.025012731552124023, "step": 18462 }, { "epoch": 2.81707763671875e-05, "step": 18462, "training_step_time": 0.10420846939086914 }, { "epoch": 2.817230224609375e-05, "model_forward_time": 0.025034189224243164, "step": 18463 }, { "epoch": 2.817230224609375e-05, "step": 18463, "training_step_time": 0.10517311096191406 }, { "epoch": 2.8173828125e-05, "model_forward_time": 0.024698734283447266, "step": 18464 }, { "epoch": 2.8173828125e-05, "step": 18464, "training_step_time": 0.10434794425964355 }, { "epoch": 2.817535400390625e-05, "model_forward_time": 0.02493000030517578, "step": 18465 }, { "epoch": 2.817535400390625e-05, "step": 18465, "training_step_time": 0.2137312889099121 }, { "epoch": 2.81768798828125e-05, "model_forward_time": 0.023679494857788086, "step": 18466 }, { "epoch": 2.81768798828125e-05, "step": 18466, "training_step_time": 0.11928844451904297 }, { "epoch": 2.817840576171875e-05, "model_forward_time": 0.027657270431518555, "step": 18467 }, { "epoch": 2.817840576171875e-05, "step": 18467, "training_step_time": 0.12653756141662598 }, { "epoch": 2.8179931640625e-05, "model_forward_time": 0.025334835052490234, "step": 18468 }, { "epoch": 2.8179931640625e-05, "step": 18468, "training_step_time": 0.150923490524292 }, { "epoch": 2.818145751953125e-05, "model_forward_time": 0.025049686431884766, "step": 18469 }, { "epoch": 2.818145751953125e-05, "step": 18469, "training_step_time": 0.10610723495483398 }, { "epoch": 2.81829833984375e-05, "grad_norm": 0.6569622755050659, "learning_rate": 3.523214876148664e-05, "loss": 0.016, "step": 18470 }, { "epoch": 2.81829833984375e-05, "model_forward_time": 0.024706602096557617, "step": 18470 }, { "epoch": 2.81829833984375e-05, "step": 18470, "training_step_time": 0.11812901496887207 }, { "epoch": 2.818450927734375e-05, "model_forward_time": 0.024754047393798828, "step": 18471 }, { "epoch": 2.818450927734375e-05, "step": 18471, "training_step_time": 0.11610174179077148 }, { "epoch": 2.818603515625e-05, "model_forward_time": 0.024856090545654297, "step": 18472 }, { "epoch": 2.818603515625e-05, "step": 18472, "training_step_time": 0.1052849292755127 }, { "epoch": 2.818756103515625e-05, "model_forward_time": 0.0242919921875, "step": 18473 }, { "epoch": 2.818756103515625e-05, "step": 18473, "training_step_time": 0.10423660278320312 }, { "epoch": 2.81890869140625e-05, "model_forward_time": 0.02505803108215332, "step": 18474 }, { "epoch": 2.81890869140625e-05, "step": 18474, "training_step_time": 0.11999034881591797 }, { "epoch": 2.819061279296875e-05, "model_forward_time": 0.02505970001220703, "step": 18475 }, { "epoch": 2.819061279296875e-05, "step": 18475, "training_step_time": 0.1090238094329834 }, { "epoch": 2.8192138671875e-05, "model_forward_time": 0.025188207626342773, "step": 18476 }, { "epoch": 2.8192138671875e-05, "step": 18476, "training_step_time": 0.10657024383544922 }, { "epoch": 2.819366455078125e-05, "model_forward_time": 0.024991989135742188, "step": 18477 }, { "epoch": 2.819366455078125e-05, "step": 18477, "training_step_time": 0.10558724403381348 }, { "epoch": 2.81951904296875e-05, "model_forward_time": 0.025236845016479492, "step": 18478 }, { "epoch": 2.81951904296875e-05, "step": 18478, "training_step_time": 0.11783647537231445 }, { "epoch": 2.819671630859375e-05, "model_forward_time": 0.024996280670166016, "step": 18479 }, { "epoch": 2.819671630859375e-05, "step": 18479, "training_step_time": 0.11097168922424316 }, { "epoch": 2.81982421875e-05, "grad_norm": 0.3495062589645386, "learning_rate": 3.517950096134232e-05, "loss": 0.0114, "step": 18480 }, { "epoch": 2.81982421875e-05, "model_forward_time": 0.025040149688720703, "step": 18480 }, { "epoch": 2.81982421875e-05, "step": 18480, "training_step_time": 0.15271759033203125 }, { "epoch": 2.819976806640625e-05, "model_forward_time": 0.024540424346923828, "step": 18481 }, { "epoch": 2.819976806640625e-05, "step": 18481, "training_step_time": 0.10719776153564453 }, { "epoch": 2.82012939453125e-05, "model_forward_time": 0.024373531341552734, "step": 18482 }, { "epoch": 2.82012939453125e-05, "step": 18482, "training_step_time": 0.11297273635864258 }, { "epoch": 2.820281982421875e-05, "model_forward_time": 0.024776458740234375, "step": 18483 }, { "epoch": 2.820281982421875e-05, "step": 18483, "training_step_time": 0.12767314910888672 }, { "epoch": 2.8204345703125e-05, "model_forward_time": 0.02525472640991211, "step": 18484 }, { "epoch": 2.8204345703125e-05, "step": 18484, "training_step_time": 0.12236428260803223 }, { "epoch": 2.820587158203125e-05, "model_forward_time": 0.025211334228515625, "step": 18485 }, { "epoch": 2.820587158203125e-05, "step": 18485, "training_step_time": 0.10983157157897949 }, { "epoch": 2.82073974609375e-05, "model_forward_time": 0.025716781616210938, "step": 18486 }, { "epoch": 2.82073974609375e-05, "step": 18486, "training_step_time": 0.1118934154510498 }, { "epoch": 2.820892333984375e-05, "model_forward_time": 0.024672985076904297, "step": 18487 }, { "epoch": 2.820892333984375e-05, "step": 18487, "training_step_time": 0.10914731025695801 }, { "epoch": 2.821044921875e-05, "model_forward_time": 0.024858951568603516, "step": 18488 }, { "epoch": 2.821044921875e-05, "step": 18488, "training_step_time": 0.10629916191101074 }, { "epoch": 2.821197509765625e-05, "model_forward_time": 0.024823665618896484, "step": 18489 }, { "epoch": 2.821197509765625e-05, "step": 18489, "training_step_time": 0.10515975952148438 }, { "epoch": 2.82135009765625e-05, "grad_norm": 0.4059763550758362, "learning_rate": 3.512687116950182e-05, "loss": 0.0091, "step": 18490 }, { "epoch": 2.82135009765625e-05, "model_forward_time": 0.02484583854675293, "step": 18490 }, { "epoch": 2.82135009765625e-05, "step": 18490, "training_step_time": 0.11062741279602051 }, { "epoch": 2.821502685546875e-05, "model_forward_time": 0.02500009536743164, "step": 18491 }, { "epoch": 2.821502685546875e-05, "step": 18491, "training_step_time": 0.10881686210632324 }, { "epoch": 2.8216552734375e-05, "model_forward_time": 0.024543285369873047, "step": 18492 }, { "epoch": 2.8216552734375e-05, "step": 18492, "training_step_time": 0.10438656806945801 }, { "epoch": 2.821807861328125e-05, "model_forward_time": 0.02492070198059082, "step": 18493 }, { "epoch": 2.821807861328125e-05, "step": 18493, "training_step_time": 0.17460227012634277 }, { "epoch": 2.82196044921875e-05, "model_forward_time": 0.026612520217895508, "step": 18494 }, { "epoch": 2.82196044921875e-05, "step": 18494, "training_step_time": 0.13511252403259277 }, { "epoch": 2.822113037109375e-05, "model_forward_time": 0.024184465408325195, "step": 18495 }, { "epoch": 2.822113037109375e-05, "step": 18495, "training_step_time": 0.11011481285095215 }, { "epoch": 2.822265625e-05, "model_forward_time": 0.025092363357543945, "step": 18496 }, { "epoch": 2.822265625e-05, "step": 18496, "training_step_time": 0.11318206787109375 }, { "epoch": 2.822418212890625e-05, "model_forward_time": 0.02474689483642578, "step": 18497 }, { "epoch": 2.822418212890625e-05, "step": 18497, "training_step_time": 0.11718869209289551 }, { "epoch": 2.82257080078125e-05, "model_forward_time": 0.025321006774902344, "step": 18498 }, { "epoch": 2.82257080078125e-05, "step": 18498, "training_step_time": 0.10908770561218262 }, { "epoch": 2.822723388671875e-05, "model_forward_time": 0.025540590286254883, "step": 18499 }, { "epoch": 2.822723388671875e-05, "step": 18499, "training_step_time": 0.19304633140563965 }, { "epoch": 2.8228759765625e-05, "grad_norm": 0.19981293380260468, "learning_rate": 3.5074259449915284e-05, "loss": 0.0187, "step": 18500 }, { "epoch": 2.8228759765625e-05, "model_forward_time": 0.02621603012084961, "step": 18500 }, { "epoch": 2.8228759765625e-05, "step": 18500, "training_step_time": 0.10569477081298828 }, { "epoch": 2.823028564453125e-05, "model_forward_time": 0.024576902389526367, "step": 18501 }, { "epoch": 2.823028564453125e-05, "step": 18501, "training_step_time": 0.10567927360534668 }, { "epoch": 2.82318115234375e-05, "model_forward_time": 0.025256872177124023, "step": 18502 }, { "epoch": 2.82318115234375e-05, "step": 18502, "training_step_time": 0.10662603378295898 }, { "epoch": 2.823333740234375e-05, "model_forward_time": 0.02559208869934082, "step": 18503 }, { "epoch": 2.823333740234375e-05, "step": 18503, "training_step_time": 0.10790085792541504 }, { "epoch": 2.823486328125e-05, "model_forward_time": 0.02549600601196289, "step": 18504 }, { "epoch": 2.823486328125e-05, "step": 18504, "training_step_time": 0.10847711563110352 }, { "epoch": 2.823638916015625e-05, "model_forward_time": 0.025139331817626953, "step": 18505 }, { "epoch": 2.823638916015625e-05, "step": 18505, "training_step_time": 0.11087536811828613 }, { "epoch": 2.82379150390625e-05, "model_forward_time": 0.02811431884765625, "step": 18506 }, { "epoch": 2.82379150390625e-05, "step": 18506, "training_step_time": 0.1909770965576172 }, { "epoch": 2.823944091796875e-05, "model_forward_time": 0.024459362030029297, "step": 18507 }, { "epoch": 2.823944091796875e-05, "step": 18507, "training_step_time": 0.20830273628234863 }, { "epoch": 2.8240966796875e-05, "model_forward_time": 0.023904800415039062, "step": 18508 }, { "epoch": 2.8240966796875e-05, "step": 18508, "training_step_time": 0.20653676986694336 }, { "epoch": 2.824249267578125e-05, "model_forward_time": 0.02499079704284668, "step": 18509 }, { "epoch": 2.824249267578125e-05, "step": 18509, "training_step_time": 0.19432973861694336 }, { "epoch": 2.82440185546875e-05, "grad_norm": 0.16926681995391846, "learning_rate": 3.5021665866510925e-05, "loss": 0.0111, "step": 18510 }, { "epoch": 2.82440185546875e-05, "model_forward_time": 0.02367877960205078, "step": 18510 }, { "epoch": 2.82440185546875e-05, "step": 18510, "training_step_time": 0.1886577606201172 }, { "epoch": 2.824554443359375e-05, "model_forward_time": 0.024843454360961914, "step": 18511 }, { "epoch": 2.824554443359375e-05, "step": 18511, "training_step_time": 0.2145678997039795 }, { "epoch": 2.82470703125e-05, "model_forward_time": 0.024446964263916016, "step": 18512 }, { "epoch": 2.82470703125e-05, "step": 18512, "training_step_time": 0.17141127586364746 }, { "epoch": 2.824859619140625e-05, "model_forward_time": 0.024988889694213867, "step": 18513 }, { "epoch": 2.824859619140625e-05, "step": 18513, "training_step_time": 0.18264245986938477 }, { "epoch": 2.82501220703125e-05, "model_forward_time": 0.024652481079101562, "step": 18514 }, { "epoch": 2.82501220703125e-05, "step": 18514, "training_step_time": 0.11088371276855469 }, { "epoch": 2.825164794921875e-05, "model_forward_time": 0.025249719619750977, "step": 18515 }, { "epoch": 2.825164794921875e-05, "step": 18515, "training_step_time": 0.10362815856933594 }, { "epoch": 2.8253173828125e-05, "model_forward_time": 0.024415016174316406, "step": 18516 }, { "epoch": 2.8253173828125e-05, "step": 18516, "training_step_time": 0.13913512229919434 }, { "epoch": 2.825469970703125e-05, "model_forward_time": 0.025102615356445312, "step": 18517 }, { "epoch": 2.825469970703125e-05, "step": 18517, "training_step_time": 0.11839723587036133 }, { "epoch": 2.82562255859375e-05, "model_forward_time": 0.025087356567382812, "step": 18518 }, { "epoch": 2.82562255859375e-05, "step": 18518, "training_step_time": 0.10636663436889648 }, { "epoch": 2.825775146484375e-05, "model_forward_time": 0.025368928909301758, "step": 18519 }, { "epoch": 2.825775146484375e-05, "step": 18519, "training_step_time": 0.12046194076538086 }, { "epoch": 2.825927734375e-05, "grad_norm": 0.24763013422489166, "learning_rate": 3.496909048319489e-05, "loss": 0.0086, "step": 18520 }, { "epoch": 2.825927734375e-05, "model_forward_time": 0.025498151779174805, "step": 18520 }, { "epoch": 2.825927734375e-05, "step": 18520, "training_step_time": 0.2077922821044922 }, { "epoch": 2.826080322265625e-05, "model_forward_time": 0.024425983428955078, "step": 18521 }, { "epoch": 2.826080322265625e-05, "step": 18521, "training_step_time": 0.11708188056945801 }, { "epoch": 2.82623291015625e-05, "model_forward_time": 0.02465057373046875, "step": 18522 }, { "epoch": 2.82623291015625e-05, "step": 18522, "training_step_time": 0.10424351692199707 }, { "epoch": 2.826385498046875e-05, "model_forward_time": 0.025422334671020508, "step": 18523 }, { "epoch": 2.826385498046875e-05, "step": 18523, "training_step_time": 0.1126410961151123 }, { "epoch": 2.8265380859375e-05, "model_forward_time": 0.02601146697998047, "step": 18524 }, { "epoch": 2.8265380859375e-05, "step": 18524, "training_step_time": 0.11643123626708984 }, { "epoch": 2.826690673828125e-05, "model_forward_time": 0.025059223175048828, "step": 18525 }, { "epoch": 2.826690673828125e-05, "step": 18525, "training_step_time": 0.10929226875305176 }, { "epoch": 2.82684326171875e-05, "model_forward_time": 0.025072336196899414, "step": 18526 }, { "epoch": 2.82684326171875e-05, "step": 18526, "training_step_time": 0.12286496162414551 }, { "epoch": 2.826995849609375e-05, "model_forward_time": 0.025107145309448242, "step": 18527 }, { "epoch": 2.826995849609375e-05, "step": 18527, "training_step_time": 0.12392258644104004 }, { "epoch": 2.8271484375e-05, "model_forward_time": 0.02485823631286621, "step": 18528 }, { "epoch": 2.8271484375e-05, "step": 18528, "training_step_time": 0.11281752586364746 }, { "epoch": 2.827301025390625e-05, "model_forward_time": 0.02467823028564453, "step": 18529 }, { "epoch": 2.827301025390625e-05, "step": 18529, "training_step_time": 0.1154017448425293 }, { "epoch": 2.82745361328125e-05, "grad_norm": 0.2802363336086273, "learning_rate": 3.491653336385124e-05, "loss": 0.0134, "step": 18530 }, { "epoch": 2.82745361328125e-05, "model_forward_time": 0.02510380744934082, "step": 18530 }, { "epoch": 2.82745361328125e-05, "step": 18530, "training_step_time": 0.11180448532104492 }, { "epoch": 2.827606201171875e-05, "model_forward_time": 0.02531266212463379, "step": 18531 }, { "epoch": 2.827606201171875e-05, "step": 18531, "training_step_time": 0.11049199104309082 }, { "epoch": 2.8277587890625e-05, "model_forward_time": 0.025597810745239258, "step": 18532 }, { "epoch": 2.8277587890625e-05, "step": 18532, "training_step_time": 0.10997414588928223 }, { "epoch": 2.827911376953125e-05, "model_forward_time": 0.025289058685302734, "step": 18533 }, { "epoch": 2.827911376953125e-05, "step": 18533, "training_step_time": 0.11003828048706055 }, { "epoch": 2.82806396484375e-05, "model_forward_time": 0.025589704513549805, "step": 18534 }, { "epoch": 2.82806396484375e-05, "step": 18534, "training_step_time": 0.10817790031433105 }, { "epoch": 2.828216552734375e-05, "model_forward_time": 0.025180339813232422, "step": 18535 }, { "epoch": 2.828216552734375e-05, "step": 18535, "training_step_time": 0.17873191833496094 }, { "epoch": 2.828369140625e-05, "model_forward_time": 0.024404525756835938, "step": 18536 }, { "epoch": 2.828369140625e-05, "step": 18536, "training_step_time": 0.14315271377563477 }, { "epoch": 2.828521728515625e-05, "model_forward_time": 0.02426004409790039, "step": 18537 }, { "epoch": 2.828521728515625e-05, "step": 18537, "training_step_time": 0.11201834678649902 }, { "epoch": 2.82867431640625e-05, "model_forward_time": 0.024942636489868164, "step": 18538 }, { "epoch": 2.82867431640625e-05, "step": 18538, "training_step_time": 0.10585713386535645 }, { "epoch": 2.828826904296875e-05, "model_forward_time": 0.025745630264282227, "step": 18539 }, { "epoch": 2.828826904296875e-05, "step": 18539, "training_step_time": 0.11813926696777344 }, { "epoch": 2.8289794921875e-05, "grad_norm": 0.1632130742073059, "learning_rate": 3.4863994572341843e-05, "loss": 0.0071, "step": 18540 }, { "epoch": 2.8289794921875e-05, "model_forward_time": 0.025328874588012695, "step": 18540 }, { "epoch": 2.8289794921875e-05, "step": 18540, "training_step_time": 0.18405508995056152 }, { "epoch": 2.829132080078125e-05, "model_forward_time": 0.024628162384033203, "step": 18541 }, { "epoch": 2.829132080078125e-05, "step": 18541, "training_step_time": 0.10892105102539062 }, { "epoch": 2.82928466796875e-05, "model_forward_time": 0.024564504623413086, "step": 18542 }, { "epoch": 2.82928466796875e-05, "step": 18542, "training_step_time": 0.10254669189453125 }, { "epoch": 2.829437255859375e-05, "model_forward_time": 0.025267839431762695, "step": 18543 }, { "epoch": 2.829437255859375e-05, "step": 18543, "training_step_time": 0.10536646842956543 }, { "epoch": 2.82958984375e-05, "model_forward_time": 0.025273799896240234, "step": 18544 }, { "epoch": 2.82958984375e-05, "step": 18544, "training_step_time": 0.10665297508239746 }, { "epoch": 2.829742431640625e-05, "model_forward_time": 0.0256502628326416, "step": 18545 }, { "epoch": 2.829742431640625e-05, "step": 18545, "training_step_time": 0.10631251335144043 }, { "epoch": 2.82989501953125e-05, "model_forward_time": 0.024371862411499023, "step": 18546 }, { "epoch": 2.82989501953125e-05, "step": 18546, "training_step_time": 0.10538434982299805 }, { "epoch": 2.830047607421875e-05, "model_forward_time": 0.025464773178100586, "step": 18547 }, { "epoch": 2.830047607421875e-05, "step": 18547, "training_step_time": 0.10851716995239258 }, { "epoch": 2.8302001953125e-05, "model_forward_time": 0.026028871536254883, "step": 18548 }, { "epoch": 2.8302001953125e-05, "step": 18548, "training_step_time": 0.10712552070617676 }, { "epoch": 2.830352783203125e-05, "model_forward_time": 0.025630712509155273, "step": 18549 }, { "epoch": 2.830352783203125e-05, "step": 18549, "training_step_time": 0.10640525817871094 }, { "epoch": 2.83050537109375e-05, "grad_norm": 0.3838638961315155, "learning_rate": 3.4811474172506275e-05, "loss": 0.0095, "step": 18550 }, { "epoch": 2.83050537109375e-05, "model_forward_time": 0.02506422996520996, "step": 18550 }, { "epoch": 2.83050537109375e-05, "step": 18550, "training_step_time": 0.10631585121154785 }, { "epoch": 2.830657958984375e-05, "model_forward_time": 0.02508687973022461, "step": 18551 }, { "epoch": 2.830657958984375e-05, "step": 18551, "training_step_time": 0.10429120063781738 }, { "epoch": 2.830810546875e-05, "model_forward_time": 0.02454686164855957, "step": 18552 }, { "epoch": 2.830810546875e-05, "step": 18552, "training_step_time": 0.1060032844543457 }, { "epoch": 2.830963134765625e-05, "model_forward_time": 0.024301528930664062, "step": 18553 }, { "epoch": 2.830963134765625e-05, "step": 18553, "training_step_time": 0.10813164710998535 }, { "epoch": 2.83111572265625e-05, "model_forward_time": 0.025612831115722656, "step": 18554 }, { "epoch": 2.83111572265625e-05, "step": 18554, "training_step_time": 0.10598039627075195 }, { "epoch": 2.831268310546875e-05, "model_forward_time": 0.025598764419555664, "step": 18555 }, { "epoch": 2.831268310546875e-05, "step": 18555, "training_step_time": 0.15174555778503418 }, { "epoch": 2.8314208984375e-05, "model_forward_time": 0.025571823120117188, "step": 18556 }, { "epoch": 2.8314208984375e-05, "step": 18556, "training_step_time": 0.11921095848083496 }, { "epoch": 2.831573486328125e-05, "model_forward_time": 0.025020122528076172, "step": 18557 }, { "epoch": 2.831573486328125e-05, "step": 18557, "training_step_time": 0.10972023010253906 }, { "epoch": 2.83172607421875e-05, "model_forward_time": 0.025304317474365234, "step": 18558 }, { "epoch": 2.83172607421875e-05, "step": 18558, "training_step_time": 0.12078213691711426 }, { "epoch": 2.831878662109375e-05, "model_forward_time": 0.02517104148864746, "step": 18559 }, { "epoch": 2.831878662109375e-05, "step": 18559, "training_step_time": 0.10640954971313477 }, { "epoch": 2.83203125e-05, "grad_norm": 0.13953132927417755, "learning_rate": 3.475897222816178e-05, "loss": 0.0197, "step": 18560 }, { "epoch": 2.83203125e-05, "model_forward_time": 0.025437593460083008, "step": 18560 }, { "epoch": 2.83203125e-05, "step": 18560, "training_step_time": 0.11421418190002441 }, { "epoch": 2.832183837890625e-05, "model_forward_time": 0.025550127029418945, "step": 18561 }, { "epoch": 2.832183837890625e-05, "step": 18561, "training_step_time": 0.1122283935546875 }, { "epoch": 2.83233642578125e-05, "model_forward_time": 0.025095701217651367, "step": 18562 }, { "epoch": 2.83233642578125e-05, "step": 18562, "training_step_time": 0.1156926155090332 }, { "epoch": 2.832489013671875e-05, "model_forward_time": 0.025767803192138672, "step": 18563 }, { "epoch": 2.832489013671875e-05, "step": 18563, "training_step_time": 0.10941243171691895 }, { "epoch": 2.8326416015625e-05, "model_forward_time": 0.0251617431640625, "step": 18564 }, { "epoch": 2.8326416015625e-05, "step": 18564, "training_step_time": 0.20952510833740234 }, { "epoch": 2.832794189453125e-05, "model_forward_time": 0.0247495174407959, "step": 18565 }, { "epoch": 2.832794189453125e-05, "step": 18565, "training_step_time": 0.1151437759399414 }, { "epoch": 2.83294677734375e-05, "model_forward_time": 0.024554014205932617, "step": 18566 }, { "epoch": 2.83294677734375e-05, "step": 18566, "training_step_time": 0.11259174346923828 }, { "epoch": 2.833099365234375e-05, "model_forward_time": 0.02532482147216797, "step": 18567 }, { "epoch": 2.833099365234375e-05, "step": 18567, "training_step_time": 0.11679863929748535 }, { "epoch": 2.833251953125e-05, "model_forward_time": 0.025187969207763672, "step": 18568 }, { "epoch": 2.833251953125e-05, "step": 18568, "training_step_time": 0.12688899040222168 }, { "epoch": 2.833404541015625e-05, "model_forward_time": 0.025196075439453125, "step": 18569 }, { "epoch": 2.833404541015625e-05, "step": 18569, "training_step_time": 0.10839724540710449 }, { "epoch": 2.83355712890625e-05, "grad_norm": 0.23392415046691895, "learning_rate": 3.470648880310313e-05, "loss": 0.0083, "step": 18570 }, { "epoch": 2.83355712890625e-05, "model_forward_time": 0.025681018829345703, "step": 18570 }, { "epoch": 2.83355712890625e-05, "step": 18570, "training_step_time": 0.10769271850585938 }, { "epoch": 2.833709716796875e-05, "model_forward_time": 0.025830984115600586, "step": 18571 }, { "epoch": 2.833709716796875e-05, "step": 18571, "training_step_time": 0.12471246719360352 }, { "epoch": 2.8338623046875e-05, "model_forward_time": 0.02533745765686035, "step": 18572 }, { "epoch": 2.8338623046875e-05, "step": 18572, "training_step_time": 0.10793638229370117 }, { "epoch": 2.834014892578125e-05, "model_forward_time": 0.025129079818725586, "step": 18573 }, { "epoch": 2.834014892578125e-05, "step": 18573, "training_step_time": 0.11579632759094238 }, { "epoch": 2.83416748046875e-05, "model_forward_time": 0.025225400924682617, "step": 18574 }, { "epoch": 2.83416748046875e-05, "step": 18574, "training_step_time": 0.13214659690856934 }, { "epoch": 2.834320068359375e-05, "model_forward_time": 0.02486109733581543, "step": 18575 }, { "epoch": 2.834320068359375e-05, "step": 18575, "training_step_time": 0.11676359176635742 }, { "epoch": 2.83447265625e-05, "model_forward_time": 0.025183439254760742, "step": 18576 }, { "epoch": 2.83447265625e-05, "step": 18576, "training_step_time": 0.12166857719421387 }, { "epoch": 2.834625244140625e-05, "model_forward_time": 0.024954795837402344, "step": 18577 }, { "epoch": 2.834625244140625e-05, "step": 18577, "training_step_time": 0.10669779777526855 }, { "epoch": 2.83477783203125e-05, "model_forward_time": 0.025155067443847656, "step": 18578 }, { "epoch": 2.83477783203125e-05, "step": 18578, "training_step_time": 0.11272668838500977 }, { "epoch": 2.834930419921875e-05, "model_forward_time": 0.025492429733276367, "step": 18579 }, { "epoch": 2.834930419921875e-05, "step": 18579, "training_step_time": 0.10658526420593262 }, { "epoch": 2.8350830078125e-05, "grad_norm": 0.3779175579547882, "learning_rate": 3.465402396110269e-05, "loss": 0.0074, "step": 18580 }, { "epoch": 2.8350830078125e-05, "model_forward_time": 0.025673866271972656, "step": 18580 }, { "epoch": 2.8350830078125e-05, "step": 18580, "training_step_time": 0.10643339157104492 }, { "epoch": 2.835235595703125e-05, "model_forward_time": 0.025476455688476562, "step": 18581 }, { "epoch": 2.835235595703125e-05, "step": 18581, "training_step_time": 0.10725212097167969 }, { "epoch": 2.83538818359375e-05, "model_forward_time": 0.026587486267089844, "step": 18582 }, { "epoch": 2.83538818359375e-05, "step": 18582, "training_step_time": 0.10766339302062988 }, { "epoch": 2.835540771484375e-05, "model_forward_time": 0.024753570556640625, "step": 18583 }, { "epoch": 2.835540771484375e-05, "step": 18583, "training_step_time": 0.13511013984680176 }, { "epoch": 2.835693359375e-05, "model_forward_time": 0.024773836135864258, "step": 18584 }, { "epoch": 2.835693359375e-05, "step": 18584, "training_step_time": 0.13988494873046875 }, { "epoch": 2.835845947265625e-05, "model_forward_time": 0.024242877960205078, "step": 18585 }, { "epoch": 2.835845947265625e-05, "step": 18585, "training_step_time": 0.10854220390319824 }, { "epoch": 2.83599853515625e-05, "model_forward_time": 0.024918079376220703, "step": 18586 }, { "epoch": 2.83599853515625e-05, "step": 18586, "training_step_time": 0.1144874095916748 }, { "epoch": 2.836151123046875e-05, "model_forward_time": 0.025079727172851562, "step": 18587 }, { "epoch": 2.836151123046875e-05, "step": 18587, "training_step_time": 0.11253857612609863 }, { "epoch": 2.8363037109375e-05, "model_forward_time": 0.024905681610107422, "step": 18588 }, { "epoch": 2.8363037109375e-05, "step": 18588, "training_step_time": 0.10449695587158203 }, { "epoch": 2.836456298828125e-05, "model_forward_time": 0.024859189987182617, "step": 18589 }, { "epoch": 2.836456298828125e-05, "step": 18589, "training_step_time": 0.1983489990234375 }, { "epoch": 2.83660888671875e-05, "grad_norm": 0.27629610896110535, "learning_rate": 3.460157776591018e-05, "loss": 0.0089, "step": 18590 }, { "epoch": 2.83660888671875e-05, "model_forward_time": 0.023938655853271484, "step": 18590 }, { "epoch": 2.83660888671875e-05, "step": 18590, "training_step_time": 0.10195159912109375 }, { "epoch": 2.836761474609375e-05, "model_forward_time": 0.024169445037841797, "step": 18591 }, { "epoch": 2.836761474609375e-05, "step": 18591, "training_step_time": 0.10474252700805664 }, { "epoch": 2.8369140625e-05, "model_forward_time": 0.024951696395874023, "step": 18592 }, { "epoch": 2.8369140625e-05, "step": 18592, "training_step_time": 0.10837626457214355 }, { "epoch": 2.837066650390625e-05, "model_forward_time": 0.024923086166381836, "step": 18593 }, { "epoch": 2.837066650390625e-05, "step": 18593, "training_step_time": 0.1066136360168457 }, { "epoch": 2.83721923828125e-05, "model_forward_time": 0.02521538734436035, "step": 18594 }, { "epoch": 2.83721923828125e-05, "step": 18594, "training_step_time": 0.1096501350402832 }, { "epoch": 2.837371826171875e-05, "model_forward_time": 0.02505970001220703, "step": 18595 }, { "epoch": 2.837371826171875e-05, "step": 18595, "training_step_time": 0.1084134578704834 }, { "epoch": 2.8375244140625e-05, "model_forward_time": 0.02508401870727539, "step": 18596 }, { "epoch": 2.8375244140625e-05, "step": 18596, "training_step_time": 0.10593843460083008 }, { "epoch": 2.837677001953125e-05, "model_forward_time": 0.024842023849487305, "step": 18597 }, { "epoch": 2.837677001953125e-05, "step": 18597, "training_step_time": 0.10787582397460938 }, { "epoch": 2.83782958984375e-05, "model_forward_time": 0.02503204345703125, "step": 18598 }, { "epoch": 2.83782958984375e-05, "step": 18598, "training_step_time": 0.10834431648254395 }, { "epoch": 2.837982177734375e-05, "model_forward_time": 0.02506256103515625, "step": 18599 }, { "epoch": 2.837982177734375e-05, "step": 18599, "training_step_time": 0.10694622993469238 }, { "epoch": 2.838134765625e-05, "grad_norm": 0.2892727255821228, "learning_rate": 3.4549150281252636e-05, "loss": 0.0184, "step": 18600 }, { "epoch": 2.838134765625e-05, "model_forward_time": 0.02483367919921875, "step": 18600 }, { "epoch": 2.838134765625e-05, "step": 18600, "training_step_time": 0.10897397994995117 }, { "epoch": 2.838287353515625e-05, "model_forward_time": 0.024903535842895508, "step": 18601 }, { "epoch": 2.838287353515625e-05, "step": 18601, "training_step_time": 0.10528945922851562 }, { "epoch": 2.83843994140625e-05, "model_forward_time": 0.02633047103881836, "step": 18602 }, { "epoch": 2.83843994140625e-05, "step": 18602, "training_step_time": 0.10734415054321289 }, { "epoch": 2.838592529296875e-05, "model_forward_time": 0.029154539108276367, "step": 18603 }, { "epoch": 2.838592529296875e-05, "step": 18603, "training_step_time": 0.13930416107177734 }, { "epoch": 2.8387451171875e-05, "model_forward_time": 0.025093555450439453, "step": 18604 }, { "epoch": 2.8387451171875e-05, "step": 18604, "training_step_time": 0.19516634941101074 }, { "epoch": 2.838897705078125e-05, "model_forward_time": 0.02456045150756836, "step": 18605 }, { "epoch": 2.838897705078125e-05, "step": 18605, "training_step_time": 0.16394400596618652 }, { "epoch": 2.83905029296875e-05, "model_forward_time": 0.024827003479003906, "step": 18606 }, { "epoch": 2.83905029296875e-05, "step": 18606, "training_step_time": 0.1476726531982422 }, { "epoch": 2.839202880859375e-05, "model_forward_time": 0.024030208587646484, "step": 18607 }, { "epoch": 2.839202880859375e-05, "step": 18607, "training_step_time": 0.11099028587341309 }, { "epoch": 2.83935546875e-05, "model_forward_time": 0.02470231056213379, "step": 18608 }, { "epoch": 2.83935546875e-05, "step": 18608, "training_step_time": 0.13009953498840332 }, { "epoch": 2.839508056640625e-05, "model_forward_time": 0.025391340255737305, "step": 18609 }, { "epoch": 2.839508056640625e-05, "step": 18609, "training_step_time": 0.12215685844421387 }, { "epoch": 2.83966064453125e-05, "grad_norm": 0.1822260022163391, "learning_rate": 3.449674157083443e-05, "loss": 0.0066, "step": 18610 }, { "epoch": 2.83966064453125e-05, "model_forward_time": 0.025275230407714844, "step": 18610 }, { "epoch": 2.83966064453125e-05, "step": 18610, "training_step_time": 0.10413718223571777 }, { "epoch": 2.839813232421875e-05, "model_forward_time": 0.02517557144165039, "step": 18611 }, { "epoch": 2.839813232421875e-05, "step": 18611, "training_step_time": 0.1081998348236084 }, { "epoch": 2.8399658203125e-05, "model_forward_time": 0.02533578872680664, "step": 18612 }, { "epoch": 2.8399658203125e-05, "step": 18612, "training_step_time": 0.11701369285583496 }, { "epoch": 2.840118408203125e-05, "model_forward_time": 0.02523946762084961, "step": 18613 }, { "epoch": 2.840118408203125e-05, "step": 18613, "training_step_time": 0.10722160339355469 }, { "epoch": 2.84027099609375e-05, "model_forward_time": 0.02516007423400879, "step": 18614 }, { "epoch": 2.84027099609375e-05, "step": 18614, "training_step_time": 0.10469698905944824 }, { "epoch": 2.840423583984375e-05, "model_forward_time": 0.02542257308959961, "step": 18615 }, { "epoch": 2.840423583984375e-05, "step": 18615, "training_step_time": 0.16954612731933594 }, { "epoch": 2.840576171875e-05, "model_forward_time": 0.02453470230102539, "step": 18616 }, { "epoch": 2.840576171875e-05, "step": 18616, "training_step_time": 0.16470098495483398 }, { "epoch": 2.840728759765625e-05, "model_forward_time": 0.025079011917114258, "step": 18617 }, { "epoch": 2.840728759765625e-05, "step": 18617, "training_step_time": 0.10504436492919922 }, { "epoch": 2.84088134765625e-05, "model_forward_time": 0.024733781814575195, "step": 18618 }, { "epoch": 2.84088134765625e-05, "step": 18618, "training_step_time": 0.1741955280303955 }, { "epoch": 2.841033935546875e-05, "model_forward_time": 0.024254560470581055, "step": 18619 }, { "epoch": 2.841033935546875e-05, "step": 18619, "training_step_time": 0.1267390251159668 }, { "epoch": 2.8411865234375e-05, "grad_norm": 0.1415482759475708, "learning_rate": 3.444435169833706e-05, "loss": 0.0178, "step": 18620 }, { "epoch": 2.8411865234375e-05, "model_forward_time": 0.024969100952148438, "step": 18620 }, { "epoch": 2.8411865234375e-05, "step": 18620, "training_step_time": 0.2275407314300537 }, { "epoch": 2.841339111328125e-05, "model_forward_time": 0.024435997009277344, "step": 18621 }, { "epoch": 2.841339111328125e-05, "step": 18621, "training_step_time": 0.14788198471069336 }, { "epoch": 2.84149169921875e-05, "model_forward_time": 0.024081945419311523, "step": 18622 }, { "epoch": 2.84149169921875e-05, "step": 18622, "training_step_time": 0.19982457160949707 }, { "epoch": 2.841644287109375e-05, "model_forward_time": 0.02442622184753418, "step": 18623 }, { "epoch": 2.841644287109375e-05, "step": 18623, "training_step_time": 0.1292562484741211 }, { "epoch": 2.841796875e-05, "model_forward_time": 0.024149179458618164, "step": 18624 }, { "epoch": 2.841796875e-05, "step": 18624, "training_step_time": 0.12444806098937988 }, { "epoch": 2.841949462890625e-05, "model_forward_time": 0.02448892593383789, "step": 18625 }, { "epoch": 2.841949462890625e-05, "step": 18625, "training_step_time": 0.11645960807800293 }, { "epoch": 2.84210205078125e-05, "model_forward_time": 0.025170326232910156, "step": 18626 }, { "epoch": 2.84210205078125e-05, "step": 18626, "training_step_time": 0.11583590507507324 }, { "epoch": 2.842254638671875e-05, "model_forward_time": 0.025203704833984375, "step": 18627 }, { "epoch": 2.842254638671875e-05, "step": 18627, "training_step_time": 0.16457271575927734 }, { "epoch": 2.8424072265625e-05, "model_forward_time": 0.02428889274597168, "step": 18628 }, { "epoch": 2.8424072265625e-05, "step": 18628, "training_step_time": 0.13883423805236816 }, { "epoch": 2.842559814453125e-05, "model_forward_time": 0.024995803833007812, "step": 18629 }, { "epoch": 2.842559814453125e-05, "step": 18629, "training_step_time": 0.10922646522521973 }, { "epoch": 2.84271240234375e-05, "grad_norm": 0.6063808798789978, "learning_rate": 3.439198072741921e-05, "loss": 0.0328, "step": 18630 }, { "epoch": 2.84271240234375e-05, "model_forward_time": 0.025669336318969727, "step": 18630 }, { "epoch": 2.84271240234375e-05, "step": 18630, "training_step_time": 0.10818219184875488 }, { "epoch": 2.842864990234375e-05, "model_forward_time": 0.025558948516845703, "step": 18631 }, { "epoch": 2.842864990234375e-05, "step": 18631, "training_step_time": 0.11385750770568848 }, { "epoch": 2.843017578125e-05, "model_forward_time": 0.02555370330810547, "step": 18632 }, { "epoch": 2.843017578125e-05, "step": 18632, "training_step_time": 0.11066532135009766 }, { "epoch": 2.843170166015625e-05, "model_forward_time": 0.025353193283081055, "step": 18633 }, { "epoch": 2.843170166015625e-05, "step": 18633, "training_step_time": 0.18738579750061035 }, { "epoch": 2.84332275390625e-05, "model_forward_time": 0.02469611167907715, "step": 18634 }, { "epoch": 2.84332275390625e-05, "step": 18634, "training_step_time": 0.10416841506958008 }, { "epoch": 2.843475341796875e-05, "model_forward_time": 0.024324417114257812, "step": 18635 }, { "epoch": 2.843475341796875e-05, "step": 18635, "training_step_time": 0.10544133186340332 }, { "epoch": 2.8436279296875e-05, "model_forward_time": 0.0242311954498291, "step": 18636 }, { "epoch": 2.8436279296875e-05, "step": 18636, "training_step_time": 0.10328340530395508 }, { "epoch": 2.843780517578125e-05, "model_forward_time": 0.02495861053466797, "step": 18637 }, { "epoch": 2.843780517578125e-05, "step": 18637, "training_step_time": 0.10842561721801758 }, { "epoch": 2.84393310546875e-05, "model_forward_time": 0.025214433670043945, "step": 18638 }, { "epoch": 2.84393310546875e-05, "step": 18638, "training_step_time": 0.1093759536743164 }, { "epoch": 2.844085693359375e-05, "model_forward_time": 0.024940013885498047, "step": 18639 }, { "epoch": 2.844085693359375e-05, "step": 18639, "training_step_time": 0.10430526733398438 }, { "epoch": 2.84423828125e-05, "grad_norm": 0.240308478474617, "learning_rate": 3.4339628721716505e-05, "loss": 0.0121, "step": 18640 }, { "epoch": 2.84423828125e-05, "model_forward_time": 0.024692296981811523, "step": 18640 }, { "epoch": 2.84423828125e-05, "step": 18640, "training_step_time": 0.10426831245422363 }, { "epoch": 2.844390869140625e-05, "model_forward_time": 0.024988174438476562, "step": 18641 }, { "epoch": 2.844390869140625e-05, "step": 18641, "training_step_time": 0.10498380661010742 }, { "epoch": 2.84454345703125e-05, "model_forward_time": 0.025749683380126953, "step": 18642 }, { "epoch": 2.84454345703125e-05, "step": 18642, "training_step_time": 0.10556173324584961 }, { "epoch": 2.844696044921875e-05, "model_forward_time": 0.027891159057617188, "step": 18643 }, { "epoch": 2.844696044921875e-05, "step": 18643, "training_step_time": 0.10684370994567871 }, { "epoch": 2.8448486328125e-05, "model_forward_time": 0.025338172912597656, "step": 18644 }, { "epoch": 2.8448486328125e-05, "step": 18644, "training_step_time": 0.10548973083496094 }, { "epoch": 2.845001220703125e-05, "model_forward_time": 0.02480316162109375, "step": 18645 }, { "epoch": 2.845001220703125e-05, "step": 18645, "training_step_time": 0.10641169548034668 }, { "epoch": 2.84515380859375e-05, "model_forward_time": 0.025333166122436523, "step": 18646 }, { "epoch": 2.84515380859375e-05, "step": 18646, "training_step_time": 0.10857963562011719 }, { "epoch": 2.845306396484375e-05, "model_forward_time": 0.02534008026123047, "step": 18647 }, { "epoch": 2.845306396484375e-05, "step": 18647, "training_step_time": 0.10523009300231934 }, { "epoch": 2.845458984375e-05, "model_forward_time": 0.025205373764038086, "step": 18648 }, { "epoch": 2.845458984375e-05, "step": 18648, "training_step_time": 0.15774059295654297 }, { "epoch": 2.845611572265625e-05, "model_forward_time": 0.024193763732910156, "step": 18649 }, { "epoch": 2.845611572265625e-05, "step": 18649, "training_step_time": 0.13544654846191406 }, { "epoch": 2.84576416015625e-05, "grad_norm": 0.28116506338119507, "learning_rate": 3.4287295744841586e-05, "loss": 0.0095, "step": 18650 }, { "epoch": 2.84576416015625e-05, "model_forward_time": 0.0254056453704834, "step": 18650 }, { "epoch": 2.84576416015625e-05, "step": 18650, "training_step_time": 0.14436936378479004 }, { "epoch": 2.845916748046875e-05, "model_forward_time": 0.024399757385253906, "step": 18651 }, { "epoch": 2.845916748046875e-05, "step": 18651, "training_step_time": 0.16254973411560059 }, { "epoch": 2.8460693359375e-05, "model_forward_time": 0.024152755737304688, "step": 18652 }, { "epoch": 2.8460693359375e-05, "step": 18652, "training_step_time": 0.18091177940368652 }, { "epoch": 2.846221923828125e-05, "model_forward_time": 0.02463817596435547, "step": 18653 }, { "epoch": 2.846221923828125e-05, "step": 18653, "training_step_time": 0.15601706504821777 }, { "epoch": 2.84637451171875e-05, "model_forward_time": 0.024683475494384766, "step": 18654 }, { "epoch": 2.84637451171875e-05, "step": 18654, "training_step_time": 0.11061573028564453 }, { "epoch": 2.846527099609375e-05, "model_forward_time": 0.02415919303894043, "step": 18655 }, { "epoch": 2.846527099609375e-05, "step": 18655, "training_step_time": 0.20746517181396484 }, { "epoch": 2.8466796875e-05, "model_forward_time": 0.024567842483520508, "step": 18656 }, { "epoch": 2.8466796875e-05, "step": 18656, "training_step_time": 0.11252570152282715 }, { "epoch": 2.846832275390625e-05, "model_forward_time": 0.02443671226501465, "step": 18657 }, { "epoch": 2.846832275390625e-05, "step": 18657, "training_step_time": 0.11144828796386719 }, { "epoch": 2.84698486328125e-05, "model_forward_time": 0.025071382522583008, "step": 18658 }, { "epoch": 2.84698486328125e-05, "step": 18658, "training_step_time": 0.2093505859375 }, { "epoch": 2.847137451171875e-05, "model_forward_time": 0.02839946746826172, "step": 18659 }, { "epoch": 2.847137451171875e-05, "step": 18659, "training_step_time": 0.12821197509765625 }, { "epoch": 2.8472900390625e-05, "grad_norm": 0.22209402918815613, "learning_rate": 3.423498186038393e-05, "loss": 0.0105, "step": 18660 }, { "epoch": 2.8472900390625e-05, "model_forward_time": 0.024376392364501953, "step": 18660 }, { "epoch": 2.8472900390625e-05, "step": 18660, "training_step_time": 0.10211396217346191 }, { "epoch": 2.847442626953125e-05, "model_forward_time": 0.025356054306030273, "step": 18661 }, { "epoch": 2.847442626953125e-05, "step": 18661, "training_step_time": 0.10447525978088379 }, { "epoch": 2.84759521484375e-05, "model_forward_time": 0.025217533111572266, "step": 18662 }, { "epoch": 2.84759521484375e-05, "step": 18662, "training_step_time": 0.10500764846801758 }, { "epoch": 2.847747802734375e-05, "model_forward_time": 0.025259971618652344, "step": 18663 }, { "epoch": 2.847747802734375e-05, "step": 18663, "training_step_time": 0.11558294296264648 }, { "epoch": 2.847900390625e-05, "model_forward_time": 0.024763107299804688, "step": 18664 }, { "epoch": 2.847900390625e-05, "step": 18664, "training_step_time": 0.10416650772094727 }, { "epoch": 2.848052978515625e-05, "model_forward_time": 0.025094032287597656, "step": 18665 }, { "epoch": 2.848052978515625e-05, "step": 18665, "training_step_time": 0.11466670036315918 }, { "epoch": 2.84820556640625e-05, "model_forward_time": 0.025075674057006836, "step": 18666 }, { "epoch": 2.84820556640625e-05, "step": 18666, "training_step_time": 0.1301407814025879 }, { "epoch": 2.848358154296875e-05, "model_forward_time": 0.025715351104736328, "step": 18667 }, { "epoch": 2.848358154296875e-05, "step": 18667, "training_step_time": 0.11510562896728516 }, { "epoch": 2.8485107421875e-05, "model_forward_time": 0.024806976318359375, "step": 18668 }, { "epoch": 2.8485107421875e-05, "step": 18668, "training_step_time": 0.12157678604125977 }, { "epoch": 2.848663330078125e-05, "model_forward_time": 0.025059938430786133, "step": 18669 }, { "epoch": 2.848663330078125e-05, "step": 18669, "training_step_time": 0.11524724960327148 }, { "epoch": 2.84881591796875e-05, "grad_norm": 0.22405150532722473, "learning_rate": 3.418268713190986e-05, "loss": 0.0117, "step": 18670 }, { "epoch": 2.84881591796875e-05, "model_forward_time": 0.024973392486572266, "step": 18670 }, { "epoch": 2.84881591796875e-05, "step": 18670, "training_step_time": 0.1145637035369873 }, { "epoch": 2.848968505859375e-05, "model_forward_time": 0.02456974983215332, "step": 18671 }, { "epoch": 2.848968505859375e-05, "step": 18671, "training_step_time": 0.1123507022857666 }, { "epoch": 2.84912109375e-05, "model_forward_time": 0.02492213249206543, "step": 18672 }, { "epoch": 2.84912109375e-05, "step": 18672, "training_step_time": 0.11482691764831543 }, { "epoch": 2.849273681640625e-05, "model_forward_time": 0.02473759651184082, "step": 18673 }, { "epoch": 2.849273681640625e-05, "step": 18673, "training_step_time": 0.18982887268066406 }, { "epoch": 2.84942626953125e-05, "model_forward_time": 0.023865461349487305, "step": 18674 }, { "epoch": 2.84942626953125e-05, "step": 18674, "training_step_time": 0.12369060516357422 }, { "epoch": 2.849578857421875e-05, "model_forward_time": 0.023891925811767578, "step": 18675 }, { "epoch": 2.849578857421875e-05, "step": 18675, "training_step_time": 0.1091470718383789 }, { "epoch": 2.8497314453125e-05, "model_forward_time": 0.025587797164916992, "step": 18676 }, { "epoch": 2.8497314453125e-05, "step": 18676, "training_step_time": 0.10956287384033203 }, { "epoch": 2.849884033203125e-05, "model_forward_time": 0.02534174919128418, "step": 18677 }, { "epoch": 2.849884033203125e-05, "step": 18677, "training_step_time": 0.12403154373168945 }, { "epoch": 2.85003662109375e-05, "model_forward_time": 0.025243520736694336, "step": 18678 }, { "epoch": 2.85003662109375e-05, "step": 18678, "training_step_time": 0.10702896118164062 }, { "epoch": 2.850189208984375e-05, "model_forward_time": 0.024883270263671875, "step": 18679 }, { "epoch": 2.850189208984375e-05, "step": 18679, "training_step_time": 0.19613409042358398 }, { "epoch": 2.850341796875e-05, "grad_norm": 0.30229589343070984, "learning_rate": 3.413041162296241e-05, "loss": 0.0101, "step": 18680 }, { "epoch": 2.850341796875e-05, "model_forward_time": 0.0244295597076416, "step": 18680 }, { "epoch": 2.850341796875e-05, "step": 18680, "training_step_time": 0.10483694076538086 }, { "epoch": 2.850494384765625e-05, "model_forward_time": 0.024383068084716797, "step": 18681 }, { "epoch": 2.850494384765625e-05, "step": 18681, "training_step_time": 0.10775184631347656 }, { "epoch": 2.85064697265625e-05, "model_forward_time": 0.025890111923217773, "step": 18682 }, { "epoch": 2.85064697265625e-05, "step": 18682, "training_step_time": 0.10780072212219238 }, { "epoch": 2.850799560546875e-05, "model_forward_time": 0.02549433708190918, "step": 18683 }, { "epoch": 2.850799560546875e-05, "step": 18683, "training_step_time": 0.1060638427734375 }, { "epoch": 2.8509521484375e-05, "model_forward_time": 0.025543689727783203, "step": 18684 }, { "epoch": 2.8509521484375e-05, "step": 18684, "training_step_time": 0.10519814491271973 }, { "epoch": 2.851104736328125e-05, "model_forward_time": 0.024728059768676758, "step": 18685 }, { "epoch": 2.851104736328125e-05, "step": 18685, "training_step_time": 0.10474085807800293 }, { "epoch": 2.85125732421875e-05, "model_forward_time": 0.024762630462646484, "step": 18686 }, { "epoch": 2.85125732421875e-05, "step": 18686, "training_step_time": 0.10592412948608398 }, { "epoch": 2.851409912109375e-05, "model_forward_time": 0.025117158889770508, "step": 18687 }, { "epoch": 2.851409912109375e-05, "step": 18687, "training_step_time": 0.10524106025695801 }, { "epoch": 2.8515625e-05, "model_forward_time": 0.025257349014282227, "step": 18688 }, { "epoch": 2.8515625e-05, "step": 18688, "training_step_time": 0.10725855827331543 }, { "epoch": 2.851715087890625e-05, "model_forward_time": 0.02482295036315918, "step": 18689 }, { "epoch": 2.851715087890625e-05, "step": 18689, "training_step_time": 0.10512137413024902 }, { "epoch": 2.85186767578125e-05, "grad_norm": 0.148757666349411, "learning_rate": 3.407815539706124e-05, "loss": 0.0097, "step": 18690 }, { "epoch": 2.85186767578125e-05, "model_forward_time": 0.024880647659301758, "step": 18690 }, { "epoch": 2.85186767578125e-05, "step": 18690, "training_step_time": 0.10797357559204102 }, { "epoch": 2.852020263671875e-05, "model_forward_time": 0.025351285934448242, "step": 18691 }, { "epoch": 2.852020263671875e-05, "step": 18691, "training_step_time": 0.10604691505432129 }, { "epoch": 2.8521728515625e-05, "model_forward_time": 0.024843215942382812, "step": 18692 }, { "epoch": 2.8521728515625e-05, "step": 18692, "training_step_time": 0.10420584678649902 }, { "epoch": 2.852325439453125e-05, "model_forward_time": 0.025293588638305664, "step": 18693 }, { "epoch": 2.852325439453125e-05, "step": 18693, "training_step_time": 0.1042935848236084 }, { "epoch": 2.85247802734375e-05, "model_forward_time": 0.025359392166137695, "step": 18694 }, { "epoch": 2.85247802734375e-05, "step": 18694, "training_step_time": 0.19097590446472168 }, { "epoch": 2.852630615234375e-05, "model_forward_time": 0.0243072509765625, "step": 18695 }, { "epoch": 2.852630615234375e-05, "step": 18695, "training_step_time": 0.12299060821533203 }, { "epoch": 2.852783203125e-05, "model_forward_time": 0.024074554443359375, "step": 18696 }, { "epoch": 2.852783203125e-05, "step": 18696, "training_step_time": 0.131317138671875 }, { "epoch": 2.852935791015625e-05, "model_forward_time": 0.02502727508544922, "step": 18697 }, { "epoch": 2.852935791015625e-05, "step": 18697, "training_step_time": 0.16652536392211914 }, { "epoch": 2.85308837890625e-05, "model_forward_time": 0.024404525756835938, "step": 18698 }, { "epoch": 2.85308837890625e-05, "step": 18698, "training_step_time": 0.21195220947265625 }, { "epoch": 2.853240966796875e-05, "model_forward_time": 0.024669408798217773, "step": 18699 }, { "epoch": 2.853240966796875e-05, "step": 18699, "training_step_time": 0.10062789916992188 }, { "epoch": 2.8533935546875e-05, "grad_norm": 0.18390384316444397, "learning_rate": 3.40259185177026e-05, "loss": 0.0056, "step": 18700 }, { "epoch": 2.8533935546875e-05, "model_forward_time": 0.024452686309814453, "step": 18700 }, { "epoch": 2.8533935546875e-05, "step": 18700, "training_step_time": 0.10303497314453125 }, { "epoch": 2.853546142578125e-05, "model_forward_time": 0.02494215965270996, "step": 18701 }, { "epoch": 2.853546142578125e-05, "step": 18701, "training_step_time": 0.11635208129882812 }, { "epoch": 2.85369873046875e-05, "model_forward_time": 0.025289297103881836, "step": 18702 }, { "epoch": 2.85369873046875e-05, "step": 18702, "training_step_time": 0.10718560218811035 }, { "epoch": 2.853851318359375e-05, "model_forward_time": 0.025139808654785156, "step": 18703 }, { "epoch": 2.853851318359375e-05, "step": 18703, "training_step_time": 0.10726165771484375 }, { "epoch": 2.85400390625e-05, "model_forward_time": 0.02516341209411621, "step": 18704 }, { "epoch": 2.85400390625e-05, "step": 18704, "training_step_time": 0.21512174606323242 }, { "epoch": 2.854156494140625e-05, "model_forward_time": 0.024864673614501953, "step": 18705 }, { "epoch": 2.854156494140625e-05, "step": 18705, "training_step_time": 0.11003494262695312 }, { "epoch": 2.85430908203125e-05, "model_forward_time": 0.024806499481201172, "step": 18706 }, { "epoch": 2.85430908203125e-05, "step": 18706, "training_step_time": 0.10397219657897949 }, { "epoch": 2.854461669921875e-05, "model_forward_time": 0.02498030662536621, "step": 18707 }, { "epoch": 2.854461669921875e-05, "step": 18707, "training_step_time": 0.10543298721313477 }, { "epoch": 2.8546142578125e-05, "model_forward_time": 0.02501392364501953, "step": 18708 }, { "epoch": 2.8546142578125e-05, "step": 18708, "training_step_time": 0.1060023307800293 }, { "epoch": 2.854766845703125e-05, "model_forward_time": 0.02534174919128418, "step": 18709 }, { "epoch": 2.854766845703125e-05, "step": 18709, "training_step_time": 0.20882630348205566 }, { "epoch": 2.85491943359375e-05, "grad_norm": 0.152579203248024, "learning_rate": 3.397370104835922e-05, "loss": 0.0089, "step": 18710 }, { "epoch": 2.85491943359375e-05, "model_forward_time": 0.02409815788269043, "step": 18710 }, { "epoch": 2.85491943359375e-05, "step": 18710, "training_step_time": 0.10209155082702637 }, { "epoch": 2.855072021484375e-05, "model_forward_time": 0.024341583251953125, "step": 18711 }, { "epoch": 2.855072021484375e-05, "step": 18711, "training_step_time": 0.11507081985473633 }, { "epoch": 2.855224609375e-05, "model_forward_time": 0.02759838104248047, "step": 18712 }, { "epoch": 2.855224609375e-05, "step": 18712, "training_step_time": 0.1255021095275879 }, { "epoch": 2.855377197265625e-05, "model_forward_time": 0.025249004364013672, "step": 18713 }, { "epoch": 2.855377197265625e-05, "step": 18713, "training_step_time": 0.12906789779663086 }, { "epoch": 2.85552978515625e-05, "model_forward_time": 0.025418996810913086, "step": 18714 }, { "epoch": 2.85552978515625e-05, "step": 18714, "training_step_time": 0.11107993125915527 }, { "epoch": 2.855682373046875e-05, "model_forward_time": 0.025378942489624023, "step": 18715 }, { "epoch": 2.855682373046875e-05, "step": 18715, "training_step_time": 0.11041736602783203 }, { "epoch": 2.8558349609375e-05, "model_forward_time": 0.024824142456054688, "step": 18716 }, { "epoch": 2.8558349609375e-05, "step": 18716, "training_step_time": 0.10512495040893555 }, { "epoch": 2.855987548828125e-05, "model_forward_time": 0.025124073028564453, "step": 18717 }, { "epoch": 2.855987548828125e-05, "step": 18717, "training_step_time": 0.10397648811340332 }, { "epoch": 2.85614013671875e-05, "model_forward_time": 0.02553248405456543, "step": 18718 }, { "epoch": 2.85614013671875e-05, "step": 18718, "training_step_time": 0.10438919067382812 }, { "epoch": 2.856292724609375e-05, "model_forward_time": 0.02518320083618164, "step": 18719 }, { "epoch": 2.856292724609375e-05, "step": 18719, "training_step_time": 0.16293001174926758 }, { "epoch": 2.8564453125e-05, "grad_norm": 0.24660253524780273, "learning_rate": 3.392150305248024e-05, "loss": 0.0056, "step": 18720 }, { "epoch": 2.8564453125e-05, "model_forward_time": 0.024881601333618164, "step": 18720 }, { "epoch": 2.8564453125e-05, "step": 18720, "training_step_time": 0.13608694076538086 }, { "epoch": 2.856597900390625e-05, "model_forward_time": 0.024523496627807617, "step": 18721 }, { "epoch": 2.856597900390625e-05, "step": 18721, "training_step_time": 0.1143651008605957 }, { "epoch": 2.85675048828125e-05, "model_forward_time": 0.024621009826660156, "step": 18722 }, { "epoch": 2.85675048828125e-05, "step": 18722, "training_step_time": 0.10692644119262695 }, { "epoch": 2.856903076171875e-05, "model_forward_time": 0.025336265563964844, "step": 18723 }, { "epoch": 2.856903076171875e-05, "step": 18723, "training_step_time": 0.10908961296081543 }, { "epoch": 2.8570556640625e-05, "model_forward_time": 0.02513432502746582, "step": 18724 }, { "epoch": 2.8570556640625e-05, "step": 18724, "training_step_time": 0.1632089614868164 }, { "epoch": 2.857208251953125e-05, "model_forward_time": 0.02430558204650879, "step": 18725 }, { "epoch": 2.857208251953125e-05, "step": 18725, "training_step_time": 0.10711979866027832 }, { "epoch": 2.85736083984375e-05, "model_forward_time": 0.024551868438720703, "step": 18726 }, { "epoch": 2.85736083984375e-05, "step": 18726, "training_step_time": 0.10663676261901855 }, { "epoch": 2.857513427734375e-05, "model_forward_time": 0.026419401168823242, "step": 18727 }, { "epoch": 2.857513427734375e-05, "step": 18727, "training_step_time": 0.11676025390625 }, { "epoch": 2.857666015625e-05, "model_forward_time": 0.025155305862426758, "step": 18728 }, { "epoch": 2.857666015625e-05, "step": 18728, "training_step_time": 0.14812874794006348 }, { "epoch": 2.857818603515625e-05, "model_forward_time": 0.023887157440185547, "step": 18729 }, { "epoch": 2.857818603515625e-05, "step": 18729, "training_step_time": 0.12357473373413086 }, { "epoch": 2.85797119140625e-05, "grad_norm": 0.420911580324173, "learning_rate": 3.386932459349114e-05, "loss": 0.0301, "step": 18730 }, { "epoch": 2.85797119140625e-05, "model_forward_time": 0.023508548736572266, "step": 18730 }, { "epoch": 2.85797119140625e-05, "step": 18730, "training_step_time": 0.12253522872924805 }, { "epoch": 2.858123779296875e-05, "model_forward_time": 0.024149417877197266, "step": 18731 }, { "epoch": 2.858123779296875e-05, "step": 18731, "training_step_time": 0.12899017333984375 }, { "epoch": 2.8582763671875e-05, "model_forward_time": 0.024247407913208008, "step": 18732 }, { "epoch": 2.8582763671875e-05, "step": 18732, "training_step_time": 0.12786555290222168 }, { "epoch": 2.858428955078125e-05, "model_forward_time": 0.023737430572509766, "step": 18733 }, { "epoch": 2.858428955078125e-05, "step": 18733, "training_step_time": 0.1237335205078125 }, { "epoch": 2.85858154296875e-05, "model_forward_time": 0.024068832397460938, "step": 18734 }, { "epoch": 2.85858154296875e-05, "step": 18734, "training_step_time": 0.1225881576538086 }, { "epoch": 2.858734130859375e-05, "model_forward_time": 0.024452924728393555, "step": 18735 }, { "epoch": 2.858734130859375e-05, "step": 18735, "training_step_time": 0.12265539169311523 }, { "epoch": 2.85888671875e-05, "model_forward_time": 0.024499893188476562, "step": 18736 }, { "epoch": 2.85888671875e-05, "step": 18736, "training_step_time": 0.12138748168945312 }, { "epoch": 2.859039306640625e-05, "model_forward_time": 0.024144411087036133, "step": 18737 }, { "epoch": 2.859039306640625e-05, "step": 18737, "training_step_time": 0.11595535278320312 }, { "epoch": 2.85919189453125e-05, "model_forward_time": 0.024477243423461914, "step": 18738 }, { "epoch": 2.85919189453125e-05, "step": 18738, "training_step_time": 0.12108898162841797 }, { "epoch": 2.859344482421875e-05, "model_forward_time": 0.025574684143066406, "step": 18739 }, { "epoch": 2.859344482421875e-05, "step": 18739, "training_step_time": 0.17233800888061523 }, { "epoch": 2.8594970703125e-05, "grad_norm": 0.2880837619304657, "learning_rate": 3.3817165734793705e-05, "loss": 0.0067, "step": 18740 }, { "epoch": 2.8594970703125e-05, "model_forward_time": 0.026782512664794922, "step": 18740 }, { "epoch": 2.8594970703125e-05, "step": 18740, "training_step_time": 0.1655445098876953 }, { "epoch": 2.859649658203125e-05, "model_forward_time": 0.027322769165039062, "step": 18741 }, { "epoch": 2.859649658203125e-05, "step": 18741, "training_step_time": 0.2655484676361084 }, { "epoch": 2.85980224609375e-05, "model_forward_time": 0.029373884201049805, "step": 18742 }, { "epoch": 2.85980224609375e-05, "step": 18742, "training_step_time": 0.22951984405517578 }, { "epoch": 2.859954833984375e-05, "model_forward_time": 0.029935598373413086, "step": 18743 }, { "epoch": 2.859954833984375e-05, "step": 18743, "training_step_time": 0.3666236400604248 }, { "epoch": 2.860107421875e-05, "model_forward_time": 0.03392672538757324, "step": 18744 }, { "epoch": 2.860107421875e-05, "step": 18744, "training_step_time": 0.305267333984375 }, { "epoch": 2.860260009765625e-05, "model_forward_time": 0.030944347381591797, "step": 18745 }, { "epoch": 2.860260009765625e-05, "step": 18745, "training_step_time": 0.3896608352661133 }, { "epoch": 2.86041259765625e-05, "model_forward_time": 0.03164196014404297, "step": 18746 }, { "epoch": 2.86041259765625e-05, "step": 18746, "training_step_time": 0.27266907691955566 }, { "epoch": 2.860565185546875e-05, "model_forward_time": 0.030896902084350586, "step": 18747 }, { "epoch": 2.860565185546875e-05, "step": 18747, "training_step_time": 0.3975076675415039 }, { "epoch": 2.8607177734375e-05, "model_forward_time": 0.028563976287841797, "step": 18748 }, { "epoch": 2.8607177734375e-05, "step": 18748, "training_step_time": 0.31119751930236816 }, { "epoch": 2.860870361328125e-05, "model_forward_time": 0.03395700454711914, "step": 18749 }, { "epoch": 2.860870361328125e-05, "step": 18749, "training_step_time": 0.3223867416381836 }, { "epoch": 2.86102294921875e-05, "grad_norm": 0.2670065760612488, "learning_rate": 3.3765026539765834e-05, "loss": 0.0063, "step": 18750 }, { "epoch": 2.86102294921875e-05, "model_forward_time": 0.033078670501708984, "step": 18750 }, { "epoch": 2.86102294921875e-05, "step": 18750, "training_step_time": 0.29189157485961914 }, { "epoch": 2.861175537109375e-05, "model_forward_time": 0.02948737144470215, "step": 18751 }, { "epoch": 2.861175537109375e-05, "step": 18751, "training_step_time": 0.21408891677856445 }, { "epoch": 2.861328125e-05, "model_forward_time": 0.02936577796936035, "step": 18752 }, { "epoch": 2.861328125e-05, "step": 18752, "training_step_time": 0.2611715793609619 }, { "epoch": 2.861480712890625e-05, "model_forward_time": 0.029602766036987305, "step": 18753 }, { "epoch": 2.861480712890625e-05, "step": 18753, "training_step_time": 0.17420554161071777 }, { "epoch": 2.86163330078125e-05, "model_forward_time": 0.030362606048583984, "step": 18754 }, { "epoch": 2.86163330078125e-05, "step": 18754, "training_step_time": 0.24542832374572754 }, { "epoch": 2.861785888671875e-05, "model_forward_time": 0.03265523910522461, "step": 18755 }, { "epoch": 2.861785888671875e-05, "step": 18755, "training_step_time": 0.1388874053955078 }, { "epoch": 2.8619384765625e-05, "model_forward_time": 0.0278778076171875, "step": 18756 }, { "epoch": 2.8619384765625e-05, "step": 18756, "training_step_time": 0.12894439697265625 }, { "epoch": 2.862091064453125e-05, "model_forward_time": 0.027524948120117188, "step": 18757 }, { "epoch": 2.862091064453125e-05, "step": 18757, "training_step_time": 0.1291515827178955 }, { "epoch": 2.86224365234375e-05, "model_forward_time": 0.02694082260131836, "step": 18758 }, { "epoch": 2.86224365234375e-05, "step": 18758, "training_step_time": 0.12988972663879395 }, { "epoch": 2.862396240234375e-05, "model_forward_time": 0.026509761810302734, "step": 18759 }, { "epoch": 2.862396240234375e-05, "step": 18759, "training_step_time": 0.1183319091796875 }, { "epoch": 2.862548828125e-05, "grad_norm": 0.3997817933559418, "learning_rate": 3.371290707176158e-05, "loss": 0.0092, "step": 18760 }, { "epoch": 2.862548828125e-05, "model_forward_time": 0.026118040084838867, "step": 18760 }, { "epoch": 2.862548828125e-05, "step": 18760, "training_step_time": 0.11597084999084473 }, { "epoch": 2.862701416015625e-05, "model_forward_time": 0.025996923446655273, "step": 18761 }, { "epoch": 2.862701416015625e-05, "step": 18761, "training_step_time": 0.11576342582702637 }, { "epoch": 2.86285400390625e-05, "model_forward_time": 0.025664329528808594, "step": 18762 }, { "epoch": 2.86285400390625e-05, "step": 18762, "training_step_time": 0.1083831787109375 }, { "epoch": 2.863006591796875e-05, "model_forward_time": 0.02584528923034668, "step": 18763 }, { "epoch": 2.863006591796875e-05, "step": 18763, "training_step_time": 0.10778164863586426 }, { "epoch": 2.8631591796875e-05, "model_forward_time": 0.024608135223388672, "step": 18764 }, { "epoch": 2.8631591796875e-05, "step": 18764, "training_step_time": 0.1079263687133789 }, { "epoch": 2.863311767578125e-05, "model_forward_time": 0.024626970291137695, "step": 18765 }, { "epoch": 2.863311767578125e-05, "step": 18765, "training_step_time": 0.10841751098632812 }, { "epoch": 2.86346435546875e-05, "model_forward_time": 0.024639129638671875, "step": 18766 }, { "epoch": 2.86346435546875e-05, "step": 18766, "training_step_time": 0.11050081253051758 }, { "epoch": 2.863616943359375e-05, "model_forward_time": 0.024825334548950195, "step": 18767 }, { "epoch": 2.863616943359375e-05, "step": 18767, "training_step_time": 0.10764741897583008 }, { "epoch": 2.86376953125e-05, "model_forward_time": 0.02548837661743164, "step": 18768 }, { "epoch": 2.86376953125e-05, "step": 18768, "training_step_time": 0.1622178554534912 }, { "epoch": 2.863922119140625e-05, "model_forward_time": 0.02474236488342285, "step": 18769 }, { "epoch": 2.863922119140625e-05, "step": 18769, "training_step_time": 0.12578749656677246 }, { "epoch": 2.86407470703125e-05, "grad_norm": 0.10352246463298798, "learning_rate": 3.366080739411101e-05, "loss": 0.0225, "step": 18770 }, { "epoch": 2.86407470703125e-05, "model_forward_time": 0.024154186248779297, "step": 18770 }, { "epoch": 2.86407470703125e-05, "step": 18770, "training_step_time": 0.12054085731506348 }, { "epoch": 2.864227294921875e-05, "model_forward_time": 0.02568507194519043, "step": 18771 }, { "epoch": 2.864227294921875e-05, "step": 18771, "training_step_time": 0.10467982292175293 }, { "epoch": 2.8643798828125e-05, "model_forward_time": 0.024552583694458008, "step": 18772 }, { "epoch": 2.8643798828125e-05, "step": 18772, "training_step_time": 0.1508169174194336 }, { "epoch": 2.864532470703125e-05, "model_forward_time": 0.025323152542114258, "step": 18773 }, { "epoch": 2.864532470703125e-05, "step": 18773, "training_step_time": 0.13198232650756836 }, { "epoch": 2.86468505859375e-05, "model_forward_time": 0.024127483367919922, "step": 18774 }, { "epoch": 2.86468505859375e-05, "step": 18774, "training_step_time": 0.11276078224182129 }, { "epoch": 2.864837646484375e-05, "model_forward_time": 0.024816274642944336, "step": 18775 }, { "epoch": 2.864837646484375e-05, "step": 18775, "training_step_time": 0.10796546936035156 }, { "epoch": 2.864990234375e-05, "model_forward_time": 0.02396845817565918, "step": 18776 }, { "epoch": 2.864990234375e-05, "step": 18776, "training_step_time": 0.14810919761657715 }, { "epoch": 2.865142822265625e-05, "model_forward_time": 0.024390459060668945, "step": 18777 }, { "epoch": 2.865142822265625e-05, "step": 18777, "training_step_time": 0.1488649845123291 }, { "epoch": 2.86529541015625e-05, "model_forward_time": 0.024006128311157227, "step": 18778 }, { "epoch": 2.86529541015625e-05, "step": 18778, "training_step_time": 0.13861393928527832 }, { "epoch": 2.865447998046875e-05, "model_forward_time": 0.02414989471435547, "step": 18779 }, { "epoch": 2.865447998046875e-05, "step": 18779, "training_step_time": 0.1290268898010254 }, { "epoch": 2.8656005859375e-05, "grad_norm": 0.3721567392349243, "learning_rate": 3.360872757012011e-05, "loss": 0.0195, "step": 18780 }, { "epoch": 2.8656005859375e-05, "model_forward_time": 0.024308443069458008, "step": 18780 }, { "epoch": 2.8656005859375e-05, "step": 18780, "training_step_time": 0.19645261764526367 }, { "epoch": 2.865753173828125e-05, "model_forward_time": 0.02265334129333496, "step": 18781 }, { "epoch": 2.865753173828125e-05, "step": 18781, "training_step_time": 0.11078906059265137 }, { "epoch": 2.86590576171875e-05, "model_forward_time": 0.024447202682495117, "step": 18782 }, { "epoch": 2.86590576171875e-05, "step": 18782, "training_step_time": 0.1097869873046875 }, { "epoch": 2.866058349609375e-05, "model_forward_time": 0.025259733200073242, "step": 18783 }, { "epoch": 2.866058349609375e-05, "step": 18783, "training_step_time": 0.11209535598754883 }, { "epoch": 2.8662109375e-05, "model_forward_time": 0.02499842643737793, "step": 18784 }, { "epoch": 2.8662109375e-05, "step": 18784, "training_step_time": 0.1071779727935791 }, { "epoch": 2.866363525390625e-05, "model_forward_time": 0.0244600772857666, "step": 18785 }, { "epoch": 2.866363525390625e-05, "step": 18785, "training_step_time": 0.10940384864807129 }, { "epoch": 2.86651611328125e-05, "model_forward_time": 0.024889230728149414, "step": 18786 }, { "epoch": 2.86651611328125e-05, "step": 18786, "training_step_time": 0.15638971328735352 }, { "epoch": 2.866668701171875e-05, "model_forward_time": 0.024312496185302734, "step": 18787 }, { "epoch": 2.866668701171875e-05, "step": 18787, "training_step_time": 0.1111001968383789 }, { "epoch": 2.8668212890625e-05, "model_forward_time": 0.02421259880065918, "step": 18788 }, { "epoch": 2.8668212890625e-05, "step": 18788, "training_step_time": 0.10922741889953613 }, { "epoch": 2.866973876953125e-05, "model_forward_time": 0.02550053596496582, "step": 18789 }, { "epoch": 2.866973876953125e-05, "step": 18789, "training_step_time": 0.11527729034423828 }, { "epoch": 2.86712646484375e-05, "grad_norm": 0.3040342926979065, "learning_rate": 3.355666766307084e-05, "loss": 0.0082, "step": 18790 }, { "epoch": 2.86712646484375e-05, "model_forward_time": 0.025507450103759766, "step": 18790 }, { "epoch": 2.86712646484375e-05, "step": 18790, "training_step_time": 0.12912964820861816 }, { "epoch": 2.867279052734375e-05, "model_forward_time": 0.02561044692993164, "step": 18791 }, { "epoch": 2.867279052734375e-05, "step": 18791, "training_step_time": 0.10654735565185547 }, { "epoch": 2.867431640625e-05, "model_forward_time": 0.02562093734741211, "step": 18792 }, { "epoch": 2.867431640625e-05, "step": 18792, "training_step_time": 0.11953258514404297 }, { "epoch": 2.867584228515625e-05, "model_forward_time": 0.02463698387145996, "step": 18793 }, { "epoch": 2.867584228515625e-05, "step": 18793, "training_step_time": 0.10654902458190918 }, { "epoch": 2.86773681640625e-05, "model_forward_time": 0.025615215301513672, "step": 18794 }, { "epoch": 2.86773681640625e-05, "step": 18794, "training_step_time": 0.12594366073608398 }, { "epoch": 2.867889404296875e-05, "model_forward_time": 0.024945497512817383, "step": 18795 }, { "epoch": 2.867889404296875e-05, "step": 18795, "training_step_time": 0.13231778144836426 }, { "epoch": 2.8680419921875e-05, "model_forward_time": 0.02504277229309082, "step": 18796 }, { "epoch": 2.8680419921875e-05, "step": 18796, "training_step_time": 0.1078488826751709 }, { "epoch": 2.868194580078125e-05, "model_forward_time": 0.025126934051513672, "step": 18797 }, { "epoch": 2.868194580078125e-05, "step": 18797, "training_step_time": 0.11077523231506348 }, { "epoch": 2.86834716796875e-05, "model_forward_time": 0.02547144889831543, "step": 18798 }, { "epoch": 2.86834716796875e-05, "step": 18798, "training_step_time": 0.12065887451171875 }, { "epoch": 2.868499755859375e-05, "model_forward_time": 0.025050640106201172, "step": 18799 }, { "epoch": 2.868499755859375e-05, "step": 18799, "training_step_time": 0.10941720008850098 }, { "epoch": 2.86865234375e-05, "grad_norm": 0.2032412737607956, "learning_rate": 3.350462773622086e-05, "loss": 0.0081, "step": 18800 }, { "epoch": 2.86865234375e-05, "model_forward_time": 0.025215864181518555, "step": 18800 }, { "epoch": 2.86865234375e-05, "step": 18800, "training_step_time": 0.19737601280212402 }, { "epoch": 2.868804931640625e-05, "model_forward_time": 0.025456666946411133, "step": 18801 }, { "epoch": 2.868804931640625e-05, "step": 18801, "training_step_time": 0.1844949722290039 }, { "epoch": 2.86895751953125e-05, "model_forward_time": 0.023549556732177734, "step": 18802 }, { "epoch": 2.86895751953125e-05, "step": 18802, "training_step_time": 0.17142558097839355 }, { "epoch": 2.869110107421875e-05, "model_forward_time": 0.023768186569213867, "step": 18803 }, { "epoch": 2.869110107421875e-05, "step": 18803, "training_step_time": 0.14828705787658691 }, { "epoch": 2.8692626953125e-05, "model_forward_time": 0.0239255428314209, "step": 18804 }, { "epoch": 2.8692626953125e-05, "step": 18804, "training_step_time": 0.13823962211608887 }, { "epoch": 2.869415283203125e-05, "model_forward_time": 0.02364063262939453, "step": 18805 }, { "epoch": 2.869415283203125e-05, "step": 18805, "training_step_time": 0.13443231582641602 }, { "epoch": 2.86956787109375e-05, "model_forward_time": 0.02341628074645996, "step": 18806 }, { "epoch": 2.86956787109375e-05, "step": 18806, "training_step_time": 0.12470197677612305 }, { "epoch": 2.869720458984375e-05, "model_forward_time": 0.02453446388244629, "step": 18807 }, { "epoch": 2.869720458984375e-05, "step": 18807, "training_step_time": 0.12043285369873047 }, { "epoch": 2.869873046875e-05, "model_forward_time": 0.025365829467773438, "step": 18808 }, { "epoch": 2.869873046875e-05, "step": 18808, "training_step_time": 0.11916899681091309 }, { "epoch": 2.870025634765625e-05, "model_forward_time": 0.02422189712524414, "step": 18809 }, { "epoch": 2.870025634765625e-05, "step": 18809, "training_step_time": 0.11345839500427246 }, { "epoch": 2.87017822265625e-05, "grad_norm": 0.24520441889762878, "learning_rate": 3.3452607852803584e-05, "loss": 0.0102, "step": 18810 }, { "epoch": 2.87017822265625e-05, "model_forward_time": 0.024350881576538086, "step": 18810 }, { "epoch": 2.87017822265625e-05, "step": 18810, "training_step_time": 0.11013674736022949 }, { "epoch": 2.870330810546875e-05, "model_forward_time": 0.0273134708404541, "step": 18811 }, { "epoch": 2.870330810546875e-05, "step": 18811, "training_step_time": 0.19591379165649414 }, { "epoch": 2.8704833984375e-05, "model_forward_time": 0.02513575553894043, "step": 18812 }, { "epoch": 2.8704833984375e-05, "step": 18812, "training_step_time": 0.12509989738464355 }, { "epoch": 2.870635986328125e-05, "model_forward_time": 0.0239107608795166, "step": 18813 }, { "epoch": 2.870635986328125e-05, "step": 18813, "training_step_time": 0.12426233291625977 }, { "epoch": 2.87078857421875e-05, "model_forward_time": 0.025073528289794922, "step": 18814 }, { "epoch": 2.87078857421875e-05, "step": 18814, "training_step_time": 0.13442611694335938 }, { "epoch": 2.870941162109375e-05, "model_forward_time": 0.02499532699584961, "step": 18815 }, { "epoch": 2.870941162109375e-05, "step": 18815, "training_step_time": 0.11906003952026367 }, { "epoch": 2.87109375e-05, "model_forward_time": 0.0249788761138916, "step": 18816 }, { "epoch": 2.87109375e-05, "step": 18816, "training_step_time": 0.1353907585144043 }, { "epoch": 2.871246337890625e-05, "model_forward_time": 0.025841474533081055, "step": 18817 }, { "epoch": 2.871246337890625e-05, "step": 18817, "training_step_time": 0.10914278030395508 }, { "epoch": 2.87139892578125e-05, "model_forward_time": 0.025089502334594727, "step": 18818 }, { "epoch": 2.87139892578125e-05, "step": 18818, "training_step_time": 0.10712289810180664 }, { "epoch": 2.871551513671875e-05, "model_forward_time": 0.025639057159423828, "step": 18819 }, { "epoch": 2.871551513671875e-05, "step": 18819, "training_step_time": 0.10757946968078613 }, { "epoch": 2.8717041015625e-05, "grad_norm": 0.2976893186569214, "learning_rate": 3.3400608076028094e-05, "loss": 0.011, "step": 18820 }, { "epoch": 2.8717041015625e-05, "model_forward_time": 0.024344921112060547, "step": 18820 }, { "epoch": 2.8717041015625e-05, "step": 18820, "training_step_time": 0.14330577850341797 }, { "epoch": 2.871856689453125e-05, "model_forward_time": 0.024757862091064453, "step": 18821 }, { "epoch": 2.871856689453125e-05, "step": 18821, "training_step_time": 0.16858768463134766 }, { "epoch": 2.87200927734375e-05, "model_forward_time": 0.02429342269897461, "step": 18822 }, { "epoch": 2.87200927734375e-05, "step": 18822, "training_step_time": 0.1139528751373291 }, { "epoch": 2.872161865234375e-05, "model_forward_time": 0.02384352684020996, "step": 18823 }, { "epoch": 2.872161865234375e-05, "step": 18823, "training_step_time": 0.12934017181396484 }, { "epoch": 2.872314453125e-05, "model_forward_time": 0.025946617126464844, "step": 18824 }, { "epoch": 2.872314453125e-05, "step": 18824, "training_step_time": 0.21086716651916504 }, { "epoch": 2.872467041015625e-05, "model_forward_time": 0.024882793426513672, "step": 18825 }, { "epoch": 2.872467041015625e-05, "step": 18825, "training_step_time": 0.11085939407348633 }, { "epoch": 2.87261962890625e-05, "model_forward_time": 0.02462482452392578, "step": 18826 }, { "epoch": 2.87261962890625e-05, "step": 18826, "training_step_time": 0.10790777206420898 }, { "epoch": 2.872772216796875e-05, "model_forward_time": 0.02523350715637207, "step": 18827 }, { "epoch": 2.872772216796875e-05, "step": 18827, "training_step_time": 0.10863780975341797 }, { "epoch": 2.8729248046875e-05, "model_forward_time": 0.025264978408813477, "step": 18828 }, { "epoch": 2.8729248046875e-05, "step": 18828, "training_step_time": 0.10814142227172852 }, { "epoch": 2.873077392578125e-05, "model_forward_time": 0.024739742279052734, "step": 18829 }, { "epoch": 2.873077392578125e-05, "step": 18829, "training_step_time": 0.10726213455200195 }, { "epoch": 2.87322998046875e-05, "grad_norm": 0.14117495715618134, "learning_rate": 3.3348628469079e-05, "loss": 0.0074, "step": 18830 }, { "epoch": 2.87322998046875e-05, "model_forward_time": 0.024840831756591797, "step": 18830 }, { "epoch": 2.87322998046875e-05, "step": 18830, "training_step_time": 0.1357409954071045 }, { "epoch": 2.873382568359375e-05, "model_forward_time": 0.025346040725708008, "step": 18831 }, { "epoch": 2.873382568359375e-05, "step": 18831, "training_step_time": 0.1106564998626709 }, { "epoch": 2.87353515625e-05, "model_forward_time": 0.0249481201171875, "step": 18832 }, { "epoch": 2.87353515625e-05, "step": 18832, "training_step_time": 0.11199545860290527 }, { "epoch": 2.873687744140625e-05, "model_forward_time": 0.02504134178161621, "step": 18833 }, { "epoch": 2.873687744140625e-05, "step": 18833, "training_step_time": 0.12252545356750488 }, { "epoch": 2.87384033203125e-05, "model_forward_time": 0.02536916732788086, "step": 18834 }, { "epoch": 2.87384033203125e-05, "step": 18834, "training_step_time": 0.13723444938659668 }, { "epoch": 2.873992919921875e-05, "model_forward_time": 0.02453160285949707, "step": 18835 }, { "epoch": 2.873992919921875e-05, "step": 18835, "training_step_time": 0.10772299766540527 }, { "epoch": 2.8741455078125e-05, "model_forward_time": 0.0253903865814209, "step": 18836 }, { "epoch": 2.8741455078125e-05, "step": 18836, "training_step_time": 0.11319422721862793 }, { "epoch": 2.874298095703125e-05, "model_forward_time": 0.024934768676757812, "step": 18837 }, { "epoch": 2.874298095703125e-05, "step": 18837, "training_step_time": 0.18870186805725098 }, { "epoch": 2.87445068359375e-05, "model_forward_time": 0.024133920669555664, "step": 18838 }, { "epoch": 2.87445068359375e-05, "step": 18838, "training_step_time": 0.13958191871643066 }, { "epoch": 2.874603271484375e-05, "model_forward_time": 0.02398085594177246, "step": 18839 }, { "epoch": 2.874603271484375e-05, "step": 18839, "training_step_time": 0.1088249683380127 }, { "epoch": 2.874755859375e-05, "grad_norm": 0.34608063101768494, "learning_rate": 3.329666909511645e-05, "loss": 0.0085, "step": 18840 }, { "epoch": 2.874755859375e-05, "model_forward_time": 0.024825572967529297, "step": 18840 }, { "epoch": 2.874755859375e-05, "step": 18840, "training_step_time": 0.10867643356323242 }, { "epoch": 2.874908447265625e-05, "model_forward_time": 0.025487661361694336, "step": 18841 }, { "epoch": 2.874908447265625e-05, "step": 18841, "training_step_time": 0.11101269721984863 }, { "epoch": 2.87506103515625e-05, "model_forward_time": 0.025203704833984375, "step": 18842 }, { "epoch": 2.87506103515625e-05, "step": 18842, "training_step_time": 0.15793371200561523 }, { "epoch": 2.875213623046875e-05, "model_forward_time": 0.025016307830810547, "step": 18843 }, { "epoch": 2.875213623046875e-05, "step": 18843, "training_step_time": 0.1491403579711914 }, { "epoch": 2.8753662109375e-05, "model_forward_time": 0.023955821990966797, "step": 18844 }, { "epoch": 2.8753662109375e-05, "step": 18844, "training_step_time": 0.10569334030151367 }, { "epoch": 2.875518798828125e-05, "model_forward_time": 0.024773597717285156, "step": 18845 }, { "epoch": 2.875518798828125e-05, "step": 18845, "training_step_time": 0.1034698486328125 }, { "epoch": 2.87567138671875e-05, "model_forward_time": 0.025126218795776367, "step": 18846 }, { "epoch": 2.87567138671875e-05, "step": 18846, "training_step_time": 0.10629153251647949 }, { "epoch": 2.875823974609375e-05, "model_forward_time": 0.02525019645690918, "step": 18847 }, { "epoch": 2.875823974609375e-05, "step": 18847, "training_step_time": 0.10510468482971191 }, { "epoch": 2.8759765625e-05, "model_forward_time": 0.02504134178161621, "step": 18848 }, { "epoch": 2.8759765625e-05, "step": 18848, "training_step_time": 0.1090080738067627 }, { "epoch": 2.876129150390625e-05, "model_forward_time": 0.025463581085205078, "step": 18849 }, { "epoch": 2.876129150390625e-05, "step": 18849, "training_step_time": 0.10808038711547852 }, { "epoch": 2.87628173828125e-05, "grad_norm": 0.21739520132541656, "learning_rate": 3.324473001727597e-05, "loss": 0.0102, "step": 18850 }, { "epoch": 2.87628173828125e-05, "model_forward_time": 0.025060176849365234, "step": 18850 }, { "epoch": 2.87628173828125e-05, "step": 18850, "training_step_time": 0.10759091377258301 }, { "epoch": 2.876434326171875e-05, "model_forward_time": 0.024988174438476562, "step": 18851 }, { "epoch": 2.876434326171875e-05, "step": 18851, "training_step_time": 0.11188125610351562 }, { "epoch": 2.8765869140625e-05, "model_forward_time": 0.02524566650390625, "step": 18852 }, { "epoch": 2.8765869140625e-05, "step": 18852, "training_step_time": 0.11302471160888672 }, { "epoch": 2.876739501953125e-05, "model_forward_time": 0.02375006675720215, "step": 18853 }, { "epoch": 2.876739501953125e-05, "step": 18853, "training_step_time": 0.1068568229675293 }, { "epoch": 2.87689208984375e-05, "model_forward_time": 0.02502727508544922, "step": 18854 }, { "epoch": 2.87689208984375e-05, "step": 18854, "training_step_time": 0.11102986335754395 }, { "epoch": 2.877044677734375e-05, "model_forward_time": 0.027033090591430664, "step": 18855 }, { "epoch": 2.877044677734375e-05, "step": 18855, "training_step_time": 0.11098694801330566 }, { "epoch": 2.877197265625e-05, "model_forward_time": 0.02537369728088379, "step": 18856 }, { "epoch": 2.877197265625e-05, "step": 18856, "training_step_time": 0.11242079734802246 }, { "epoch": 2.877349853515625e-05, "model_forward_time": 0.025435686111450195, "step": 18857 }, { "epoch": 2.877349853515625e-05, "step": 18857, "training_step_time": 0.14428019523620605 }, { "epoch": 2.87750244140625e-05, "model_forward_time": 0.02502298355102539, "step": 18858 }, { "epoch": 2.87750244140625e-05, "step": 18858, "training_step_time": 0.11732602119445801 }, { "epoch": 2.877655029296875e-05, "model_forward_time": 0.024841785430908203, "step": 18859 }, { "epoch": 2.877655029296875e-05, "step": 18859, "training_step_time": 0.12676048278808594 }, { "epoch": 2.8778076171875e-05, "grad_norm": 0.25564438104629517, "learning_rate": 3.3192811298668434e-05, "loss": 0.0134, "step": 18860 }, { "epoch": 2.8778076171875e-05, "model_forward_time": 0.024605751037597656, "step": 18860 }, { "epoch": 2.8778076171875e-05, "step": 18860, "training_step_time": 0.16477441787719727 }, { "epoch": 2.877960205078125e-05, "model_forward_time": 0.024392366409301758, "step": 18861 }, { "epoch": 2.877960205078125e-05, "step": 18861, "training_step_time": 0.2183387279510498 }, { "epoch": 2.87811279296875e-05, "model_forward_time": 0.02406620979309082, "step": 18862 }, { "epoch": 2.87811279296875e-05, "step": 18862, "training_step_time": 0.11933612823486328 }, { "epoch": 2.878265380859375e-05, "model_forward_time": 0.02436375617980957, "step": 18863 }, { "epoch": 2.878265380859375e-05, "step": 18863, "training_step_time": 0.10696196556091309 }, { "epoch": 2.87841796875e-05, "model_forward_time": 0.024748802185058594, "step": 18864 }, { "epoch": 2.87841796875e-05, "step": 18864, "training_step_time": 0.10242009162902832 }, { "epoch": 2.878570556640625e-05, "model_forward_time": 0.023896455764770508, "step": 18865 }, { "epoch": 2.878570556640625e-05, "step": 18865, "training_step_time": 0.13183164596557617 }, { "epoch": 2.87872314453125e-05, "model_forward_time": 0.02444171905517578, "step": 18866 }, { "epoch": 2.87872314453125e-05, "step": 18866, "training_step_time": 0.12638354301452637 }, { "epoch": 2.878875732421875e-05, "model_forward_time": 0.024751901626586914, "step": 18867 }, { "epoch": 2.878875732421875e-05, "step": 18867, "training_step_time": 0.10514569282531738 }, { "epoch": 2.8790283203125e-05, "model_forward_time": 0.025467872619628906, "step": 18868 }, { "epoch": 2.8790283203125e-05, "step": 18868, "training_step_time": 0.10662603378295898 }, { "epoch": 2.879180908203125e-05, "model_forward_time": 0.026114702224731445, "step": 18869 }, { "epoch": 2.879180908203125e-05, "step": 18869, "training_step_time": 0.12800121307373047 }, { "epoch": 2.87933349609375e-05, "grad_norm": 0.2927517592906952, "learning_rate": 3.3140913002379995e-05, "loss": 0.0118, "step": 18870 }, { "epoch": 2.87933349609375e-05, "model_forward_time": 0.025313854217529297, "step": 18870 }, { "epoch": 2.87933349609375e-05, "step": 18870, "training_step_time": 0.20552921295166016 }, { "epoch": 2.879486083984375e-05, "model_forward_time": 0.0244293212890625, "step": 18871 }, { "epoch": 2.879486083984375e-05, "step": 18871, "training_step_time": 0.1035158634185791 }, { "epoch": 2.879638671875e-05, "model_forward_time": 0.02451610565185547, "step": 18872 }, { "epoch": 2.879638671875e-05, "step": 18872, "training_step_time": 0.10397028923034668 }, { "epoch": 2.879791259765625e-05, "model_forward_time": 0.02529168128967285, "step": 18873 }, { "epoch": 2.879791259765625e-05, "step": 18873, "training_step_time": 0.10483884811401367 }, { "epoch": 2.87994384765625e-05, "model_forward_time": 0.025180816650390625, "step": 18874 }, { "epoch": 2.87994384765625e-05, "step": 18874, "training_step_time": 0.10379862785339355 }, { "epoch": 2.880096435546875e-05, "model_forward_time": 0.02538132667541504, "step": 18875 }, { "epoch": 2.880096435546875e-05, "step": 18875, "training_step_time": 0.20954585075378418 }, { "epoch": 2.8802490234375e-05, "model_forward_time": 0.02449345588684082, "step": 18876 }, { "epoch": 2.8802490234375e-05, "step": 18876, "training_step_time": 0.10798287391662598 }, { "epoch": 2.880401611328125e-05, "model_forward_time": 0.024219989776611328, "step": 18877 }, { "epoch": 2.880401611328125e-05, "step": 18877, "training_step_time": 0.11140227317810059 }, { "epoch": 2.88055419921875e-05, "model_forward_time": 0.02522587776184082, "step": 18878 }, { "epoch": 2.88055419921875e-05, "step": 18878, "training_step_time": 0.12182855606079102 }, { "epoch": 2.880706787109375e-05, "model_forward_time": 0.024939775466918945, "step": 18879 }, { "epoch": 2.880706787109375e-05, "step": 18879, "training_step_time": 0.1306607723236084 }, { "epoch": 2.880859375e-05, "grad_norm": 0.17261019349098206, "learning_rate": 3.308903519147194e-05, "loss": 0.0082, "step": 18880 }, { "epoch": 2.880859375e-05, "model_forward_time": 0.02469038963317871, "step": 18880 }, { "epoch": 2.880859375e-05, "step": 18880, "training_step_time": 0.10517716407775879 }, { "epoch": 2.881011962890625e-05, "model_forward_time": 0.025018930435180664, "step": 18881 }, { "epoch": 2.881011962890625e-05, "step": 18881, "training_step_time": 0.11643123626708984 }, { "epoch": 2.88116455078125e-05, "model_forward_time": 0.0251157283782959, "step": 18882 }, { "epoch": 2.88116455078125e-05, "step": 18882, "training_step_time": 0.12379169464111328 }, { "epoch": 2.881317138671875e-05, "model_forward_time": 0.025612831115722656, "step": 18883 }, { "epoch": 2.881317138671875e-05, "step": 18883, "training_step_time": 0.143751859664917 }, { "epoch": 2.8814697265625e-05, "model_forward_time": 0.024871349334716797, "step": 18884 }, { "epoch": 2.8814697265625e-05, "step": 18884, "training_step_time": 0.13404083251953125 }, { "epoch": 2.881622314453125e-05, "model_forward_time": 0.024524688720703125, "step": 18885 }, { "epoch": 2.881622314453125e-05, "step": 18885, "training_step_time": 0.19545984268188477 }, { "epoch": 2.88177490234375e-05, "model_forward_time": 0.02385711669921875, "step": 18886 }, { "epoch": 2.88177490234375e-05, "step": 18886, "training_step_time": 0.10447454452514648 }, { "epoch": 2.881927490234375e-05, "model_forward_time": 0.024504899978637695, "step": 18887 }, { "epoch": 2.881927490234375e-05, "step": 18887, "training_step_time": 0.10370159149169922 }, { "epoch": 2.882080078125e-05, "model_forward_time": 0.025098562240600586, "step": 18888 }, { "epoch": 2.882080078125e-05, "step": 18888, "training_step_time": 0.10895490646362305 }, { "epoch": 2.882232666015625e-05, "model_forward_time": 0.02541327476501465, "step": 18889 }, { "epoch": 2.882232666015625e-05, "step": 18889, "training_step_time": 0.1052088737487793 }, { "epoch": 2.88238525390625e-05, "grad_norm": 0.3890341520309448, "learning_rate": 3.3037177928980735e-05, "loss": 0.009, "step": 18890 }, { "epoch": 2.88238525390625e-05, "model_forward_time": 0.025043010711669922, "step": 18890 }, { "epoch": 2.88238525390625e-05, "step": 18890, "training_step_time": 0.10892295837402344 }, { "epoch": 2.882537841796875e-05, "model_forward_time": 0.024786949157714844, "step": 18891 }, { "epoch": 2.882537841796875e-05, "step": 18891, "training_step_time": 0.10414409637451172 }, { "epoch": 2.8826904296875e-05, "model_forward_time": 0.024820566177368164, "step": 18892 }, { "epoch": 2.8826904296875e-05, "step": 18892, "training_step_time": 0.11092233657836914 }, { "epoch": 2.882843017578125e-05, "model_forward_time": 0.025178194046020508, "step": 18893 }, { "epoch": 2.882843017578125e-05, "step": 18893, "training_step_time": 0.1123661994934082 }, { "epoch": 2.88299560546875e-05, "model_forward_time": 0.02521991729736328, "step": 18894 }, { "epoch": 2.88299560546875e-05, "step": 18894, "training_step_time": 0.13781023025512695 }, { "epoch": 2.883148193359375e-05, "model_forward_time": 0.024681806564331055, "step": 18895 }, { "epoch": 2.883148193359375e-05, "step": 18895, "training_step_time": 0.15827655792236328 }, { "epoch": 2.88330078125e-05, "model_forward_time": 0.024061203002929688, "step": 18896 }, { "epoch": 2.88330078125e-05, "step": 18896, "training_step_time": 0.14672541618347168 }, { "epoch": 2.883453369140625e-05, "model_forward_time": 0.024347782135009766, "step": 18897 }, { "epoch": 2.883453369140625e-05, "step": 18897, "training_step_time": 0.12808585166931152 }, { "epoch": 2.88360595703125e-05, "model_forward_time": 0.024402379989624023, "step": 18898 }, { "epoch": 2.88360595703125e-05, "step": 18898, "training_step_time": 0.12591242790222168 }, { "epoch": 2.883758544921875e-05, "model_forward_time": 0.024922609329223633, "step": 18899 }, { "epoch": 2.883758544921875e-05, "step": 18899, "training_step_time": 0.11777973175048828 }, { "epoch": 2.8839111328125e-05, "grad_norm": 0.12064553052186966, "learning_rate": 3.298534127791785e-05, "loss": 0.0209, "step": 18900 }, { "epoch": 2.8839111328125e-05, "model_forward_time": 0.025566577911376953, "step": 18900 }, { "epoch": 2.8839111328125e-05, "step": 18900, "training_step_time": 0.22234463691711426 }, { "epoch": 2.884063720703125e-05, "model_forward_time": 0.024265766143798828, "step": 18901 }, { "epoch": 2.884063720703125e-05, "step": 18901, "training_step_time": 0.11425304412841797 }, { "epoch": 2.88421630859375e-05, "model_forward_time": 0.024271249771118164, "step": 18902 }, { "epoch": 2.88421630859375e-05, "step": 18902, "training_step_time": 0.13135027885437012 }, { "epoch": 2.884368896484375e-05, "model_forward_time": 0.024553298950195312, "step": 18903 }, { "epoch": 2.884368896484375e-05, "step": 18903, "training_step_time": 0.10552382469177246 }, { "epoch": 2.884521484375e-05, "model_forward_time": 0.0251922607421875, "step": 18904 }, { "epoch": 2.884521484375e-05, "step": 18904, "training_step_time": 0.15097498893737793 }, { "epoch": 2.884674072265625e-05, "model_forward_time": 0.024839162826538086, "step": 18905 }, { "epoch": 2.884674072265625e-05, "step": 18905, "training_step_time": 0.12385249137878418 }, { "epoch": 2.88482666015625e-05, "model_forward_time": 0.02461862564086914, "step": 18906 }, { "epoch": 2.88482666015625e-05, "step": 18906, "training_step_time": 0.1228024959564209 }, { "epoch": 2.884979248046875e-05, "model_forward_time": 0.025215625762939453, "step": 18907 }, { "epoch": 2.884979248046875e-05, "step": 18907, "training_step_time": 0.11106204986572266 }, { "epoch": 2.8851318359375e-05, "model_forward_time": 0.025383949279785156, "step": 18908 }, { "epoch": 2.8851318359375e-05, "step": 18908, "training_step_time": 0.10761308670043945 }, { "epoch": 2.885284423828125e-05, "model_forward_time": 0.025107622146606445, "step": 18909 }, { "epoch": 2.885284423828125e-05, "step": 18909, "training_step_time": 0.10526371002197266 }, { "epoch": 2.88543701171875e-05, "grad_norm": 0.14067591726779938, "learning_rate": 3.2933525301269684e-05, "loss": 0.0062, "step": 18910 }, { "epoch": 2.88543701171875e-05, "model_forward_time": 0.02489185333251953, "step": 18910 }, { "epoch": 2.88543701171875e-05, "step": 18910, "training_step_time": 0.11405158042907715 }, { "epoch": 2.885589599609375e-05, "model_forward_time": 0.02472662925720215, "step": 18911 }, { "epoch": 2.885589599609375e-05, "step": 18911, "training_step_time": 0.11493587493896484 }, { "epoch": 2.8857421875e-05, "model_forward_time": 0.025727272033691406, "step": 18912 }, { "epoch": 2.8857421875e-05, "step": 18912, "training_step_time": 0.10759878158569336 }, { "epoch": 2.885894775390625e-05, "model_forward_time": 0.02562856674194336, "step": 18913 }, { "epoch": 2.885894775390625e-05, "step": 18913, "training_step_time": 0.12086153030395508 }, { "epoch": 2.88604736328125e-05, "model_forward_time": 0.025790929794311523, "step": 18914 }, { "epoch": 2.88604736328125e-05, "step": 18914, "training_step_time": 0.11515378952026367 }, { "epoch": 2.886199951171875e-05, "model_forward_time": 0.02579212188720703, "step": 18915 }, { "epoch": 2.886199951171875e-05, "step": 18915, "training_step_time": 0.11622762680053711 }, { "epoch": 2.8863525390625e-05, "model_forward_time": 0.02542877197265625, "step": 18916 }, { "epoch": 2.8863525390625e-05, "step": 18916, "training_step_time": 0.11065387725830078 }, { "epoch": 2.886505126953125e-05, "model_forward_time": 0.025629520416259766, "step": 18917 }, { "epoch": 2.886505126953125e-05, "step": 18917, "training_step_time": 0.10785579681396484 }, { "epoch": 2.88665771484375e-05, "model_forward_time": 0.025242090225219727, "step": 18918 }, { "epoch": 2.88665771484375e-05, "step": 18918, "training_step_time": 0.10523772239685059 }, { "epoch": 2.886810302734375e-05, "model_forward_time": 0.02515697479248047, "step": 18919 }, { "epoch": 2.886810302734375e-05, "step": 18919, "training_step_time": 0.10547852516174316 }, { "epoch": 2.886962890625e-05, "grad_norm": 0.22273331880569458, "learning_rate": 3.288173006199755e-05, "loss": 0.0078, "step": 18920 }, { "epoch": 2.886962890625e-05, "model_forward_time": 0.025501251220703125, "step": 18920 }, { "epoch": 2.886962890625e-05, "step": 18920, "training_step_time": 0.1068274974822998 }, { "epoch": 2.887115478515625e-05, "model_forward_time": 0.024096965789794922, "step": 18921 }, { "epoch": 2.887115478515625e-05, "step": 18921, "training_step_time": 0.18000006675720215 }, { "epoch": 2.88726806640625e-05, "model_forward_time": 0.02391815185546875, "step": 18922 }, { "epoch": 2.88726806640625e-05, "step": 18922, "training_step_time": 0.1116325855255127 }, { "epoch": 2.887420654296875e-05, "model_forward_time": 0.023923873901367188, "step": 18923 }, { "epoch": 2.887420654296875e-05, "step": 18923, "training_step_time": 0.11114668846130371 }, { "epoch": 2.8875732421875e-05, "model_forward_time": 0.0239408016204834, "step": 18924 }, { "epoch": 2.8875732421875e-05, "step": 18924, "training_step_time": 0.12334465980529785 }, { "epoch": 2.887725830078125e-05, "model_forward_time": 0.0251007080078125, "step": 18925 }, { "epoch": 2.887725830078125e-05, "step": 18925, "training_step_time": 0.11938762664794922 }, { "epoch": 2.88787841796875e-05, "model_forward_time": 0.02538275718688965, "step": 18926 }, { "epoch": 2.88787841796875e-05, "step": 18926, "training_step_time": 0.12231326103210449 }, { "epoch": 2.888031005859375e-05, "model_forward_time": 0.02518010139465332, "step": 18927 }, { "epoch": 2.888031005859375e-05, "step": 18927, "training_step_time": 0.18668341636657715 }, { "epoch": 2.88818359375e-05, "model_forward_time": 0.025701045989990234, "step": 18928 }, { "epoch": 2.88818359375e-05, "step": 18928, "training_step_time": 0.13675308227539062 }, { "epoch": 2.888336181640625e-05, "model_forward_time": 0.024899721145629883, "step": 18929 }, { "epoch": 2.888336181640625e-05, "step": 18929, "training_step_time": 0.10778069496154785 }, { "epoch": 2.88848876953125e-05, "grad_norm": 0.2892070412635803, "learning_rate": 3.282995562303754e-05, "loss": 0.0125, "step": 18930 }, { "epoch": 2.88848876953125e-05, "model_forward_time": 0.025589704513549805, "step": 18930 }, { "epoch": 2.88848876953125e-05, "step": 18930, "training_step_time": 0.1076207160949707 }, { "epoch": 2.888641357421875e-05, "model_forward_time": 0.025669336318969727, "step": 18931 }, { "epoch": 2.888641357421875e-05, "step": 18931, "training_step_time": 0.11063218116760254 }, { "epoch": 2.8887939453125e-05, "model_forward_time": 0.02666759490966797, "step": 18932 }, { "epoch": 2.8887939453125e-05, "step": 18932, "training_step_time": 0.10848879814147949 }, { "epoch": 2.888946533203125e-05, "model_forward_time": 0.025424480438232422, "step": 18933 }, { "epoch": 2.888946533203125e-05, "step": 18933, "training_step_time": 0.194899320602417 }, { "epoch": 2.88909912109375e-05, "model_forward_time": 0.024498701095581055, "step": 18934 }, { "epoch": 2.88909912109375e-05, "step": 18934, "training_step_time": 0.10633397102355957 }, { "epoch": 2.889251708984375e-05, "model_forward_time": 0.02478957176208496, "step": 18935 }, { "epoch": 2.889251708984375e-05, "step": 18935, "training_step_time": 0.10169696807861328 }, { "epoch": 2.889404296875e-05, "model_forward_time": 0.025592565536499023, "step": 18936 }, { "epoch": 2.889404296875e-05, "step": 18936, "training_step_time": 0.10772538185119629 }, { "epoch": 2.889556884765625e-05, "model_forward_time": 0.025572538375854492, "step": 18937 }, { "epoch": 2.889556884765625e-05, "step": 18937, "training_step_time": 0.10413980484008789 }, { "epoch": 2.88970947265625e-05, "model_forward_time": 0.025280475616455078, "step": 18938 }, { "epoch": 2.88970947265625e-05, "step": 18938, "training_step_time": 0.10274791717529297 }, { "epoch": 2.889862060546875e-05, "model_forward_time": 0.02498912811279297, "step": 18939 }, { "epoch": 2.889862060546875e-05, "step": 18939, "training_step_time": 0.10342025756835938 }, { "epoch": 2.8900146484375e-05, "grad_norm": 0.3651007115840912, "learning_rate": 3.2778202047300444e-05, "loss": 0.0068, "step": 18940 }, { "epoch": 2.8900146484375e-05, "model_forward_time": 0.02526116371154785, "step": 18940 }, { "epoch": 2.8900146484375e-05, "step": 18940, "training_step_time": 0.10467076301574707 }, { "epoch": 2.890167236328125e-05, "model_forward_time": 0.025110960006713867, "step": 18941 }, { "epoch": 2.890167236328125e-05, "step": 18941, "training_step_time": 0.10449719429016113 }, { "epoch": 2.89031982421875e-05, "model_forward_time": 0.025330305099487305, "step": 18942 }, { "epoch": 2.89031982421875e-05, "step": 18942, "training_step_time": 0.10906863212585449 }, { "epoch": 2.890472412109375e-05, "model_forward_time": 0.024963855743408203, "step": 18943 }, { "epoch": 2.890472412109375e-05, "step": 18943, "training_step_time": 0.10756993293762207 }, { "epoch": 2.890625e-05, "model_forward_time": 0.02512216567993164, "step": 18944 }, { "epoch": 2.890625e-05, "step": 18944, "training_step_time": 0.1048429012298584 }, { "epoch": 2.890777587890625e-05, "model_forward_time": 0.025414705276489258, "step": 18945 }, { "epoch": 2.890777587890625e-05, "step": 18945, "training_step_time": 0.10666513442993164 }, { "epoch": 2.89093017578125e-05, "model_forward_time": 0.02587747573852539, "step": 18946 }, { "epoch": 2.89093017578125e-05, "step": 18946, "training_step_time": 0.10871744155883789 }, { "epoch": 2.891082763671875e-05, "model_forward_time": 0.025266170501708984, "step": 18947 }, { "epoch": 2.891082763671875e-05, "step": 18947, "training_step_time": 0.10972309112548828 }, { "epoch": 2.8912353515625e-05, "model_forward_time": 0.026609420776367188, "step": 18948 }, { "epoch": 2.8912353515625e-05, "step": 18948, "training_step_time": 0.12633204460144043 }, { "epoch": 2.891387939453125e-05, "model_forward_time": 0.025199413299560547, "step": 18949 }, { "epoch": 2.891387939453125e-05, "step": 18949, "training_step_time": 0.11192011833190918 }, { "epoch": 2.89154052734375e-05, "grad_norm": 0.21987299621105194, "learning_rate": 3.272646939767179e-05, "loss": 0.0062, "step": 18950 }, { "epoch": 2.89154052734375e-05, "model_forward_time": 0.02509331703186035, "step": 18950 }, { "epoch": 2.89154052734375e-05, "step": 18950, "training_step_time": 0.1367940902709961 }, { "epoch": 2.891693115234375e-05, "model_forward_time": 0.024996042251586914, "step": 18951 }, { "epoch": 2.891693115234375e-05, "step": 18951, "training_step_time": 0.16524410247802734 }, { "epoch": 2.891845703125e-05, "model_forward_time": 0.02559947967529297, "step": 18952 }, { "epoch": 2.891845703125e-05, "step": 18952, "training_step_time": 0.21694207191467285 }, { "epoch": 2.891998291015625e-05, "model_forward_time": 0.02449488639831543, "step": 18953 }, { "epoch": 2.891998291015625e-05, "step": 18953, "training_step_time": 0.1099400520324707 }, { "epoch": 2.89215087890625e-05, "model_forward_time": 0.024363994598388672, "step": 18954 }, { "epoch": 2.89215087890625e-05, "step": 18954, "training_step_time": 0.10453367233276367 }, { "epoch": 2.892303466796875e-05, "model_forward_time": 0.024936914443969727, "step": 18955 }, { "epoch": 2.892303466796875e-05, "step": 18955, "training_step_time": 0.10587525367736816 }, { "epoch": 2.8924560546875e-05, "model_forward_time": 0.024736881256103516, "step": 18956 }, { "epoch": 2.8924560546875e-05, "step": 18956, "training_step_time": 0.10306644439697266 }, { "epoch": 2.892608642578125e-05, "model_forward_time": 0.02451920509338379, "step": 18957 }, { "epoch": 2.892608642578125e-05, "step": 18957, "training_step_time": 0.1050574779510498 }, { "epoch": 2.89276123046875e-05, "model_forward_time": 0.024784326553344727, "step": 18958 }, { "epoch": 2.89276123046875e-05, "step": 18958, "training_step_time": 0.11274957656860352 }, { "epoch": 2.892913818359375e-05, "model_forward_time": 0.02568531036376953, "step": 18959 }, { "epoch": 2.892913818359375e-05, "step": 18959, "training_step_time": 0.1206810474395752 }, { "epoch": 2.89306640625e-05, "grad_norm": 0.28759485483169556, "learning_rate": 3.267475773701161e-05, "loss": 0.011, "step": 18960 }, { "epoch": 2.89306640625e-05, "model_forward_time": 0.025647401809692383, "step": 18960 }, { "epoch": 2.89306640625e-05, "step": 18960, "training_step_time": 0.10648632049560547 }, { "epoch": 2.893218994140625e-05, "model_forward_time": 0.025524139404296875, "step": 18961 }, { "epoch": 2.893218994140625e-05, "step": 18961, "training_step_time": 0.23932456970214844 }, { "epoch": 2.89337158203125e-05, "model_forward_time": 0.024968385696411133, "step": 18962 }, { "epoch": 2.89337158203125e-05, "step": 18962, "training_step_time": 0.2108609676361084 }, { "epoch": 2.893524169921875e-05, "model_forward_time": 0.024247169494628906, "step": 18963 }, { "epoch": 2.893524169921875e-05, "step": 18963, "training_step_time": 0.21035385131835938 }, { "epoch": 2.8936767578125e-05, "model_forward_time": 0.024971485137939453, "step": 18964 }, { "epoch": 2.8936767578125e-05, "step": 18964, "training_step_time": 0.2032630443572998 }, { "epoch": 2.893829345703125e-05, "model_forward_time": 0.028270721435546875, "step": 18965 }, { "epoch": 2.893829345703125e-05, "step": 18965, "training_step_time": 0.20017123222351074 }, { "epoch": 2.89398193359375e-05, "model_forward_time": 0.024212360382080078, "step": 18966 }, { "epoch": 2.89398193359375e-05, "step": 18966, "training_step_time": 0.19023537635803223 }, { "epoch": 2.894134521484375e-05, "model_forward_time": 0.024563312530517578, "step": 18967 }, { "epoch": 2.894134521484375e-05, "step": 18967, "training_step_time": 0.21092939376831055 }, { "epoch": 2.894287109375e-05, "model_forward_time": 0.02434849739074707, "step": 18968 }, { "epoch": 2.894287109375e-05, "step": 18968, "training_step_time": 0.17253661155700684 }, { "epoch": 2.894439697265625e-05, "model_forward_time": 0.024616241455078125, "step": 18969 }, { "epoch": 2.894439697265625e-05, "step": 18969, "training_step_time": 0.1607038974761963 }, { "epoch": 2.89459228515625e-05, "grad_norm": 0.33754733204841614, "learning_rate": 3.262306712815444e-05, "loss": 0.0106, "step": 18970 }, { "epoch": 2.89459228515625e-05, "model_forward_time": 0.0240936279296875, "step": 18970 }, { "epoch": 2.89459228515625e-05, "step": 18970, "training_step_time": 0.11397457122802734 }, { "epoch": 2.894744873046875e-05, "model_forward_time": 0.025021076202392578, "step": 18971 }, { "epoch": 2.894744873046875e-05, "step": 18971, "training_step_time": 0.10851359367370605 }, { "epoch": 2.8948974609375e-05, "model_forward_time": 0.025980472564697266, "step": 18972 }, { "epoch": 2.8948974609375e-05, "step": 18972, "training_step_time": 0.11702823638916016 }, { "epoch": 2.895050048828125e-05, "model_forward_time": 0.025939464569091797, "step": 18973 }, { "epoch": 2.895050048828125e-05, "step": 18973, "training_step_time": 0.10886120796203613 }, { "epoch": 2.89520263671875e-05, "model_forward_time": 0.025954484939575195, "step": 18974 }, { "epoch": 2.89520263671875e-05, "step": 18974, "training_step_time": 0.10896635055541992 }, { "epoch": 2.895355224609375e-05, "model_forward_time": 0.025478601455688477, "step": 18975 }, { "epoch": 2.895355224609375e-05, "step": 18975, "training_step_time": 0.19779515266418457 }, { "epoch": 2.8955078125e-05, "model_forward_time": 0.02449941635131836, "step": 18976 }, { "epoch": 2.8955078125e-05, "step": 18976, "training_step_time": 0.10136103630065918 }, { "epoch": 2.895660400390625e-05, "model_forward_time": 0.024809598922729492, "step": 18977 }, { "epoch": 2.895660400390625e-05, "step": 18977, "training_step_time": 0.10315537452697754 }, { "epoch": 2.89581298828125e-05, "model_forward_time": 0.027747631072998047, "step": 18978 }, { "epoch": 2.89581298828125e-05, "step": 18978, "training_step_time": 0.10747432708740234 }, { "epoch": 2.895965576171875e-05, "model_forward_time": 0.025395631790161133, "step": 18979 }, { "epoch": 2.895965576171875e-05, "step": 18979, "training_step_time": 0.10577106475830078 }, { "epoch": 2.8961181640625e-05, "grad_norm": 0.3815549612045288, "learning_rate": 3.257139763390925e-05, "loss": 0.0107, "step": 18980 }, { "epoch": 2.8961181640625e-05, "model_forward_time": 0.02577948570251465, "step": 18980 }, { "epoch": 2.8961181640625e-05, "step": 18980, "training_step_time": 0.10616731643676758 }, { "epoch": 2.896270751953125e-05, "model_forward_time": 0.025363683700561523, "step": 18981 }, { "epoch": 2.896270751953125e-05, "step": 18981, "training_step_time": 0.1056203842163086 }, { "epoch": 2.89642333984375e-05, "model_forward_time": 0.025207042694091797, "step": 18982 }, { "epoch": 2.89642333984375e-05, "step": 18982, "training_step_time": 0.10625195503234863 }, { "epoch": 2.896575927734375e-05, "model_forward_time": 0.025478601455688477, "step": 18983 }, { "epoch": 2.896575927734375e-05, "step": 18983, "training_step_time": 0.10497713088989258 }, { "epoch": 2.896728515625e-05, "model_forward_time": 0.024956226348876953, "step": 18984 }, { "epoch": 2.896728515625e-05, "step": 18984, "training_step_time": 0.1054391860961914 }, { "epoch": 2.896881103515625e-05, "model_forward_time": 0.02547168731689453, "step": 18985 }, { "epoch": 2.896881103515625e-05, "step": 18985, "training_step_time": 0.10461187362670898 }, { "epoch": 2.89703369140625e-05, "model_forward_time": 0.025010347366333008, "step": 18986 }, { "epoch": 2.89703369140625e-05, "step": 18986, "training_step_time": 0.10944485664367676 }, { "epoch": 2.897186279296875e-05, "model_forward_time": 0.028072357177734375, "step": 18987 }, { "epoch": 2.897186279296875e-05, "step": 18987, "training_step_time": 0.11066174507141113 }, { "epoch": 2.8973388671875e-05, "model_forward_time": 0.0254364013671875, "step": 18988 }, { "epoch": 2.8973388671875e-05, "step": 18988, "training_step_time": 0.16875052452087402 }, { "epoch": 2.897491455078125e-05, "model_forward_time": 0.024350643157958984, "step": 18989 }, { "epoch": 2.897491455078125e-05, "step": 18989, "training_step_time": 0.1988825798034668 }, { "epoch": 2.89764404296875e-05, "grad_norm": 0.26489847898483276, "learning_rate": 3.251974931705933e-05, "loss": 0.0064, "step": 18990 }, { "epoch": 2.89764404296875e-05, "model_forward_time": 0.024118423461914062, "step": 18990 }, { "epoch": 2.89764404296875e-05, "step": 18990, "training_step_time": 0.2319011688232422 }, { "epoch": 2.897796630859375e-05, "model_forward_time": 0.024244070053100586, "step": 18991 }, { "epoch": 2.897796630859375e-05, "step": 18991, "training_step_time": 0.18625211715698242 }, { "epoch": 2.89794921875e-05, "model_forward_time": 0.030688762664794922, "step": 18992 }, { "epoch": 2.89794921875e-05, "step": 18992, "training_step_time": 0.20748448371887207 }, { "epoch": 2.898101806640625e-05, "model_forward_time": 0.0240786075592041, "step": 18993 }, { "epoch": 2.898101806640625e-05, "step": 18993, "training_step_time": 0.16120147705078125 }, { "epoch": 2.89825439453125e-05, "model_forward_time": 0.023673534393310547, "step": 18994 }, { "epoch": 2.89825439453125e-05, "step": 18994, "training_step_time": 0.13706064224243164 }, { "epoch": 2.898406982421875e-05, "model_forward_time": 0.02485370635986328, "step": 18995 }, { "epoch": 2.898406982421875e-05, "step": 18995, "training_step_time": 0.10296082496643066 }, { "epoch": 2.8985595703125e-05, "model_forward_time": 0.02431321144104004, "step": 18996 }, { "epoch": 2.8985595703125e-05, "step": 18996, "training_step_time": 0.10668230056762695 }, { "epoch": 2.898712158203125e-05, "model_forward_time": 0.02469182014465332, "step": 18997 }, { "epoch": 2.898712158203125e-05, "step": 18997, "training_step_time": 0.11056208610534668 }, { "epoch": 2.89886474609375e-05, "model_forward_time": 0.025389909744262695, "step": 18998 }, { "epoch": 2.89886474609375e-05, "step": 18998, "training_step_time": 0.11526060104370117 }, { "epoch": 2.899017333984375e-05, "model_forward_time": 0.025658607482910156, "step": 18999 }, { "epoch": 2.899017333984375e-05, "step": 18999, "training_step_time": 0.10592842102050781 }, { "epoch": 2.899169921875e-05, "grad_norm": 0.339771568775177, "learning_rate": 3.2468122240362284e-05, "loss": 0.0139, "step": 19000 }, { "epoch": 2.899169921875e-05, "model_forward_time": 0.02521228790283203, "step": 19000 }, { "epoch": 2.899169921875e-05, "step": 19000, "training_step_time": 0.0976862907409668 }, { "epoch": 2.899322509765625e-05, "model_forward_time": 0.023215532302856445, "step": 19001 }, { "epoch": 2.899322509765625e-05, "step": 19001, "training_step_time": 0.09881019592285156 }, { "epoch": 2.89947509765625e-05, "model_forward_time": 0.024942874908447266, "step": 19002 }, { "epoch": 2.89947509765625e-05, "step": 19002, "training_step_time": 0.10318708419799805 }, { "epoch": 2.899627685546875e-05, "model_forward_time": 0.025781631469726562, "step": 19003 }, { "epoch": 2.899627685546875e-05, "step": 19003, "training_step_time": 0.10401296615600586 }, { "epoch": 2.8997802734375e-05, "model_forward_time": 0.025183439254760742, "step": 19004 }, { "epoch": 2.8997802734375e-05, "step": 19004, "training_step_time": 0.11464095115661621 }, { "epoch": 2.899932861328125e-05, "model_forward_time": 0.027678966522216797, "step": 19005 }, { "epoch": 2.899932861328125e-05, "step": 19005, "training_step_time": 0.13311052322387695 }, { "epoch": 2.90008544921875e-05, "model_forward_time": 0.025727033615112305, "step": 19006 }, { "epoch": 2.90008544921875e-05, "step": 19006, "training_step_time": 0.10665631294250488 }, { "epoch": 2.900238037109375e-05, "model_forward_time": 0.025464296340942383, "step": 19007 }, { "epoch": 2.900238037109375e-05, "step": 19007, "training_step_time": 0.11227750778198242 }, { "epoch": 2.900390625e-05, "model_forward_time": 0.025686264038085938, "step": 19008 }, { "epoch": 2.900390625e-05, "step": 19008, "training_step_time": 0.12848639488220215 }, { "epoch": 2.900543212890625e-05, "model_forward_time": 0.025217056274414062, "step": 19009 }, { "epoch": 2.900543212890625e-05, "step": 19009, "training_step_time": 0.19919180870056152 }, { "epoch": 2.90069580078125e-05, "grad_norm": 0.24529385566711426, "learning_rate": 3.241651646654986e-05, "loss": 0.0111, "step": 19010 }, { "epoch": 2.90069580078125e-05, "model_forward_time": 0.024768590927124023, "step": 19010 }, { "epoch": 2.90069580078125e-05, "step": 19010, "training_step_time": 0.10742807388305664 }, { "epoch": 2.900848388671875e-05, "model_forward_time": 0.02458667755126953, "step": 19011 }, { "epoch": 2.900848388671875e-05, "step": 19011, "training_step_time": 0.10912013053894043 }, { "epoch": 2.9010009765625e-05, "model_forward_time": 0.025495290756225586, "step": 19012 }, { "epoch": 2.9010009765625e-05, "step": 19012, "training_step_time": 0.11466550827026367 }, { "epoch": 2.901153564453125e-05, "model_forward_time": 0.02555561065673828, "step": 19013 }, { "epoch": 2.901153564453125e-05, "step": 19013, "training_step_time": 0.1107628345489502 }, { "epoch": 2.90130615234375e-05, "model_forward_time": 0.025579452514648438, "step": 19014 }, { "epoch": 2.90130615234375e-05, "step": 19014, "training_step_time": 0.1791071891784668 }, { "epoch": 2.901458740234375e-05, "model_forward_time": 0.02455282211303711, "step": 19015 }, { "epoch": 2.901458740234375e-05, "step": 19015, "training_step_time": 0.1121516227722168 }, { "epoch": 2.901611328125e-05, "model_forward_time": 0.024873733520507812, "step": 19016 }, { "epoch": 2.901611328125e-05, "step": 19016, "training_step_time": 0.1127316951751709 }, { "epoch": 2.901763916015625e-05, "model_forward_time": 0.02541375160217285, "step": 19017 }, { "epoch": 2.901763916015625e-05, "step": 19017, "training_step_time": 0.12691140174865723 }, { "epoch": 2.90191650390625e-05, "model_forward_time": 0.02562570571899414, "step": 19018 }, { "epoch": 2.90191650390625e-05, "step": 19018, "training_step_time": 0.1147010326385498 }, { "epoch": 2.902069091796875e-05, "model_forward_time": 0.025420427322387695, "step": 19019 }, { "epoch": 2.902069091796875e-05, "step": 19019, "training_step_time": 0.12376856803894043 }, { "epoch": 2.9022216796875e-05, "grad_norm": 0.27364978194236755, "learning_rate": 3.236493205832795e-05, "loss": 0.009, "step": 19020 }, { "epoch": 2.9022216796875e-05, "model_forward_time": 0.02543807029724121, "step": 19020 }, { "epoch": 2.9022216796875e-05, "step": 19020, "training_step_time": 0.1524949073791504 }, { "epoch": 2.902374267578125e-05, "model_forward_time": 0.0251312255859375, "step": 19021 }, { "epoch": 2.902374267578125e-05, "step": 19021, "training_step_time": 0.11231803894042969 }, { "epoch": 2.90252685546875e-05, "model_forward_time": 0.025129079818725586, "step": 19022 }, { "epoch": 2.90252685546875e-05, "step": 19022, "training_step_time": 0.11091828346252441 }, { "epoch": 2.902679443359375e-05, "model_forward_time": 0.024714231491088867, "step": 19023 }, { "epoch": 2.902679443359375e-05, "step": 19023, "training_step_time": 0.10529446601867676 }, { "epoch": 2.90283203125e-05, "model_forward_time": 0.025334596633911133, "step": 19024 }, { "epoch": 2.90283203125e-05, "step": 19024, "training_step_time": 0.11040306091308594 }, { "epoch": 2.902984619140625e-05, "model_forward_time": 0.025220155715942383, "step": 19025 }, { "epoch": 2.902984619140625e-05, "step": 19025, "training_step_time": 0.18441152572631836 }, { "epoch": 2.90313720703125e-05, "model_forward_time": 0.024129390716552734, "step": 19026 }, { "epoch": 2.90313720703125e-05, "step": 19026, "training_step_time": 0.16012978553771973 }, { "epoch": 2.903289794921875e-05, "model_forward_time": 0.024023771286010742, "step": 19027 }, { "epoch": 2.903289794921875e-05, "step": 19027, "training_step_time": 0.10694622993469238 }, { "epoch": 2.9034423828125e-05, "model_forward_time": 0.024895191192626953, "step": 19028 }, { "epoch": 2.9034423828125e-05, "step": 19028, "training_step_time": 0.10486650466918945 }, { "epoch": 2.903594970703125e-05, "model_forward_time": 0.025260448455810547, "step": 19029 }, { "epoch": 2.903594970703125e-05, "step": 19029, "training_step_time": 0.10531473159790039 }, { "epoch": 2.90374755859375e-05, "grad_norm": 0.24423016607761383, "learning_rate": 3.231336907837646e-05, "loss": 0.0058, "step": 19030 }, { "epoch": 2.90374755859375e-05, "model_forward_time": 0.02558612823486328, "step": 19030 }, { "epoch": 2.90374755859375e-05, "step": 19030, "training_step_time": 0.10730934143066406 }, { "epoch": 2.903900146484375e-05, "model_forward_time": 0.02528691291809082, "step": 19031 }, { "epoch": 2.903900146484375e-05, "step": 19031, "training_step_time": 0.11119508743286133 }, { "epoch": 2.904052734375e-05, "model_forward_time": 0.025862932205200195, "step": 19032 }, { "epoch": 2.904052734375e-05, "step": 19032, "training_step_time": 0.10508871078491211 }, { "epoch": 2.904205322265625e-05, "model_forward_time": 0.02496814727783203, "step": 19033 }, { "epoch": 2.904205322265625e-05, "step": 19033, "training_step_time": 0.10631990432739258 }, { "epoch": 2.90435791015625e-05, "model_forward_time": 0.025215864181518555, "step": 19034 }, { "epoch": 2.90435791015625e-05, "step": 19034, "training_step_time": 0.10454440116882324 }, { "epoch": 2.904510498046875e-05, "model_forward_time": 0.025423288345336914, "step": 19035 }, { "epoch": 2.904510498046875e-05, "step": 19035, "training_step_time": 0.10506534576416016 }, { "epoch": 2.9046630859375e-05, "model_forward_time": 0.02563023567199707, "step": 19036 }, { "epoch": 2.9046630859375e-05, "step": 19036, "training_step_time": 0.10559296607971191 }, { "epoch": 2.904815673828125e-05, "model_forward_time": 0.025359153747558594, "step": 19037 }, { "epoch": 2.904815673828125e-05, "step": 19037, "training_step_time": 0.10909175872802734 }, { "epoch": 2.90496826171875e-05, "model_forward_time": 0.025300979614257812, "step": 19038 }, { "epoch": 2.90496826171875e-05, "step": 19038, "training_step_time": 0.10549330711364746 }, { "epoch": 2.905120849609375e-05, "model_forward_time": 0.025072574615478516, "step": 19039 }, { "epoch": 2.905120849609375e-05, "step": 19039, "training_step_time": 0.10524821281433105 }, { "epoch": 2.9052734375e-05, "grad_norm": 0.18859590590000153, "learning_rate": 3.226182758934927e-05, "loss": 0.0125, "step": 19040 }, { "epoch": 2.9052734375e-05, "model_forward_time": 0.02525925636291504, "step": 19040 }, { "epoch": 2.9052734375e-05, "step": 19040, "training_step_time": 0.10652732849121094 }, { "epoch": 2.905426025390625e-05, "model_forward_time": 0.025722742080688477, "step": 19041 }, { "epoch": 2.905426025390625e-05, "step": 19041, "training_step_time": 0.19288396835327148 }, { "epoch": 2.90557861328125e-05, "model_forward_time": 0.024660110473632812, "step": 19042 }, { "epoch": 2.90557861328125e-05, "step": 19042, "training_step_time": 0.11337065696716309 }, { "epoch": 2.905731201171875e-05, "model_forward_time": 0.024973630905151367, "step": 19043 }, { "epoch": 2.905731201171875e-05, "step": 19043, "training_step_time": 0.11511659622192383 }, { "epoch": 2.9058837890625e-05, "model_forward_time": 0.025489330291748047, "step": 19044 }, { "epoch": 2.9058837890625e-05, "step": 19044, "training_step_time": 0.12489986419677734 }, { "epoch": 2.906036376953125e-05, "model_forward_time": 0.025351762771606445, "step": 19045 }, { "epoch": 2.906036376953125e-05, "step": 19045, "training_step_time": 0.15376496315002441 }, { "epoch": 2.90618896484375e-05, "model_forward_time": 0.025669097900390625, "step": 19046 }, { "epoch": 2.90618896484375e-05, "step": 19046, "training_step_time": 0.1374986171722412 }, { "epoch": 2.906341552734375e-05, "model_forward_time": 0.024574756622314453, "step": 19047 }, { "epoch": 2.906341552734375e-05, "step": 19047, "training_step_time": 0.12349486351013184 }, { "epoch": 2.906494140625e-05, "model_forward_time": 0.02469158172607422, "step": 19048 }, { "epoch": 2.906494140625e-05, "step": 19048, "training_step_time": 0.11615395545959473 }, { "epoch": 2.906646728515625e-05, "model_forward_time": 0.0249788761138916, "step": 19049 }, { "epoch": 2.906646728515625e-05, "step": 19049, "training_step_time": 0.10844206809997559 }, { "epoch": 2.90679931640625e-05, "grad_norm": 0.14127777516841888, "learning_rate": 3.221030765387417e-05, "loss": 0.012, "step": 19050 }, { "epoch": 2.90679931640625e-05, "model_forward_time": 0.025008678436279297, "step": 19050 }, { "epoch": 2.90679931640625e-05, "step": 19050, "training_step_time": 0.13796615600585938 }, { "epoch": 2.906951904296875e-05, "model_forward_time": 0.024981260299682617, "step": 19051 }, { "epoch": 2.906951904296875e-05, "step": 19051, "training_step_time": 0.10809111595153809 }, { "epoch": 2.9071044921875e-05, "model_forward_time": 0.02520895004272461, "step": 19052 }, { "epoch": 2.9071044921875e-05, "step": 19052, "training_step_time": 0.19357848167419434 }, { "epoch": 2.907257080078125e-05, "model_forward_time": 0.024652957916259766, "step": 19053 }, { "epoch": 2.907257080078125e-05, "step": 19053, "training_step_time": 0.12835478782653809 }, { "epoch": 2.90740966796875e-05, "model_forward_time": 0.02606368064880371, "step": 19054 }, { "epoch": 2.90740966796875e-05, "step": 19054, "training_step_time": 0.16054463386535645 }, { "epoch": 2.907562255859375e-05, "model_forward_time": 0.024468660354614258, "step": 19055 }, { "epoch": 2.907562255859375e-05, "step": 19055, "training_step_time": 0.10909700393676758 }, { "epoch": 2.90771484375e-05, "model_forward_time": 0.0249786376953125, "step": 19056 }, { "epoch": 2.90771484375e-05, "step": 19056, "training_step_time": 0.11012411117553711 }, { "epoch": 2.907867431640625e-05, "model_forward_time": 0.025438785552978516, "step": 19057 }, { "epoch": 2.907867431640625e-05, "step": 19057, "training_step_time": 0.10492491722106934 }, { "epoch": 2.90802001953125e-05, "model_forward_time": 0.025462865829467773, "step": 19058 }, { "epoch": 2.90802001953125e-05, "step": 19058, "training_step_time": 0.10672140121459961 }, { "epoch": 2.908172607421875e-05, "model_forward_time": 0.02576756477355957, "step": 19059 }, { "epoch": 2.908172607421875e-05, "step": 19059, "training_step_time": 0.2056431770324707 }, { "epoch": 2.9083251953125e-05, "grad_norm": 0.243336021900177, "learning_rate": 3.2158809334552745e-05, "loss": 0.0105, "step": 19060 }, { "epoch": 2.9083251953125e-05, "model_forward_time": 0.024601221084594727, "step": 19060 }, { "epoch": 2.9083251953125e-05, "step": 19060, "training_step_time": 0.10589480400085449 }, { "epoch": 2.908477783203125e-05, "model_forward_time": 0.023767471313476562, "step": 19061 }, { "epoch": 2.908477783203125e-05, "step": 19061, "training_step_time": 0.10448718070983887 }, { "epoch": 2.90863037109375e-05, "model_forward_time": 0.025023221969604492, "step": 19062 }, { "epoch": 2.90863037109375e-05, "step": 19062, "training_step_time": 0.12453031539916992 }, { "epoch": 2.908782958984375e-05, "model_forward_time": 0.025569915771484375, "step": 19063 }, { "epoch": 2.908782958984375e-05, "step": 19063, "training_step_time": 0.12300658226013184 }, { "epoch": 2.908935546875e-05, "model_forward_time": 0.025118589401245117, "step": 19064 }, { "epoch": 2.908935546875e-05, "step": 19064, "training_step_time": 0.1176755428314209 }, { "epoch": 2.909088134765625e-05, "model_forward_time": 0.025014162063598633, "step": 19065 }, { "epoch": 2.909088134765625e-05, "step": 19065, "training_step_time": 0.18131256103515625 }, { "epoch": 2.90924072265625e-05, "model_forward_time": 0.02466559410095215, "step": 19066 }, { "epoch": 2.90924072265625e-05, "step": 19066, "training_step_time": 0.10652422904968262 }, { "epoch": 2.909393310546875e-05, "model_forward_time": 0.02463507652282715, "step": 19067 }, { "epoch": 2.909393310546875e-05, "step": 19067, "training_step_time": 0.19243121147155762 }, { "epoch": 2.9095458984375e-05, "model_forward_time": 0.024670124053955078, "step": 19068 }, { "epoch": 2.9095458984375e-05, "step": 19068, "training_step_time": 0.12286734580993652 }, { "epoch": 2.909698486328125e-05, "model_forward_time": 0.023886442184448242, "step": 19069 }, { "epoch": 2.909698486328125e-05, "step": 19069, "training_step_time": 0.10750627517700195 }, { "epoch": 2.90985107421875e-05, "grad_norm": 0.22614695131778717, "learning_rate": 3.210733269396028e-05, "loss": 0.0127, "step": 19070 }, { "epoch": 2.90985107421875e-05, "model_forward_time": 0.025374889373779297, "step": 19070 }, { "epoch": 2.90985107421875e-05, "step": 19070, "training_step_time": 0.11409902572631836 }, { "epoch": 2.910003662109375e-05, "model_forward_time": 0.025227785110473633, "step": 19071 }, { "epoch": 2.910003662109375e-05, "step": 19071, "training_step_time": 0.11868739128112793 }, { "epoch": 2.91015625e-05, "model_forward_time": 0.025865554809570312, "step": 19072 }, { "epoch": 2.91015625e-05, "step": 19072, "training_step_time": 0.10512804985046387 }, { "epoch": 2.910308837890625e-05, "model_forward_time": 0.025388002395629883, "step": 19073 }, { "epoch": 2.910308837890625e-05, "step": 19073, "training_step_time": 0.10745716094970703 }, { "epoch": 2.91046142578125e-05, "model_forward_time": 0.025316476821899414, "step": 19074 }, { "epoch": 2.91046142578125e-05, "step": 19074, "training_step_time": 0.10283613204956055 }, { "epoch": 2.910614013671875e-05, "model_forward_time": 0.02558445930480957, "step": 19075 }, { "epoch": 2.910614013671875e-05, "step": 19075, "training_step_time": 0.10521364212036133 }, { "epoch": 2.9107666015625e-05, "model_forward_time": 0.025439023971557617, "step": 19076 }, { "epoch": 2.9107666015625e-05, "step": 19076, "training_step_time": 0.10368180274963379 }, { "epoch": 2.910919189453125e-05, "model_forward_time": 0.02516627311706543, "step": 19077 }, { "epoch": 2.910919189453125e-05, "step": 19077, "training_step_time": 0.10654211044311523 }, { "epoch": 2.91107177734375e-05, "model_forward_time": 0.025326251983642578, "step": 19078 }, { "epoch": 2.91107177734375e-05, "step": 19078, "training_step_time": 0.16875123977661133 }, { "epoch": 2.911224365234375e-05, "model_forward_time": 0.02497720718383789, "step": 19079 }, { "epoch": 2.911224365234375e-05, "step": 19079, "training_step_time": 0.19500088691711426 }, { "epoch": 2.911376953125e-05, "grad_norm": 0.1602323353290558, "learning_rate": 3.205587779464576e-05, "loss": 0.0065, "step": 19080 }, { "epoch": 2.911376953125e-05, "model_forward_time": 0.024251461029052734, "step": 19080 }, { "epoch": 2.911376953125e-05, "step": 19080, "training_step_time": 0.18586254119873047 }, { "epoch": 2.911529541015625e-05, "model_forward_time": 0.024263620376586914, "step": 19081 }, { "epoch": 2.911529541015625e-05, "step": 19081, "training_step_time": 0.18398070335388184 }, { "epoch": 2.91168212890625e-05, "model_forward_time": 0.02419567108154297, "step": 19082 }, { "epoch": 2.91168212890625e-05, "step": 19082, "training_step_time": 0.16943860054016113 }, { "epoch": 2.911834716796875e-05, "model_forward_time": 0.024126529693603516, "step": 19083 }, { "epoch": 2.911834716796875e-05, "step": 19083, "training_step_time": 0.21080756187438965 }, { "epoch": 2.9119873046875e-05, "model_forward_time": 0.025321483612060547, "step": 19084 }, { "epoch": 2.9119873046875e-05, "step": 19084, "training_step_time": 0.13036131858825684 }, { "epoch": 2.912139892578125e-05, "model_forward_time": 0.024187803268432617, "step": 19085 }, { "epoch": 2.912139892578125e-05, "step": 19085, "training_step_time": 0.12222146987915039 }, { "epoch": 2.91229248046875e-05, "model_forward_time": 0.025311708450317383, "step": 19086 }, { "epoch": 2.91229248046875e-05, "step": 19086, "training_step_time": 0.1370401382446289 }, { "epoch": 2.912445068359375e-05, "model_forward_time": 0.024813175201416016, "step": 19087 }, { "epoch": 2.912445068359375e-05, "step": 19087, "training_step_time": 0.1163029670715332 }, { "epoch": 2.91259765625e-05, "model_forward_time": 0.02492690086364746, "step": 19088 }, { "epoch": 2.91259765625e-05, "step": 19088, "training_step_time": 0.13187146186828613 }, { "epoch": 2.912750244140625e-05, "model_forward_time": 0.02523636817932129, "step": 19089 }, { "epoch": 2.912750244140625e-05, "step": 19089, "training_step_time": 0.11110043525695801 }, { "epoch": 2.91290283203125e-05, "grad_norm": 0.10157324373722076, "learning_rate": 3.2004444699131727e-05, "loss": 0.0142, "step": 19090 }, { "epoch": 2.91290283203125e-05, "model_forward_time": 0.02501225471496582, "step": 19090 }, { "epoch": 2.91290283203125e-05, "step": 19090, "training_step_time": 0.10602593421936035 }, { "epoch": 2.913055419921875e-05, "model_forward_time": 0.02499246597290039, "step": 19091 }, { "epoch": 2.913055419921875e-05, "step": 19091, "training_step_time": 0.10865950584411621 }, { "epoch": 2.9132080078125e-05, "model_forward_time": 0.02554464340209961, "step": 19092 }, { "epoch": 2.9132080078125e-05, "step": 19092, "training_step_time": 0.1098332405090332 }, { "epoch": 2.913360595703125e-05, "model_forward_time": 0.024596214294433594, "step": 19093 }, { "epoch": 2.913360595703125e-05, "step": 19093, "training_step_time": 0.13742828369140625 }, { "epoch": 2.91351318359375e-05, "model_forward_time": 0.02461695671081543, "step": 19094 }, { "epoch": 2.91351318359375e-05, "step": 19094, "training_step_time": 0.16405320167541504 }, { "epoch": 2.913665771484375e-05, "model_forward_time": 0.025019407272338867, "step": 19095 }, { "epoch": 2.913665771484375e-05, "step": 19095, "training_step_time": 0.11177968978881836 }, { "epoch": 2.913818359375e-05, "model_forward_time": 0.024298667907714844, "step": 19096 }, { "epoch": 2.913818359375e-05, "step": 19096, "training_step_time": 0.1304616928100586 }, { "epoch": 2.913970947265625e-05, "model_forward_time": 0.024941444396972656, "step": 19097 }, { "epoch": 2.913970947265625e-05, "step": 19097, "training_step_time": 0.21898126602172852 }, { "epoch": 2.91412353515625e-05, "model_forward_time": 0.024863719940185547, "step": 19098 }, { "epoch": 2.91412353515625e-05, "step": 19098, "training_step_time": 0.11178970336914062 }, { "epoch": 2.914276123046875e-05, "model_forward_time": 0.024909257888793945, "step": 19099 }, { "epoch": 2.914276123046875e-05, "step": 19099, "training_step_time": 0.10520005226135254 }, { "epoch": 2.9144287109375e-05, "grad_norm": 0.14794135093688965, "learning_rate": 3.1953033469914276e-05, "loss": 0.0151, "step": 19100 }, { "epoch": 2.9144287109375e-05, "model_forward_time": 0.025289535522460938, "step": 19100 }, { "epoch": 2.9144287109375e-05, "step": 19100, "training_step_time": 0.10608530044555664 }, { "epoch": 2.914581298828125e-05, "model_forward_time": 0.025264263153076172, "step": 19101 }, { "epoch": 2.914581298828125e-05, "step": 19101, "training_step_time": 0.10851550102233887 }, { "epoch": 2.91473388671875e-05, "model_forward_time": 0.025126218795776367, "step": 19102 }, { "epoch": 2.91473388671875e-05, "step": 19102, "training_step_time": 0.1536405086517334 }, { "epoch": 2.914886474609375e-05, "model_forward_time": 0.024659156799316406, "step": 19103 }, { "epoch": 2.914886474609375e-05, "step": 19103, "training_step_time": 0.10673069953918457 }, { "epoch": 2.9150390625e-05, "model_forward_time": 0.026833534240722656, "step": 19104 }, { "epoch": 2.9150390625e-05, "step": 19104, "training_step_time": 0.10928058624267578 }, { "epoch": 2.915191650390625e-05, "model_forward_time": 0.025177717208862305, "step": 19105 }, { "epoch": 2.915191650390625e-05, "step": 19105, "training_step_time": 0.12807059288024902 }, { "epoch": 2.91534423828125e-05, "model_forward_time": 0.025484323501586914, "step": 19106 }, { "epoch": 2.91534423828125e-05, "step": 19106, "training_step_time": 0.12203383445739746 }, { "epoch": 2.915496826171875e-05, "model_forward_time": 0.02539825439453125, "step": 19107 }, { "epoch": 2.915496826171875e-05, "step": 19107, "training_step_time": 0.12215495109558105 }, { "epoch": 2.9156494140625e-05, "model_forward_time": 0.02493143081665039, "step": 19108 }, { "epoch": 2.9156494140625e-05, "step": 19108, "training_step_time": 0.148115873336792 }, { "epoch": 2.915802001953125e-05, "model_forward_time": 0.02460765838623047, "step": 19109 }, { "epoch": 2.915802001953125e-05, "step": 19109, "training_step_time": 0.14225530624389648 }, { "epoch": 2.91595458984375e-05, "grad_norm": 0.10831483453512192, "learning_rate": 3.190164416946285e-05, "loss": 0.0081, "step": 19110 }, { "epoch": 2.91595458984375e-05, "model_forward_time": 0.02434086799621582, "step": 19110 }, { "epoch": 2.91595458984375e-05, "step": 19110, "training_step_time": 0.10690784454345703 }, { "epoch": 2.916107177734375e-05, "model_forward_time": 0.024959564208984375, "step": 19111 }, { "epoch": 2.916107177734375e-05, "step": 19111, "training_step_time": 0.12288403511047363 }, { "epoch": 2.916259765625e-05, "model_forward_time": 0.025275468826293945, "step": 19112 }, { "epoch": 2.916259765625e-05, "step": 19112, "training_step_time": 0.12377786636352539 }, { "epoch": 2.916412353515625e-05, "model_forward_time": 0.025150299072265625, "step": 19113 }, { "epoch": 2.916412353515625e-05, "step": 19113, "training_step_time": 0.10667181015014648 }, { "epoch": 2.91656494140625e-05, "model_forward_time": 0.02527022361755371, "step": 19114 }, { "epoch": 2.91656494140625e-05, "step": 19114, "training_step_time": 0.19100117683410645 }, { "epoch": 2.916717529296875e-05, "model_forward_time": 0.024626493453979492, "step": 19115 }, { "epoch": 2.916717529296875e-05, "step": 19115, "training_step_time": 0.10141468048095703 }, { "epoch": 2.9168701171875e-05, "model_forward_time": 0.024565458297729492, "step": 19116 }, { "epoch": 2.9168701171875e-05, "step": 19116, "training_step_time": 0.10399723052978516 }, { "epoch": 2.917022705078125e-05, "model_forward_time": 0.02532052993774414, "step": 19117 }, { "epoch": 2.917022705078125e-05, "step": 19117, "training_step_time": 0.10423493385314941 }, { "epoch": 2.91717529296875e-05, "model_forward_time": 0.02520275115966797, "step": 19118 }, { "epoch": 2.91717529296875e-05, "step": 19118, "training_step_time": 0.10827040672302246 }, { "epoch": 2.917327880859375e-05, "model_forward_time": 0.025252819061279297, "step": 19119 }, { "epoch": 2.917327880859375e-05, "step": 19119, "training_step_time": 0.10921502113342285 }, { "epoch": 2.91748046875e-05, "grad_norm": 0.07747375965118408, "learning_rate": 3.1850276860220346e-05, "loss": 0.0057, "step": 19120 }, { "epoch": 2.91748046875e-05, "model_forward_time": 0.024976253509521484, "step": 19120 }, { "epoch": 2.91748046875e-05, "step": 19120, "training_step_time": 0.1065826416015625 }, { "epoch": 2.917633056640625e-05, "model_forward_time": 0.025442123413085938, "step": 19121 }, { "epoch": 2.917633056640625e-05, "step": 19121, "training_step_time": 0.10892963409423828 }, { "epoch": 2.91778564453125e-05, "model_forward_time": 0.02556586265563965, "step": 19122 }, { "epoch": 2.91778564453125e-05, "step": 19122, "training_step_time": 0.10950636863708496 }, { "epoch": 2.917938232421875e-05, "model_forward_time": 0.02536463737487793, "step": 19123 }, { "epoch": 2.917938232421875e-05, "step": 19123, "training_step_time": 0.10784459114074707 }, { "epoch": 2.9180908203125e-05, "model_forward_time": 0.02524089813232422, "step": 19124 }, { "epoch": 2.9180908203125e-05, "step": 19124, "training_step_time": 0.10599493980407715 }, { "epoch": 2.918243408203125e-05, "model_forward_time": 0.025662660598754883, "step": 19125 }, { "epoch": 2.918243408203125e-05, "step": 19125, "training_step_time": 0.1109166145324707 }, { "epoch": 2.91839599609375e-05, "model_forward_time": 0.025733470916748047, "step": 19126 }, { "epoch": 2.91839599609375e-05, "step": 19126, "training_step_time": 0.10693073272705078 }, { "epoch": 2.918548583984375e-05, "model_forward_time": 0.02564382553100586, "step": 19127 }, { "epoch": 2.918548583984375e-05, "step": 19127, "training_step_time": 0.10742783546447754 }, { "epoch": 2.918701171875e-05, "model_forward_time": 0.025267362594604492, "step": 19128 }, { "epoch": 2.918701171875e-05, "step": 19128, "training_step_time": 0.10490202903747559 }, { "epoch": 2.918853759765625e-05, "model_forward_time": 0.02510547637939453, "step": 19129 }, { "epoch": 2.918853759765625e-05, "step": 19129, "training_step_time": 0.16948533058166504 }, { "epoch": 2.91900634765625e-05, "grad_norm": 0.09368924051523209, "learning_rate": 3.1798931604602864e-05, "loss": 0.0173, "step": 19130 }, { "epoch": 2.91900634765625e-05, "model_forward_time": 0.025140762329101562, "step": 19130 }, { "epoch": 2.91900634765625e-05, "step": 19130, "training_step_time": 0.11592221260070801 }, { "epoch": 2.919158935546875e-05, "model_forward_time": 0.024262428283691406, "step": 19131 }, { "epoch": 2.919158935546875e-05, "step": 19131, "training_step_time": 0.10565185546875 }, { "epoch": 2.9193115234375e-05, "model_forward_time": 0.025305986404418945, "step": 19132 }, { "epoch": 2.9193115234375e-05, "step": 19132, "training_step_time": 0.12161064147949219 }, { "epoch": 2.919464111328125e-05, "model_forward_time": 0.025467395782470703, "step": 19133 }, { "epoch": 2.919464111328125e-05, "step": 19133, "training_step_time": 0.10840535163879395 }, { "epoch": 2.91961669921875e-05, "model_forward_time": 0.02503204345703125, "step": 19134 }, { "epoch": 2.91961669921875e-05, "step": 19134, "training_step_time": 0.11540889739990234 }, { "epoch": 2.919769287109375e-05, "model_forward_time": 0.025942564010620117, "step": 19135 }, { "epoch": 2.919769287109375e-05, "step": 19135, "training_step_time": 0.11507081985473633 }, { "epoch": 2.919921875e-05, "model_forward_time": 0.025327205657958984, "step": 19136 }, { "epoch": 2.919921875e-05, "step": 19136, "training_step_time": 0.11298227310180664 }, { "epoch": 2.920074462890625e-05, "model_forward_time": 0.025176286697387695, "step": 19137 }, { "epoch": 2.920074462890625e-05, "step": 19137, "training_step_time": 0.10883688926696777 }, { "epoch": 2.92022705078125e-05, "model_forward_time": 0.025403261184692383, "step": 19138 }, { "epoch": 2.92022705078125e-05, "step": 19138, "training_step_time": 0.10388469696044922 }, { "epoch": 2.920379638671875e-05, "model_forward_time": 0.025003671646118164, "step": 19139 }, { "epoch": 2.920379638671875e-05, "step": 19139, "training_step_time": 0.12038946151733398 }, { "epoch": 2.9205322265625e-05, "grad_norm": 0.12927605211734772, "learning_rate": 3.1747608464999725e-05, "loss": 0.0055, "step": 19140 }, { "epoch": 2.9205322265625e-05, "model_forward_time": 0.02436971664428711, "step": 19140 }, { "epoch": 2.9205322265625e-05, "step": 19140, "training_step_time": 0.11724567413330078 }, { "epoch": 2.920684814453125e-05, "model_forward_time": 0.025753259658813477, "step": 19141 }, { "epoch": 2.920684814453125e-05, "step": 19141, "training_step_time": 0.10664081573486328 }, { "epoch": 2.92083740234375e-05, "model_forward_time": 0.025559425354003906, "step": 19142 }, { "epoch": 2.92083740234375e-05, "step": 19142, "training_step_time": 0.1165318489074707 }, { "epoch": 2.920989990234375e-05, "model_forward_time": 0.0253753662109375, "step": 19143 }, { "epoch": 2.920989990234375e-05, "step": 19143, "training_step_time": 0.11254453659057617 }, { "epoch": 2.921142578125e-05, "model_forward_time": 0.025761127471923828, "step": 19144 }, { "epoch": 2.921142578125e-05, "step": 19144, "training_step_time": 0.12736988067626953 }, { "epoch": 2.921295166015625e-05, "model_forward_time": 0.02578902244567871, "step": 19145 }, { "epoch": 2.921295166015625e-05, "step": 19145, "training_step_time": 0.12256217002868652 }, { "epoch": 2.92144775390625e-05, "model_forward_time": 0.02558135986328125, "step": 19146 }, { "epoch": 2.92144775390625e-05, "step": 19146, "training_step_time": 0.10568642616271973 }, { "epoch": 2.921600341796875e-05, "model_forward_time": 0.025492429733276367, "step": 19147 }, { "epoch": 2.921600341796875e-05, "step": 19147, "training_step_time": 0.10626721382141113 }, { "epoch": 2.9217529296875e-05, "model_forward_time": 0.025780916213989258, "step": 19148 }, { "epoch": 2.9217529296875e-05, "step": 19148, "training_step_time": 0.11117005348205566 }, { "epoch": 2.921905517578125e-05, "model_forward_time": 0.025304317474365234, "step": 19149 }, { "epoch": 2.921905517578125e-05, "step": 19149, "training_step_time": 0.10689902305603027 }, { "epoch": 2.92205810546875e-05, "grad_norm": 0.25562000274658203, "learning_rate": 3.169630750377337e-05, "loss": 0.0036, "step": 19150 }, { "epoch": 2.92205810546875e-05, "model_forward_time": 0.025221824645996094, "step": 19150 }, { "epoch": 2.92205810546875e-05, "step": 19150, "training_step_time": 0.1511378288269043 }, { "epoch": 2.922210693359375e-05, "model_forward_time": 0.025392770767211914, "step": 19151 }, { "epoch": 2.922210693359375e-05, "step": 19151, "training_step_time": 0.10863232612609863 }, { "epoch": 2.92236328125e-05, "model_forward_time": 0.025344371795654297, "step": 19152 }, { "epoch": 2.92236328125e-05, "step": 19152, "training_step_time": 0.1107943058013916 }, { "epoch": 2.922515869140625e-05, "model_forward_time": 0.02565622329711914, "step": 19153 }, { "epoch": 2.922515869140625e-05, "step": 19153, "training_step_time": 0.12649130821228027 }, { "epoch": 2.92266845703125e-05, "model_forward_time": 0.025661945343017578, "step": 19154 }, { "epoch": 2.92266845703125e-05, "step": 19154, "training_step_time": 0.1254258155822754 }, { "epoch": 2.922821044921875e-05, "model_forward_time": 0.0252377986907959, "step": 19155 }, { "epoch": 2.922821044921875e-05, "step": 19155, "training_step_time": 0.11474895477294922 }, { "epoch": 2.9229736328125e-05, "model_forward_time": 0.025528430938720703, "step": 19156 }, { "epoch": 2.9229736328125e-05, "step": 19156, "training_step_time": 0.13181471824645996 }, { "epoch": 2.923126220703125e-05, "model_forward_time": 0.025496959686279297, "step": 19157 }, { "epoch": 2.923126220703125e-05, "step": 19157, "training_step_time": 0.13738679885864258 }, { "epoch": 2.92327880859375e-05, "model_forward_time": 0.024787187576293945, "step": 19158 }, { "epoch": 2.92327880859375e-05, "step": 19158, "training_step_time": 0.11504459381103516 }, { "epoch": 2.923431396484375e-05, "model_forward_time": 0.02484440803527832, "step": 19159 }, { "epoch": 2.923431396484375e-05, "step": 19159, "training_step_time": 0.10437202453613281 }, { "epoch": 2.923583984375e-05, "grad_norm": 0.20046649873256683, "learning_rate": 3.1645028783259345e-05, "loss": 0.0087, "step": 19160 }, { "epoch": 2.923583984375e-05, "model_forward_time": 0.025221824645996094, "step": 19160 }, { "epoch": 2.923583984375e-05, "step": 19160, "training_step_time": 0.1115577220916748 }, { "epoch": 2.923736572265625e-05, "model_forward_time": 0.02541637420654297, "step": 19161 }, { "epoch": 2.923736572265625e-05, "step": 19161, "training_step_time": 0.10995650291442871 }, { "epoch": 2.92388916015625e-05, "model_forward_time": 0.025473356246948242, "step": 19162 }, { "epoch": 2.92388916015625e-05, "step": 19162, "training_step_time": 0.1905820369720459 }, { "epoch": 2.924041748046875e-05, "model_forward_time": 0.024717092514038086, "step": 19163 }, { "epoch": 2.924041748046875e-05, "step": 19163, "training_step_time": 0.10255980491638184 }, { "epoch": 2.9241943359375e-05, "model_forward_time": 0.024859905242919922, "step": 19164 }, { "epoch": 2.9241943359375e-05, "step": 19164, "training_step_time": 0.1027383804321289 }, { "epoch": 2.924346923828125e-05, "model_forward_time": 0.025934934616088867, "step": 19165 }, { "epoch": 2.924346923828125e-05, "step": 19165, "training_step_time": 0.10698938369750977 }, { "epoch": 2.92449951171875e-05, "model_forward_time": 0.0251619815826416, "step": 19166 }, { "epoch": 2.92449951171875e-05, "step": 19166, "training_step_time": 0.10729384422302246 }, { "epoch": 2.924652099609375e-05, "model_forward_time": 0.025444746017456055, "step": 19167 }, { "epoch": 2.924652099609375e-05, "step": 19167, "training_step_time": 0.10352468490600586 }, { "epoch": 2.9248046875e-05, "model_forward_time": 0.025356531143188477, "step": 19168 }, { "epoch": 2.9248046875e-05, "step": 19168, "training_step_time": 0.1057133674621582 }, { "epoch": 2.924957275390625e-05, "model_forward_time": 0.025600671768188477, "step": 19169 }, { "epoch": 2.924957275390625e-05, "step": 19169, "training_step_time": 0.10941004753112793 }, { "epoch": 2.92510986328125e-05, "grad_norm": 0.11145558208227158, "learning_rate": 3.1593772365766105e-05, "loss": 0.0063, "step": 19170 }, { "epoch": 2.92510986328125e-05, "model_forward_time": 0.02516627311706543, "step": 19170 }, { "epoch": 2.92510986328125e-05, "step": 19170, "training_step_time": 0.10455107688903809 }, { "epoch": 2.925262451171875e-05, "model_forward_time": 0.026804685592651367, "step": 19171 }, { "epoch": 2.925262451171875e-05, "step": 19171, "training_step_time": 0.10468578338623047 }, { "epoch": 2.9254150390625e-05, "model_forward_time": 0.025454282760620117, "step": 19172 }, { "epoch": 2.9254150390625e-05, "step": 19172, "training_step_time": 0.10536623001098633 }, { "epoch": 2.925567626953125e-05, "model_forward_time": 0.025073528289794922, "step": 19173 }, { "epoch": 2.925567626953125e-05, "step": 19173, "training_step_time": 0.1084897518157959 }, { "epoch": 2.92572021484375e-05, "model_forward_time": 0.025296926498413086, "step": 19174 }, { "epoch": 2.92572021484375e-05, "step": 19174, "training_step_time": 0.10695695877075195 }, { "epoch": 2.925872802734375e-05, "model_forward_time": 0.02520275115966797, "step": 19175 }, { "epoch": 2.925872802734375e-05, "step": 19175, "training_step_time": 0.10709023475646973 }, { "epoch": 2.926025390625e-05, "model_forward_time": 0.02548527717590332, "step": 19176 }, { "epoch": 2.926025390625e-05, "step": 19176, "training_step_time": 0.10820221900939941 }, { "epoch": 2.926177978515625e-05, "model_forward_time": 0.025419235229492188, "step": 19177 }, { "epoch": 2.926177978515625e-05, "step": 19177, "training_step_time": 0.17939376831054688 }, { "epoch": 2.92633056640625e-05, "model_forward_time": 0.024326801300048828, "step": 19178 }, { "epoch": 2.92633056640625e-05, "step": 19178, "training_step_time": 0.24491429328918457 }, { "epoch": 2.926483154296875e-05, "model_forward_time": 0.024651288986206055, "step": 19179 }, { "epoch": 2.926483154296875e-05, "step": 19179, "training_step_time": 0.21010470390319824 }, { "epoch": 2.9266357421875e-05, "grad_norm": 0.27768412232398987, "learning_rate": 3.1542538313575035e-05, "loss": 0.0071, "step": 19180 }, { "epoch": 2.9266357421875e-05, "model_forward_time": 0.024679899215698242, "step": 19180 }, { "epoch": 2.9266357421875e-05, "step": 19180, "training_step_time": 0.2001194953918457 }, { "epoch": 2.926788330078125e-05, "model_forward_time": 0.024219036102294922, "step": 19181 }, { "epoch": 2.926788330078125e-05, "step": 19181, "training_step_time": 0.22867369651794434 }, { "epoch": 2.92694091796875e-05, "model_forward_time": 0.023774385452270508, "step": 19182 }, { "epoch": 2.92694091796875e-05, "step": 19182, "training_step_time": 0.18787074089050293 }, { "epoch": 2.927093505859375e-05, "model_forward_time": 0.02380228042602539, "step": 19183 }, { "epoch": 2.927093505859375e-05, "step": 19183, "training_step_time": 0.17838454246520996 }, { "epoch": 2.92724609375e-05, "model_forward_time": 0.02523493766784668, "step": 19184 }, { "epoch": 2.92724609375e-05, "step": 19184, "training_step_time": 0.1281139850616455 }, { "epoch": 2.927398681640625e-05, "model_forward_time": 0.024333715438842773, "step": 19185 }, { "epoch": 2.927398681640625e-05, "step": 19185, "training_step_time": 0.1433396339416504 }, { "epoch": 2.92755126953125e-05, "model_forward_time": 0.02483534812927246, "step": 19186 }, { "epoch": 2.92755126953125e-05, "step": 19186, "training_step_time": 0.17440414428710938 }, { "epoch": 2.927703857421875e-05, "model_forward_time": 0.0246124267578125, "step": 19187 }, { "epoch": 2.927703857421875e-05, "step": 19187, "training_step_time": 0.1164860725402832 }, { "epoch": 2.9278564453125e-05, "model_forward_time": 0.024529457092285156, "step": 19188 }, { "epoch": 2.9278564453125e-05, "step": 19188, "training_step_time": 0.13065767288208008 }, { "epoch": 2.928009033203125e-05, "model_forward_time": 0.02524852752685547, "step": 19189 }, { "epoch": 2.928009033203125e-05, "step": 19189, "training_step_time": 0.10392308235168457 }, { "epoch": 2.92816162109375e-05, "grad_norm": 0.4354826509952545, "learning_rate": 3.1491326688940345e-05, "loss": 0.02, "step": 19190 }, { "epoch": 2.92816162109375e-05, "model_forward_time": 0.02585911750793457, "step": 19190 }, { "epoch": 2.92816162109375e-05, "step": 19190, "training_step_time": 0.11215877532958984 }, { "epoch": 2.928314208984375e-05, "model_forward_time": 0.02551579475402832, "step": 19191 }, { "epoch": 2.928314208984375e-05, "step": 19191, "training_step_time": 0.10480785369873047 }, { "epoch": 2.928466796875e-05, "model_forward_time": 0.02552652359008789, "step": 19192 }, { "epoch": 2.928466796875e-05, "step": 19192, "training_step_time": 0.10409688949584961 }, { "epoch": 2.928619384765625e-05, "model_forward_time": 0.025800466537475586, "step": 19193 }, { "epoch": 2.928619384765625e-05, "step": 19193, "training_step_time": 0.17763137817382812 }, { "epoch": 2.92877197265625e-05, "model_forward_time": 0.025051116943359375, "step": 19194 }, { "epoch": 2.92877197265625e-05, "step": 19194, "training_step_time": 0.1086416244506836 }, { "epoch": 2.928924560546875e-05, "model_forward_time": 0.025941133499145508, "step": 19195 }, { "epoch": 2.928924560546875e-05, "step": 19195, "training_step_time": 0.11163568496704102 }, { "epoch": 2.9290771484375e-05, "model_forward_time": 0.025202035903930664, "step": 19196 }, { "epoch": 2.9290771484375e-05, "step": 19196, "training_step_time": 0.13249540328979492 }, { "epoch": 2.929229736328125e-05, "model_forward_time": 0.02553558349609375, "step": 19197 }, { "epoch": 2.929229736328125e-05, "step": 19197, "training_step_time": 0.13285374641418457 }, { "epoch": 2.92938232421875e-05, "model_forward_time": 0.02493453025817871, "step": 19198 }, { "epoch": 2.92938232421875e-05, "step": 19198, "training_step_time": 0.10608410835266113 }, { "epoch": 2.929534912109375e-05, "model_forward_time": 0.02508378028869629, "step": 19199 }, { "epoch": 2.929534912109375e-05, "step": 19199, "training_step_time": 0.10982751846313477 }, { "epoch": 2.9296875e-05, "grad_norm": 0.15480276942253113, "learning_rate": 3.144013755408895e-05, "loss": 0.0088, "step": 19200 }, { "epoch": 2.9296875e-05, "model_forward_time": 0.025064468383789062, "step": 19200 }, { "epoch": 2.9296875e-05, "step": 19200, "training_step_time": 0.13068723678588867 }, { "epoch": 2.929840087890625e-05, "model_forward_time": 0.02538585662841797, "step": 19201 }, { "epoch": 2.929840087890625e-05, "step": 19201, "training_step_time": 0.10827827453613281 }, { "epoch": 2.92999267578125e-05, "model_forward_time": 0.025325298309326172, "step": 19202 }, { "epoch": 2.92999267578125e-05, "step": 19202, "training_step_time": 0.11127042770385742 }, { "epoch": 2.930145263671875e-05, "model_forward_time": 0.0254213809967041, "step": 19203 }, { "epoch": 2.930145263671875e-05, "step": 19203, "training_step_time": 0.12284207344055176 }, { "epoch": 2.9302978515625e-05, "model_forward_time": 0.025451183319091797, "step": 19204 }, { "epoch": 2.9302978515625e-05, "step": 19204, "training_step_time": 0.18102502822875977 }, { "epoch": 2.930450439453125e-05, "model_forward_time": 0.024601459503173828, "step": 19205 }, { "epoch": 2.930450439453125e-05, "step": 19205, "training_step_time": 0.11902356147766113 }, { "epoch": 2.93060302734375e-05, "model_forward_time": 0.024265766143798828, "step": 19206 }, { "epoch": 2.93060302734375e-05, "step": 19206, "training_step_time": 0.1156916618347168 }, { "epoch": 2.930755615234375e-05, "model_forward_time": 0.024874210357666016, "step": 19207 }, { "epoch": 2.930755615234375e-05, "step": 19207, "training_step_time": 0.11552190780639648 }, { "epoch": 2.930908203125e-05, "model_forward_time": 0.025356769561767578, "step": 19208 }, { "epoch": 2.930908203125e-05, "step": 19208, "training_step_time": 0.1118307113647461 }, { "epoch": 2.931060791015625e-05, "model_forward_time": 0.025333166122436523, "step": 19209 }, { "epoch": 2.931060791015625e-05, "step": 19209, "training_step_time": 0.11358356475830078 }, { "epoch": 2.93121337890625e-05, "grad_norm": 0.21166066825389862, "learning_rate": 3.1388970971220546e-05, "loss": 0.0064, "step": 19210 }, { "epoch": 2.93121337890625e-05, "model_forward_time": 0.028171539306640625, "step": 19210 }, { "epoch": 2.93121337890625e-05, "step": 19210, "training_step_time": 0.11153912544250488 }, { "epoch": 2.931365966796875e-05, "model_forward_time": 0.026267528533935547, "step": 19211 }, { "epoch": 2.931365966796875e-05, "step": 19211, "training_step_time": 0.10839009284973145 }, { "epoch": 2.9315185546875e-05, "model_forward_time": 0.025510549545288086, "step": 19212 }, { "epoch": 2.9315185546875e-05, "step": 19212, "training_step_time": 0.10545110702514648 }, { "epoch": 2.931671142578125e-05, "model_forward_time": 0.025348186492919922, "step": 19213 }, { "epoch": 2.931671142578125e-05, "step": 19213, "training_step_time": 0.10542774200439453 }, { "epoch": 2.93182373046875e-05, "model_forward_time": 0.0253450870513916, "step": 19214 }, { "epoch": 2.93182373046875e-05, "step": 19214, "training_step_time": 0.10511970520019531 }, { "epoch": 2.931976318359375e-05, "model_forward_time": 0.025597333908081055, "step": 19215 }, { "epoch": 2.931976318359375e-05, "step": 19215, "training_step_time": 0.10569214820861816 }, { "epoch": 2.93212890625e-05, "model_forward_time": 0.02530694007873535, "step": 19216 }, { "epoch": 2.93212890625e-05, "step": 19216, "training_step_time": 0.10933709144592285 }, { "epoch": 2.932281494140625e-05, "model_forward_time": 0.025393009185791016, "step": 19217 }, { "epoch": 2.932281494140625e-05, "step": 19217, "training_step_time": 0.10900425910949707 }, { "epoch": 2.93243408203125e-05, "model_forward_time": 0.025661468505859375, "step": 19218 }, { "epoch": 2.93243408203125e-05, "step": 19218, "training_step_time": 0.11128401756286621 }, { "epoch": 2.932586669921875e-05, "model_forward_time": 0.025545120239257812, "step": 19219 }, { "epoch": 2.932586669921875e-05, "step": 19219, "training_step_time": 0.10916948318481445 }, { "epoch": 2.9327392578125e-05, "grad_norm": 0.3978164494037628, "learning_rate": 3.133782700250731e-05, "loss": 0.0061, "step": 19220 }, { "epoch": 2.9327392578125e-05, "model_forward_time": 0.025174856185913086, "step": 19220 }, { "epoch": 2.9327392578125e-05, "step": 19220, "training_step_time": 0.10708427429199219 }, { "epoch": 2.932891845703125e-05, "model_forward_time": 0.025544404983520508, "step": 19221 }, { "epoch": 2.932891845703125e-05, "step": 19221, "training_step_time": 0.11556410789489746 }, { "epoch": 2.93304443359375e-05, "model_forward_time": 0.0253140926361084, "step": 19222 }, { "epoch": 2.93304443359375e-05, "step": 19222, "training_step_time": 0.11324119567871094 }, { "epoch": 2.933197021484375e-05, "model_forward_time": 0.0253143310546875, "step": 19223 }, { "epoch": 2.933197021484375e-05, "step": 19223, "training_step_time": 0.1232759952545166 }, { "epoch": 2.933349609375e-05, "model_forward_time": 0.025258779525756836, "step": 19224 }, { "epoch": 2.933349609375e-05, "step": 19224, "training_step_time": 0.11106419563293457 }, { "epoch": 2.933502197265625e-05, "model_forward_time": 0.025220632553100586, "step": 19225 }, { "epoch": 2.933502197265625e-05, "step": 19225, "training_step_time": 0.16109538078308105 }, { "epoch": 2.93365478515625e-05, "model_forward_time": 0.025310039520263672, "step": 19226 }, { "epoch": 2.93365478515625e-05, "step": 19226, "training_step_time": 0.13904047012329102 }, { "epoch": 2.933807373046875e-05, "model_forward_time": 0.024864912033081055, "step": 19227 }, { "epoch": 2.933807373046875e-05, "step": 19227, "training_step_time": 0.1054375171661377 }, { "epoch": 2.9339599609375e-05, "model_forward_time": 0.025435209274291992, "step": 19228 }, { "epoch": 2.9339599609375e-05, "step": 19228, "training_step_time": 0.10371661186218262 }, { "epoch": 2.934112548828125e-05, "model_forward_time": 0.024962663650512695, "step": 19229 }, { "epoch": 2.934112548828125e-05, "step": 19229, "training_step_time": 0.11001133918762207 }, { "epoch": 2.93426513671875e-05, "grad_norm": 0.21541450917720795, "learning_rate": 3.128670571009399e-05, "loss": 0.0107, "step": 19230 }, { "epoch": 2.93426513671875e-05, "model_forward_time": 0.025121688842773438, "step": 19230 }, { "epoch": 2.93426513671875e-05, "step": 19230, "training_step_time": 0.10422134399414062 }, { "epoch": 2.934417724609375e-05, "model_forward_time": 0.02557516098022461, "step": 19231 }, { "epoch": 2.934417724609375e-05, "step": 19231, "training_step_time": 0.10710906982421875 }, { "epoch": 2.9345703125e-05, "model_forward_time": 0.024446725845336914, "step": 19232 }, { "epoch": 2.9345703125e-05, "step": 19232, "training_step_time": 0.14668703079223633 }, { "epoch": 2.934722900390625e-05, "model_forward_time": 0.02451014518737793, "step": 19233 }, { "epoch": 2.934722900390625e-05, "step": 19233, "training_step_time": 0.17050552368164062 }, { "epoch": 2.93487548828125e-05, "model_forward_time": 0.02489471435546875, "step": 19234 }, { "epoch": 2.93487548828125e-05, "step": 19234, "training_step_time": 0.11841702461242676 }, { "epoch": 2.935028076171875e-05, "model_forward_time": 0.025107383728027344, "step": 19235 }, { "epoch": 2.935028076171875e-05, "step": 19235, "training_step_time": 0.13881921768188477 }, { "epoch": 2.9351806640625e-05, "model_forward_time": 0.025017738342285156, "step": 19236 }, { "epoch": 2.9351806640625e-05, "step": 19236, "training_step_time": 0.18053746223449707 }, { "epoch": 2.935333251953125e-05, "model_forward_time": 0.02479696273803711, "step": 19237 }, { "epoch": 2.935333251953125e-05, "step": 19237, "training_step_time": 0.14353537559509277 }, { "epoch": 2.93548583984375e-05, "model_forward_time": 0.023694276809692383, "step": 19238 }, { "epoch": 2.93548583984375e-05, "step": 19238, "training_step_time": 0.12405109405517578 }, { "epoch": 2.935638427734375e-05, "model_forward_time": 0.023906946182250977, "step": 19239 }, { "epoch": 2.935638427734375e-05, "step": 19239, "training_step_time": 0.18067145347595215 }, { "epoch": 2.935791015625e-05, "grad_norm": 0.18409797549247742, "learning_rate": 3.123560715609777e-05, "loss": 0.0088, "step": 19240 }, { "epoch": 2.935791015625e-05, "model_forward_time": 0.024544477462768555, "step": 19240 }, { "epoch": 2.935791015625e-05, "step": 19240, "training_step_time": 0.11346936225891113 }, { "epoch": 2.935943603515625e-05, "model_forward_time": 0.02455306053161621, "step": 19241 }, { "epoch": 2.935943603515625e-05, "step": 19241, "training_step_time": 0.17413115501403809 }, { "epoch": 2.93609619140625e-05, "model_forward_time": 0.02481818199157715, "step": 19242 }, { "epoch": 2.93609619140625e-05, "step": 19242, "training_step_time": 0.17853260040283203 }, { "epoch": 2.936248779296875e-05, "model_forward_time": 0.024631738662719727, "step": 19243 }, { "epoch": 2.936248779296875e-05, "step": 19243, "training_step_time": 0.17427682876586914 }, { "epoch": 2.9364013671875e-05, "model_forward_time": 0.024524211883544922, "step": 19244 }, { "epoch": 2.9364013671875e-05, "step": 19244, "training_step_time": 0.15829873085021973 }, { "epoch": 2.936553955078125e-05, "model_forward_time": 0.024396181106567383, "step": 19245 }, { "epoch": 2.936553955078125e-05, "step": 19245, "training_step_time": 0.18975448608398438 }, { "epoch": 2.93670654296875e-05, "model_forward_time": 0.025730133056640625, "step": 19246 }, { "epoch": 2.93670654296875e-05, "step": 19246, "training_step_time": 0.1287071704864502 }, { "epoch": 2.936859130859375e-05, "model_forward_time": 0.024869203567504883, "step": 19247 }, { "epoch": 2.936859130859375e-05, "step": 19247, "training_step_time": 0.10538482666015625 }, { "epoch": 2.93701171875e-05, "model_forward_time": 0.025168657302856445, "step": 19248 }, { "epoch": 2.93701171875e-05, "step": 19248, "training_step_time": 0.19133901596069336 }, { "epoch": 2.937164306640625e-05, "model_forward_time": 0.02466106414794922, "step": 19249 }, { "epoch": 2.937164306640625e-05, "step": 19249, "training_step_time": 0.10312056541442871 }, { "epoch": 2.93731689453125e-05, "grad_norm": 0.24338629841804504, "learning_rate": 3.118453140260823e-05, "loss": 0.008, "step": 19250 }, { "epoch": 2.93731689453125e-05, "model_forward_time": 0.024415016174316406, "step": 19250 }, { "epoch": 2.93731689453125e-05, "step": 19250, "training_step_time": 0.10437583923339844 }, { "epoch": 2.937469482421875e-05, "model_forward_time": 0.02513909339904785, "step": 19251 }, { "epoch": 2.937469482421875e-05, "step": 19251, "training_step_time": 0.10594749450683594 }, { "epoch": 2.9376220703125e-05, "model_forward_time": 0.027559757232666016, "step": 19252 }, { "epoch": 2.9376220703125e-05, "step": 19252, "training_step_time": 0.1090250015258789 }, { "epoch": 2.937774658203125e-05, "model_forward_time": 0.02554798126220703, "step": 19253 }, { "epoch": 2.937774658203125e-05, "step": 19253, "training_step_time": 0.11000180244445801 }, { "epoch": 2.93792724609375e-05, "model_forward_time": 0.024583101272583008, "step": 19254 }, { "epoch": 2.93792724609375e-05, "step": 19254, "training_step_time": 0.10407161712646484 }, { "epoch": 2.938079833984375e-05, "model_forward_time": 0.026499032974243164, "step": 19255 }, { "epoch": 2.938079833984375e-05, "step": 19255, "training_step_time": 0.10717511177062988 }, { "epoch": 2.938232421875e-05, "model_forward_time": 0.02484273910522461, "step": 19256 }, { "epoch": 2.938232421875e-05, "step": 19256, "training_step_time": 0.10487723350524902 }, { "epoch": 2.938385009765625e-05, "model_forward_time": 0.02489328384399414, "step": 19257 }, { "epoch": 2.938385009765625e-05, "step": 19257, "training_step_time": 0.10401272773742676 }, { "epoch": 2.93853759765625e-05, "model_forward_time": 0.02525162696838379, "step": 19258 }, { "epoch": 2.93853759765625e-05, "step": 19258, "training_step_time": 0.10507607460021973 }, { "epoch": 2.938690185546875e-05, "model_forward_time": 0.025394678115844727, "step": 19259 }, { "epoch": 2.938690185546875e-05, "step": 19259, "training_step_time": 0.10581135749816895 }, { "epoch": 2.9388427734375e-05, "grad_norm": 0.21936672925949097, "learning_rate": 3.113347851168721e-05, "loss": 0.0064, "step": 19260 }, { "epoch": 2.9388427734375e-05, "model_forward_time": 0.02534008026123047, "step": 19260 }, { "epoch": 2.9388427734375e-05, "step": 19260, "training_step_time": 0.10618782043457031 }, { "epoch": 2.938995361328125e-05, "model_forward_time": 0.025007009506225586, "step": 19261 }, { "epoch": 2.938995361328125e-05, "step": 19261, "training_step_time": 0.10863447189331055 }, { "epoch": 2.93914794921875e-05, "model_forward_time": 0.025121688842773438, "step": 19262 }, { "epoch": 2.93914794921875e-05, "step": 19262, "training_step_time": 0.10734128952026367 }, { "epoch": 2.939300537109375e-05, "model_forward_time": 0.02500629425048828, "step": 19263 }, { "epoch": 2.939300537109375e-05, "step": 19263, "training_step_time": 0.104827880859375 }, { "epoch": 2.939453125e-05, "model_forward_time": 0.025671005249023438, "step": 19264 }, { "epoch": 2.939453125e-05, "step": 19264, "training_step_time": 0.22210192680358887 }, { "epoch": 2.939605712890625e-05, "model_forward_time": 0.024561643600463867, "step": 19265 }, { "epoch": 2.939605712890625e-05, "step": 19265, "training_step_time": 0.11351919174194336 }, { "epoch": 2.93975830078125e-05, "model_forward_time": 0.024650096893310547, "step": 19266 }, { "epoch": 2.93975830078125e-05, "step": 19266, "training_step_time": 0.19347000122070312 }, { "epoch": 2.939910888671875e-05, "model_forward_time": 0.02416682243347168, "step": 19267 }, { "epoch": 2.939910888671875e-05, "step": 19267, "training_step_time": 0.16080927848815918 }, { "epoch": 2.9400634765625e-05, "model_forward_time": 0.025736570358276367, "step": 19268 }, { "epoch": 2.9400634765625e-05, "step": 19268, "training_step_time": 0.13562917709350586 }, { "epoch": 2.940216064453125e-05, "model_forward_time": 0.024166584014892578, "step": 19269 }, { "epoch": 2.940216064453125e-05, "step": 19269, "training_step_time": 0.10534954071044922 }, { "epoch": 2.94036865234375e-05, "grad_norm": 0.1270855814218521, "learning_rate": 3.1082448545368814e-05, "loss": 0.0053, "step": 19270 }, { "epoch": 2.94036865234375e-05, "model_forward_time": 0.025366783142089844, "step": 19270 }, { "epoch": 2.94036865234375e-05, "step": 19270, "training_step_time": 0.10294461250305176 }, { "epoch": 2.940521240234375e-05, "model_forward_time": 0.025060415267944336, "step": 19271 }, { "epoch": 2.940521240234375e-05, "step": 19271, "training_step_time": 0.10331988334655762 }, { "epoch": 2.940673828125e-05, "model_forward_time": 0.025148391723632812, "step": 19272 }, { "epoch": 2.940673828125e-05, "step": 19272, "training_step_time": 0.10428619384765625 }, { "epoch": 2.940826416015625e-05, "model_forward_time": 0.025613069534301758, "step": 19273 }, { "epoch": 2.940826416015625e-05, "step": 19273, "training_step_time": 0.10636520385742188 }, { "epoch": 2.94097900390625e-05, "model_forward_time": 0.025888919830322266, "step": 19274 }, { "epoch": 2.94097900390625e-05, "step": 19274, "training_step_time": 0.10785150527954102 }, { "epoch": 2.941131591796875e-05, "model_forward_time": 0.024164199829101562, "step": 19275 }, { "epoch": 2.941131591796875e-05, "step": 19275, "training_step_time": 0.19426488876342773 }, { "epoch": 2.9412841796875e-05, "model_forward_time": 0.025041818618774414, "step": 19276 }, { "epoch": 2.9412841796875e-05, "step": 19276, "training_step_time": 0.21092438697814941 }, { "epoch": 2.941436767578125e-05, "model_forward_time": 0.02442622184753418, "step": 19277 }, { "epoch": 2.941436767578125e-05, "step": 19277, "training_step_time": 0.22937846183776855 }, { "epoch": 2.94158935546875e-05, "model_forward_time": 0.024703502655029297, "step": 19278 }, { "epoch": 2.94158935546875e-05, "step": 19278, "training_step_time": 0.21656465530395508 }, { "epoch": 2.941741943359375e-05, "model_forward_time": 0.024412155151367188, "step": 19279 }, { "epoch": 2.941741943359375e-05, "step": 19279, "training_step_time": 0.20065855979919434 }, { "epoch": 2.94189453125e-05, "grad_norm": 0.15332883596420288, "learning_rate": 3.1031441565659235e-05, "loss": 0.0094, "step": 19280 }, { "epoch": 2.94189453125e-05, "model_forward_time": 0.028171300888061523, "step": 19280 }, { "epoch": 2.94189453125e-05, "step": 19280, "training_step_time": 0.22831201553344727 }, { "epoch": 2.942047119140625e-05, "model_forward_time": 0.02398991584777832, "step": 19281 }, { "epoch": 2.942047119140625e-05, "step": 19281, "training_step_time": 0.2129652500152588 }, { "epoch": 2.94219970703125e-05, "model_forward_time": 0.02465367317199707, "step": 19282 }, { "epoch": 2.94219970703125e-05, "step": 19282, "training_step_time": 0.1670525074005127 }, { "epoch": 2.942352294921875e-05, "model_forward_time": 0.024639606475830078, "step": 19283 }, { "epoch": 2.942352294921875e-05, "step": 19283, "training_step_time": 0.1670832633972168 }, { "epoch": 2.9425048828125e-05, "model_forward_time": 0.024389266967773438, "step": 19284 }, { "epoch": 2.9425048828125e-05, "step": 19284, "training_step_time": 0.20344996452331543 }, { "epoch": 2.942657470703125e-05, "model_forward_time": 0.024549484252929688, "step": 19285 }, { "epoch": 2.942657470703125e-05, "step": 19285, "training_step_time": 0.10524249076843262 }, { "epoch": 2.94281005859375e-05, "model_forward_time": 0.023940563201904297, "step": 19286 }, { "epoch": 2.94281005859375e-05, "step": 19286, "training_step_time": 0.1128244400024414 }, { "epoch": 2.942962646484375e-05, "model_forward_time": 0.0252225399017334, "step": 19287 }, { "epoch": 2.942962646484375e-05, "step": 19287, "training_step_time": 0.11997103691101074 }, { "epoch": 2.943115234375e-05, "model_forward_time": 0.025533437728881836, "step": 19288 }, { "epoch": 2.943115234375e-05, "step": 19288, "training_step_time": 0.1034090518951416 }, { "epoch": 2.943267822265625e-05, "model_forward_time": 0.025382041931152344, "step": 19289 }, { "epoch": 2.943267822265625e-05, "step": 19289, "training_step_time": 0.19354677200317383 }, { "epoch": 2.94342041015625e-05, "grad_norm": 0.4299980103969574, "learning_rate": 3.098045763453678e-05, "loss": 0.0137, "step": 19290 }, { "epoch": 2.94342041015625e-05, "model_forward_time": 0.024603843688964844, "step": 19290 }, { "epoch": 2.94342041015625e-05, "step": 19290, "training_step_time": 0.10291409492492676 }, { "epoch": 2.943572998046875e-05, "model_forward_time": 0.02489471435546875, "step": 19291 }, { "epoch": 2.943572998046875e-05, "step": 19291, "training_step_time": 0.10221529006958008 }, { "epoch": 2.9437255859375e-05, "model_forward_time": 0.025199174880981445, "step": 19292 }, { "epoch": 2.9437255859375e-05, "step": 19292, "training_step_time": 0.11051440238952637 }, { "epoch": 2.943878173828125e-05, "model_forward_time": 0.0252225399017334, "step": 19293 }, { "epoch": 2.943878173828125e-05, "step": 19293, "training_step_time": 0.10925531387329102 }, { "epoch": 2.94403076171875e-05, "model_forward_time": 0.02550983428955078, "step": 19294 }, { "epoch": 2.94403076171875e-05, "step": 19294, "training_step_time": 0.10435199737548828 }, { "epoch": 2.944183349609375e-05, "model_forward_time": 0.025310993194580078, "step": 19295 }, { "epoch": 2.944183349609375e-05, "step": 19295, "training_step_time": 0.11001157760620117 }, { "epoch": 2.9443359375e-05, "model_forward_time": 0.025522232055664062, "step": 19296 }, { "epoch": 2.9443359375e-05, "step": 19296, "training_step_time": 0.10387015342712402 }, { "epoch": 2.944488525390625e-05, "model_forward_time": 0.0252993106842041, "step": 19297 }, { "epoch": 2.944488525390625e-05, "step": 19297, "training_step_time": 0.10490798950195312 }, { "epoch": 2.94464111328125e-05, "model_forward_time": 0.02535867691040039, "step": 19298 }, { "epoch": 2.94464111328125e-05, "step": 19298, "training_step_time": 0.10591840744018555 }, { "epoch": 2.944793701171875e-05, "model_forward_time": 0.025668621063232422, "step": 19299 }, { "epoch": 2.944793701171875e-05, "step": 19299, "training_step_time": 0.10705065727233887 }, { "epoch": 2.9449462890625e-05, "grad_norm": 0.22110362350940704, "learning_rate": 3.0929496813951694e-05, "loss": 0.0096, "step": 19300 }, { "epoch": 2.9449462890625e-05, "model_forward_time": 0.025661706924438477, "step": 19300 }, { "epoch": 2.9449462890625e-05, "step": 19300, "training_step_time": 0.10493612289428711 }, { "epoch": 2.945098876953125e-05, "model_forward_time": 0.025415658950805664, "step": 19301 }, { "epoch": 2.945098876953125e-05, "step": 19301, "training_step_time": 0.1504533290863037 }, { "epoch": 2.94525146484375e-05, "model_forward_time": 0.025029897689819336, "step": 19302 }, { "epoch": 2.94525146484375e-05, "step": 19302, "training_step_time": 0.18638205528259277 }, { "epoch": 2.945404052734375e-05, "model_forward_time": 0.024095773696899414, "step": 19303 }, { "epoch": 2.945404052734375e-05, "step": 19303, "training_step_time": 0.1727466583251953 }, { "epoch": 2.945556640625e-05, "model_forward_time": 0.024483680725097656, "step": 19304 }, { "epoch": 2.945556640625e-05, "step": 19304, "training_step_time": 0.19438791275024414 }, { "epoch": 2.945709228515625e-05, "model_forward_time": 0.024800539016723633, "step": 19305 }, { "epoch": 2.945709228515625e-05, "step": 19305, "training_step_time": 0.21100258827209473 }, { "epoch": 2.94586181640625e-05, "model_forward_time": 0.024143695831298828, "step": 19306 }, { "epoch": 2.94586181640625e-05, "step": 19306, "training_step_time": 0.13538241386413574 }, { "epoch": 2.946014404296875e-05, "model_forward_time": 0.023680925369262695, "step": 19307 }, { "epoch": 2.946014404296875e-05, "step": 19307, "training_step_time": 0.20956993103027344 }, { "epoch": 2.9461669921875e-05, "model_forward_time": 0.024394989013671875, "step": 19308 }, { "epoch": 2.9461669921875e-05, "step": 19308, "training_step_time": 0.12218403816223145 }, { "epoch": 2.946319580078125e-05, "model_forward_time": 0.024245023727416992, "step": 19309 }, { "epoch": 2.946319580078125e-05, "step": 19309, "training_step_time": 0.11400556564331055 }, { "epoch": 2.94647216796875e-05, "grad_norm": 0.4304065406322479, "learning_rate": 3.0878559165826236e-05, "loss": 0.0074, "step": 19310 }, { "epoch": 2.94647216796875e-05, "model_forward_time": 0.025407075881958008, "step": 19310 }, { "epoch": 2.94647216796875e-05, "step": 19310, "training_step_time": 0.11446046829223633 }, { "epoch": 2.946624755859375e-05, "model_forward_time": 0.024799346923828125, "step": 19311 }, { "epoch": 2.946624755859375e-05, "step": 19311, "training_step_time": 0.11042380332946777 }, { "epoch": 2.94677734375e-05, "model_forward_time": 0.025327444076538086, "step": 19312 }, { "epoch": 2.94677734375e-05, "step": 19312, "training_step_time": 0.10468006134033203 }, { "epoch": 2.946929931640625e-05, "model_forward_time": 0.02501654624938965, "step": 19313 }, { "epoch": 2.946929931640625e-05, "step": 19313, "training_step_time": 0.14600300788879395 }, { "epoch": 2.94708251953125e-05, "model_forward_time": 0.024906635284423828, "step": 19314 }, { "epoch": 2.94708251953125e-05, "step": 19314, "training_step_time": 0.1129465103149414 }, { "epoch": 2.947235107421875e-05, "model_forward_time": 0.024831533432006836, "step": 19315 }, { "epoch": 2.947235107421875e-05, "step": 19315, "training_step_time": 0.11356425285339355 }, { "epoch": 2.9473876953125e-05, "model_forward_time": 0.025443315505981445, "step": 19316 }, { "epoch": 2.9473876953125e-05, "step": 19316, "training_step_time": 0.11955547332763672 }, { "epoch": 2.947540283203125e-05, "model_forward_time": 0.025089263916015625, "step": 19317 }, { "epoch": 2.947540283203125e-05, "step": 19317, "training_step_time": 0.12111520767211914 }, { "epoch": 2.94769287109375e-05, "model_forward_time": 0.02630305290222168, "step": 19318 }, { "epoch": 2.94769287109375e-05, "step": 19318, "training_step_time": 0.12194705009460449 }, { "epoch": 2.947845458984375e-05, "model_forward_time": 0.02557659149169922, "step": 19319 }, { "epoch": 2.947845458984375e-05, "step": 19319, "training_step_time": 0.1193840503692627 }, { "epoch": 2.947998046875e-05, "grad_norm": 0.2681468427181244, "learning_rate": 3.082764475205442e-05, "loss": 0.0152, "step": 19320 }, { "epoch": 2.947998046875e-05, "model_forward_time": 0.02525162696838379, "step": 19320 }, { "epoch": 2.947998046875e-05, "step": 19320, "training_step_time": 0.11241459846496582 }, { "epoch": 2.948150634765625e-05, "model_forward_time": 0.025516271591186523, "step": 19321 }, { "epoch": 2.948150634765625e-05, "step": 19321, "training_step_time": 0.1083076000213623 }, { "epoch": 2.94830322265625e-05, "model_forward_time": 0.025372982025146484, "step": 19322 }, { "epoch": 2.94830322265625e-05, "step": 19322, "training_step_time": 0.16967296600341797 }, { "epoch": 2.948455810546875e-05, "model_forward_time": 0.024569034576416016, "step": 19323 }, { "epoch": 2.948455810546875e-05, "step": 19323, "training_step_time": 0.10860252380371094 }, { "epoch": 2.9486083984375e-05, "model_forward_time": 0.02464604377746582, "step": 19324 }, { "epoch": 2.9486083984375e-05, "step": 19324, "training_step_time": 0.10744929313659668 }, { "epoch": 2.948760986328125e-05, "model_forward_time": 0.02521657943725586, "step": 19325 }, { "epoch": 2.948760986328125e-05, "step": 19325, "training_step_time": 0.13216662406921387 }, { "epoch": 2.94891357421875e-05, "model_forward_time": 0.0252835750579834, "step": 19326 }, { "epoch": 2.94891357421875e-05, "step": 19326, "training_step_time": 0.12733864784240723 }, { "epoch": 2.949066162109375e-05, "model_forward_time": 0.024904966354370117, "step": 19327 }, { "epoch": 2.949066162109375e-05, "step": 19327, "training_step_time": 0.13619756698608398 }, { "epoch": 2.94921875e-05, "model_forward_time": 0.02569127082824707, "step": 19328 }, { "epoch": 2.94921875e-05, "step": 19328, "training_step_time": 0.13528990745544434 }, { "epoch": 2.949371337890625e-05, "model_forward_time": 0.02448296546936035, "step": 19329 }, { "epoch": 2.949371337890625e-05, "step": 19329, "training_step_time": 0.10435247421264648 }, { "epoch": 2.94952392578125e-05, "grad_norm": 0.3525830805301666, "learning_rate": 3.077675363450207e-05, "loss": 0.0211, "step": 19330 }, { "epoch": 2.94952392578125e-05, "model_forward_time": 0.025161266326904297, "step": 19330 }, { "epoch": 2.94952392578125e-05, "step": 19330, "training_step_time": 0.10590624809265137 }, { "epoch": 2.949676513671875e-05, "model_forward_time": 0.025621891021728516, "step": 19331 }, { "epoch": 2.949676513671875e-05, "step": 19331, "training_step_time": 0.11840987205505371 }, { "epoch": 2.9498291015625e-05, "model_forward_time": 0.02527165412902832, "step": 19332 }, { "epoch": 2.9498291015625e-05, "step": 19332, "training_step_time": 0.11003470420837402 }, { "epoch": 2.949981689453125e-05, "model_forward_time": 0.026260852813720703, "step": 19333 }, { "epoch": 2.949981689453125e-05, "step": 19333, "training_step_time": 0.1905043125152588 }, { "epoch": 2.95013427734375e-05, "model_forward_time": 0.02552008628845215, "step": 19334 }, { "epoch": 2.95013427734375e-05, "step": 19334, "training_step_time": 0.10716557502746582 }, { "epoch": 2.950286865234375e-05, "model_forward_time": 0.02435016632080078, "step": 19335 }, { "epoch": 2.950286865234375e-05, "step": 19335, "training_step_time": 0.10715746879577637 }, { "epoch": 2.950439453125e-05, "model_forward_time": 0.025578022003173828, "step": 19336 }, { "epoch": 2.950439453125e-05, "step": 19336, "training_step_time": 0.11125636100769043 }, { "epoch": 2.950592041015625e-05, "model_forward_time": 0.025347232818603516, "step": 19337 }, { "epoch": 2.950592041015625e-05, "step": 19337, "training_step_time": 0.10583090782165527 }, { "epoch": 2.95074462890625e-05, "model_forward_time": 0.025775432586669922, "step": 19338 }, { "epoch": 2.95074462890625e-05, "step": 19338, "training_step_time": 0.11068367958068848 }, { "epoch": 2.950897216796875e-05, "model_forward_time": 0.025562286376953125, "step": 19339 }, { "epoch": 2.950897216796875e-05, "step": 19339, "training_step_time": 0.11296343803405762 }, { "epoch": 2.9510498046875e-05, "grad_norm": 0.36293911933898926, "learning_rate": 3.072588587500669e-05, "loss": 0.026, "step": 19340 }, { "epoch": 2.9510498046875e-05, "model_forward_time": 0.024964094161987305, "step": 19340 }, { "epoch": 2.9510498046875e-05, "step": 19340, "training_step_time": 0.11462855339050293 }, { "epoch": 2.951202392578125e-05, "model_forward_time": 0.02418375015258789, "step": 19341 }, { "epoch": 2.951202392578125e-05, "step": 19341, "training_step_time": 0.11475729942321777 }, { "epoch": 2.95135498046875e-05, "model_forward_time": 0.025511980056762695, "step": 19342 }, { "epoch": 2.95135498046875e-05, "step": 19342, "training_step_time": 0.11611342430114746 }, { "epoch": 2.951507568359375e-05, "model_forward_time": 0.02539515495300293, "step": 19343 }, { "epoch": 2.951507568359375e-05, "step": 19343, "training_step_time": 0.11210036277770996 }, { "epoch": 2.95166015625e-05, "model_forward_time": 0.025484800338745117, "step": 19344 }, { "epoch": 2.95166015625e-05, "step": 19344, "training_step_time": 0.10908985137939453 }, { "epoch": 2.951812744140625e-05, "model_forward_time": 0.025290727615356445, "step": 19345 }, { "epoch": 2.951812744140625e-05, "step": 19345, "training_step_time": 0.10505819320678711 }, { "epoch": 2.95196533203125e-05, "model_forward_time": 0.025363445281982422, "step": 19346 }, { "epoch": 2.95196533203125e-05, "step": 19346, "training_step_time": 0.10651993751525879 }, { "epoch": 2.952117919921875e-05, "model_forward_time": 0.025542259216308594, "step": 19347 }, { "epoch": 2.952117919921875e-05, "step": 19347, "training_step_time": 0.1069936752319336 }, { "epoch": 2.9522705078125e-05, "model_forward_time": 0.02524089813232422, "step": 19348 }, { "epoch": 2.9522705078125e-05, "step": 19348, "training_step_time": 0.10680007934570312 }, { "epoch": 2.952423095703125e-05, "model_forward_time": 0.025068044662475586, "step": 19349 }, { "epoch": 2.952423095703125e-05, "step": 19349, "training_step_time": 0.10931158065795898 }, { "epoch": 2.95257568359375e-05, "grad_norm": 0.17749305069446564, "learning_rate": 3.0675041535377405e-05, "loss": 0.0135, "step": 19350 }, { "epoch": 2.95257568359375e-05, "model_forward_time": 0.02513885498046875, "step": 19350 }, { "epoch": 2.95257568359375e-05, "step": 19350, "training_step_time": 0.10729122161865234 }, { "epoch": 2.952728271484375e-05, "model_forward_time": 0.025162935256958008, "step": 19351 }, { "epoch": 2.952728271484375e-05, "step": 19351, "training_step_time": 0.2151949405670166 }, { "epoch": 2.952880859375e-05, "model_forward_time": 0.025155305862426758, "step": 19352 }, { "epoch": 2.952880859375e-05, "step": 19352, "training_step_time": 0.10356616973876953 }, { "epoch": 2.953033447265625e-05, "model_forward_time": 0.02480030059814453, "step": 19353 }, { "epoch": 2.953033447265625e-05, "step": 19353, "training_step_time": 0.12329268455505371 }, { "epoch": 2.95318603515625e-05, "model_forward_time": 0.025142431259155273, "step": 19354 }, { "epoch": 2.95318603515625e-05, "step": 19354, "training_step_time": 0.19245362281799316 }, { "epoch": 2.953338623046875e-05, "model_forward_time": 0.0240933895111084, "step": 19355 }, { "epoch": 2.953338623046875e-05, "step": 19355, "training_step_time": 0.10188412666320801 }, { "epoch": 2.9534912109375e-05, "model_forward_time": 0.024489164352416992, "step": 19356 }, { "epoch": 2.9534912109375e-05, "step": 19356, "training_step_time": 0.10262680053710938 }, { "epoch": 2.953643798828125e-05, "model_forward_time": 0.024477720260620117, "step": 19357 }, { "epoch": 2.953643798828125e-05, "step": 19357, "training_step_time": 0.10724163055419922 }, { "epoch": 2.95379638671875e-05, "model_forward_time": 0.02522134780883789, "step": 19358 }, { "epoch": 2.95379638671875e-05, "step": 19358, "training_step_time": 0.10643744468688965 }, { "epoch": 2.953948974609375e-05, "model_forward_time": 0.02412104606628418, "step": 19359 }, { "epoch": 2.953948974609375e-05, "step": 19359, "training_step_time": 0.10055661201477051 }, { "epoch": 2.9541015625e-05, "grad_norm": 0.21281743049621582, "learning_rate": 3.062422067739485e-05, "loss": 0.0047, "step": 19360 }, { "epoch": 2.9541015625e-05, "model_forward_time": 0.024124860763549805, "step": 19360 }, { "epoch": 2.9541015625e-05, "step": 19360, "training_step_time": 0.14296603202819824 }, { "epoch": 2.954254150390625e-05, "model_forward_time": 0.02445220947265625, "step": 19361 }, { "epoch": 2.954254150390625e-05, "step": 19361, "training_step_time": 0.10367655754089355 }, { "epoch": 2.95440673828125e-05, "model_forward_time": 0.024925947189331055, "step": 19362 }, { "epoch": 2.95440673828125e-05, "step": 19362, "training_step_time": 0.19614362716674805 }, { "epoch": 2.954559326171875e-05, "model_forward_time": 0.024068117141723633, "step": 19363 }, { "epoch": 2.954559326171875e-05, "step": 19363, "training_step_time": 0.1382288932800293 }, { "epoch": 2.9547119140625e-05, "model_forward_time": 0.024335622787475586, "step": 19364 }, { "epoch": 2.9547119140625e-05, "step": 19364, "training_step_time": 0.11353278160095215 }, { "epoch": 2.954864501953125e-05, "model_forward_time": 0.026988744735717773, "step": 19365 }, { "epoch": 2.954864501953125e-05, "step": 19365, "training_step_time": 0.10896944999694824 }, { "epoch": 2.95501708984375e-05, "model_forward_time": 0.025338411331176758, "step": 19366 }, { "epoch": 2.95501708984375e-05, "step": 19366, "training_step_time": 0.1040339469909668 }, { "epoch": 2.955169677734375e-05, "model_forward_time": 0.025185346603393555, "step": 19367 }, { "epoch": 2.955169677734375e-05, "step": 19367, "training_step_time": 0.1069643497467041 }, { "epoch": 2.955322265625e-05, "model_forward_time": 0.025547266006469727, "step": 19368 }, { "epoch": 2.955322265625e-05, "step": 19368, "training_step_time": 0.10523128509521484 }, { "epoch": 2.955474853515625e-05, "model_forward_time": 0.02496027946472168, "step": 19369 }, { "epoch": 2.955474853515625e-05, "step": 19369, "training_step_time": 0.11645388603210449 }, { "epoch": 2.95562744140625e-05, "grad_norm": 0.25978705286979675, "learning_rate": 3.057342336281122e-05, "loss": 0.0075, "step": 19370 }, { "epoch": 2.95562744140625e-05, "model_forward_time": 0.025130510330200195, "step": 19370 }, { "epoch": 2.95562744140625e-05, "step": 19370, "training_step_time": 0.1526503562927246 }, { "epoch": 2.955780029296875e-05, "model_forward_time": 0.024779319763183594, "step": 19371 }, { "epoch": 2.955780029296875e-05, "step": 19371, "training_step_time": 0.1971452236175537 }, { "epoch": 2.9559326171875e-05, "model_forward_time": 0.025882720947265625, "step": 19372 }, { "epoch": 2.9559326171875e-05, "step": 19372, "training_step_time": 0.14332938194274902 }, { "epoch": 2.956085205078125e-05, "model_forward_time": 0.024105310440063477, "step": 19373 }, { "epoch": 2.956085205078125e-05, "step": 19373, "training_step_time": 0.19579315185546875 }, { "epoch": 2.95623779296875e-05, "model_forward_time": 0.024492263793945312, "step": 19374 }, { "epoch": 2.95623779296875e-05, "step": 19374, "training_step_time": 0.12819886207580566 }, { "epoch": 2.956390380859375e-05, "model_forward_time": 0.02418375015258789, "step": 19375 }, { "epoch": 2.956390380859375e-05, "step": 19375, "training_step_time": 0.16190361976623535 }, { "epoch": 2.95654296875e-05, "model_forward_time": 0.024300575256347656, "step": 19376 }, { "epoch": 2.95654296875e-05, "step": 19376, "training_step_time": 0.1443929672241211 }, { "epoch": 2.956695556640625e-05, "model_forward_time": 0.02418231964111328, "step": 19377 }, { "epoch": 2.956695556640625e-05, "step": 19377, "training_step_time": 0.10849189758300781 }, { "epoch": 2.95684814453125e-05, "model_forward_time": 0.02497243881225586, "step": 19378 }, { "epoch": 2.95684814453125e-05, "step": 19378, "training_step_time": 0.10912656784057617 }, { "epoch": 2.957000732421875e-05, "model_forward_time": 0.028501510620117188, "step": 19379 }, { "epoch": 2.957000732421875e-05, "step": 19379, "training_step_time": 0.1096353530883789 }, { "epoch": 2.9571533203125e-05, "grad_norm": 0.16195520758628845, "learning_rate": 3.052264965335e-05, "loss": 0.0167, "step": 19380 }, { "epoch": 2.9571533203125e-05, "model_forward_time": 0.025025367736816406, "step": 19380 }, { "epoch": 2.9571533203125e-05, "step": 19380, "training_step_time": 0.10773277282714844 }, { "epoch": 2.957305908203125e-05, "model_forward_time": 0.025027751922607422, "step": 19381 }, { "epoch": 2.957305908203125e-05, "step": 19381, "training_step_time": 0.10712695121765137 }, { "epoch": 2.95745849609375e-05, "model_forward_time": 0.025179386138916016, "step": 19382 }, { "epoch": 2.95745849609375e-05, "step": 19382, "training_step_time": 0.10597777366638184 }, { "epoch": 2.957611083984375e-05, "model_forward_time": 0.02498602867126465, "step": 19383 }, { "epoch": 2.957611083984375e-05, "step": 19383, "training_step_time": 0.10631966590881348 }, { "epoch": 2.957763671875e-05, "model_forward_time": 0.026386022567749023, "step": 19384 }, { "epoch": 2.957763671875e-05, "step": 19384, "training_step_time": 0.1076200008392334 }, { "epoch": 2.957916259765625e-05, "model_forward_time": 0.025328397750854492, "step": 19385 }, { "epoch": 2.957916259765625e-05, "step": 19385, "training_step_time": 0.10784387588500977 }, { "epoch": 2.95806884765625e-05, "model_forward_time": 0.025004148483276367, "step": 19386 }, { "epoch": 2.95806884765625e-05, "step": 19386, "training_step_time": 0.10548710823059082 }, { "epoch": 2.958221435546875e-05, "model_forward_time": 0.025218963623046875, "step": 19387 }, { "epoch": 2.958221435546875e-05, "step": 19387, "training_step_time": 0.10984134674072266 }, { "epoch": 2.9583740234375e-05, "model_forward_time": 0.02541637420654297, "step": 19388 }, { "epoch": 2.9583740234375e-05, "step": 19388, "training_step_time": 0.11308121681213379 }, { "epoch": 2.958526611328125e-05, "model_forward_time": 0.02499866485595703, "step": 19389 }, { "epoch": 2.958526611328125e-05, "step": 19389, "training_step_time": 0.10491418838500977 }, { "epoch": 2.95867919921875e-05, "grad_norm": 0.1905003786087036, "learning_rate": 3.0471899610706038e-05, "loss": 0.0099, "step": 19390 }, { "epoch": 2.95867919921875e-05, "model_forward_time": 0.024810314178466797, "step": 19390 }, { "epoch": 2.95867919921875e-05, "step": 19390, "training_step_time": 0.10346698760986328 }, { "epoch": 2.958831787109375e-05, "model_forward_time": 0.024114608764648438, "step": 19391 }, { "epoch": 2.958831787109375e-05, "step": 19391, "training_step_time": 0.10409021377563477 }, { "epoch": 2.958984375e-05, "model_forward_time": 0.024738550186157227, "step": 19392 }, { "epoch": 2.958984375e-05, "step": 19392, "training_step_time": 0.10511088371276855 }, { "epoch": 2.959136962890625e-05, "model_forward_time": 0.02518153190612793, "step": 19393 }, { "epoch": 2.959136962890625e-05, "step": 19393, "training_step_time": 0.10576367378234863 }, { "epoch": 2.95928955078125e-05, "model_forward_time": 0.02478194236755371, "step": 19394 }, { "epoch": 2.95928955078125e-05, "step": 19394, "training_step_time": 0.1092371940612793 }, { "epoch": 2.959442138671875e-05, "model_forward_time": 0.024996280670166016, "step": 19395 }, { "epoch": 2.959442138671875e-05, "step": 19395, "training_step_time": 0.10967040061950684 }, { "epoch": 2.9595947265625e-05, "model_forward_time": 0.02518177032470703, "step": 19396 }, { "epoch": 2.9595947265625e-05, "step": 19396, "training_step_time": 0.11954307556152344 }, { "epoch": 2.959747314453125e-05, "model_forward_time": 0.024852514266967773, "step": 19397 }, { "epoch": 2.959747314453125e-05, "step": 19397, "training_step_time": 0.10904169082641602 }, { "epoch": 2.95989990234375e-05, "model_forward_time": 0.025056838989257812, "step": 19398 }, { "epoch": 2.95989990234375e-05, "step": 19398, "training_step_time": 0.17362737655639648 }, { "epoch": 2.960052490234375e-05, "model_forward_time": 0.024135112762451172, "step": 19399 }, { "epoch": 2.960052490234375e-05, "step": 19399, "training_step_time": 0.13387346267700195 }, { "epoch": 2.960205078125e-05, "grad_norm": 0.25740137696266174, "learning_rate": 3.042117329654544e-05, "loss": 0.0078, "step": 19400 }, { "epoch": 2.960205078125e-05, "model_forward_time": 0.024519920349121094, "step": 19400 }, { "epoch": 2.960205078125e-05, "step": 19400, "training_step_time": 0.11896014213562012 }, { "epoch": 2.960357666015625e-05, "model_forward_time": 0.024959802627563477, "step": 19401 }, { "epoch": 2.960357666015625e-05, "step": 19401, "training_step_time": 0.10890579223632812 }, { "epoch": 2.96051025390625e-05, "model_forward_time": 0.02514481544494629, "step": 19402 }, { "epoch": 2.96051025390625e-05, "step": 19402, "training_step_time": 0.10313129425048828 }, { "epoch": 2.960662841796875e-05, "model_forward_time": 0.025104045867919922, "step": 19403 }, { "epoch": 2.960662841796875e-05, "step": 19403, "training_step_time": 0.10685443878173828 }, { "epoch": 2.9608154296875e-05, "model_forward_time": 0.025518417358398438, "step": 19404 }, { "epoch": 2.9608154296875e-05, "step": 19404, "training_step_time": 0.10692310333251953 }, { "epoch": 2.960968017578125e-05, "model_forward_time": 0.025260448455810547, "step": 19405 }, { "epoch": 2.960968017578125e-05, "step": 19405, "training_step_time": 0.12213921546936035 }, { "epoch": 2.96112060546875e-05, "model_forward_time": 0.025303363800048828, "step": 19406 }, { "epoch": 2.96112060546875e-05, "step": 19406, "training_step_time": 0.13008928298950195 }, { "epoch": 2.961273193359375e-05, "model_forward_time": 0.02525043487548828, "step": 19407 }, { "epoch": 2.961273193359375e-05, "step": 19407, "training_step_time": 0.14973711967468262 }, { "epoch": 2.96142578125e-05, "model_forward_time": 0.024605274200439453, "step": 19408 }, { "epoch": 2.96142578125e-05, "step": 19408, "training_step_time": 0.15762591361999512 }, { "epoch": 2.961578369140625e-05, "model_forward_time": 0.024689197540283203, "step": 19409 }, { "epoch": 2.961578369140625e-05, "step": 19409, "training_step_time": 0.1307682991027832 }, { "epoch": 2.96173095703125e-05, "grad_norm": 0.2595687806606293, "learning_rate": 3.0370470772505433e-05, "loss": 0.0061, "step": 19410 }, { "epoch": 2.96173095703125e-05, "model_forward_time": 0.024315595626831055, "step": 19410 }, { "epoch": 2.96173095703125e-05, "step": 19410, "training_step_time": 0.1808323860168457 }, { "epoch": 2.961883544921875e-05, "model_forward_time": 0.024743318557739258, "step": 19411 }, { "epoch": 2.961883544921875e-05, "step": 19411, "training_step_time": 0.11928200721740723 }, { "epoch": 2.9620361328125e-05, "model_forward_time": 0.023906946182250977, "step": 19412 }, { "epoch": 2.9620361328125e-05, "step": 19412, "training_step_time": 0.1158149242401123 }, { "epoch": 2.962188720703125e-05, "model_forward_time": 0.023882627487182617, "step": 19413 }, { "epoch": 2.962188720703125e-05, "step": 19413, "training_step_time": 0.1131587028503418 }, { "epoch": 2.96234130859375e-05, "model_forward_time": 0.023974895477294922, "step": 19414 }, { "epoch": 2.96234130859375e-05, "step": 19414, "training_step_time": 0.10978293418884277 }, { "epoch": 2.962493896484375e-05, "model_forward_time": 0.024952173233032227, "step": 19415 }, { "epoch": 2.962493896484375e-05, "step": 19415, "training_step_time": 0.12639260292053223 }, { "epoch": 2.962646484375e-05, "model_forward_time": 0.024823904037475586, "step": 19416 }, { "epoch": 2.962646484375e-05, "step": 19416, "training_step_time": 0.11259102821350098 }, { "epoch": 2.962799072265625e-05, "model_forward_time": 0.02462005615234375, "step": 19417 }, { "epoch": 2.962799072265625e-05, "step": 19417, "training_step_time": 0.11641240119934082 }, { "epoch": 2.96295166015625e-05, "model_forward_time": 0.024954795837402344, "step": 19418 }, { "epoch": 2.96295166015625e-05, "step": 19418, "training_step_time": 0.11258506774902344 }, { "epoch": 2.963104248046875e-05, "model_forward_time": 0.025301456451416016, "step": 19419 }, { "epoch": 2.963104248046875e-05, "step": 19419, "training_step_time": 0.16359305381774902 }, { "epoch": 2.9632568359375e-05, "grad_norm": 0.14640112221240997, "learning_rate": 3.03197921001944e-05, "loss": 0.0068, "step": 19420 }, { "epoch": 2.9632568359375e-05, "model_forward_time": 0.024810791015625, "step": 19420 }, { "epoch": 2.9632568359375e-05, "step": 19420, "training_step_time": 0.19085407257080078 }, { "epoch": 2.963409423828125e-05, "model_forward_time": 0.024271726608276367, "step": 19421 }, { "epoch": 2.963409423828125e-05, "step": 19421, "training_step_time": 0.16853785514831543 }, { "epoch": 2.96356201171875e-05, "model_forward_time": 0.024113893508911133, "step": 19422 }, { "epoch": 2.96356201171875e-05, "step": 19422, "training_step_time": 0.11551141738891602 }, { "epoch": 2.963714599609375e-05, "model_forward_time": 0.024299144744873047, "step": 19423 }, { "epoch": 2.963714599609375e-05, "step": 19423, "training_step_time": 0.1065371036529541 }, { "epoch": 2.9638671875e-05, "model_forward_time": 0.025440216064453125, "step": 19424 }, { "epoch": 2.9638671875e-05, "step": 19424, "training_step_time": 0.1945357322692871 }, { "epoch": 2.964019775390625e-05, "model_forward_time": 0.024390220642089844, "step": 19425 }, { "epoch": 2.964019775390625e-05, "step": 19425, "training_step_time": 0.10519695281982422 }, { "epoch": 2.96417236328125e-05, "model_forward_time": 0.024646759033203125, "step": 19426 }, { "epoch": 2.96417236328125e-05, "step": 19426, "training_step_time": 0.10261917114257812 }, { "epoch": 2.964324951171875e-05, "model_forward_time": 0.024927377700805664, "step": 19427 }, { "epoch": 2.964324951171875e-05, "step": 19427, "training_step_time": 0.10620450973510742 }, { "epoch": 2.9644775390625e-05, "model_forward_time": 0.02482771873474121, "step": 19428 }, { "epoch": 2.9644775390625e-05, "step": 19428, "training_step_time": 0.10649251937866211 }, { "epoch": 2.964630126953125e-05, "model_forward_time": 0.025658369064331055, "step": 19429 }, { "epoch": 2.964630126953125e-05, "step": 19429, "training_step_time": 0.10568952560424805 }, { "epoch": 2.96478271484375e-05, "grad_norm": 0.319711297750473, "learning_rate": 3.0269137341191677e-05, "loss": 0.0195, "step": 19430 }, { "epoch": 2.96478271484375e-05, "model_forward_time": 0.024947643280029297, "step": 19430 }, { "epoch": 2.96478271484375e-05, "step": 19430, "training_step_time": 0.11019659042358398 }, { "epoch": 2.964935302734375e-05, "model_forward_time": 0.02520585060119629, "step": 19431 }, { "epoch": 2.964935302734375e-05, "step": 19431, "training_step_time": 0.10590481758117676 }, { "epoch": 2.965087890625e-05, "model_forward_time": 0.024945974349975586, "step": 19432 }, { "epoch": 2.965087890625e-05, "step": 19432, "training_step_time": 0.10547399520874023 }, { "epoch": 2.965240478515625e-05, "model_forward_time": 0.026547908782958984, "step": 19433 }, { "epoch": 2.965240478515625e-05, "step": 19433, "training_step_time": 0.10596537590026855 }, { "epoch": 2.96539306640625e-05, "model_forward_time": 0.025240659713745117, "step": 19434 }, { "epoch": 2.96539306640625e-05, "step": 19434, "training_step_time": 0.10838079452514648 }, { "epoch": 2.965545654296875e-05, "model_forward_time": 0.025402307510375977, "step": 19435 }, { "epoch": 2.965545654296875e-05, "step": 19435, "training_step_time": 0.10556578636169434 }, { "epoch": 2.9656982421875e-05, "model_forward_time": 0.025099515914916992, "step": 19436 }, { "epoch": 2.9656982421875e-05, "step": 19436, "training_step_time": 0.10491061210632324 }, { "epoch": 2.965850830078125e-05, "model_forward_time": 0.02480316162109375, "step": 19437 }, { "epoch": 2.965850830078125e-05, "step": 19437, "training_step_time": 0.1044769287109375 }, { "epoch": 2.96600341796875e-05, "model_forward_time": 0.02489781379699707, "step": 19438 }, { "epoch": 2.96600341796875e-05, "step": 19438, "training_step_time": 0.1067502498626709 }, { "epoch": 2.966156005859375e-05, "model_forward_time": 0.025623083114624023, "step": 19439 }, { "epoch": 2.966156005859375e-05, "step": 19439, "training_step_time": 0.11151862144470215 }, { "epoch": 2.96630859375e-05, "grad_norm": 0.15751205384731293, "learning_rate": 3.0218506557047598e-05, "loss": 0.0068, "step": 19440 }, { "epoch": 2.96630859375e-05, "model_forward_time": 0.025697946548461914, "step": 19440 }, { "epoch": 2.96630859375e-05, "step": 19440, "training_step_time": 0.14061498641967773 }, { "epoch": 2.966461181640625e-05, "model_forward_time": 0.02424907684326172, "step": 19441 }, { "epoch": 2.966461181640625e-05, "step": 19441, "training_step_time": 0.1075296401977539 }, { "epoch": 2.96661376953125e-05, "model_forward_time": 0.02544999122619629, "step": 19442 }, { "epoch": 2.96661376953125e-05, "step": 19442, "training_step_time": 0.10875582695007324 }, { "epoch": 2.966766357421875e-05, "model_forward_time": 0.02531599998474121, "step": 19443 }, { "epoch": 2.966766357421875e-05, "step": 19443, "training_step_time": 0.12152576446533203 }, { "epoch": 2.9669189453125e-05, "model_forward_time": 0.024504423141479492, "step": 19444 }, { "epoch": 2.9669189453125e-05, "step": 19444, "training_step_time": 0.1664745807647705 }, { "epoch": 2.967071533203125e-05, "model_forward_time": 0.02435302734375, "step": 19445 }, { "epoch": 2.967071533203125e-05, "step": 19445, "training_step_time": 0.16179132461547852 }, { "epoch": 2.96722412109375e-05, "model_forward_time": 0.023926973342895508, "step": 19446 }, { "epoch": 2.96722412109375e-05, "step": 19446, "training_step_time": 0.11522841453552246 }, { "epoch": 2.967376708984375e-05, "model_forward_time": 0.024471282958984375, "step": 19447 }, { "epoch": 2.967376708984375e-05, "step": 19447, "training_step_time": 0.1034231185913086 }, { "epoch": 2.967529296875e-05, "model_forward_time": 0.02530074119567871, "step": 19448 }, { "epoch": 2.967529296875e-05, "step": 19448, "training_step_time": 0.10602331161499023 }, { "epoch": 2.967681884765625e-05, "model_forward_time": 0.025386810302734375, "step": 19449 }, { "epoch": 2.967681884765625e-05, "step": 19449, "training_step_time": 0.10744285583496094 }, { "epoch": 2.96783447265625e-05, "grad_norm": 0.1679406464099884, "learning_rate": 3.0167899809283308e-05, "loss": 0.0089, "step": 19450 }, { "epoch": 2.96783447265625e-05, "model_forward_time": 0.02478933334350586, "step": 19450 }, { "epoch": 2.96783447265625e-05, "step": 19450, "training_step_time": 0.10740232467651367 }, { "epoch": 2.967987060546875e-05, "model_forward_time": 0.025142192840576172, "step": 19451 }, { "epoch": 2.967987060546875e-05, "step": 19451, "training_step_time": 0.10234665870666504 }, { "epoch": 2.9681396484375e-05, "model_forward_time": 0.024281978607177734, "step": 19452 }, { "epoch": 2.9681396484375e-05, "step": 19452, "training_step_time": 0.14697265625 }, { "epoch": 2.968292236328125e-05, "model_forward_time": 0.024203777313232422, "step": 19453 }, { "epoch": 2.968292236328125e-05, "step": 19453, "training_step_time": 0.15729093551635742 }, { "epoch": 2.96844482421875e-05, "model_forward_time": 0.024791955947875977, "step": 19454 }, { "epoch": 2.96844482421875e-05, "step": 19454, "training_step_time": 0.11343812942504883 }, { "epoch": 2.968597412109375e-05, "model_forward_time": 0.02443385124206543, "step": 19455 }, { "epoch": 2.968597412109375e-05, "step": 19455, "training_step_time": 0.13426446914672852 }, { "epoch": 2.96875e-05, "model_forward_time": 0.025545835494995117, "step": 19456 }, { "epoch": 2.96875e-05, "step": 19456, "training_step_time": 0.2019481658935547 }, { "epoch": 2.968902587890625e-05, "model_forward_time": 0.024195432662963867, "step": 19457 }, { "epoch": 2.968902587890625e-05, "step": 19457, "training_step_time": 0.11447429656982422 }, { "epoch": 2.96905517578125e-05, "model_forward_time": 0.024137258529663086, "step": 19458 }, { "epoch": 2.96905517578125e-05, "step": 19458, "training_step_time": 0.12416815757751465 }, { "epoch": 2.969207763671875e-05, "model_forward_time": 0.02571415901184082, "step": 19459 }, { "epoch": 2.969207763671875e-05, "step": 19459, "training_step_time": 0.12586021423339844 }, { "epoch": 2.9693603515625e-05, "grad_norm": 0.08849591016769409, "learning_rate": 3.0117317159390794e-05, "loss": 0.0076, "step": 19460 }, { "epoch": 2.9693603515625e-05, "model_forward_time": 0.0251004695892334, "step": 19460 }, { "epoch": 2.9693603515625e-05, "step": 19460, "training_step_time": 0.16143274307250977 }, { "epoch": 2.969512939453125e-05, "model_forward_time": 0.02446913719177246, "step": 19461 }, { "epoch": 2.969512939453125e-05, "step": 19461, "training_step_time": 0.11586260795593262 }, { "epoch": 2.96966552734375e-05, "model_forward_time": 0.02630615234375, "step": 19462 }, { "epoch": 2.96966552734375e-05, "step": 19462, "training_step_time": 0.11466217041015625 }, { "epoch": 2.969818115234375e-05, "model_forward_time": 0.024039745330810547, "step": 19463 }, { "epoch": 2.969818115234375e-05, "step": 19463, "training_step_time": 0.11170721054077148 }, { "epoch": 2.969970703125e-05, "model_forward_time": 0.026463985443115234, "step": 19464 }, { "epoch": 2.969970703125e-05, "step": 19464, "training_step_time": 0.13919734954833984 }, { "epoch": 2.970123291015625e-05, "model_forward_time": 0.026562213897705078, "step": 19465 }, { "epoch": 2.970123291015625e-05, "step": 19465, "training_step_time": 0.13667678833007812 }, { "epoch": 2.97027587890625e-05, "model_forward_time": 0.026032686233520508, "step": 19466 }, { "epoch": 2.97027587890625e-05, "step": 19466, "training_step_time": 0.17267203330993652 }, { "epoch": 2.970428466796875e-05, "model_forward_time": 0.023658275604248047, "step": 19467 }, { "epoch": 2.970428466796875e-05, "step": 19467, "training_step_time": 0.17602777481079102 }, { "epoch": 2.9705810546875e-05, "model_forward_time": 0.023304462432861328, "step": 19468 }, { "epoch": 2.9705810546875e-05, "step": 19468, "training_step_time": 0.10495972633361816 }, { "epoch": 2.970733642578125e-05, "model_forward_time": 0.027095317840576172, "step": 19469 }, { "epoch": 2.970733642578125e-05, "step": 19469, "training_step_time": 0.19354033470153809 }, { "epoch": 2.97088623046875e-05, "grad_norm": 0.1485823690891266, "learning_rate": 3.006675866883275e-05, "loss": 0.0134, "step": 19470 }, { "epoch": 2.97088623046875e-05, "model_forward_time": 0.02401113510131836, "step": 19470 }, { "epoch": 2.97088623046875e-05, "step": 19470, "training_step_time": 0.10397219657897949 }, { "epoch": 2.971038818359375e-05, "model_forward_time": 0.024376869201660156, "step": 19471 }, { "epoch": 2.971038818359375e-05, "step": 19471, "training_step_time": 0.10249710083007812 }, { "epoch": 2.97119140625e-05, "model_forward_time": 0.025582075119018555, "step": 19472 }, { "epoch": 2.97119140625e-05, "step": 19472, "training_step_time": 0.1060645580291748 }, { "epoch": 2.971343994140625e-05, "model_forward_time": 0.02597808837890625, "step": 19473 }, { "epoch": 2.971343994140625e-05, "step": 19473, "training_step_time": 0.10935664176940918 }, { "epoch": 2.97149658203125e-05, "model_forward_time": 0.025186538696289062, "step": 19474 }, { "epoch": 2.97149658203125e-05, "step": 19474, "training_step_time": 0.10999536514282227 }, { "epoch": 2.971649169921875e-05, "model_forward_time": 0.02534031867980957, "step": 19475 }, { "epoch": 2.971649169921875e-05, "step": 19475, "training_step_time": 0.10572290420532227 }, { "epoch": 2.9718017578125e-05, "model_forward_time": 0.025584697723388672, "step": 19476 }, { "epoch": 2.9718017578125e-05, "step": 19476, "training_step_time": 0.10885095596313477 }, { "epoch": 2.971954345703125e-05, "model_forward_time": 0.025394916534423828, "step": 19477 }, { "epoch": 2.971954345703125e-05, "step": 19477, "training_step_time": 0.1063392162322998 }, { "epoch": 2.97210693359375e-05, "model_forward_time": 0.025259733200073242, "step": 19478 }, { "epoch": 2.97210693359375e-05, "step": 19478, "training_step_time": 0.1060783863067627 }, { "epoch": 2.972259521484375e-05, "model_forward_time": 0.02486705780029297, "step": 19479 }, { "epoch": 2.972259521484375e-05, "step": 19479, "training_step_time": 0.10518336296081543 }, { "epoch": 2.972412109375e-05, "grad_norm": 0.2886893153190613, "learning_rate": 3.0016224399042515e-05, "loss": 0.006, "step": 19480 }, { "epoch": 2.972412109375e-05, "model_forward_time": 0.02496337890625, "step": 19480 }, { "epoch": 2.972412109375e-05, "step": 19480, "training_step_time": 0.10764956474304199 }, { "epoch": 2.972564697265625e-05, "model_forward_time": 0.025459766387939453, "step": 19481 }, { "epoch": 2.972564697265625e-05, "step": 19481, "training_step_time": 0.10448813438415527 }, { "epoch": 2.97271728515625e-05, "model_forward_time": 0.02514934539794922, "step": 19482 }, { "epoch": 2.97271728515625e-05, "step": 19482, "training_step_time": 0.10755276679992676 }, { "epoch": 2.972869873046875e-05, "model_forward_time": 0.025012493133544922, "step": 19483 }, { "epoch": 2.972869873046875e-05, "step": 19483, "training_step_time": 0.10634422302246094 }, { "epoch": 2.9730224609375e-05, "model_forward_time": 0.025177717208862305, "step": 19484 }, { "epoch": 2.9730224609375e-05, "step": 19484, "training_step_time": 0.10847735404968262 }, { "epoch": 2.973175048828125e-05, "model_forward_time": 0.02456188201904297, "step": 19485 }, { "epoch": 2.973175048828125e-05, "step": 19485, "training_step_time": 0.10931944847106934 }, { "epoch": 2.97332763671875e-05, "model_forward_time": 0.02558279037475586, "step": 19486 }, { "epoch": 2.97332763671875e-05, "step": 19486, "training_step_time": 0.10979151725769043 }, { "epoch": 2.973480224609375e-05, "model_forward_time": 0.025223493576049805, "step": 19487 }, { "epoch": 2.973480224609375e-05, "step": 19487, "training_step_time": 0.20734429359436035 }, { "epoch": 2.9736328125e-05, "model_forward_time": 0.02458047866821289, "step": 19488 }, { "epoch": 2.9736328125e-05, "step": 19488, "training_step_time": 0.10582351684570312 }, { "epoch": 2.973785400390625e-05, "model_forward_time": 0.02407550811767578, "step": 19489 }, { "epoch": 2.973785400390625e-05, "step": 19489, "training_step_time": 0.12486124038696289 }, { "epoch": 2.97393798828125e-05, "grad_norm": 0.23423996567726135, "learning_rate": 2.9965714411423972e-05, "loss": 0.0188, "step": 19490 }, { "epoch": 2.97393798828125e-05, "model_forward_time": 0.025362730026245117, "step": 19490 }, { "epoch": 2.97393798828125e-05, "step": 19490, "training_step_time": 0.16048336029052734 }, { "epoch": 2.974090576171875e-05, "model_forward_time": 0.024205923080444336, "step": 19491 }, { "epoch": 2.974090576171875e-05, "step": 19491, "training_step_time": 0.10168719291687012 }, { "epoch": 2.9742431640625e-05, "model_forward_time": 0.0248870849609375, "step": 19492 }, { "epoch": 2.9742431640625e-05, "step": 19492, "training_step_time": 0.10680079460144043 }, { "epoch": 2.974395751953125e-05, "model_forward_time": 0.024321794509887695, "step": 19493 }, { "epoch": 2.974395751953125e-05, "step": 19493, "training_step_time": 0.10918498039245605 }, { "epoch": 2.97454833984375e-05, "model_forward_time": 0.02410435676574707, "step": 19494 }, { "epoch": 2.97454833984375e-05, "step": 19494, "training_step_time": 0.11081957817077637 }, { "epoch": 2.974700927734375e-05, "model_forward_time": 0.02460026741027832, "step": 19495 }, { "epoch": 2.974700927734375e-05, "step": 19495, "training_step_time": 0.10891437530517578 }, { "epoch": 2.974853515625e-05, "model_forward_time": 0.025681018829345703, "step": 19496 }, { "epoch": 2.974853515625e-05, "step": 19496, "training_step_time": 0.10690689086914062 }, { "epoch": 2.975006103515625e-05, "model_forward_time": 0.02449774742126465, "step": 19497 }, { "epoch": 2.975006103515625e-05, "step": 19497, "training_step_time": 0.13414430618286133 }, { "epoch": 2.97515869140625e-05, "model_forward_time": 0.024885177612304688, "step": 19498 }, { "epoch": 2.97515869140625e-05, "step": 19498, "training_step_time": 0.12016129493713379 }, { "epoch": 2.975311279296875e-05, "model_forward_time": 0.024668216705322266, "step": 19499 }, { "epoch": 2.975311279296875e-05, "step": 19499, "training_step_time": 0.19413399696350098 }, { "epoch": 2.9754638671875e-05, "grad_norm": 0.26104629039764404, "learning_rate": 2.991522876735154e-05, "loss": 0.0066, "step": 19500 }, { "epoch": 2.9754638671875e-05, "model_forward_time": 0.024544715881347656, "step": 19500 }, { "epoch": 2.9754638671875e-05, "step": 19500, "training_step_time": 0.12636113166809082 }, { "epoch": 2.975616455078125e-05, "model_forward_time": 0.02457118034362793, "step": 19501 }, { "epoch": 2.975616455078125e-05, "step": 19501, "training_step_time": 0.19796109199523926 }, { "epoch": 2.97576904296875e-05, "model_forward_time": 0.024531841278076172, "step": 19502 }, { "epoch": 2.97576904296875e-05, "step": 19502, "training_step_time": 0.10522866249084473 }, { "epoch": 2.975921630859375e-05, "model_forward_time": 0.024940967559814453, "step": 19503 }, { "epoch": 2.975921630859375e-05, "step": 19503, "training_step_time": 0.1037909984588623 }, { "epoch": 2.97607421875e-05, "model_forward_time": 0.025203466415405273, "step": 19504 }, { "epoch": 2.97607421875e-05, "step": 19504, "training_step_time": 0.10757970809936523 }, { "epoch": 2.976226806640625e-05, "model_forward_time": 0.025115966796875, "step": 19505 }, { "epoch": 2.976226806640625e-05, "step": 19505, "training_step_time": 0.17981576919555664 }, { "epoch": 2.97637939453125e-05, "model_forward_time": 0.026972055435180664, "step": 19506 }, { "epoch": 2.97637939453125e-05, "step": 19506, "training_step_time": 0.14259886741638184 }, { "epoch": 2.976531982421875e-05, "model_forward_time": 0.024760961532592773, "step": 19507 }, { "epoch": 2.976531982421875e-05, "step": 19507, "training_step_time": 0.11158537864685059 }, { "epoch": 2.9766845703125e-05, "model_forward_time": 0.02513885498046875, "step": 19508 }, { "epoch": 2.9766845703125e-05, "step": 19508, "training_step_time": 0.1312885284423828 }, { "epoch": 2.976837158203125e-05, "model_forward_time": 0.025066614151000977, "step": 19509 }, { "epoch": 2.976837158203125e-05, "step": 19509, "training_step_time": 0.15006566047668457 }, { "epoch": 2.97698974609375e-05, "grad_norm": 0.3384643793106079, "learning_rate": 2.9864767528170002e-05, "loss": 0.0171, "step": 19510 }, { "epoch": 2.97698974609375e-05, "model_forward_time": 0.024920225143432617, "step": 19510 }, { "epoch": 2.97698974609375e-05, "step": 19510, "training_step_time": 0.17083978652954102 }, { "epoch": 2.977142333984375e-05, "model_forward_time": 0.02491450309753418, "step": 19511 }, { "epoch": 2.977142333984375e-05, "step": 19511, "training_step_time": 0.18915295600891113 }, { "epoch": 2.977294921875e-05, "model_forward_time": 0.02492499351501465, "step": 19512 }, { "epoch": 2.977294921875e-05, "step": 19512, "training_step_time": 0.11075377464294434 }, { "epoch": 2.977447509765625e-05, "model_forward_time": 0.024904966354370117, "step": 19513 }, { "epoch": 2.977447509765625e-05, "step": 19513, "training_step_time": 0.1067497730255127 }, { "epoch": 2.97760009765625e-05, "model_forward_time": 0.0250701904296875, "step": 19514 }, { "epoch": 2.97760009765625e-05, "step": 19514, "training_step_time": 0.10480237007141113 }, { "epoch": 2.977752685546875e-05, "model_forward_time": 0.024966955184936523, "step": 19515 }, { "epoch": 2.977752685546875e-05, "step": 19515, "training_step_time": 0.10515904426574707 }, { "epoch": 2.9779052734375e-05, "model_forward_time": 0.025329113006591797, "step": 19516 }, { "epoch": 2.9779052734375e-05, "step": 19516, "training_step_time": 0.10393309593200684 }, { "epoch": 2.978057861328125e-05, "model_forward_time": 0.02542710304260254, "step": 19517 }, { "epoch": 2.978057861328125e-05, "step": 19517, "training_step_time": 0.10538196563720703 }, { "epoch": 2.97821044921875e-05, "model_forward_time": 0.025069713592529297, "step": 19518 }, { "epoch": 2.97821044921875e-05, "step": 19518, "training_step_time": 0.10492539405822754 }, { "epoch": 2.978363037109375e-05, "model_forward_time": 0.025235891342163086, "step": 19519 }, { "epoch": 2.978363037109375e-05, "step": 19519, "training_step_time": 0.10514688491821289 }, { "epoch": 2.978515625e-05, "grad_norm": 0.31845393776893616, "learning_rate": 2.9814330755194564e-05, "loss": 0.0064, "step": 19520 }, { "epoch": 2.978515625e-05, "model_forward_time": 0.025456905364990234, "step": 19520 }, { "epoch": 2.978515625e-05, "step": 19520, "training_step_time": 0.10809993743896484 }, { "epoch": 2.978668212890625e-05, "model_forward_time": 0.024985790252685547, "step": 19521 }, { "epoch": 2.978668212890625e-05, "step": 19521, "training_step_time": 0.1073908805847168 }, { "epoch": 2.97882080078125e-05, "model_forward_time": 0.025693178176879883, "step": 19522 }, { "epoch": 2.97882080078125e-05, "step": 19522, "training_step_time": 0.10714316368103027 }, { "epoch": 2.978973388671875e-05, "model_forward_time": 0.02463841438293457, "step": 19523 }, { "epoch": 2.978973388671875e-05, "step": 19523, "training_step_time": 0.1046140193939209 }, { "epoch": 2.9791259765625e-05, "model_forward_time": 0.024739503860473633, "step": 19524 }, { "epoch": 2.9791259765625e-05, "step": 19524, "training_step_time": 0.10309576988220215 }, { "epoch": 2.979278564453125e-05, "model_forward_time": 0.02532958984375, "step": 19525 }, { "epoch": 2.979278564453125e-05, "step": 19525, "training_step_time": 0.1053006649017334 }, { "epoch": 2.97943115234375e-05, "model_forward_time": 0.025354862213134766, "step": 19526 }, { "epoch": 2.97943115234375e-05, "step": 19526, "training_step_time": 0.1072547435760498 }, { "epoch": 2.979583740234375e-05, "model_forward_time": 0.025319576263427734, "step": 19527 }, { "epoch": 2.979583740234375e-05, "step": 19527, "training_step_time": 0.10787343978881836 }, { "epoch": 2.979736328125e-05, "model_forward_time": 0.025336503982543945, "step": 19528 }, { "epoch": 2.979736328125e-05, "step": 19528, "training_step_time": 0.10618138313293457 }, { "epoch": 2.979888916015625e-05, "model_forward_time": 0.024941682815551758, "step": 19529 }, { "epoch": 2.979888916015625e-05, "step": 19529, "training_step_time": 0.10894060134887695 }, { "epoch": 2.98004150390625e-05, "grad_norm": 0.18962590396404266, "learning_rate": 2.976391850971065e-05, "loss": 0.0066, "step": 19530 }, { "epoch": 2.98004150390625e-05, "model_forward_time": 0.028411865234375, "step": 19530 }, { "epoch": 2.98004150390625e-05, "step": 19530, "training_step_time": 0.15261578559875488 }, { "epoch": 2.980194091796875e-05, "model_forward_time": 0.025447607040405273, "step": 19531 }, { "epoch": 2.980194091796875e-05, "step": 19531, "training_step_time": 0.11745095252990723 }, { "epoch": 2.9803466796875e-05, "model_forward_time": 0.024374723434448242, "step": 19532 }, { "epoch": 2.9803466796875e-05, "step": 19532, "training_step_time": 0.21504592895507812 }, { "epoch": 2.980499267578125e-05, "model_forward_time": 0.024563074111938477, "step": 19533 }, { "epoch": 2.980499267578125e-05, "step": 19533, "training_step_time": 0.11453509330749512 }, { "epoch": 2.98065185546875e-05, "model_forward_time": 0.024665117263793945, "step": 19534 }, { "epoch": 2.98065185546875e-05, "step": 19534, "training_step_time": 0.11942505836486816 }, { "epoch": 2.980804443359375e-05, "model_forward_time": 0.02391505241394043, "step": 19535 }, { "epoch": 2.980804443359375e-05, "step": 19535, "training_step_time": 0.19457292556762695 }, { "epoch": 2.98095703125e-05, "model_forward_time": 0.02424454689025879, "step": 19536 }, { "epoch": 2.98095703125e-05, "step": 19536, "training_step_time": 0.11475229263305664 }, { "epoch": 2.981109619140625e-05, "model_forward_time": 0.024344921112060547, "step": 19537 }, { "epoch": 2.981109619140625e-05, "step": 19537, "training_step_time": 0.11418747901916504 }, { "epoch": 2.98126220703125e-05, "model_forward_time": 0.02504277229309082, "step": 19538 }, { "epoch": 2.98126220703125e-05, "step": 19538, "training_step_time": 0.11642599105834961 }, { "epoch": 2.981414794921875e-05, "model_forward_time": 0.0252532958984375, "step": 19539 }, { "epoch": 2.981414794921875e-05, "step": 19539, "training_step_time": 0.11401176452636719 }, { "epoch": 2.9815673828125e-05, "grad_norm": 0.15322192013263702, "learning_rate": 2.971353085297387e-05, "loss": 0.0071, "step": 19540 }, { "epoch": 2.9815673828125e-05, "model_forward_time": 0.0251615047454834, "step": 19540 }, { "epoch": 2.9815673828125e-05, "step": 19540, "training_step_time": 0.11014580726623535 }, { "epoch": 2.981719970703125e-05, "model_forward_time": 0.025318384170532227, "step": 19541 }, { "epoch": 2.981719970703125e-05, "step": 19541, "training_step_time": 0.1055152416229248 }, { "epoch": 2.98187255859375e-05, "model_forward_time": 0.024315834045410156, "step": 19542 }, { "epoch": 2.98187255859375e-05, "step": 19542, "training_step_time": 0.14560866355895996 }, { "epoch": 2.982025146484375e-05, "model_forward_time": 0.024071216583251953, "step": 19543 }, { "epoch": 2.982025146484375e-05, "step": 19543, "training_step_time": 0.10537433624267578 }, { "epoch": 2.982177734375e-05, "model_forward_time": 0.025277137756347656, "step": 19544 }, { "epoch": 2.982177734375e-05, "step": 19544, "training_step_time": 0.18425846099853516 }, { "epoch": 2.982330322265625e-05, "model_forward_time": 0.025066614151000977, "step": 19545 }, { "epoch": 2.982330322265625e-05, "step": 19545, "training_step_time": 0.13294649124145508 }, { "epoch": 2.98248291015625e-05, "model_forward_time": 0.024482250213623047, "step": 19546 }, { "epoch": 2.98248291015625e-05, "step": 19546, "training_step_time": 0.19315671920776367 }, { "epoch": 2.982635498046875e-05, "model_forward_time": 0.023893117904663086, "step": 19547 }, { "epoch": 2.982635498046875e-05, "step": 19547, "training_step_time": 0.10288095474243164 }, { "epoch": 2.9827880859375e-05, "model_forward_time": 0.024553775787353516, "step": 19548 }, { "epoch": 2.9827880859375e-05, "step": 19548, "training_step_time": 0.10717582702636719 }, { "epoch": 2.982940673828125e-05, "model_forward_time": 0.025120019912719727, "step": 19549 }, { "epoch": 2.982940673828125e-05, "step": 19549, "training_step_time": 0.10562658309936523 }, { "epoch": 2.98309326171875e-05, "grad_norm": 0.20454606413841248, "learning_rate": 2.9663167846209998e-05, "loss": 0.0085, "step": 19550 }, { "epoch": 2.98309326171875e-05, "model_forward_time": 0.025198698043823242, "step": 19550 }, { "epoch": 2.98309326171875e-05, "step": 19550, "training_step_time": 0.10779333114624023 }, { "epoch": 2.983245849609375e-05, "model_forward_time": 0.025770902633666992, "step": 19551 }, { "epoch": 2.983245849609375e-05, "step": 19551, "training_step_time": 0.11975407600402832 }, { "epoch": 2.9833984375e-05, "model_forward_time": 0.025444746017456055, "step": 19552 }, { "epoch": 2.9833984375e-05, "step": 19552, "training_step_time": 0.14971399307250977 }, { "epoch": 2.983551025390625e-05, "model_forward_time": 0.024919748306274414, "step": 19553 }, { "epoch": 2.983551025390625e-05, "step": 19553, "training_step_time": 0.13924837112426758 }, { "epoch": 2.98370361328125e-05, "model_forward_time": 0.024295806884765625, "step": 19554 }, { "epoch": 2.98370361328125e-05, "step": 19554, "training_step_time": 0.16065597534179688 }, { "epoch": 2.983856201171875e-05, "model_forward_time": 0.024028539657592773, "step": 19555 }, { "epoch": 2.983856201171875e-05, "step": 19555, "training_step_time": 0.15027952194213867 }, { "epoch": 2.9840087890625e-05, "model_forward_time": 0.0243227481842041, "step": 19556 }, { "epoch": 2.9840087890625e-05, "step": 19556, "training_step_time": 0.10836601257324219 }, { "epoch": 2.984161376953125e-05, "model_forward_time": 0.024856090545654297, "step": 19557 }, { "epoch": 2.984161376953125e-05, "step": 19557, "training_step_time": 0.10556936264038086 }, { "epoch": 2.98431396484375e-05, "model_forward_time": 0.02561330795288086, "step": 19558 }, { "epoch": 2.98431396484375e-05, "step": 19558, "training_step_time": 0.1068260669708252 }, { "epoch": 2.984466552734375e-05, "model_forward_time": 0.024974584579467773, "step": 19559 }, { "epoch": 2.984466552734375e-05, "step": 19559, "training_step_time": 0.10716128349304199 }, { "epoch": 2.984619140625e-05, "grad_norm": 0.1670469492673874, "learning_rate": 2.9612829550614836e-05, "loss": 0.0145, "step": 19560 }, { "epoch": 2.984619140625e-05, "model_forward_time": 0.026113271713256836, "step": 19560 }, { "epoch": 2.984619140625e-05, "step": 19560, "training_step_time": 0.19251441955566406 }, { "epoch": 2.984771728515625e-05, "model_forward_time": 0.024308443069458008, "step": 19561 }, { "epoch": 2.984771728515625e-05, "step": 19561, "training_step_time": 0.10585212707519531 }, { "epoch": 2.98492431640625e-05, "model_forward_time": 0.024365901947021484, "step": 19562 }, { "epoch": 2.98492431640625e-05, "step": 19562, "training_step_time": 0.10660457611083984 }, { "epoch": 2.985076904296875e-05, "model_forward_time": 0.025064468383789062, "step": 19563 }, { "epoch": 2.985076904296875e-05, "step": 19563, "training_step_time": 0.11022615432739258 }, { "epoch": 2.9852294921875e-05, "model_forward_time": 0.025190114974975586, "step": 19564 }, { "epoch": 2.9852294921875e-05, "step": 19564, "training_step_time": 0.11057066917419434 }, { "epoch": 2.985382080078125e-05, "model_forward_time": 0.024777650833129883, "step": 19565 }, { "epoch": 2.985382080078125e-05, "step": 19565, "training_step_time": 0.11058807373046875 }, { "epoch": 2.98553466796875e-05, "model_forward_time": 0.025362491607666016, "step": 19566 }, { "epoch": 2.98553466796875e-05, "step": 19566, "training_step_time": 0.10823297500610352 }, { "epoch": 2.985687255859375e-05, "model_forward_time": 0.024789810180664062, "step": 19567 }, { "epoch": 2.985687255859375e-05, "step": 19567, "training_step_time": 0.10738968849182129 }, { "epoch": 2.98583984375e-05, "model_forward_time": 0.025118112564086914, "step": 19568 }, { "epoch": 2.98583984375e-05, "step": 19568, "training_step_time": 0.10932087898254395 }, { "epoch": 2.985992431640625e-05, "model_forward_time": 0.024895906448364258, "step": 19569 }, { "epoch": 2.985992431640625e-05, "step": 19569, "training_step_time": 0.10595917701721191 }, { "epoch": 2.98614501953125e-05, "grad_norm": 0.13820356130599976, "learning_rate": 2.956251602735413e-05, "loss": 0.0089, "step": 19570 }, { "epoch": 2.98614501953125e-05, "model_forward_time": 0.02538585662841797, "step": 19570 }, { "epoch": 2.98614501953125e-05, "step": 19570, "training_step_time": 0.10759091377258301 }, { "epoch": 2.986297607421875e-05, "model_forward_time": 0.025141239166259766, "step": 19571 }, { "epoch": 2.986297607421875e-05, "step": 19571, "training_step_time": 0.10559511184692383 }, { "epoch": 2.9864501953125e-05, "model_forward_time": 0.02512812614440918, "step": 19572 }, { "epoch": 2.9864501953125e-05, "step": 19572, "training_step_time": 0.10876679420471191 }, { "epoch": 2.986602783203125e-05, "model_forward_time": 0.02518773078918457, "step": 19573 }, { "epoch": 2.986602783203125e-05, "step": 19573, "training_step_time": 0.1080482006072998 }, { "epoch": 2.98675537109375e-05, "model_forward_time": 0.029588937759399414, "step": 19574 }, { "epoch": 2.98675537109375e-05, "step": 19574, "training_step_time": 0.11270737648010254 }, { "epoch": 2.986907958984375e-05, "model_forward_time": 0.02529764175415039, "step": 19575 }, { "epoch": 2.986907958984375e-05, "step": 19575, "training_step_time": 0.16208887100219727 }, { "epoch": 2.987060546875e-05, "model_forward_time": 0.02442193031311035, "step": 19576 }, { "epoch": 2.987060546875e-05, "step": 19576, "training_step_time": 0.10684776306152344 }, { "epoch": 2.987213134765625e-05, "model_forward_time": 0.025663137435913086, "step": 19577 }, { "epoch": 2.987213134765625e-05, "step": 19577, "training_step_time": 0.1072843074798584 }, { "epoch": 2.98736572265625e-05, "model_forward_time": 0.025342226028442383, "step": 19578 }, { "epoch": 2.98736572265625e-05, "step": 19578, "training_step_time": 0.15394115447998047 }, { "epoch": 2.987518310546875e-05, "model_forward_time": 0.024675846099853516, "step": 19579 }, { "epoch": 2.987518310546875e-05, "step": 19579, "training_step_time": 0.1824800968170166 }, { "epoch": 2.9876708984375e-05, "grad_norm": 0.16472890973091125, "learning_rate": 2.9512227337563604e-05, "loss": 0.01, "step": 19580 }, { "epoch": 2.9876708984375e-05, "model_forward_time": 0.024648666381835938, "step": 19580 }, { "epoch": 2.9876708984375e-05, "step": 19580, "training_step_time": 0.1094059944152832 }, { "epoch": 2.987823486328125e-05, "model_forward_time": 0.02759575843811035, "step": 19581 }, { "epoch": 2.987823486328125e-05, "step": 19581, "training_step_time": 0.10732865333557129 }, { "epoch": 2.98797607421875e-05, "model_forward_time": 0.02532219886779785, "step": 19582 }, { "epoch": 2.98797607421875e-05, "step": 19582, "training_step_time": 0.10691976547241211 }, { "epoch": 2.988128662109375e-05, "model_forward_time": 0.02490830421447754, "step": 19583 }, { "epoch": 2.988128662109375e-05, "step": 19583, "training_step_time": 0.10532093048095703 }, { "epoch": 2.98828125e-05, "model_forward_time": 0.02523517608642578, "step": 19584 }, { "epoch": 2.98828125e-05, "step": 19584, "training_step_time": 0.10985898971557617 }, { "epoch": 2.988433837890625e-05, "model_forward_time": 0.025485515594482422, "step": 19585 }, { "epoch": 2.988433837890625e-05, "step": 19585, "training_step_time": 0.1072542667388916 }, { "epoch": 2.98858642578125e-05, "model_forward_time": 0.025235414505004883, "step": 19586 }, { "epoch": 2.98858642578125e-05, "step": 19586, "training_step_time": 0.10715627670288086 }, { "epoch": 2.988739013671875e-05, "model_forward_time": 0.025811195373535156, "step": 19587 }, { "epoch": 2.988739013671875e-05, "step": 19587, "training_step_time": 0.10572934150695801 }, { "epoch": 2.9888916015625e-05, "model_forward_time": 0.02470254898071289, "step": 19588 }, { "epoch": 2.9888916015625e-05, "step": 19588, "training_step_time": 0.15378355979919434 }, { "epoch": 2.989044189453125e-05, "model_forward_time": 0.025005102157592773, "step": 19589 }, { "epoch": 2.989044189453125e-05, "step": 19589, "training_step_time": 0.16457486152648926 }, { "epoch": 2.98919677734375e-05, "grad_norm": 0.1409149467945099, "learning_rate": 2.9461963542348737e-05, "loss": 0.0073, "step": 19590 }, { "epoch": 2.98919677734375e-05, "model_forward_time": 0.024463415145874023, "step": 19590 }, { "epoch": 2.98919677734375e-05, "step": 19590, "training_step_time": 0.1068716049194336 }, { "epoch": 2.989349365234375e-05, "model_forward_time": 0.025065183639526367, "step": 19591 }, { "epoch": 2.989349365234375e-05, "step": 19591, "training_step_time": 0.12686634063720703 }, { "epoch": 2.989501953125e-05, "model_forward_time": 0.025710105895996094, "step": 19592 }, { "epoch": 2.989501953125e-05, "step": 19592, "training_step_time": 0.1659994125366211 }, { "epoch": 2.989654541015625e-05, "model_forward_time": 0.02454066276550293, "step": 19593 }, { "epoch": 2.989654541015625e-05, "step": 19593, "training_step_time": 0.10453462600708008 }, { "epoch": 2.98980712890625e-05, "model_forward_time": 0.02484726905822754, "step": 19594 }, { "epoch": 2.98980712890625e-05, "step": 19594, "training_step_time": 0.10209393501281738 }, { "epoch": 2.989959716796875e-05, "model_forward_time": 0.02641892433166504, "step": 19595 }, { "epoch": 2.989959716796875e-05, "step": 19595, "training_step_time": 0.10591387748718262 }, { "epoch": 2.9901123046875e-05, "model_forward_time": 0.02702498435974121, "step": 19596 }, { "epoch": 2.9901123046875e-05, "step": 19596, "training_step_time": 0.17159152030944824 }, { "epoch": 2.990264892578125e-05, "model_forward_time": 0.024260997772216797, "step": 19597 }, { "epoch": 2.990264892578125e-05, "step": 19597, "training_step_time": 0.10137414932250977 }, { "epoch": 2.99041748046875e-05, "model_forward_time": 0.024391651153564453, "step": 19598 }, { "epoch": 2.99041748046875e-05, "step": 19598, "training_step_time": 0.18944954872131348 }, { "epoch": 2.990570068359375e-05, "model_forward_time": 0.023992538452148438, "step": 19599 }, { "epoch": 2.990570068359375e-05, "step": 19599, "training_step_time": 0.10377621650695801 }, { "epoch": 2.99072265625e-05, "grad_norm": 0.32406118512153625, "learning_rate": 2.9411724702784758e-05, "loss": 0.0122, "step": 19600 }, { "epoch": 2.99072265625e-05, "model_forward_time": 0.023989439010620117, "step": 19600 }, { "epoch": 2.99072265625e-05, "step": 19600, "training_step_time": 0.15044164657592773 }, { "epoch": 2.990875244140625e-05, "model_forward_time": 0.02483534812927246, "step": 19601 }, { "epoch": 2.990875244140625e-05, "step": 19601, "training_step_time": 0.19647574424743652 }, { "epoch": 2.99102783203125e-05, "model_forward_time": 0.024031400680541992, "step": 19602 }, { "epoch": 2.99102783203125e-05, "step": 19602, "training_step_time": 0.10714411735534668 }, { "epoch": 2.991180419921875e-05, "model_forward_time": 0.024303913116455078, "step": 19603 }, { "epoch": 2.991180419921875e-05, "step": 19603, "training_step_time": 0.12185359001159668 }, { "epoch": 2.9913330078125e-05, "model_forward_time": 0.025064468383789062, "step": 19604 }, { "epoch": 2.9913330078125e-05, "step": 19604, "training_step_time": 0.10674762725830078 }, { "epoch": 2.991485595703125e-05, "model_forward_time": 0.025110244750976562, "step": 19605 }, { "epoch": 2.991485595703125e-05, "step": 19605, "training_step_time": 0.188767671585083 }, { "epoch": 2.99163818359375e-05, "model_forward_time": 0.024672746658325195, "step": 19606 }, { "epoch": 2.99163818359375e-05, "step": 19606, "training_step_time": 0.10637378692626953 }, { "epoch": 2.991790771484375e-05, "model_forward_time": 0.024598121643066406, "step": 19607 }, { "epoch": 2.991790771484375e-05, "step": 19607, "training_step_time": 0.1020803451538086 }, { "epoch": 2.991943359375e-05, "model_forward_time": 0.024354219436645508, "step": 19608 }, { "epoch": 2.991943359375e-05, "step": 19608, "training_step_time": 0.10612273216247559 }, { "epoch": 2.992095947265625e-05, "model_forward_time": 0.025546789169311523, "step": 19609 }, { "epoch": 2.992095947265625e-05, "step": 19609, "training_step_time": 0.10814785957336426 }, { "epoch": 2.99224853515625e-05, "grad_norm": 0.14034727215766907, "learning_rate": 2.936151087991663e-05, "loss": 0.0077, "step": 19610 }, { "epoch": 2.99224853515625e-05, "model_forward_time": 0.03080463409423828, "step": 19610 }, { "epoch": 2.99224853515625e-05, "step": 19610, "training_step_time": 0.11222672462463379 }, { "epoch": 2.992401123046875e-05, "model_forward_time": 0.025627851486206055, "step": 19611 }, { "epoch": 2.992401123046875e-05, "step": 19611, "training_step_time": 0.11129617691040039 }, { "epoch": 2.9925537109375e-05, "model_forward_time": 0.026285409927368164, "step": 19612 }, { "epoch": 2.9925537109375e-05, "step": 19612, "training_step_time": 0.11401128768920898 }, { "epoch": 2.992706298828125e-05, "model_forward_time": 0.024807214736938477, "step": 19613 }, { "epoch": 2.992706298828125e-05, "step": 19613, "training_step_time": 0.10528182983398438 }, { "epoch": 2.99285888671875e-05, "model_forward_time": 0.025075435638427734, "step": 19614 }, { "epoch": 2.99285888671875e-05, "step": 19614, "training_step_time": 0.10641241073608398 }, { "epoch": 2.993011474609375e-05, "model_forward_time": 0.025201797485351562, "step": 19615 }, { "epoch": 2.993011474609375e-05, "step": 19615, "training_step_time": 0.10706758499145508 }, { "epoch": 2.9931640625e-05, "model_forward_time": 0.025257110595703125, "step": 19616 }, { "epoch": 2.9931640625e-05, "step": 19616, "training_step_time": 0.10868287086486816 }, { "epoch": 2.993316650390625e-05, "model_forward_time": 0.024718761444091797, "step": 19617 }, { "epoch": 2.993316650390625e-05, "step": 19617, "training_step_time": 0.10501384735107422 }, { "epoch": 2.99346923828125e-05, "model_forward_time": 0.025174379348754883, "step": 19618 }, { "epoch": 2.99346923828125e-05, "step": 19618, "training_step_time": 0.10816812515258789 }, { "epoch": 2.993621826171875e-05, "model_forward_time": 0.025418996810913086, "step": 19619 }, { "epoch": 2.993621826171875e-05, "step": 19619, "training_step_time": 0.10870885848999023 }, { "epoch": 2.9937744140625e-05, "grad_norm": 0.42201992869377136, "learning_rate": 2.931132213475884e-05, "loss": 0.0116, "step": 19620 }, { "epoch": 2.9937744140625e-05, "model_forward_time": 0.025223970413208008, "step": 19620 }, { "epoch": 2.9937744140625e-05, "step": 19620, "training_step_time": 0.10993289947509766 }, { "epoch": 2.993927001953125e-05, "model_forward_time": 0.025276660919189453, "step": 19621 }, { "epoch": 2.993927001953125e-05, "step": 19621, "training_step_time": 0.11061716079711914 }, { "epoch": 2.99407958984375e-05, "model_forward_time": 0.02518606185913086, "step": 19622 }, { "epoch": 2.99407958984375e-05, "step": 19622, "training_step_time": 0.11000728607177734 }, { "epoch": 2.994232177734375e-05, "model_forward_time": 0.025216102600097656, "step": 19623 }, { "epoch": 2.994232177734375e-05, "step": 19623, "training_step_time": 0.12096762657165527 }, { "epoch": 2.994384765625e-05, "model_forward_time": 0.02507948875427246, "step": 19624 }, { "epoch": 2.994384765625e-05, "step": 19624, "training_step_time": 0.10816836357116699 }, { "epoch": 2.994537353515625e-05, "model_forward_time": 0.025690793991088867, "step": 19625 }, { "epoch": 2.994537353515625e-05, "step": 19625, "training_step_time": 0.21753358840942383 }, { "epoch": 2.99468994140625e-05, "model_forward_time": 0.02683711051940918, "step": 19626 }, { "epoch": 2.99468994140625e-05, "step": 19626, "training_step_time": 0.1206669807434082 }, { "epoch": 2.994842529296875e-05, "model_forward_time": 0.024258136749267578, "step": 19627 }, { "epoch": 2.994842529296875e-05, "step": 19627, "training_step_time": 0.1037757396697998 }, { "epoch": 2.9949951171875e-05, "model_forward_time": 0.025513172149658203, "step": 19628 }, { "epoch": 2.9949951171875e-05, "step": 19628, "training_step_time": 0.10775232315063477 }, { "epoch": 2.995147705078125e-05, "model_forward_time": 0.025191783905029297, "step": 19629 }, { "epoch": 2.995147705078125e-05, "step": 19629, "training_step_time": 0.10503959655761719 }, { "epoch": 2.99530029296875e-05, "grad_norm": 0.14935193955898285, "learning_rate": 2.9261158528295495e-05, "loss": 0.0114, "step": 19630 }, { "epoch": 2.99530029296875e-05, "model_forward_time": 0.025160551071166992, "step": 19630 }, { "epoch": 2.99530029296875e-05, "step": 19630, "training_step_time": 0.10604548454284668 }, { "epoch": 2.995452880859375e-05, "model_forward_time": 0.02519392967224121, "step": 19631 }, { "epoch": 2.995452880859375e-05, "step": 19631, "training_step_time": 0.10671377182006836 }, { "epoch": 2.99560546875e-05, "model_forward_time": 0.025049924850463867, "step": 19632 }, { "epoch": 2.99560546875e-05, "step": 19632, "training_step_time": 0.10622239112854004 }, { "epoch": 2.995758056640625e-05, "model_forward_time": 0.025531291961669922, "step": 19633 }, { "epoch": 2.995758056640625e-05, "step": 19633, "training_step_time": 0.10392546653747559 }, { "epoch": 2.99591064453125e-05, "model_forward_time": 0.02480769157409668, "step": 19634 }, { "epoch": 2.99591064453125e-05, "step": 19634, "training_step_time": 0.15025877952575684 }, { "epoch": 2.996063232421875e-05, "model_forward_time": 0.02473902702331543, "step": 19635 }, { "epoch": 2.996063232421875e-05, "step": 19635, "training_step_time": 0.16156482696533203 }, { "epoch": 2.9962158203125e-05, "model_forward_time": 0.024419307708740234, "step": 19636 }, { "epoch": 2.9962158203125e-05, "step": 19636, "training_step_time": 0.10918855667114258 }, { "epoch": 2.996368408203125e-05, "model_forward_time": 0.0249786376953125, "step": 19637 }, { "epoch": 2.996368408203125e-05, "step": 19637, "training_step_time": 0.1332385540008545 }, { "epoch": 2.99652099609375e-05, "model_forward_time": 0.025173664093017578, "step": 19638 }, { "epoch": 2.99652099609375e-05, "step": 19638, "training_step_time": 0.19610214233398438 }, { "epoch": 2.996673583984375e-05, "model_forward_time": 0.024643659591674805, "step": 19639 }, { "epoch": 2.996673583984375e-05, "step": 19639, "training_step_time": 0.10800290107727051 }, { "epoch": 2.996826171875e-05, "grad_norm": 0.22325342893600464, "learning_rate": 2.9211020121480083e-05, "loss": 0.0073, "step": 19640 }, { "epoch": 2.996826171875e-05, "model_forward_time": 0.024869441986083984, "step": 19640 }, { "epoch": 2.996826171875e-05, "step": 19640, "training_step_time": 0.19586706161499023 }, { "epoch": 2.996978759765625e-05, "model_forward_time": 0.024135828018188477, "step": 19641 }, { "epoch": 2.996978759765625e-05, "step": 19641, "training_step_time": 0.19405770301818848 }, { "epoch": 2.99713134765625e-05, "model_forward_time": 0.024590492248535156, "step": 19642 }, { "epoch": 2.99713134765625e-05, "step": 19642, "training_step_time": 0.16583728790283203 }, { "epoch": 2.997283935546875e-05, "model_forward_time": 0.023678064346313477, "step": 19643 }, { "epoch": 2.997283935546875e-05, "step": 19643, "training_step_time": 0.14778590202331543 }, { "epoch": 2.9974365234375e-05, "model_forward_time": 0.0245211124420166, "step": 19644 }, { "epoch": 2.9974365234375e-05, "step": 19644, "training_step_time": 0.173325777053833 }, { "epoch": 2.997589111328125e-05, "model_forward_time": 0.024397850036621094, "step": 19645 }, { "epoch": 2.997589111328125e-05, "step": 19645, "training_step_time": 0.14224982261657715 }, { "epoch": 2.99774169921875e-05, "model_forward_time": 0.027773618698120117, "step": 19646 }, { "epoch": 2.99774169921875e-05, "step": 19646, "training_step_time": 0.19606757164001465 }, { "epoch": 2.997894287109375e-05, "model_forward_time": 0.02436542510986328, "step": 19647 }, { "epoch": 2.997894287109375e-05, "step": 19647, "training_step_time": 0.1328599452972412 }, { "epoch": 2.998046875e-05, "model_forward_time": 0.025101423263549805, "step": 19648 }, { "epoch": 2.998046875e-05, "step": 19648, "training_step_time": 0.1195838451385498 }, { "epoch": 2.998199462890625e-05, "model_forward_time": 0.025168657302856445, "step": 19649 }, { "epoch": 2.998199462890625e-05, "step": 19649, "training_step_time": 0.18176770210266113 }, { "epoch": 2.99835205078125e-05, "grad_norm": 0.23292744159698486, "learning_rate": 2.916090697523549e-05, "loss": 0.0097, "step": 19650 }, { "epoch": 2.99835205078125e-05, "model_forward_time": 0.02465987205505371, "step": 19650 }, { "epoch": 2.99835205078125e-05, "step": 19650, "training_step_time": 0.11229872703552246 }, { "epoch": 2.998504638671875e-05, "model_forward_time": 0.024750947952270508, "step": 19651 }, { "epoch": 2.998504638671875e-05, "step": 19651, "training_step_time": 0.11211442947387695 }, { "epoch": 2.9986572265625e-05, "model_forward_time": 0.024960041046142578, "step": 19652 }, { "epoch": 2.9986572265625e-05, "step": 19652, "training_step_time": 0.11193346977233887 }, { "epoch": 2.998809814453125e-05, "model_forward_time": 0.025344133377075195, "step": 19653 }, { "epoch": 2.998809814453125e-05, "step": 19653, "training_step_time": 0.10980868339538574 }, { "epoch": 2.99896240234375e-05, "model_forward_time": 0.025328636169433594, "step": 19654 }, { "epoch": 2.99896240234375e-05, "step": 19654, "training_step_time": 0.1088714599609375 }, { "epoch": 2.999114990234375e-05, "model_forward_time": 0.02562117576599121, "step": 19655 }, { "epoch": 2.999114990234375e-05, "step": 19655, "training_step_time": 0.10650014877319336 }, { "epoch": 2.999267578125e-05, "model_forward_time": 0.025331735610961914, "step": 19656 }, { "epoch": 2.999267578125e-05, "step": 19656, "training_step_time": 0.10725831985473633 }, { "epoch": 2.999420166015625e-05, "model_forward_time": 0.02508068084716797, "step": 19657 }, { "epoch": 2.999420166015625e-05, "step": 19657, "training_step_time": 0.10398125648498535 }, { "epoch": 2.99957275390625e-05, "model_forward_time": 0.025248050689697266, "step": 19658 }, { "epoch": 2.99957275390625e-05, "step": 19658, "training_step_time": 0.10518431663513184 }, { "epoch": 2.999725341796875e-05, "model_forward_time": 0.025169849395751953, "step": 19659 }, { "epoch": 2.999725341796875e-05, "step": 19659, "training_step_time": 0.10518312454223633 }, { "epoch": 2.9998779296875e-05, "grad_norm": 0.17813117802143097, "learning_rate": 2.9110819150453927e-05, "loss": 0.0066, "step": 19660 }, { "epoch": 2.9998779296875e-05, "model_forward_time": 0.025392532348632812, "step": 19660 }, { "epoch": 2.9998779296875e-05, "step": 19660, "training_step_time": 0.11640501022338867 }, { "epoch": 3.000030517578125e-05, "model_forward_time": 0.025184154510498047, "step": 19661 }, { "epoch": 3.000030517578125e-05, "step": 19661, "training_step_time": 0.11286234855651855 }, { "epoch": 3.00018310546875e-05, "model_forward_time": 0.02505040168762207, "step": 19662 }, { "epoch": 3.00018310546875e-05, "step": 19662, "training_step_time": 0.10632491111755371 }, { "epoch": 3.000335693359375e-05, "model_forward_time": 0.024850845336914062, "step": 19663 }, { "epoch": 3.000335693359375e-05, "step": 19663, "training_step_time": 0.1812114715576172 }, { "epoch": 3.00048828125e-05, "model_forward_time": 0.02472686767578125, "step": 19664 }, { "epoch": 3.00048828125e-05, "step": 19664, "training_step_time": 0.18145751953125 }, { "epoch": 3.000640869140625e-05, "model_forward_time": 0.025064706802368164, "step": 19665 }, { "epoch": 3.000640869140625e-05, "step": 19665, "training_step_time": 0.13115763664245605 }, { "epoch": 3.00079345703125e-05, "model_forward_time": 0.024035215377807617, "step": 19666 }, { "epoch": 3.00079345703125e-05, "step": 19666, "training_step_time": 0.10888504981994629 }, { "epoch": 3.000946044921875e-05, "model_forward_time": 0.025581836700439453, "step": 19667 }, { "epoch": 3.000946044921875e-05, "step": 19667, "training_step_time": 0.18021655082702637 }, { "epoch": 3.0010986328125e-05, "model_forward_time": 0.025094270706176758, "step": 19668 }, { "epoch": 3.0010986328125e-05, "step": 19668, "training_step_time": 0.13316726684570312 }, { "epoch": 3.001251220703125e-05, "model_forward_time": 0.024628400802612305, "step": 19669 }, { "epoch": 3.001251220703125e-05, "step": 19669, "training_step_time": 0.11265873908996582 }, { "epoch": 3.00140380859375e-05, "grad_norm": 0.19238826632499695, "learning_rate": 2.9060756707996796e-05, "loss": 0.007, "step": 19670 }, { "epoch": 3.00140380859375e-05, "model_forward_time": 0.02514052391052246, "step": 19670 }, { "epoch": 3.00140380859375e-05, "step": 19670, "training_step_time": 0.11207342147827148 }, { "epoch": 3.001556396484375e-05, "model_forward_time": 0.0253603458404541, "step": 19671 }, { "epoch": 3.001556396484375e-05, "step": 19671, "training_step_time": 0.11382198333740234 }, { "epoch": 3.001708984375e-05, "model_forward_time": 0.025166034698486328, "step": 19672 }, { "epoch": 3.001708984375e-05, "step": 19672, "training_step_time": 0.11002564430236816 }, { "epoch": 3.001861572265625e-05, "model_forward_time": 0.025319337844848633, "step": 19673 }, { "epoch": 3.001861572265625e-05, "step": 19673, "training_step_time": 0.11128664016723633 }, { "epoch": 3.00201416015625e-05, "model_forward_time": 0.025250673294067383, "step": 19674 }, { "epoch": 3.00201416015625e-05, "step": 19674, "training_step_time": 0.10958981513977051 }, { "epoch": 3.002166748046875e-05, "model_forward_time": 0.026613473892211914, "step": 19675 }, { "epoch": 3.002166748046875e-05, "step": 19675, "training_step_time": 0.11628365516662598 }, { "epoch": 3.0023193359375e-05, "model_forward_time": 0.024479389190673828, "step": 19676 }, { "epoch": 3.0023193359375e-05, "step": 19676, "training_step_time": 0.13940095901489258 }, { "epoch": 3.002471923828125e-05, "model_forward_time": 0.024273157119750977, "step": 19677 }, { "epoch": 3.002471923828125e-05, "step": 19677, "training_step_time": 0.15883731842041016 }, { "epoch": 3.00262451171875e-05, "model_forward_time": 0.02582263946533203, "step": 19678 }, { "epoch": 3.00262451171875e-05, "step": 19678, "training_step_time": 0.11578488349914551 }, { "epoch": 3.002777099609375e-05, "model_forward_time": 0.024660348892211914, "step": 19679 }, { "epoch": 3.002777099609375e-05, "step": 19679, "training_step_time": 0.13032269477844238 }, { "epoch": 3.0029296875e-05, "grad_norm": 0.17850922048091888, "learning_rate": 2.9010719708694722e-05, "loss": 0.0061, "step": 19680 }, { "epoch": 3.0029296875e-05, "model_forward_time": 0.025351285934448242, "step": 19680 }, { "epoch": 3.0029296875e-05, "step": 19680, "training_step_time": 0.19426560401916504 }, { "epoch": 3.003082275390625e-05, "model_forward_time": 0.02360057830810547, "step": 19681 }, { "epoch": 3.003082275390625e-05, "step": 19681, "training_step_time": 0.10700106620788574 }, { "epoch": 3.00323486328125e-05, "model_forward_time": 0.025031328201293945, "step": 19682 }, { "epoch": 3.00323486328125e-05, "step": 19682, "training_step_time": 0.15339183807373047 }, { "epoch": 3.003387451171875e-05, "model_forward_time": 0.026000261306762695, "step": 19683 }, { "epoch": 3.003387451171875e-05, "step": 19683, "training_step_time": 0.11007094383239746 }, { "epoch": 3.0035400390625e-05, "model_forward_time": 0.024726152420043945, "step": 19684 }, { "epoch": 3.0035400390625e-05, "step": 19684, "training_step_time": 0.10822200775146484 }, { "epoch": 3.003692626953125e-05, "model_forward_time": 0.024831295013427734, "step": 19685 }, { "epoch": 3.003692626953125e-05, "step": 19685, "training_step_time": 0.11777949333190918 }, { "epoch": 3.00384521484375e-05, "model_forward_time": 0.025411128997802734, "step": 19686 }, { "epoch": 3.00384521484375e-05, "step": 19686, "training_step_time": 0.12631964683532715 }, { "epoch": 3.003997802734375e-05, "model_forward_time": 0.025126218795776367, "step": 19687 }, { "epoch": 3.003997802734375e-05, "step": 19687, "training_step_time": 0.1203007698059082 }, { "epoch": 3.004150390625e-05, "model_forward_time": 0.024927854537963867, "step": 19688 }, { "epoch": 3.004150390625e-05, "step": 19688, "training_step_time": 0.157027006149292 }, { "epoch": 3.004302978515625e-05, "model_forward_time": 0.024726390838623047, "step": 19689 }, { "epoch": 3.004302978515625e-05, "step": 19689, "training_step_time": 0.15761280059814453 }, { "epoch": 3.00445556640625e-05, "grad_norm": 0.11738183349370956, "learning_rate": 2.8960708213347366e-05, "loss": 0.0124, "step": 19690 }, { "epoch": 3.00445556640625e-05, "model_forward_time": 0.024455785751342773, "step": 19690 }, { "epoch": 3.00445556640625e-05, "step": 19690, "training_step_time": 0.10438394546508789 }, { "epoch": 3.004608154296875e-05, "model_forward_time": 0.02504253387451172, "step": 19691 }, { "epoch": 3.004608154296875e-05, "step": 19691, "training_step_time": 0.10595107078552246 }, { "epoch": 3.0047607421875e-05, "model_forward_time": 0.025060653686523438, "step": 19692 }, { "epoch": 3.0047607421875e-05, "step": 19692, "training_step_time": 0.11174416542053223 }, { "epoch": 3.004913330078125e-05, "model_forward_time": 0.024715423583984375, "step": 19693 }, { "epoch": 3.004913330078125e-05, "step": 19693, "training_step_time": 0.1072533130645752 }, { "epoch": 3.00506591796875e-05, "model_forward_time": 0.025342702865600586, "step": 19694 }, { "epoch": 3.00506591796875e-05, "step": 19694, "training_step_time": 0.20294976234436035 }, { "epoch": 3.005218505859375e-05, "model_forward_time": 0.02500438690185547, "step": 19695 }, { "epoch": 3.005218505859375e-05, "step": 19695, "training_step_time": 0.10686826705932617 }, { "epoch": 3.00537109375e-05, "model_forward_time": 0.02388310432434082, "step": 19696 }, { "epoch": 3.00537109375e-05, "step": 19696, "training_step_time": 0.10616850852966309 }, { "epoch": 3.005523681640625e-05, "model_forward_time": 0.025279760360717773, "step": 19697 }, { "epoch": 3.005523681640625e-05, "step": 19697, "training_step_time": 0.10956525802612305 }, { "epoch": 3.00567626953125e-05, "model_forward_time": 0.025069713592529297, "step": 19698 }, { "epoch": 3.00567626953125e-05, "step": 19698, "training_step_time": 0.1048283576965332 }, { "epoch": 3.005828857421875e-05, "model_forward_time": 0.024960756301879883, "step": 19699 }, { "epoch": 3.005828857421875e-05, "step": 19699, "training_step_time": 0.10488224029541016 }, { "epoch": 3.0059814453125e-05, "grad_norm": 0.10064233839511871, "learning_rate": 2.89107222827234e-05, "loss": 0.0069, "step": 19700 }, { "epoch": 3.0059814453125e-05, "model_forward_time": 0.025174617767333984, "step": 19700 }, { "epoch": 3.0059814453125e-05, "step": 19700, "training_step_time": 0.10704684257507324 }, { "epoch": 3.006134033203125e-05, "model_forward_time": 0.02544999122619629, "step": 19701 }, { "epoch": 3.006134033203125e-05, "step": 19701, "training_step_time": 0.1064295768737793 }, { "epoch": 3.00628662109375e-05, "model_forward_time": 0.025246381759643555, "step": 19702 }, { "epoch": 3.00628662109375e-05, "step": 19702, "training_step_time": 0.10637378692626953 }, { "epoch": 3.006439208984375e-05, "model_forward_time": 0.025086641311645508, "step": 19703 }, { "epoch": 3.006439208984375e-05, "step": 19703, "training_step_time": 0.10762429237365723 }, { "epoch": 3.006591796875e-05, "model_forward_time": 0.025159120559692383, "step": 19704 }, { "epoch": 3.006591796875e-05, "step": 19704, "training_step_time": 0.10619640350341797 }, { "epoch": 3.006744384765625e-05, "model_forward_time": 0.027978897094726562, "step": 19705 }, { "epoch": 3.006744384765625e-05, "step": 19705, "training_step_time": 0.10939788818359375 }, { "epoch": 3.00689697265625e-05, "model_forward_time": 0.025289297103881836, "step": 19706 }, { "epoch": 3.00689697265625e-05, "step": 19706, "training_step_time": 0.10675477981567383 }, { "epoch": 3.007049560546875e-05, "model_forward_time": 0.025128602981567383, "step": 19707 }, { "epoch": 3.007049560546875e-05, "step": 19707, "training_step_time": 0.10494041442871094 }, { "epoch": 3.0072021484375e-05, "model_forward_time": 0.0251772403717041, "step": 19708 }, { "epoch": 3.0072021484375e-05, "step": 19708, "training_step_time": 0.10567831993103027 }, { "epoch": 3.007354736328125e-05, "model_forward_time": 0.02515888214111328, "step": 19709 }, { "epoch": 3.007354736328125e-05, "step": 19709, "training_step_time": 0.13473892211914062 }, { "epoch": 3.00750732421875e-05, "grad_norm": 0.08206330239772797, "learning_rate": 2.8860761977560436e-05, "loss": 0.0101, "step": 19710 }, { "epoch": 3.00750732421875e-05, "model_forward_time": 0.025106430053710938, "step": 19710 }, { "epoch": 3.00750732421875e-05, "step": 19710, "training_step_time": 0.10725617408752441 }, { "epoch": 3.007659912109375e-05, "model_forward_time": 0.02534770965576172, "step": 19711 }, { "epoch": 3.007659912109375e-05, "step": 19711, "training_step_time": 0.13705062866210938 }, { "epoch": 3.0078125e-05, "model_forward_time": 0.025715351104736328, "step": 19712 }, { "epoch": 3.0078125e-05, "step": 19712, "training_step_time": 0.1617591381072998 }, { "epoch": 3.007965087890625e-05, "model_forward_time": 0.02455592155456543, "step": 19713 }, { "epoch": 3.007965087890625e-05, "step": 19713, "training_step_time": 0.21434688568115234 }, { "epoch": 3.00811767578125e-05, "model_forward_time": 0.024642229080200195, "step": 19714 }, { "epoch": 3.00811767578125e-05, "step": 19714, "training_step_time": 0.11678814888000488 }, { "epoch": 3.008270263671875e-05, "model_forward_time": 0.024316787719726562, "step": 19715 }, { "epoch": 3.008270263671875e-05, "step": 19715, "training_step_time": 0.1009368896484375 }, { "epoch": 3.0084228515625e-05, "model_forward_time": 0.025431394577026367, "step": 19716 }, { "epoch": 3.0084228515625e-05, "step": 19716, "training_step_time": 0.10310125350952148 }, { "epoch": 3.008575439453125e-05, "model_forward_time": 0.02516651153564453, "step": 19717 }, { "epoch": 3.008575439453125e-05, "step": 19717, "training_step_time": 0.10382294654846191 }, { "epoch": 3.00872802734375e-05, "model_forward_time": 0.025153636932373047, "step": 19718 }, { "epoch": 3.00872802734375e-05, "step": 19718, "training_step_time": 0.10476183891296387 }, { "epoch": 3.008880615234375e-05, "model_forward_time": 0.025460481643676758, "step": 19719 }, { "epoch": 3.008880615234375e-05, "step": 19719, "training_step_time": 0.1080467700958252 }, { "epoch": 3.009033203125e-05, "grad_norm": 0.3016381859779358, "learning_rate": 2.881082735856499e-05, "loss": 0.0107, "step": 19720 }, { "epoch": 3.009033203125e-05, "model_forward_time": 0.025172948837280273, "step": 19720 }, { "epoch": 3.009033203125e-05, "step": 19720, "training_step_time": 0.11602568626403809 }, { "epoch": 3.009185791015625e-05, "model_forward_time": 0.025673627853393555, "step": 19721 }, { "epoch": 3.009185791015625e-05, "step": 19721, "training_step_time": 0.1671898365020752 }, { "epoch": 3.00933837890625e-05, "model_forward_time": 0.02429342269897461, "step": 19722 }, { "epoch": 3.00933837890625e-05, "step": 19722, "training_step_time": 0.26038360595703125 }, { "epoch": 3.009490966796875e-05, "model_forward_time": 0.02397298812866211, "step": 19723 }, { "epoch": 3.009490966796875e-05, "step": 19723, "training_step_time": 0.2115795612335205 }, { "epoch": 3.0096435546875e-05, "model_forward_time": 0.024787187576293945, "step": 19724 }, { "epoch": 3.0096435546875e-05, "step": 19724, "training_step_time": 0.21196889877319336 }, { "epoch": 3.009796142578125e-05, "model_forward_time": 0.02483201026916504, "step": 19725 }, { "epoch": 3.009796142578125e-05, "step": 19725, "training_step_time": 0.20707225799560547 }, { "epoch": 3.00994873046875e-05, "model_forward_time": 0.025256872177124023, "step": 19726 }, { "epoch": 3.00994873046875e-05, "step": 19726, "training_step_time": 0.19124531745910645 }, { "epoch": 3.010101318359375e-05, "model_forward_time": 0.02504706382751465, "step": 19727 }, { "epoch": 3.010101318359375e-05, "step": 19727, "training_step_time": 0.10524868965148926 }, { "epoch": 3.01025390625e-05, "model_forward_time": 0.024422645568847656, "step": 19728 }, { "epoch": 3.01025390625e-05, "step": 19728, "training_step_time": 0.13104915618896484 }, { "epoch": 3.010406494140625e-05, "model_forward_time": 0.025175094604492188, "step": 19729 }, { "epoch": 3.010406494140625e-05, "step": 19729, "training_step_time": 0.13187837600708008 }, { "epoch": 3.01055908203125e-05, "grad_norm": 0.3020986020565033, "learning_rate": 2.8760918486412292e-05, "loss": 0.0082, "step": 19730 }, { "epoch": 3.01055908203125e-05, "model_forward_time": 0.02501058578491211, "step": 19730 }, { "epoch": 3.01055908203125e-05, "step": 19730, "training_step_time": 0.11344385147094727 }, { "epoch": 3.010711669921875e-05, "model_forward_time": 0.02485489845275879, "step": 19731 }, { "epoch": 3.010711669921875e-05, "step": 19731, "training_step_time": 0.1938316822052002 }, { "epoch": 3.0108642578125e-05, "model_forward_time": 0.023894071578979492, "step": 19732 }, { "epoch": 3.0108642578125e-05, "step": 19732, "training_step_time": 0.14981698989868164 }, { "epoch": 3.011016845703125e-05, "model_forward_time": 0.02461981773376465, "step": 19733 }, { "epoch": 3.011016845703125e-05, "step": 19733, "training_step_time": 0.12836933135986328 }, { "epoch": 3.01116943359375e-05, "model_forward_time": 0.022846460342407227, "step": 19734 }, { "epoch": 3.01116943359375e-05, "step": 19734, "training_step_time": 0.19185757637023926 }, { "epoch": 3.011322021484375e-05, "model_forward_time": 0.02409219741821289, "step": 19735 }, { "epoch": 3.011322021484375e-05, "step": 19735, "training_step_time": 0.11886119842529297 }, { "epoch": 3.011474609375e-05, "model_forward_time": 0.022912025451660156, "step": 19736 }, { "epoch": 3.011474609375e-05, "step": 19736, "training_step_time": 0.19974970817565918 }, { "epoch": 3.011627197265625e-05, "model_forward_time": 0.024472475051879883, "step": 19737 }, { "epoch": 3.011627197265625e-05, "step": 19737, "training_step_time": 0.11124706268310547 }, { "epoch": 3.01177978515625e-05, "model_forward_time": 0.024484872817993164, "step": 19738 }, { "epoch": 3.01177978515625e-05, "step": 19738, "training_step_time": 0.10616135597229004 }, { "epoch": 3.011932373046875e-05, "model_forward_time": 0.023968935012817383, "step": 19739 }, { "epoch": 3.011932373046875e-05, "step": 19739, "training_step_time": 0.10559701919555664 }, { "epoch": 3.0120849609375e-05, "grad_norm": 0.18054917454719543, "learning_rate": 2.8711035421746367e-05, "loss": 0.008, "step": 19740 }, { "epoch": 3.0120849609375e-05, "model_forward_time": 0.025109529495239258, "step": 19740 }, { "epoch": 3.0120849609375e-05, "step": 19740, "training_step_time": 0.10874342918395996 }, { "epoch": 3.012237548828125e-05, "model_forward_time": 0.02528238296508789, "step": 19741 }, { "epoch": 3.012237548828125e-05, "step": 19741, "training_step_time": 0.10560011863708496 }, { "epoch": 3.01239013671875e-05, "model_forward_time": 0.025239229202270508, "step": 19742 }, { "epoch": 3.01239013671875e-05, "step": 19742, "training_step_time": 0.10644865036010742 }, { "epoch": 3.012542724609375e-05, "model_forward_time": 0.025270938873291016, "step": 19743 }, { "epoch": 3.012542724609375e-05, "step": 19743, "training_step_time": 0.10535812377929688 }, { "epoch": 3.0126953125e-05, "model_forward_time": 0.02537393569946289, "step": 19744 }, { "epoch": 3.0126953125e-05, "step": 19744, "training_step_time": 0.1062171459197998 }, { "epoch": 3.012847900390625e-05, "model_forward_time": 0.02492380142211914, "step": 19745 }, { "epoch": 3.012847900390625e-05, "step": 19745, "training_step_time": 0.10589098930358887 }, { "epoch": 3.01300048828125e-05, "model_forward_time": 0.02548384666442871, "step": 19746 }, { "epoch": 3.01300048828125e-05, "step": 19746, "training_step_time": 0.10793066024780273 }, { "epoch": 3.013153076171875e-05, "model_forward_time": 0.02533864974975586, "step": 19747 }, { "epoch": 3.013153076171875e-05, "step": 19747, "training_step_time": 0.10680818557739258 }, { "epoch": 3.0133056640625e-05, "model_forward_time": 0.02514958381652832, "step": 19748 }, { "epoch": 3.0133056640625e-05, "step": 19748, "training_step_time": 0.10398435592651367 }, { "epoch": 3.013458251953125e-05, "model_forward_time": 0.025442838668823242, "step": 19749 }, { "epoch": 3.013458251953125e-05, "step": 19749, "training_step_time": 0.10552382469177246 }, { "epoch": 3.01361083984375e-05, "grad_norm": 0.2245168834924698, "learning_rate": 2.866117822517982e-05, "loss": 0.0055, "step": 19750 }, { "epoch": 3.01361083984375e-05, "model_forward_time": 0.024711132049560547, "step": 19750 }, { "epoch": 3.01361083984375e-05, "step": 19750, "training_step_time": 0.18805241584777832 }, { "epoch": 3.013763427734375e-05, "model_forward_time": 0.024533748626708984, "step": 19751 }, { "epoch": 3.013763427734375e-05, "step": 19751, "training_step_time": 0.11631083488464355 }, { "epoch": 3.013916015625e-05, "model_forward_time": 0.02417159080505371, "step": 19752 }, { "epoch": 3.013916015625e-05, "step": 19752, "training_step_time": 0.132887601852417 }, { "epoch": 3.014068603515625e-05, "model_forward_time": 0.02510380744934082, "step": 19753 }, { "epoch": 3.014068603515625e-05, "step": 19753, "training_step_time": 0.1390397548675537 }, { "epoch": 3.01422119140625e-05, "model_forward_time": 0.024521827697753906, "step": 19754 }, { "epoch": 3.01422119140625e-05, "step": 19754, "training_step_time": 0.11911988258361816 }, { "epoch": 3.014373779296875e-05, "model_forward_time": 0.024769306182861328, "step": 19755 }, { "epoch": 3.014373779296875e-05, "step": 19755, "training_step_time": 0.12621855735778809 }, { "epoch": 3.0145263671875e-05, "model_forward_time": 0.025578975677490234, "step": 19756 }, { "epoch": 3.0145263671875e-05, "step": 19756, "training_step_time": 0.10920333862304688 }, { "epoch": 3.014678955078125e-05, "model_forward_time": 0.02537822723388672, "step": 19757 }, { "epoch": 3.014678955078125e-05, "step": 19757, "training_step_time": 0.10340428352355957 }, { "epoch": 3.01483154296875e-05, "model_forward_time": 0.02498316764831543, "step": 19758 }, { "epoch": 3.01483154296875e-05, "step": 19758, "training_step_time": 0.10287833213806152 }, { "epoch": 3.014984130859375e-05, "model_forward_time": 0.02574920654296875, "step": 19759 }, { "epoch": 3.014984130859375e-05, "step": 19759, "training_step_time": 0.11110448837280273 }, { "epoch": 3.01513671875e-05, "grad_norm": 0.29145142436027527, "learning_rate": 2.861134695729385e-05, "loss": 0.0146, "step": 19760 }, { "epoch": 3.01513671875e-05, "model_forward_time": 0.025334596633911133, "step": 19760 }, { "epoch": 3.01513671875e-05, "step": 19760, "training_step_time": 0.1078188419342041 }, { "epoch": 3.015289306640625e-05, "model_forward_time": 0.025133609771728516, "step": 19761 }, { "epoch": 3.015289306640625e-05, "step": 19761, "training_step_time": 0.14405155181884766 }, { "epoch": 3.01544189453125e-05, "model_forward_time": 0.025588035583496094, "step": 19762 }, { "epoch": 3.01544189453125e-05, "step": 19762, "training_step_time": 0.11105871200561523 }, { "epoch": 3.015594482421875e-05, "model_forward_time": 0.0244295597076416, "step": 19763 }, { "epoch": 3.015594482421875e-05, "step": 19763, "training_step_time": 0.15865612030029297 }, { "epoch": 3.0157470703125e-05, "model_forward_time": 0.024277210235595703, "step": 19764 }, { "epoch": 3.0157470703125e-05, "step": 19764, "training_step_time": 0.15872550010681152 }, { "epoch": 3.015899658203125e-05, "model_forward_time": 0.02430272102355957, "step": 19765 }, { "epoch": 3.015899658203125e-05, "step": 19765, "training_step_time": 0.12417864799499512 }, { "epoch": 3.01605224609375e-05, "model_forward_time": 0.024353742599487305, "step": 19766 }, { "epoch": 3.01605224609375e-05, "step": 19766, "training_step_time": 0.11871552467346191 }, { "epoch": 3.016204833984375e-05, "model_forward_time": 0.024990081787109375, "step": 19767 }, { "epoch": 3.016204833984375e-05, "step": 19767, "training_step_time": 0.19194269180297852 }, { "epoch": 3.016357421875e-05, "model_forward_time": 0.024880647659301758, "step": 19768 }, { "epoch": 3.016357421875e-05, "step": 19768, "training_step_time": 0.11201882362365723 }, { "epoch": 3.016510009765625e-05, "model_forward_time": 0.024860858917236328, "step": 19769 }, { "epoch": 3.016510009765625e-05, "step": 19769, "training_step_time": 0.15588951110839844 }, { "epoch": 3.01666259765625e-05, "grad_norm": 0.25361666083335876, "learning_rate": 2.8561541678638142e-05, "loss": 0.0076, "step": 19770 }, { "epoch": 3.01666259765625e-05, "model_forward_time": 0.024788618087768555, "step": 19770 }, { "epoch": 3.01666259765625e-05, "step": 19770, "training_step_time": 0.14987468719482422 }, { "epoch": 3.016815185546875e-05, "model_forward_time": 0.024391651153564453, "step": 19771 }, { "epoch": 3.016815185546875e-05, "step": 19771, "training_step_time": 0.14062976837158203 }, { "epoch": 3.0169677734375e-05, "model_forward_time": 0.02464890480041504, "step": 19772 }, { "epoch": 3.0169677734375e-05, "step": 19772, "training_step_time": 0.10789680480957031 }, { "epoch": 3.017120361328125e-05, "model_forward_time": 0.02524733543395996, "step": 19773 }, { "epoch": 3.017120361328125e-05, "step": 19773, "training_step_time": 0.18741989135742188 }, { "epoch": 3.01727294921875e-05, "model_forward_time": 0.02436065673828125, "step": 19774 }, { "epoch": 3.01727294921875e-05, "step": 19774, "training_step_time": 0.1067206859588623 }, { "epoch": 3.017425537109375e-05, "model_forward_time": 0.024643898010253906, "step": 19775 }, { "epoch": 3.017425537109375e-05, "step": 19775, "training_step_time": 0.11038470268249512 }, { "epoch": 3.017578125e-05, "model_forward_time": 0.025953054428100586, "step": 19776 }, { "epoch": 3.017578125e-05, "step": 19776, "training_step_time": 0.13510823249816895 }, { "epoch": 3.017730712890625e-05, "model_forward_time": 0.025248050689697266, "step": 19777 }, { "epoch": 3.017730712890625e-05, "step": 19777, "training_step_time": 0.10892057418823242 }, { "epoch": 3.01788330078125e-05, "model_forward_time": 0.025916099548339844, "step": 19778 }, { "epoch": 3.01788330078125e-05, "step": 19778, "training_step_time": 0.1108860969543457 }, { "epoch": 3.018035888671875e-05, "model_forward_time": 0.025270938873291016, "step": 19779 }, { "epoch": 3.018035888671875e-05, "step": 19779, "training_step_time": 0.11475658416748047 }, { "epoch": 3.0181884765625e-05, "grad_norm": 0.3906853497028351, "learning_rate": 2.8511762449730795e-05, "loss": 0.0115, "step": 19780 }, { "epoch": 3.0181884765625e-05, "model_forward_time": 0.025171518325805664, "step": 19780 }, { "epoch": 3.0181884765625e-05, "step": 19780, "training_step_time": 0.10463690757751465 }, { "epoch": 3.018341064453125e-05, "model_forward_time": 0.025354862213134766, "step": 19781 }, { "epoch": 3.018341064453125e-05, "step": 19781, "training_step_time": 0.1950528621673584 }, { "epoch": 3.01849365234375e-05, "model_forward_time": 0.024790287017822266, "step": 19782 }, { "epoch": 3.01849365234375e-05, "step": 19782, "training_step_time": 0.10386395454406738 }, { "epoch": 3.018646240234375e-05, "model_forward_time": 0.024918794631958008, "step": 19783 }, { "epoch": 3.018646240234375e-05, "step": 19783, "training_step_time": 0.1031959056854248 }, { "epoch": 3.018798828125e-05, "model_forward_time": 0.0251462459564209, "step": 19784 }, { "epoch": 3.018798828125e-05, "step": 19784, "training_step_time": 0.10685253143310547 }, { "epoch": 3.018951416015625e-05, "model_forward_time": 0.0254213809967041, "step": 19785 }, { "epoch": 3.018951416015625e-05, "step": 19785, "training_step_time": 0.10639286041259766 }, { "epoch": 3.01910400390625e-05, "model_forward_time": 0.024895429611206055, "step": 19786 }, { "epoch": 3.01910400390625e-05, "step": 19786, "training_step_time": 0.10412025451660156 }, { "epoch": 3.019256591796875e-05, "model_forward_time": 0.025119304656982422, "step": 19787 }, { "epoch": 3.019256591796875e-05, "step": 19787, "training_step_time": 0.10592508316040039 }, { "epoch": 3.0194091796875e-05, "model_forward_time": 0.025200366973876953, "step": 19788 }, { "epoch": 3.0194091796875e-05, "step": 19788, "training_step_time": 0.10486984252929688 }, { "epoch": 3.019561767578125e-05, "model_forward_time": 0.025241613388061523, "step": 19789 }, { "epoch": 3.019561767578125e-05, "step": 19789, "training_step_time": 0.11138772964477539 }, { "epoch": 3.01971435546875e-05, "grad_norm": 0.23596696555614471, "learning_rate": 2.846200933105829e-05, "loss": 0.0141, "step": 19790 }, { "epoch": 3.01971435546875e-05, "model_forward_time": 0.02523493766784668, "step": 19790 }, { "epoch": 3.01971435546875e-05, "step": 19790, "training_step_time": 0.10589385032653809 }, { "epoch": 3.019866943359375e-05, "model_forward_time": 0.0256655216217041, "step": 19791 }, { "epoch": 3.019866943359375e-05, "step": 19791, "training_step_time": 0.10993361473083496 }, { "epoch": 3.02001953125e-05, "model_forward_time": 0.024235963821411133, "step": 19792 }, { "epoch": 3.02001953125e-05, "step": 19792, "training_step_time": 0.12194037437438965 }, { "epoch": 3.020172119140625e-05, "model_forward_time": 0.025089740753173828, "step": 19793 }, { "epoch": 3.020172119140625e-05, "step": 19793, "training_step_time": 0.1191873550415039 }, { "epoch": 3.02032470703125e-05, "model_forward_time": 0.025371789932250977, "step": 19794 }, { "epoch": 3.02032470703125e-05, "step": 19794, "training_step_time": 0.11788129806518555 }, { "epoch": 3.020477294921875e-05, "model_forward_time": 0.025250911712646484, "step": 19795 }, { "epoch": 3.020477294921875e-05, "step": 19795, "training_step_time": 0.14443731307983398 }, { "epoch": 3.0206298828125e-05, "model_forward_time": 0.025148630142211914, "step": 19796 }, { "epoch": 3.0206298828125e-05, "step": 19796, "training_step_time": 0.12986183166503906 }, { "epoch": 3.020782470703125e-05, "model_forward_time": 0.02463364601135254, "step": 19797 }, { "epoch": 3.020782470703125e-05, "step": 19797, "training_step_time": 0.11286616325378418 }, { "epoch": 3.02093505859375e-05, "model_forward_time": 0.025294065475463867, "step": 19798 }, { "epoch": 3.02093505859375e-05, "step": 19798, "training_step_time": 0.11988234519958496 }, { "epoch": 3.021087646484375e-05, "model_forward_time": 0.025386810302734375, "step": 19799 }, { "epoch": 3.021087646484375e-05, "step": 19799, "training_step_time": 0.11135029792785645 }, { "epoch": 3.021240234375e-05, "grad_norm": 0.16461493074893951, "learning_rate": 2.8412282383075363e-05, "loss": 0.0088, "step": 19800 }, { "epoch": 3.021240234375e-05, "model_forward_time": 0.024890422821044922, "step": 19800 }, { "epoch": 3.021240234375e-05, "step": 19800, "training_step_time": 0.11123418807983398 }, { "epoch": 3.021392822265625e-05, "model_forward_time": 0.0250089168548584, "step": 19801 }, { "epoch": 3.021392822265625e-05, "step": 19801, "training_step_time": 0.11397910118103027 }, { "epoch": 3.02154541015625e-05, "model_forward_time": 0.02497100830078125, "step": 19802 }, { "epoch": 3.02154541015625e-05, "step": 19802, "training_step_time": 0.11146736145019531 }, { "epoch": 3.021697998046875e-05, "model_forward_time": 0.02507925033569336, "step": 19803 }, { "epoch": 3.021697998046875e-05, "step": 19803, "training_step_time": 0.10859322547912598 }, { "epoch": 3.0218505859375e-05, "model_forward_time": 0.024953603744506836, "step": 19804 }, { "epoch": 3.0218505859375e-05, "step": 19804, "training_step_time": 0.10517644882202148 }, { "epoch": 3.022003173828125e-05, "model_forward_time": 0.02515411376953125, "step": 19805 }, { "epoch": 3.022003173828125e-05, "step": 19805, "training_step_time": 0.11144542694091797 }, { "epoch": 3.02215576171875e-05, "model_forward_time": 0.02464437484741211, "step": 19806 }, { "epoch": 3.02215576171875e-05, "step": 19806, "training_step_time": 0.10696268081665039 }, { "epoch": 3.022308349609375e-05, "model_forward_time": 0.024903297424316406, "step": 19807 }, { "epoch": 3.022308349609375e-05, "step": 19807, "training_step_time": 0.10630345344543457 }, { "epoch": 3.0224609375e-05, "model_forward_time": 0.02466726303100586, "step": 19808 }, { "epoch": 3.0224609375e-05, "step": 19808, "training_step_time": 0.10550141334533691 }, { "epoch": 3.022613525390625e-05, "model_forward_time": 0.02500176429748535, "step": 19809 }, { "epoch": 3.022613525390625e-05, "step": 19809, "training_step_time": 0.10499358177185059 }, { "epoch": 3.02276611328125e-05, "grad_norm": 0.19356225430965424, "learning_rate": 2.8362581666204918e-05, "loss": 0.0079, "step": 19810 }, { "epoch": 3.02276611328125e-05, "model_forward_time": 0.026335716247558594, "step": 19810 }, { "epoch": 3.02276611328125e-05, "step": 19810, "training_step_time": 0.10510730743408203 }, { "epoch": 3.022918701171875e-05, "model_forward_time": 0.024923324584960938, "step": 19811 }, { "epoch": 3.022918701171875e-05, "step": 19811, "training_step_time": 0.1270143985748291 }, { "epoch": 3.0230712890625e-05, "model_forward_time": 0.024589061737060547, "step": 19812 }, { "epoch": 3.0230712890625e-05, "step": 19812, "training_step_time": 0.11534667015075684 }, { "epoch": 3.023223876953125e-05, "model_forward_time": 0.025110960006713867, "step": 19813 }, { "epoch": 3.023223876953125e-05, "step": 19813, "training_step_time": 0.2022702693939209 }, { "epoch": 3.02337646484375e-05, "model_forward_time": 0.024135351181030273, "step": 19814 }, { "epoch": 3.02337646484375e-05, "step": 19814, "training_step_time": 0.13481831550598145 }, { "epoch": 3.023529052734375e-05, "model_forward_time": 0.024395465850830078, "step": 19815 }, { "epoch": 3.023529052734375e-05, "step": 19815, "training_step_time": 0.2055823802947998 }, { "epoch": 3.023681640625e-05, "model_forward_time": 0.024216651916503906, "step": 19816 }, { "epoch": 3.023681640625e-05, "step": 19816, "training_step_time": 0.12240171432495117 }, { "epoch": 3.023834228515625e-05, "model_forward_time": 0.024003267288208008, "step": 19817 }, { "epoch": 3.023834228515625e-05, "step": 19817, "training_step_time": 0.10724472999572754 }, { "epoch": 3.02398681640625e-05, "model_forward_time": 0.025269269943237305, "step": 19818 }, { "epoch": 3.02398681640625e-05, "step": 19818, "training_step_time": 0.10634040832519531 }, { "epoch": 3.024139404296875e-05, "model_forward_time": 0.025299787521362305, "step": 19819 }, { "epoch": 3.024139404296875e-05, "step": 19819, "training_step_time": 0.152357816696167 }, { "epoch": 3.0242919921875e-05, "grad_norm": 0.12694837152957916, "learning_rate": 2.8312907240838027e-05, "loss": 0.0085, "step": 19820 }, { "epoch": 3.0242919921875e-05, "model_forward_time": 0.02709197998046875, "step": 19820 }, { "epoch": 3.0242919921875e-05, "step": 19820, "training_step_time": 0.12467336654663086 }, { "epoch": 3.024444580078125e-05, "model_forward_time": 0.024280548095703125, "step": 19821 }, { "epoch": 3.024444580078125e-05, "step": 19821, "training_step_time": 0.11790108680725098 }, { "epoch": 3.02459716796875e-05, "model_forward_time": 0.023859739303588867, "step": 19822 }, { "epoch": 3.02459716796875e-05, "step": 19822, "training_step_time": 0.1369180679321289 }, { "epoch": 3.024749755859375e-05, "model_forward_time": 0.02498769760131836, "step": 19823 }, { "epoch": 3.024749755859375e-05, "step": 19823, "training_step_time": 0.12589168548583984 }, { "epoch": 3.02490234375e-05, "model_forward_time": 0.023369312286376953, "step": 19824 }, { "epoch": 3.02490234375e-05, "step": 19824, "training_step_time": 0.20339035987854004 }, { "epoch": 3.025054931640625e-05, "model_forward_time": 0.02436661720275879, "step": 19825 }, { "epoch": 3.025054931640625e-05, "step": 19825, "training_step_time": 0.12917017936706543 }, { "epoch": 3.02520751953125e-05, "model_forward_time": 0.023841142654418945, "step": 19826 }, { "epoch": 3.02520751953125e-05, "step": 19826, "training_step_time": 0.1820850372314453 }, { "epoch": 3.025360107421875e-05, "model_forward_time": 0.024190425872802734, "step": 19827 }, { "epoch": 3.025360107421875e-05, "step": 19827, "training_step_time": 0.1123194694519043 }, { "epoch": 3.0255126953125e-05, "model_forward_time": 0.024533748626708984, "step": 19828 }, { "epoch": 3.0255126953125e-05, "step": 19828, "training_step_time": 0.10678887367248535 }, { "epoch": 3.025665283203125e-05, "model_forward_time": 0.02523946762084961, "step": 19829 }, { "epoch": 3.025665283203125e-05, "step": 19829, "training_step_time": 0.10825634002685547 }, { "epoch": 3.02581787109375e-05, "grad_norm": 0.1579444855451584, "learning_rate": 2.8263259167333777e-05, "loss": 0.0038, "step": 19830 }, { "epoch": 3.02581787109375e-05, "model_forward_time": 0.024755001068115234, "step": 19830 }, { "epoch": 3.02581787109375e-05, "step": 19830, "training_step_time": 0.10955500602722168 }, { "epoch": 3.025970458984375e-05, "model_forward_time": 0.025429964065551758, "step": 19831 }, { "epoch": 3.025970458984375e-05, "step": 19831, "training_step_time": 0.10622549057006836 }, { "epoch": 3.026123046875e-05, "model_forward_time": 0.025336742401123047, "step": 19832 }, { "epoch": 3.026123046875e-05, "step": 19832, "training_step_time": 0.10773372650146484 }, { "epoch": 3.026275634765625e-05, "model_forward_time": 0.025203227996826172, "step": 19833 }, { "epoch": 3.026275634765625e-05, "step": 19833, "training_step_time": 0.10687088966369629 }, { "epoch": 3.02642822265625e-05, "model_forward_time": 0.025011301040649414, "step": 19834 }, { "epoch": 3.02642822265625e-05, "step": 19834, "training_step_time": 0.10758781433105469 }, { "epoch": 3.026580810546875e-05, "model_forward_time": 0.025038719177246094, "step": 19835 }, { "epoch": 3.026580810546875e-05, "step": 19835, "training_step_time": 0.10650014877319336 }, { "epoch": 3.0267333984375e-05, "model_forward_time": 0.025045394897460938, "step": 19836 }, { "epoch": 3.0267333984375e-05, "step": 19836, "training_step_time": 0.10807585716247559 }, { "epoch": 3.026885986328125e-05, "model_forward_time": 0.0250246524810791, "step": 19837 }, { "epoch": 3.026885986328125e-05, "step": 19837, "training_step_time": 0.10412359237670898 }, { "epoch": 3.02703857421875e-05, "model_forward_time": 0.0250241756439209, "step": 19838 }, { "epoch": 3.02703857421875e-05, "step": 19838, "training_step_time": 0.10488319396972656 }, { "epoch": 3.027191162109375e-05, "model_forward_time": 0.025052785873413086, "step": 19839 }, { "epoch": 3.027191162109375e-05, "step": 19839, "training_step_time": 0.10551786422729492 }, { "epoch": 3.02734375e-05, "grad_norm": 0.19443681836128235, "learning_rate": 2.8213637506019304e-05, "loss": 0.0059, "step": 19840 }, { "epoch": 3.02734375e-05, "model_forward_time": 0.025023460388183594, "step": 19840 }, { "epoch": 3.02734375e-05, "step": 19840, "training_step_time": 0.10430026054382324 }, { "epoch": 3.027496337890625e-05, "model_forward_time": 0.025120258331298828, "step": 19841 }, { "epoch": 3.027496337890625e-05, "step": 19841, "training_step_time": 0.10541749000549316 }, { "epoch": 3.02764892578125e-05, "model_forward_time": 0.024962663650512695, "step": 19842 }, { "epoch": 3.02764892578125e-05, "step": 19842, "training_step_time": 0.1202249526977539 }, { "epoch": 3.027801513671875e-05, "model_forward_time": 0.02823781967163086, "step": 19843 }, { "epoch": 3.027801513671875e-05, "step": 19843, "training_step_time": 0.11669015884399414 }, { "epoch": 3.0279541015625e-05, "model_forward_time": 0.024492979049682617, "step": 19844 }, { "epoch": 3.0279541015625e-05, "step": 19844, "training_step_time": 0.13006806373596191 }, { "epoch": 3.028106689453125e-05, "model_forward_time": 0.024974584579467773, "step": 19845 }, { "epoch": 3.028106689453125e-05, "step": 19845, "training_step_time": 0.1073763370513916 }, { "epoch": 3.02825927734375e-05, "model_forward_time": 0.025258541107177734, "step": 19846 }, { "epoch": 3.02825927734375e-05, "step": 19846, "training_step_time": 0.18275856971740723 }, { "epoch": 3.028411865234375e-05, "model_forward_time": 0.02449321746826172, "step": 19847 }, { "epoch": 3.028411865234375e-05, "step": 19847, "training_step_time": 0.13634634017944336 }, { "epoch": 3.028564453125e-05, "model_forward_time": 0.02402663230895996, "step": 19848 }, { "epoch": 3.028564453125e-05, "step": 19848, "training_step_time": 0.1191554069519043 }, { "epoch": 3.028717041015625e-05, "model_forward_time": 0.02448868751525879, "step": 19849 }, { "epoch": 3.028717041015625e-05, "step": 19849, "training_step_time": 0.10529541969299316 }, { "epoch": 3.02886962890625e-05, "grad_norm": 0.631939709186554, "learning_rate": 2.8164042317189575e-05, "loss": 0.0165, "step": 19850 }, { "epoch": 3.02886962890625e-05, "model_forward_time": 0.025736093521118164, "step": 19850 }, { "epoch": 3.02886962890625e-05, "step": 19850, "training_step_time": 0.10680341720581055 }, { "epoch": 3.029022216796875e-05, "model_forward_time": 0.025399208068847656, "step": 19851 }, { "epoch": 3.029022216796875e-05, "step": 19851, "training_step_time": 0.10507965087890625 }, { "epoch": 3.0291748046875e-05, "model_forward_time": 0.02488112449645996, "step": 19852 }, { "epoch": 3.0291748046875e-05, "step": 19852, "training_step_time": 0.10835671424865723 }, { "epoch": 3.029327392578125e-05, "model_forward_time": 0.024398088455200195, "step": 19853 }, { "epoch": 3.029327392578125e-05, "step": 19853, "training_step_time": 0.10718417167663574 }, { "epoch": 3.02947998046875e-05, "model_forward_time": 0.024988174438476562, "step": 19854 }, { "epoch": 3.02947998046875e-05, "step": 19854, "training_step_time": 0.10629868507385254 }, { "epoch": 3.029632568359375e-05, "model_forward_time": 0.02503824234008789, "step": 19855 }, { "epoch": 3.029632568359375e-05, "step": 19855, "training_step_time": 0.1041262149810791 }, { "epoch": 3.02978515625e-05, "model_forward_time": 0.025023937225341797, "step": 19856 }, { "epoch": 3.02978515625e-05, "step": 19856, "training_step_time": 0.10327363014221191 }, { "epoch": 3.029937744140625e-05, "model_forward_time": 0.02416539192199707, "step": 19857 }, { "epoch": 3.029937744140625e-05, "step": 19857, "training_step_time": 0.10901689529418945 }, { "epoch": 3.03009033203125e-05, "model_forward_time": 0.02634429931640625, "step": 19858 }, { "epoch": 3.03009033203125e-05, "step": 19858, "training_step_time": 0.1147758960723877 }, { "epoch": 3.030242919921875e-05, "model_forward_time": 0.025261402130126953, "step": 19859 }, { "epoch": 3.030242919921875e-05, "step": 19859, "training_step_time": 0.11034345626831055 }, { "epoch": 3.0303955078125e-05, "grad_norm": 0.3624440133571625, "learning_rate": 2.811447366110741e-05, "loss": 0.0159, "step": 19860 }, { "epoch": 3.0303955078125e-05, "model_forward_time": 0.025297164916992188, "step": 19860 }, { "epoch": 3.0303955078125e-05, "step": 19860, "training_step_time": 0.21103334426879883 }, { "epoch": 3.030548095703125e-05, "model_forward_time": 0.024688005447387695, "step": 19861 }, { "epoch": 3.030548095703125e-05, "step": 19861, "training_step_time": 0.15964627265930176 }, { "epoch": 3.03070068359375e-05, "model_forward_time": 0.024794816970825195, "step": 19862 }, { "epoch": 3.03070068359375e-05, "step": 19862, "training_step_time": 0.18436074256896973 }, { "epoch": 3.030853271484375e-05, "model_forward_time": 0.024187564849853516, "step": 19863 }, { "epoch": 3.030853271484375e-05, "step": 19863, "training_step_time": 0.13009309768676758 }, { "epoch": 3.031005859375e-05, "model_forward_time": 0.02405405044555664, "step": 19864 }, { "epoch": 3.031005859375e-05, "step": 19864, "training_step_time": 0.11369442939758301 }, { "epoch": 3.031158447265625e-05, "model_forward_time": 0.024793148040771484, "step": 19865 }, { "epoch": 3.031158447265625e-05, "step": 19865, "training_step_time": 0.11489129066467285 }, { "epoch": 3.03131103515625e-05, "model_forward_time": 0.025031566619873047, "step": 19866 }, { "epoch": 3.03131103515625e-05, "step": 19866, "training_step_time": 0.10381817817687988 }, { "epoch": 3.031463623046875e-05, "model_forward_time": 0.025305986404418945, "step": 19867 }, { "epoch": 3.031463623046875e-05, "step": 19867, "training_step_time": 0.1776280403137207 }, { "epoch": 3.0316162109375e-05, "model_forward_time": 0.02449512481689453, "step": 19868 }, { "epoch": 3.0316162109375e-05, "step": 19868, "training_step_time": 0.17885732650756836 }, { "epoch": 3.031768798828125e-05, "model_forward_time": 0.02428150177001953, "step": 19869 }, { "epoch": 3.031768798828125e-05, "step": 19869, "training_step_time": 0.2342391014099121 }, { "epoch": 3.03192138671875e-05, "grad_norm": 0.1861395537853241, "learning_rate": 2.8064931598003436e-05, "loss": 0.0062, "step": 19870 }, { "epoch": 3.03192138671875e-05, "model_forward_time": 0.024134159088134766, "step": 19870 }, { "epoch": 3.03192138671875e-05, "step": 19870, "training_step_time": 0.19604158401489258 }, { "epoch": 3.032073974609375e-05, "model_forward_time": 0.02410888671875, "step": 19871 }, { "epoch": 3.032073974609375e-05, "step": 19871, "training_step_time": 0.1863994598388672 }, { "epoch": 3.0322265625e-05, "model_forward_time": 0.02411341667175293, "step": 19872 }, { "epoch": 3.0322265625e-05, "step": 19872, "training_step_time": 0.1749098300933838 }, { "epoch": 3.032379150390625e-05, "model_forward_time": 0.024414539337158203, "step": 19873 }, { "epoch": 3.032379150390625e-05, "step": 19873, "training_step_time": 0.15276575088500977 }, { "epoch": 3.03253173828125e-05, "model_forward_time": 0.024059057235717773, "step": 19874 }, { "epoch": 3.03253173828125e-05, "step": 19874, "training_step_time": 0.13388323783874512 }, { "epoch": 3.032684326171875e-05, "model_forward_time": 0.024216175079345703, "step": 19875 }, { "epoch": 3.032684326171875e-05, "step": 19875, "training_step_time": 0.12927699089050293 }, { "epoch": 3.0328369140625e-05, "model_forward_time": 0.02425861358642578, "step": 19876 }, { "epoch": 3.0328369140625e-05, "step": 19876, "training_step_time": 0.1265885829925537 }, { "epoch": 3.032989501953125e-05, "model_forward_time": 0.024297714233398438, "step": 19877 }, { "epoch": 3.032989501953125e-05, "step": 19877, "training_step_time": 0.12507915496826172 }, { "epoch": 3.03314208984375e-05, "model_forward_time": 0.02460479736328125, "step": 19878 }, { "epoch": 3.03314208984375e-05, "step": 19878, "training_step_time": 0.11876940727233887 }, { "epoch": 3.033294677734375e-05, "model_forward_time": 0.024849891662597656, "step": 19879 }, { "epoch": 3.033294677734375e-05, "step": 19879, "training_step_time": 0.11263322830200195 }, { "epoch": 3.033447265625e-05, "grad_norm": 0.2712714672088623, "learning_rate": 2.8015416188075893e-05, "loss": 0.0168, "step": 19880 }, { "epoch": 3.033447265625e-05, "model_forward_time": 0.02477431297302246, "step": 19880 }, { "epoch": 3.033447265625e-05, "step": 19880, "training_step_time": 0.11446380615234375 }, { "epoch": 3.033599853515625e-05, "model_forward_time": 0.025043725967407227, "step": 19881 }, { "epoch": 3.033599853515625e-05, "step": 19881, "training_step_time": 0.11242508888244629 }, { "epoch": 3.03375244140625e-05, "model_forward_time": 0.02487659454345703, "step": 19882 }, { "epoch": 3.03375244140625e-05, "step": 19882, "training_step_time": 0.11130809783935547 }, { "epoch": 3.033905029296875e-05, "model_forward_time": 0.02488875389099121, "step": 19883 }, { "epoch": 3.033905029296875e-05, "step": 19883, "training_step_time": 0.10930323600769043 }, { "epoch": 3.0340576171875e-05, "model_forward_time": 0.025063276290893555, "step": 19884 }, { "epoch": 3.0340576171875e-05, "step": 19884, "training_step_time": 0.18225908279418945 }, { "epoch": 3.034210205078125e-05, "model_forward_time": 0.024791240692138672, "step": 19885 }, { "epoch": 3.034210205078125e-05, "step": 19885, "training_step_time": 0.11960983276367188 }, { "epoch": 3.03436279296875e-05, "model_forward_time": 0.02390146255493164, "step": 19886 }, { "epoch": 3.03436279296875e-05, "step": 19886, "training_step_time": 0.13649940490722656 }, { "epoch": 3.034515380859375e-05, "model_forward_time": 0.02491307258605957, "step": 19887 }, { "epoch": 3.034515380859375e-05, "step": 19887, "training_step_time": 0.16174101829528809 }, { "epoch": 3.03466796875e-05, "model_forward_time": 0.024428367614746094, "step": 19888 }, { "epoch": 3.03466796875e-05, "step": 19888, "training_step_time": 0.21346759796142578 }, { "epoch": 3.034820556640625e-05, "model_forward_time": 0.023756742477416992, "step": 19889 }, { "epoch": 3.034820556640625e-05, "step": 19889, "training_step_time": 0.11511659622192383 }, { "epoch": 3.03497314453125e-05, "grad_norm": 0.1906876266002655, "learning_rate": 2.7965927491490705e-05, "loss": 0.0105, "step": 19890 }, { "epoch": 3.03497314453125e-05, "model_forward_time": 0.02425861358642578, "step": 19890 }, { "epoch": 3.03497314453125e-05, "step": 19890, "training_step_time": 0.10442972183227539 }, { "epoch": 3.035125732421875e-05, "model_forward_time": 0.024774789810180664, "step": 19891 }, { "epoch": 3.035125732421875e-05, "step": 19891, "training_step_time": 0.10831618309020996 }, { "epoch": 3.0352783203125e-05, "model_forward_time": 0.025073528289794922, "step": 19892 }, { "epoch": 3.0352783203125e-05, "step": 19892, "training_step_time": 0.10739636421203613 }, { "epoch": 3.035430908203125e-05, "model_forward_time": 0.02703714370727539, "step": 19893 }, { "epoch": 3.035430908203125e-05, "step": 19893, "training_step_time": 0.10930705070495605 }, { "epoch": 3.03558349609375e-05, "model_forward_time": 0.025168895721435547, "step": 19894 }, { "epoch": 3.03558349609375e-05, "step": 19894, "training_step_time": 0.10419726371765137 }, { "epoch": 3.035736083984375e-05, "model_forward_time": 0.024979591369628906, "step": 19895 }, { "epoch": 3.035736083984375e-05, "step": 19895, "training_step_time": 0.10649728775024414 }, { "epoch": 3.035888671875e-05, "model_forward_time": 0.02600574493408203, "step": 19896 }, { "epoch": 3.035888671875e-05, "step": 19896, "training_step_time": 0.10592317581176758 }, { "epoch": 3.036041259765625e-05, "model_forward_time": 0.025988101959228516, "step": 19897 }, { "epoch": 3.036041259765625e-05, "step": 19897, "training_step_time": 0.11029648780822754 }, { "epoch": 3.03619384765625e-05, "model_forward_time": 0.026530027389526367, "step": 19898 }, { "epoch": 3.03619384765625e-05, "step": 19898, "training_step_time": 0.10671520233154297 }, { "epoch": 3.036346435546875e-05, "model_forward_time": 0.025300979614257812, "step": 19899 }, { "epoch": 3.036346435546875e-05, "step": 19899, "training_step_time": 0.14664077758789062 }, { "epoch": 3.0364990234375e-05, "grad_norm": 0.3329494297504425, "learning_rate": 2.79164655683813e-05, "loss": 0.025, "step": 19900 }, { "epoch": 3.0364990234375e-05, "model_forward_time": 0.02467799186706543, "step": 19900 }, { "epoch": 3.0364990234375e-05, "step": 19900, "training_step_time": 0.1531839370727539 }, { "epoch": 3.036651611328125e-05, "model_forward_time": 0.024692058563232422, "step": 19901 }, { "epoch": 3.036651611328125e-05, "step": 19901, "training_step_time": 0.12768340110778809 }, { "epoch": 3.03680419921875e-05, "model_forward_time": 0.02463817596435547, "step": 19902 }, { "epoch": 3.03680419921875e-05, "step": 19902, "training_step_time": 0.13075494766235352 }, { "epoch": 3.036956787109375e-05, "model_forward_time": 0.025205612182617188, "step": 19903 }, { "epoch": 3.036956787109375e-05, "step": 19903, "training_step_time": 0.16486191749572754 }, { "epoch": 3.037109375e-05, "model_forward_time": 0.024573564529418945, "step": 19904 }, { "epoch": 3.037109375e-05, "step": 19904, "training_step_time": 0.1816844940185547 }, { "epoch": 3.037261962890625e-05, "model_forward_time": 0.024337291717529297, "step": 19905 }, { "epoch": 3.037261962890625e-05, "step": 19905, "training_step_time": 0.18549823760986328 }, { "epoch": 3.03741455078125e-05, "model_forward_time": 0.023953914642333984, "step": 19906 }, { "epoch": 3.03741455078125e-05, "step": 19906, "training_step_time": 0.11421084403991699 }, { "epoch": 3.037567138671875e-05, "model_forward_time": 0.024344682693481445, "step": 19907 }, { "epoch": 3.037567138671875e-05, "step": 19907, "training_step_time": 0.10966300964355469 }, { "epoch": 3.0377197265625e-05, "model_forward_time": 0.024970054626464844, "step": 19908 }, { "epoch": 3.0377197265625e-05, "step": 19908, "training_step_time": 0.1424696445465088 }, { "epoch": 3.037872314453125e-05, "model_forward_time": 0.02541661262512207, "step": 19909 }, { "epoch": 3.037872314453125e-05, "step": 19909, "training_step_time": 0.14164495468139648 }, { "epoch": 3.03802490234375e-05, "grad_norm": 0.27687445282936096, "learning_rate": 2.7867030478848577e-05, "loss": 0.0087, "step": 19910 }, { "epoch": 3.03802490234375e-05, "model_forward_time": 0.02445840835571289, "step": 19910 }, { "epoch": 3.03802490234375e-05, "step": 19910, "training_step_time": 0.11130261421203613 }, { "epoch": 3.038177490234375e-05, "model_forward_time": 0.024960041046142578, "step": 19911 }, { "epoch": 3.038177490234375e-05, "step": 19911, "training_step_time": 0.10811614990234375 }, { "epoch": 3.038330078125e-05, "model_forward_time": 0.025246143341064453, "step": 19912 }, { "epoch": 3.038330078125e-05, "step": 19912, "training_step_time": 0.1452932357788086 }, { "epoch": 3.038482666015625e-05, "model_forward_time": 0.025028705596923828, "step": 19913 }, { "epoch": 3.038482666015625e-05, "step": 19913, "training_step_time": 0.19161510467529297 }, { "epoch": 3.03863525390625e-05, "model_forward_time": 0.0239865779876709, "step": 19914 }, { "epoch": 3.03863525390625e-05, "step": 19914, "training_step_time": 0.1677379608154297 }, { "epoch": 3.038787841796875e-05, "model_forward_time": 0.02416229248046875, "step": 19915 }, { "epoch": 3.038787841796875e-05, "step": 19915, "training_step_time": 0.1450815200805664 }, { "epoch": 3.0389404296875e-05, "model_forward_time": 0.024320125579833984, "step": 19916 }, { "epoch": 3.0389404296875e-05, "step": 19916, "training_step_time": 0.14340710639953613 }, { "epoch": 3.039093017578125e-05, "model_forward_time": 0.024419784545898438, "step": 19917 }, { "epoch": 3.039093017578125e-05, "step": 19917, "training_step_time": 0.1277015209197998 }, { "epoch": 3.03924560546875e-05, "model_forward_time": 0.024507761001586914, "step": 19918 }, { "epoch": 3.03924560546875e-05, "step": 19918, "training_step_time": 0.12740445137023926 }, { "epoch": 3.039398193359375e-05, "model_forward_time": 0.02516627311706543, "step": 19919 }, { "epoch": 3.039398193359375e-05, "step": 19919, "training_step_time": 0.12455558776855469 }, { "epoch": 3.03955078125e-05, "grad_norm": 0.1977100521326065, "learning_rate": 2.7817622282960815e-05, "loss": 0.0089, "step": 19920 }, { "epoch": 3.03955078125e-05, "model_forward_time": 0.024837493896484375, "step": 19920 }, { "epoch": 3.03955078125e-05, "step": 19920, "training_step_time": 0.11716985702514648 }, { "epoch": 3.039703369140625e-05, "model_forward_time": 0.025469541549682617, "step": 19921 }, { "epoch": 3.039703369140625e-05, "step": 19921, "training_step_time": 0.11849093437194824 }, { "epoch": 3.03985595703125e-05, "model_forward_time": 0.02434563636779785, "step": 19922 }, { "epoch": 3.03985595703125e-05, "step": 19922, "training_step_time": 0.1139073371887207 }, { "epoch": 3.040008544921875e-05, "model_forward_time": 0.026251792907714844, "step": 19923 }, { "epoch": 3.040008544921875e-05, "step": 19923, "training_step_time": 0.10900497436523438 }, { "epoch": 3.0401611328125e-05, "model_forward_time": 0.025104045867919922, "step": 19924 }, { "epoch": 3.0401611328125e-05, "step": 19924, "training_step_time": 0.11059975624084473 }, { "epoch": 3.040313720703125e-05, "model_forward_time": 0.025193452835083008, "step": 19925 }, { "epoch": 3.040313720703125e-05, "step": 19925, "training_step_time": 0.10805130004882812 }, { "epoch": 3.04046630859375e-05, "model_forward_time": 0.02548384666442871, "step": 19926 }, { "epoch": 3.04046630859375e-05, "step": 19926, "training_step_time": 0.1086888313293457 }, { "epoch": 3.040618896484375e-05, "model_forward_time": 0.02505779266357422, "step": 19927 }, { "epoch": 3.040618896484375e-05, "step": 19927, "training_step_time": 0.1521008014678955 }, { "epoch": 3.040771484375e-05, "model_forward_time": 0.025010347366333008, "step": 19928 }, { "epoch": 3.040771484375e-05, "step": 19928, "training_step_time": 0.12192797660827637 }, { "epoch": 3.040924072265625e-05, "model_forward_time": 0.02416682243347168, "step": 19929 }, { "epoch": 3.040924072265625e-05, "step": 19929, "training_step_time": 0.10888171195983887 }, { "epoch": 3.04107666015625e-05, "grad_norm": 0.19256380200386047, "learning_rate": 2.776824104075364e-05, "loss": 0.0081, "step": 19930 }, { "epoch": 3.04107666015625e-05, "model_forward_time": 0.024959087371826172, "step": 19930 }, { "epoch": 3.04107666015625e-05, "step": 19930, "training_step_time": 0.11455535888671875 }, { "epoch": 3.041229248046875e-05, "model_forward_time": 0.025134801864624023, "step": 19931 }, { "epoch": 3.041229248046875e-05, "step": 19931, "training_step_time": 0.10491061210632324 }, { "epoch": 3.0413818359375e-05, "model_forward_time": 0.025040864944458008, "step": 19932 }, { "epoch": 3.0413818359375e-05, "step": 19932, "training_step_time": 0.12108254432678223 }, { "epoch": 3.041534423828125e-05, "model_forward_time": 0.02526068687438965, "step": 19933 }, { "epoch": 3.041534423828125e-05, "step": 19933, "training_step_time": 0.10828113555908203 }, { "epoch": 3.04168701171875e-05, "model_forward_time": 0.02504873275756836, "step": 19934 }, { "epoch": 3.04168701171875e-05, "step": 19934, "training_step_time": 0.10734868049621582 }, { "epoch": 3.041839599609375e-05, "model_forward_time": 0.02499246597290039, "step": 19935 }, { "epoch": 3.041839599609375e-05, "step": 19935, "training_step_time": 0.10853242874145508 }, { "epoch": 3.0419921875e-05, "model_forward_time": 0.025500774383544922, "step": 19936 }, { "epoch": 3.0419921875e-05, "step": 19936, "training_step_time": 0.10510516166687012 }, { "epoch": 3.042144775390625e-05, "model_forward_time": 0.024642229080200195, "step": 19937 }, { "epoch": 3.042144775390625e-05, "step": 19937, "training_step_time": 0.10414648056030273 }, { "epoch": 3.04229736328125e-05, "model_forward_time": 0.025037288665771484, "step": 19938 }, { "epoch": 3.04229736328125e-05, "step": 19938, "training_step_time": 0.1120145320892334 }, { "epoch": 3.042449951171875e-05, "model_forward_time": 0.0251767635345459, "step": 19939 }, { "epoch": 3.042449951171875e-05, "step": 19939, "training_step_time": 0.10376644134521484 }, { "epoch": 3.0426025390625e-05, "grad_norm": 0.24935036897659302, "learning_rate": 2.7718886812229907e-05, "loss": 0.0078, "step": 19940 }, { "epoch": 3.0426025390625e-05, "model_forward_time": 0.02485370635986328, "step": 19940 }, { "epoch": 3.0426025390625e-05, "step": 19940, "training_step_time": 0.10911083221435547 }, { "epoch": 3.042755126953125e-05, "model_forward_time": 0.02517533302307129, "step": 19941 }, { "epoch": 3.042755126953125e-05, "step": 19941, "training_step_time": 0.11028742790222168 }, { "epoch": 3.04290771484375e-05, "model_forward_time": 0.02545928955078125, "step": 19942 }, { "epoch": 3.04290771484375e-05, "step": 19942, "training_step_time": 0.10753965377807617 }, { "epoch": 3.043060302734375e-05, "model_forward_time": 0.02494192123413086, "step": 19943 }, { "epoch": 3.043060302734375e-05, "step": 19943, "training_step_time": 0.18908476829528809 }, { "epoch": 3.043212890625e-05, "model_forward_time": 0.024820327758789062, "step": 19944 }, { "epoch": 3.043212890625e-05, "step": 19944, "training_step_time": 0.16127562522888184 }, { "epoch": 3.043365478515625e-05, "model_forward_time": 0.024576187133789062, "step": 19945 }, { "epoch": 3.043365478515625e-05, "step": 19945, "training_step_time": 0.11820197105407715 }, { "epoch": 3.04351806640625e-05, "model_forward_time": 0.025038480758666992, "step": 19946 }, { "epoch": 3.04351806640625e-05, "step": 19946, "training_step_time": 0.18862605094909668 }, { "epoch": 3.043670654296875e-05, "model_forward_time": 0.02382349967956543, "step": 19947 }, { "epoch": 3.043670654296875e-05, "step": 19947, "training_step_time": 0.14104557037353516 }, { "epoch": 3.0438232421875e-05, "model_forward_time": 0.024343252182006836, "step": 19948 }, { "epoch": 3.0438232421875e-05, "step": 19948, "training_step_time": 0.21198773384094238 }, { "epoch": 3.043975830078125e-05, "model_forward_time": 0.024395465850830078, "step": 19949 }, { "epoch": 3.043975830078125e-05, "step": 19949, "training_step_time": 0.1260221004486084 }, { "epoch": 3.04412841796875e-05, "grad_norm": 0.24325866997241974, "learning_rate": 2.766955965735968e-05, "loss": 0.0075, "step": 19950 }, { "epoch": 3.04412841796875e-05, "model_forward_time": 0.024066686630249023, "step": 19950 }, { "epoch": 3.04412841796875e-05, "step": 19950, "training_step_time": 0.11644554138183594 }, { "epoch": 3.044281005859375e-05, "model_forward_time": 0.026113033294677734, "step": 19951 }, { "epoch": 3.044281005859375e-05, "step": 19951, "training_step_time": 0.10965204238891602 }, { "epoch": 3.04443359375e-05, "model_forward_time": 0.025269508361816406, "step": 19952 }, { "epoch": 3.04443359375e-05, "step": 19952, "training_step_time": 0.16093039512634277 }, { "epoch": 3.044586181640625e-05, "model_forward_time": 0.0242764949798584, "step": 19953 }, { "epoch": 3.044586181640625e-05, "step": 19953, "training_step_time": 0.13271808624267578 }, { "epoch": 3.04473876953125e-05, "model_forward_time": 0.02443671226501465, "step": 19954 }, { "epoch": 3.04473876953125e-05, "step": 19954, "training_step_time": 0.18363428115844727 }, { "epoch": 3.044891357421875e-05, "model_forward_time": 0.024775028228759766, "step": 19955 }, { "epoch": 3.044891357421875e-05, "step": 19955, "training_step_time": 0.17467808723449707 }, { "epoch": 3.0450439453125e-05, "model_forward_time": 0.024431705474853516, "step": 19956 }, { "epoch": 3.0450439453125e-05, "step": 19956, "training_step_time": 0.10280227661132812 }, { "epoch": 3.045196533203125e-05, "model_forward_time": 0.024753093719482422, "step": 19957 }, { "epoch": 3.045196533203125e-05, "step": 19957, "training_step_time": 0.1028439998626709 }, { "epoch": 3.04534912109375e-05, "model_forward_time": 0.025008201599121094, "step": 19958 }, { "epoch": 3.04534912109375e-05, "step": 19958, "training_step_time": 0.10391712188720703 }, { "epoch": 3.045501708984375e-05, "model_forward_time": 0.025335311889648438, "step": 19959 }, { "epoch": 3.045501708984375e-05, "step": 19959, "training_step_time": 0.10502505302429199 }, { "epoch": 3.045654296875e-05, "grad_norm": 0.3688250184059143, "learning_rate": 2.762025963608009e-05, "loss": 0.0096, "step": 19960 }, { "epoch": 3.045654296875e-05, "model_forward_time": 0.02461075782775879, "step": 19960 }, { "epoch": 3.045654296875e-05, "step": 19960, "training_step_time": 0.10373973846435547 }, { "epoch": 3.045806884765625e-05, "model_forward_time": 0.02527332305908203, "step": 19961 }, { "epoch": 3.045806884765625e-05, "step": 19961, "training_step_time": 0.10569429397583008 }, { "epoch": 3.04595947265625e-05, "model_forward_time": 0.02544546127319336, "step": 19962 }, { "epoch": 3.04595947265625e-05, "step": 19962, "training_step_time": 0.10693192481994629 }, { "epoch": 3.046112060546875e-05, "model_forward_time": 0.0256040096282959, "step": 19963 }, { "epoch": 3.046112060546875e-05, "step": 19963, "training_step_time": 0.1060795783996582 }, { "epoch": 3.0462646484375e-05, "model_forward_time": 0.025006532669067383, "step": 19964 }, { "epoch": 3.0462646484375e-05, "step": 19964, "training_step_time": 0.10557985305786133 }, { "epoch": 3.046417236328125e-05, "model_forward_time": 0.025159120559692383, "step": 19965 }, { "epoch": 3.046417236328125e-05, "step": 19965, "training_step_time": 0.10832500457763672 }, { "epoch": 3.04656982421875e-05, "model_forward_time": 0.025431394577026367, "step": 19966 }, { "epoch": 3.04656982421875e-05, "step": 19966, "training_step_time": 0.10777664184570312 }, { "epoch": 3.046722412109375e-05, "model_forward_time": 0.02511429786682129, "step": 19967 }, { "epoch": 3.046722412109375e-05, "step": 19967, "training_step_time": 0.10891389846801758 }, { "epoch": 3.046875e-05, "model_forward_time": 0.02514362335205078, "step": 19968 }, { "epoch": 3.046875e-05, "step": 19968, "training_step_time": 0.10719871520996094 }, { "epoch": 3.047027587890625e-05, "model_forward_time": 0.02505946159362793, "step": 19969 }, { "epoch": 3.047027587890625e-05, "step": 19969, "training_step_time": 0.10890316963195801 }, { "epoch": 3.04718017578125e-05, "grad_norm": 0.3474755585193634, "learning_rate": 2.7570986808295324e-05, "loss": 0.0085, "step": 19970 }, { "epoch": 3.04718017578125e-05, "model_forward_time": 0.02526688575744629, "step": 19970 }, { "epoch": 3.04718017578125e-05, "step": 19970, "training_step_time": 0.10830068588256836 }, { "epoch": 3.047332763671875e-05, "model_forward_time": 0.025133371353149414, "step": 19971 }, { "epoch": 3.047332763671875e-05, "step": 19971, "training_step_time": 0.13037681579589844 }, { "epoch": 3.0474853515625e-05, "model_forward_time": 0.02466583251953125, "step": 19972 }, { "epoch": 3.0474853515625e-05, "step": 19972, "training_step_time": 0.148085355758667 }, { "epoch": 3.047637939453125e-05, "model_forward_time": 0.02457594871520996, "step": 19973 }, { "epoch": 3.047637939453125e-05, "step": 19973, "training_step_time": 0.1656947135925293 }, { "epoch": 3.04779052734375e-05, "model_forward_time": 0.02419137954711914, "step": 19974 }, { "epoch": 3.04779052734375e-05, "step": 19974, "training_step_time": 0.14847254753112793 }, { "epoch": 3.047943115234375e-05, "model_forward_time": 0.024055957794189453, "step": 19975 }, { "epoch": 3.047943115234375e-05, "step": 19975, "training_step_time": 0.16578888893127441 }, { "epoch": 3.048095703125e-05, "model_forward_time": 0.024364233016967773, "step": 19976 }, { "epoch": 3.048095703125e-05, "step": 19976, "training_step_time": 0.1230156421661377 }, { "epoch": 3.048248291015625e-05, "model_forward_time": 0.024200439453125, "step": 19977 }, { "epoch": 3.048248291015625e-05, "step": 19977, "training_step_time": 0.1225137710571289 }, { "epoch": 3.04840087890625e-05, "model_forward_time": 0.025120258331298828, "step": 19978 }, { "epoch": 3.04840087890625e-05, "step": 19978, "training_step_time": 0.11369657516479492 }, { "epoch": 3.048553466796875e-05, "model_forward_time": 0.024889707565307617, "step": 19979 }, { "epoch": 3.048553466796875e-05, "step": 19979, "training_step_time": 0.11343550682067871 }, { "epoch": 3.0487060546875e-05, "grad_norm": 0.4701537489891052, "learning_rate": 2.7521741233876496e-05, "loss": 0.0104, "step": 19980 }, { "epoch": 3.0487060546875e-05, "model_forward_time": 0.025390148162841797, "step": 19980 }, { "epoch": 3.0487060546875e-05, "step": 19980, "training_step_time": 0.11195564270019531 }, { "epoch": 3.048858642578125e-05, "model_forward_time": 0.024222850799560547, "step": 19981 }, { "epoch": 3.048858642578125e-05, "step": 19981, "training_step_time": 0.10916447639465332 }, { "epoch": 3.04901123046875e-05, "model_forward_time": 0.02498769760131836, "step": 19982 }, { "epoch": 3.04901123046875e-05, "step": 19982, "training_step_time": 0.10727262496948242 }, { "epoch": 3.049163818359375e-05, "model_forward_time": 0.024943113327026367, "step": 19983 }, { "epoch": 3.049163818359375e-05, "step": 19983, "training_step_time": 0.10680890083312988 }, { "epoch": 3.04931640625e-05, "model_forward_time": 0.025546550750732422, "step": 19984 }, { "epoch": 3.04931640625e-05, "step": 19984, "training_step_time": 0.10891270637512207 }, { "epoch": 3.049468994140625e-05, "model_forward_time": 0.025310993194580078, "step": 19985 }, { "epoch": 3.049468994140625e-05, "step": 19985, "training_step_time": 0.10904097557067871 }, { "epoch": 3.04962158203125e-05, "model_forward_time": 0.025338172912597656, "step": 19986 }, { "epoch": 3.04962158203125e-05, "step": 19986, "training_step_time": 0.10748505592346191 }, { "epoch": 3.049774169921875e-05, "model_forward_time": 0.024489402770996094, "step": 19987 }, { "epoch": 3.049774169921875e-05, "step": 19987, "training_step_time": 0.16831278800964355 }, { "epoch": 3.0499267578125e-05, "model_forward_time": 0.024480342864990234, "step": 19988 }, { "epoch": 3.0499267578125e-05, "step": 19988, "training_step_time": 0.1105961799621582 }, { "epoch": 3.050079345703125e-05, "model_forward_time": 0.024851560592651367, "step": 19989 }, { "epoch": 3.050079345703125e-05, "step": 19989, "training_step_time": 0.19509172439575195 }, { "epoch": 3.05023193359375e-05, "grad_norm": 0.23995867371559143, "learning_rate": 2.747252297266162e-05, "loss": 0.01, "step": 19990 }, { "epoch": 3.05023193359375e-05, "model_forward_time": 0.024470090866088867, "step": 19990 }, { "epoch": 3.05023193359375e-05, "step": 19990, "training_step_time": 0.19739103317260742 }, { "epoch": 3.050384521484375e-05, "model_forward_time": 0.024302959442138672, "step": 19991 }, { "epoch": 3.050384521484375e-05, "step": 19991, "training_step_time": 0.1313340663909912 }, { "epoch": 3.050537109375e-05, "model_forward_time": 0.02444601058959961, "step": 19992 }, { "epoch": 3.050537109375e-05, "step": 19992, "training_step_time": 0.221099853515625 }, { "epoch": 3.050689697265625e-05, "model_forward_time": 0.024151325225830078, "step": 19993 }, { "epoch": 3.050689697265625e-05, "step": 19993, "training_step_time": 0.1308457851409912 }, { "epoch": 3.05084228515625e-05, "model_forward_time": 0.024014711380004883, "step": 19994 }, { "epoch": 3.05084228515625e-05, "step": 19994, "training_step_time": 0.14746379852294922 }, { "epoch": 3.050994873046875e-05, "model_forward_time": 0.024657726287841797, "step": 19995 }, { "epoch": 3.050994873046875e-05, "step": 19995, "training_step_time": 0.17785954475402832 }, { "epoch": 3.0511474609375e-05, "model_forward_time": 0.024407148361206055, "step": 19996 }, { "epoch": 3.0511474609375e-05, "step": 19996, "training_step_time": 0.1804971694946289 }, { "epoch": 3.051300048828125e-05, "model_forward_time": 0.02358102798461914, "step": 19997 }, { "epoch": 3.051300048828125e-05, "step": 19997, "training_step_time": 0.12000608444213867 }, { "epoch": 3.05145263671875e-05, "model_forward_time": 0.02477407455444336, "step": 19998 }, { "epoch": 3.05145263671875e-05, "step": 19998, "training_step_time": 0.10666346549987793 }, { "epoch": 3.051605224609375e-05, "model_forward_time": 0.025479555130004883, "step": 19999 }, { "epoch": 3.051605224609375e-05, "step": 19999, "training_step_time": 0.19030046463012695 }, { "epoch": 3.0517578125e-05, "grad_norm": 0.28397536277770996, "learning_rate": 2.7423332084455544e-05, "loss": 0.0096, "step": 20000 }, { "epoch": 3.0517578125e-05, "model_forward_time": 0.02337193489074707, "step": 20000 }, { "epoch": 3.0517578125e-05, "step": 20000, "training_step_time": 0.09570574760437012 }, { "epoch": 3.051910400390625e-05, "model_forward_time": 0.0223236083984375, "step": 20001 }, { "epoch": 3.051910400390625e-05, "step": 20001, "training_step_time": 0.14720726013183594 }, { "epoch": 3.05206298828125e-05, "model_forward_time": 0.024743318557739258, "step": 20002 }, { "epoch": 3.05206298828125e-05, "step": 20002, "training_step_time": 0.11738300323486328 }, { "epoch": 3.052215576171875e-05, "model_forward_time": 0.024571895599365234, "step": 20003 }, { "epoch": 3.052215576171875e-05, "step": 20003, "training_step_time": 0.1035306453704834 }, { "epoch": 3.0523681640625e-05, "model_forward_time": 0.024451017379760742, "step": 20004 }, { "epoch": 3.0523681640625e-05, "step": 20004, "training_step_time": 0.10531020164489746 }, { "epoch": 3.052520751953125e-05, "model_forward_time": 0.02780914306640625, "step": 20005 }, { "epoch": 3.052520751953125e-05, "step": 20005, "training_step_time": 0.11121726036071777 }, { "epoch": 3.05267333984375e-05, "model_forward_time": 0.024915695190429688, "step": 20006 }, { "epoch": 3.05267333984375e-05, "step": 20006, "training_step_time": 0.10805940628051758 }, { "epoch": 3.052825927734375e-05, "model_forward_time": 0.024686336517333984, "step": 20007 }, { "epoch": 3.052825927734375e-05, "step": 20007, "training_step_time": 0.1971290111541748 }, { "epoch": 3.052978515625e-05, "model_forward_time": 0.023880958557128906, "step": 20008 }, { "epoch": 3.052978515625e-05, "step": 20008, "training_step_time": 0.1034994125366211 }, { "epoch": 3.053131103515625e-05, "model_forward_time": 0.024181842803955078, "step": 20009 }, { "epoch": 3.053131103515625e-05, "step": 20009, "training_step_time": 0.10066819190979004 }, { "epoch": 3.05328369140625e-05, "grad_norm": 0.14979436993598938, "learning_rate": 2.7374168629029813e-05, "loss": 0.007, "step": 20010 }, { "epoch": 3.05328369140625e-05, "model_forward_time": 0.024131298065185547, "step": 20010 }, { "epoch": 3.05328369140625e-05, "step": 20010, "training_step_time": 0.10579872131347656 }, { "epoch": 3.053436279296875e-05, "model_forward_time": 0.024957895278930664, "step": 20011 }, { "epoch": 3.053436279296875e-05, "step": 20011, "training_step_time": 0.10393357276916504 }, { "epoch": 3.0535888671875e-05, "model_forward_time": 0.024820327758789062, "step": 20012 }, { "epoch": 3.0535888671875e-05, "step": 20012, "training_step_time": 0.10638189315795898 }, { "epoch": 3.053741455078125e-05, "model_forward_time": 0.025218725204467773, "step": 20013 }, { "epoch": 3.053741455078125e-05, "step": 20013, "training_step_time": 0.10672807693481445 }, { "epoch": 3.05389404296875e-05, "model_forward_time": 0.02453017234802246, "step": 20014 }, { "epoch": 3.05389404296875e-05, "step": 20014, "training_step_time": 0.17987537384033203 }, { "epoch": 3.054046630859375e-05, "model_forward_time": 0.024103403091430664, "step": 20015 }, { "epoch": 3.054046630859375e-05, "step": 20015, "training_step_time": 0.18902587890625 }, { "epoch": 3.05419921875e-05, "model_forward_time": 0.023686885833740234, "step": 20016 }, { "epoch": 3.05419921875e-05, "step": 20016, "training_step_time": 0.17943453788757324 }, { "epoch": 3.054351806640625e-05, "model_forward_time": 0.023665189743041992, "step": 20017 }, { "epoch": 3.054351806640625e-05, "step": 20017, "training_step_time": 0.16382527351379395 }, { "epoch": 3.05450439453125e-05, "model_forward_time": 0.023807048797607422, "step": 20018 }, { "epoch": 3.05450439453125e-05, "step": 20018, "training_step_time": 0.11962771415710449 }, { "epoch": 3.054656982421875e-05, "model_forward_time": 0.027333974838256836, "step": 20019 }, { "epoch": 3.054656982421875e-05, "step": 20019, "training_step_time": 0.10613679885864258 }, { "epoch": 3.0548095703125e-05, "grad_norm": 0.14195986092090607, "learning_rate": 2.7325032666122686e-05, "loss": 0.0072, "step": 20020 }, { "epoch": 3.0548095703125e-05, "model_forward_time": 0.024239778518676758, "step": 20020 }, { "epoch": 3.0548095703125e-05, "step": 20020, "training_step_time": 0.10755038261413574 }, { "epoch": 3.054962158203125e-05, "model_forward_time": 0.024358034133911133, "step": 20021 }, { "epoch": 3.054962158203125e-05, "step": 20021, "training_step_time": 0.13631629943847656 }, { "epoch": 3.05511474609375e-05, "model_forward_time": 0.024330854415893555, "step": 20022 }, { "epoch": 3.05511474609375e-05, "step": 20022, "training_step_time": 0.11111330986022949 }, { "epoch": 3.055267333984375e-05, "model_forward_time": 0.0246884822845459, "step": 20023 }, { "epoch": 3.055267333984375e-05, "step": 20023, "training_step_time": 0.133697509765625 }, { "epoch": 3.055419921875e-05, "model_forward_time": 0.024876117706298828, "step": 20024 }, { "epoch": 3.055419921875e-05, "step": 20024, "training_step_time": 0.14897513389587402 }, { "epoch": 3.055572509765625e-05, "model_forward_time": 0.023957490921020508, "step": 20025 }, { "epoch": 3.055572509765625e-05, "step": 20025, "training_step_time": 0.11231374740600586 }, { "epoch": 3.05572509765625e-05, "model_forward_time": 0.02454519271850586, "step": 20026 }, { "epoch": 3.05572509765625e-05, "step": 20026, "training_step_time": 0.12746739387512207 }, { "epoch": 3.055877685546875e-05, "model_forward_time": 0.026970386505126953, "step": 20027 }, { "epoch": 3.055877685546875e-05, "step": 20027, "training_step_time": 0.12401461601257324 }, { "epoch": 3.0560302734375e-05, "model_forward_time": 0.024517536163330078, "step": 20028 }, { "epoch": 3.0560302734375e-05, "step": 20028, "training_step_time": 0.10865044593811035 }, { "epoch": 3.056182861328125e-05, "model_forward_time": 0.024792909622192383, "step": 20029 }, { "epoch": 3.056182861328125e-05, "step": 20029, "training_step_time": 0.11149740219116211 }, { "epoch": 3.05633544921875e-05, "grad_norm": 0.20012709498405457, "learning_rate": 2.727592425543899e-05, "loss": 0.0066, "step": 20030 }, { "epoch": 3.05633544921875e-05, "model_forward_time": 0.024751663208007812, "step": 20030 }, { "epoch": 3.05633544921875e-05, "step": 20030, "training_step_time": 0.10913801193237305 }, { "epoch": 3.056488037109375e-05, "model_forward_time": 0.024646997451782227, "step": 20031 }, { "epoch": 3.056488037109375e-05, "step": 20031, "training_step_time": 0.10865902900695801 }, { "epoch": 3.056640625e-05, "model_forward_time": 0.02458953857421875, "step": 20032 }, { "epoch": 3.056640625e-05, "step": 20032, "training_step_time": 0.10888004302978516 }, { "epoch": 3.056793212890625e-05, "model_forward_time": 0.02497553825378418, "step": 20033 }, { "epoch": 3.056793212890625e-05, "step": 20033, "training_step_time": 0.11115121841430664 }, { "epoch": 3.05694580078125e-05, "model_forward_time": 0.024533748626708984, "step": 20034 }, { "epoch": 3.05694580078125e-05, "step": 20034, "training_step_time": 0.11658573150634766 }, { "epoch": 3.057098388671875e-05, "model_forward_time": 0.025025606155395508, "step": 20035 }, { "epoch": 3.057098388671875e-05, "step": 20035, "training_step_time": 0.10935258865356445 }, { "epoch": 3.0572509765625e-05, "model_forward_time": 0.025971174240112305, "step": 20036 }, { "epoch": 3.0572509765625e-05, "step": 20036, "training_step_time": 0.10526800155639648 }, { "epoch": 3.057403564453125e-05, "model_forward_time": 0.024916410446166992, "step": 20037 }, { "epoch": 3.057403564453125e-05, "step": 20037, "training_step_time": 0.12465143203735352 }, { "epoch": 3.05755615234375e-05, "model_forward_time": 0.025053024291992188, "step": 20038 }, { "epoch": 3.05755615234375e-05, "step": 20038, "training_step_time": 0.1185305118560791 }, { "epoch": 3.057708740234375e-05, "model_forward_time": 0.025161266326904297, "step": 20039 }, { "epoch": 3.057708740234375e-05, "step": 20039, "training_step_time": 0.11285591125488281 }, { "epoch": 3.057861328125e-05, "grad_norm": 0.39375030994415283, "learning_rate": 2.7226843456650037e-05, "loss": 0.0204, "step": 20040 }, { "epoch": 3.057861328125e-05, "model_forward_time": 0.025297880172729492, "step": 20040 }, { "epoch": 3.057861328125e-05, "step": 20040, "training_step_time": 0.1164710521697998 }, { "epoch": 3.058013916015625e-05, "model_forward_time": 0.02543926239013672, "step": 20041 }, { "epoch": 3.058013916015625e-05, "step": 20041, "training_step_time": 0.1324000358581543 }, { "epoch": 3.05816650390625e-05, "model_forward_time": 0.027437210083007812, "step": 20042 }, { "epoch": 3.05816650390625e-05, "step": 20042, "training_step_time": 0.20728349685668945 }, { "epoch": 3.058319091796875e-05, "model_forward_time": 0.023511409759521484, "step": 20043 }, { "epoch": 3.058319091796875e-05, "step": 20043, "training_step_time": 0.18297863006591797 }, { "epoch": 3.0584716796875e-05, "model_forward_time": 0.023816347122192383, "step": 20044 }, { "epoch": 3.0584716796875e-05, "step": 20044, "training_step_time": 0.12964749336242676 }, { "epoch": 3.058624267578125e-05, "model_forward_time": 0.023683786392211914, "step": 20045 }, { "epoch": 3.058624267578125e-05, "step": 20045, "training_step_time": 0.13047409057617188 }, { "epoch": 3.05877685546875e-05, "model_forward_time": 0.024052858352661133, "step": 20046 }, { "epoch": 3.05877685546875e-05, "step": 20046, "training_step_time": 0.2108476161956787 }, { "epoch": 3.058929443359375e-05, "model_forward_time": 0.02394723892211914, "step": 20047 }, { "epoch": 3.058929443359375e-05, "step": 20047, "training_step_time": 0.13980579376220703 }, { "epoch": 3.05908203125e-05, "model_forward_time": 0.02375960350036621, "step": 20048 }, { "epoch": 3.05908203125e-05, "step": 20048, "training_step_time": 0.19109320640563965 }, { "epoch": 3.059234619140625e-05, "model_forward_time": 0.0247194766998291, "step": 20049 }, { "epoch": 3.059234619140625e-05, "step": 20049, "training_step_time": 0.10919475555419922 }, { "epoch": 3.05938720703125e-05, "grad_norm": 0.2476683109998703, "learning_rate": 2.717779032939367e-05, "loss": 0.0105, "step": 20050 }, { "epoch": 3.05938720703125e-05, "model_forward_time": 0.023656129837036133, "step": 20050 }, { "epoch": 3.05938720703125e-05, "step": 20050, "training_step_time": 0.18899941444396973 }, { "epoch": 3.059539794921875e-05, "model_forward_time": 0.02353692054748535, "step": 20051 }, { "epoch": 3.059539794921875e-05, "step": 20051, "training_step_time": 0.10837650299072266 }, { "epoch": 3.0596923828125e-05, "model_forward_time": 0.023532390594482422, "step": 20052 }, { "epoch": 3.0596923828125e-05, "step": 20052, "training_step_time": 0.10756468772888184 }, { "epoch": 3.059844970703125e-05, "model_forward_time": 0.0244600772857666, "step": 20053 }, { "epoch": 3.059844970703125e-05, "step": 20053, "training_step_time": 0.10891985893249512 }, { "epoch": 3.05999755859375e-05, "model_forward_time": 0.024141550064086914, "step": 20054 }, { "epoch": 3.05999755859375e-05, "step": 20054, "training_step_time": 0.11549162864685059 }, { "epoch": 3.060150146484375e-05, "model_forward_time": 0.024569034576416016, "step": 20055 }, { "epoch": 3.060150146484375e-05, "step": 20055, "training_step_time": 0.1092219352722168 }, { "epoch": 3.060302734375e-05, "model_forward_time": 0.024714946746826172, "step": 20056 }, { "epoch": 3.060302734375e-05, "step": 20056, "training_step_time": 0.10893487930297852 }, { "epoch": 3.060455322265625e-05, "model_forward_time": 0.02446269989013672, "step": 20057 }, { "epoch": 3.060455322265625e-05, "step": 20057, "training_step_time": 0.10633087158203125 }, { "epoch": 3.06060791015625e-05, "model_forward_time": 0.0245668888092041, "step": 20058 }, { "epoch": 3.06060791015625e-05, "step": 20058, "training_step_time": 0.11205506324768066 }, { "epoch": 3.060760498046875e-05, "model_forward_time": 0.024166345596313477, "step": 20059 }, { "epoch": 3.060760498046875e-05, "step": 20059, "training_step_time": 0.1069033145904541 }, { "epoch": 3.0609130859375e-05, "grad_norm": 0.11443420499563217, "learning_rate": 2.7128764933274052e-05, "loss": 0.0047, "step": 20060 }, { "epoch": 3.0609130859375e-05, "model_forward_time": 0.024232149124145508, "step": 20060 }, { "epoch": 3.0609130859375e-05, "step": 20060, "training_step_time": 0.1068110466003418 }, { "epoch": 3.061065673828125e-05, "model_forward_time": 0.025106191635131836, "step": 20061 }, { "epoch": 3.061065673828125e-05, "step": 20061, "training_step_time": 0.17304205894470215 }, { "epoch": 3.06121826171875e-05, "model_forward_time": 0.024400711059570312, "step": 20062 }, { "epoch": 3.06121826171875e-05, "step": 20062, "training_step_time": 0.19573450088500977 }, { "epoch": 3.061370849609375e-05, "model_forward_time": 0.023867130279541016, "step": 20063 }, { "epoch": 3.061370849609375e-05, "step": 20063, "training_step_time": 0.18181300163269043 }, { "epoch": 3.0615234375e-05, "model_forward_time": 0.02433180809020996, "step": 20064 }, { "epoch": 3.0615234375e-05, "step": 20064, "training_step_time": 0.19516205787658691 }, { "epoch": 3.061676025390625e-05, "model_forward_time": 0.023671865463256836, "step": 20065 }, { "epoch": 3.061676025390625e-05, "step": 20065, "training_step_time": 0.18101906776428223 }, { "epoch": 3.06182861328125e-05, "model_forward_time": 0.024179458618164062, "step": 20066 }, { "epoch": 3.06182861328125e-05, "step": 20066, "training_step_time": 0.14071011543273926 }, { "epoch": 3.061981201171875e-05, "model_forward_time": 0.024497270584106445, "step": 20067 }, { "epoch": 3.061981201171875e-05, "step": 20067, "training_step_time": 0.15570378303527832 }, { "epoch": 3.0621337890625e-05, "model_forward_time": 0.02552008628845215, "step": 20068 }, { "epoch": 3.0621337890625e-05, "step": 20068, "training_step_time": 0.1303102970123291 }, { "epoch": 3.062286376953125e-05, "model_forward_time": 0.024104595184326172, "step": 20069 }, { "epoch": 3.062286376953125e-05, "step": 20069, "training_step_time": 0.11702728271484375 }, { "epoch": 3.06243896484375e-05, "grad_norm": 0.36181890964508057, "learning_rate": 2.707976732786166e-05, "loss": 0.0146, "step": 20070 }, { "epoch": 3.06243896484375e-05, "model_forward_time": 0.02505803108215332, "step": 20070 }, { "epoch": 3.06243896484375e-05, "step": 20070, "training_step_time": 0.10621809959411621 }, { "epoch": 3.062591552734375e-05, "model_forward_time": 0.025150775909423828, "step": 20071 }, { "epoch": 3.062591552734375e-05, "step": 20071, "training_step_time": 0.10820198059082031 }, { "epoch": 3.062744140625e-05, "model_forward_time": 0.025429248809814453, "step": 20072 }, { "epoch": 3.062744140625e-05, "step": 20072, "training_step_time": 0.11041092872619629 }, { "epoch": 3.062896728515625e-05, "model_forward_time": 0.025026798248291016, "step": 20073 }, { "epoch": 3.062896728515625e-05, "step": 20073, "training_step_time": 0.10819387435913086 }, { "epoch": 3.06304931640625e-05, "model_forward_time": 0.025316238403320312, "step": 20074 }, { "epoch": 3.06304931640625e-05, "step": 20074, "training_step_time": 0.10929751396179199 }, { "epoch": 3.063201904296875e-05, "model_forward_time": 0.025164365768432617, "step": 20075 }, { "epoch": 3.063201904296875e-05, "step": 20075, "training_step_time": 0.1129920482635498 }, { "epoch": 3.0633544921875e-05, "model_forward_time": 0.02754378318786621, "step": 20076 }, { "epoch": 3.0633544921875e-05, "step": 20076, "training_step_time": 0.11178350448608398 }, { "epoch": 3.063507080078125e-05, "model_forward_time": 0.024785757064819336, "step": 20077 }, { "epoch": 3.063507080078125e-05, "step": 20077, "training_step_time": 0.11131596565246582 }, { "epoch": 3.06365966796875e-05, "model_forward_time": 0.025014638900756836, "step": 20078 }, { "epoch": 3.06365966796875e-05, "step": 20078, "training_step_time": 0.11169075965881348 }, { "epoch": 3.063812255859375e-05, "model_forward_time": 0.025621652603149414, "step": 20079 }, { "epoch": 3.063812255859375e-05, "step": 20079, "training_step_time": 0.11114239692687988 }, { "epoch": 3.06396484375e-05, "grad_norm": 0.23736076056957245, "learning_rate": 2.703079757269319e-05, "loss": 0.0064, "step": 20080 }, { "epoch": 3.06396484375e-05, "model_forward_time": 0.02550959587097168, "step": 20080 }, { "epoch": 3.06396484375e-05, "step": 20080, "training_step_time": 0.13739943504333496 }, { "epoch": 3.064117431640625e-05, "model_forward_time": 0.024765491485595703, "step": 20081 }, { "epoch": 3.064117431640625e-05, "step": 20081, "training_step_time": 0.10868144035339355 }, { "epoch": 3.06427001953125e-05, "model_forward_time": 0.025235891342163086, "step": 20082 }, { "epoch": 3.06427001953125e-05, "step": 20082, "training_step_time": 0.19417905807495117 }, { "epoch": 3.064422607421875e-05, "model_forward_time": 0.025873422622680664, "step": 20083 }, { "epoch": 3.064422607421875e-05, "step": 20083, "training_step_time": 0.13622450828552246 }, { "epoch": 3.0645751953125e-05, "model_forward_time": 0.02423882484436035, "step": 20084 }, { "epoch": 3.0645751953125e-05, "step": 20084, "training_step_time": 0.19639825820922852 }, { "epoch": 3.064727783203125e-05, "model_forward_time": 0.024513721466064453, "step": 20085 }, { "epoch": 3.064727783203125e-05, "step": 20085, "training_step_time": 0.17439770698547363 }, { "epoch": 3.06488037109375e-05, "model_forward_time": 0.024694442749023438, "step": 20086 }, { "epoch": 3.06488037109375e-05, "step": 20086, "training_step_time": 0.11016201972961426 }, { "epoch": 3.065032958984375e-05, "model_forward_time": 0.024710655212402344, "step": 20087 }, { "epoch": 3.065032958984375e-05, "step": 20087, "training_step_time": 0.10719513893127441 }, { "epoch": 3.065185546875e-05, "model_forward_time": 0.02510523796081543, "step": 20088 }, { "epoch": 3.065185546875e-05, "step": 20088, "training_step_time": 0.18665575981140137 }, { "epoch": 3.065338134765625e-05, "model_forward_time": 0.0243532657623291, "step": 20089 }, { "epoch": 3.065338134765625e-05, "step": 20089, "training_step_time": 0.21629023551940918 }, { "epoch": 3.06549072265625e-05, "grad_norm": 0.1674196422100067, "learning_rate": 2.698185572727151e-05, "loss": 0.0116, "step": 20090 }, { "epoch": 3.06549072265625e-05, "model_forward_time": 0.024390459060668945, "step": 20090 }, { "epoch": 3.06549072265625e-05, "step": 20090, "training_step_time": 0.10967803001403809 }, { "epoch": 3.065643310546875e-05, "model_forward_time": 0.024274110794067383, "step": 20091 }, { "epoch": 3.065643310546875e-05, "step": 20091, "training_step_time": 0.11041402816772461 }, { "epoch": 3.0657958984375e-05, "model_forward_time": 0.02630305290222168, "step": 20092 }, { "epoch": 3.0657958984375e-05, "step": 20092, "training_step_time": 0.11366748809814453 }, { "epoch": 3.065948486328125e-05, "model_forward_time": 0.025084257125854492, "step": 20093 }, { "epoch": 3.065948486328125e-05, "step": 20093, "training_step_time": 0.1096503734588623 }, { "epoch": 3.06610107421875e-05, "model_forward_time": 0.025183916091918945, "step": 20094 }, { "epoch": 3.06610107421875e-05, "step": 20094, "training_step_time": 0.19602465629577637 }, { "epoch": 3.066253662109375e-05, "model_forward_time": 0.02431344985961914, "step": 20095 }, { "epoch": 3.066253662109375e-05, "step": 20095, "training_step_time": 0.1074831485748291 }, { "epoch": 3.06640625e-05, "model_forward_time": 0.024272441864013672, "step": 20096 }, { "epoch": 3.06640625e-05, "step": 20096, "training_step_time": 0.10624408721923828 }, { "epoch": 3.066558837890625e-05, "model_forward_time": 0.025000810623168945, "step": 20097 }, { "epoch": 3.066558837890625e-05, "step": 20097, "training_step_time": 0.10942268371582031 }, { "epoch": 3.06671142578125e-05, "model_forward_time": 0.02573251724243164, "step": 20098 }, { "epoch": 3.06671142578125e-05, "step": 20098, "training_step_time": 0.10874247550964355 }, { "epoch": 3.066864013671875e-05, "model_forward_time": 0.025012493133544922, "step": 20099 }, { "epoch": 3.066864013671875e-05, "step": 20099, "training_step_time": 0.10790872573852539 }, { "epoch": 3.0670166015625e-05, "grad_norm": 0.19107389450073242, "learning_rate": 2.693294185106562e-05, "loss": 0.0079, "step": 20100 }, { "epoch": 3.0670166015625e-05, "model_forward_time": 0.024210214614868164, "step": 20100 }, { "epoch": 3.0670166015625e-05, "step": 20100, "training_step_time": 0.10798215866088867 }, { "epoch": 3.067169189453125e-05, "model_forward_time": 0.023985862731933594, "step": 20101 }, { "epoch": 3.067169189453125e-05, "step": 20101, "training_step_time": 0.10751986503601074 }, { "epoch": 3.06732177734375e-05, "model_forward_time": 0.025285005569458008, "step": 20102 }, { "epoch": 3.06732177734375e-05, "step": 20102, "training_step_time": 0.11167526245117188 }, { "epoch": 3.067474365234375e-05, "model_forward_time": 0.026564836502075195, "step": 20103 }, { "epoch": 3.067474365234375e-05, "step": 20103, "training_step_time": 0.10903596878051758 }, { "epoch": 3.067626953125e-05, "model_forward_time": 0.025388717651367188, "step": 20104 }, { "epoch": 3.067626953125e-05, "step": 20104, "training_step_time": 0.10700821876525879 }, { "epoch": 3.067779541015625e-05, "model_forward_time": 0.025379657745361328, "step": 20105 }, { "epoch": 3.067779541015625e-05, "step": 20105, "training_step_time": 0.11047744750976562 }, { "epoch": 3.06793212890625e-05, "model_forward_time": 0.02525806427001953, "step": 20106 }, { "epoch": 3.06793212890625e-05, "step": 20106, "training_step_time": 0.10770463943481445 }, { "epoch": 3.068084716796875e-05, "model_forward_time": 0.025288820266723633, "step": 20107 }, { "epoch": 3.068084716796875e-05, "step": 20107, "training_step_time": 0.1085062026977539 }, { "epoch": 3.0682373046875e-05, "model_forward_time": 0.025474071502685547, "step": 20108 }, { "epoch": 3.0682373046875e-05, "step": 20108, "training_step_time": 0.1755831241607666 }, { "epoch": 3.068389892578125e-05, "model_forward_time": 0.024499177932739258, "step": 20109 }, { "epoch": 3.068389892578125e-05, "step": 20109, "training_step_time": 0.12106704711914062 }, { "epoch": 3.06854248046875e-05, "grad_norm": 0.19927628338336945, "learning_rate": 2.688405600351045e-05, "loss": 0.0078, "step": 20110 }, { "epoch": 3.06854248046875e-05, "model_forward_time": 0.024152517318725586, "step": 20110 }, { "epoch": 3.06854248046875e-05, "step": 20110, "training_step_time": 0.13187289237976074 }, { "epoch": 3.068695068359375e-05, "model_forward_time": 0.025029659271240234, "step": 20111 }, { "epoch": 3.068695068359375e-05, "step": 20111, "training_step_time": 0.16140413284301758 }, { "epoch": 3.06884765625e-05, "model_forward_time": 0.024066686630249023, "step": 20112 }, { "epoch": 3.06884765625e-05, "step": 20112, "training_step_time": 0.23043465614318848 }, { "epoch": 3.069000244140625e-05, "model_forward_time": 0.023512601852416992, "step": 20113 }, { "epoch": 3.069000244140625e-05, "step": 20113, "training_step_time": 0.12192201614379883 }, { "epoch": 3.06915283203125e-05, "model_forward_time": 0.023984432220458984, "step": 20114 }, { "epoch": 3.06915283203125e-05, "step": 20114, "training_step_time": 0.1234583854675293 }, { "epoch": 3.069305419921875e-05, "model_forward_time": 0.024393320083618164, "step": 20115 }, { "epoch": 3.069305419921875e-05, "step": 20115, "training_step_time": 0.12225008010864258 }, { "epoch": 3.0694580078125e-05, "model_forward_time": 0.024712085723876953, "step": 20116 }, { "epoch": 3.0694580078125e-05, "step": 20116, "training_step_time": 0.12128353118896484 }, { "epoch": 3.069610595703125e-05, "model_forward_time": 0.02452254295349121, "step": 20117 }, { "epoch": 3.069610595703125e-05, "step": 20117, "training_step_time": 0.11886334419250488 }, { "epoch": 3.06976318359375e-05, "model_forward_time": 0.024532318115234375, "step": 20118 }, { "epoch": 3.06976318359375e-05, "step": 20118, "training_step_time": 0.12436580657958984 }, { "epoch": 3.069915771484375e-05, "model_forward_time": 0.025753498077392578, "step": 20119 }, { "epoch": 3.069915771484375e-05, "step": 20119, "training_step_time": 0.11226177215576172 }, { "epoch": 3.070068359375e-05, "grad_norm": 0.2917441725730896, "learning_rate": 2.6835198244006927e-05, "loss": 0.009, "step": 20120 }, { "epoch": 3.070068359375e-05, "model_forward_time": 0.025027751922607422, "step": 20120 }, { "epoch": 3.070068359375e-05, "step": 20120, "training_step_time": 0.11216235160827637 }, { "epoch": 3.070220947265625e-05, "model_forward_time": 0.024962902069091797, "step": 20121 }, { "epoch": 3.070220947265625e-05, "step": 20121, "training_step_time": 0.11505413055419922 }, { "epoch": 3.07037353515625e-05, "model_forward_time": 0.02503514289855957, "step": 20122 }, { "epoch": 3.07037353515625e-05, "step": 20122, "training_step_time": 0.10608220100402832 }, { "epoch": 3.070526123046875e-05, "model_forward_time": 0.024297475814819336, "step": 20123 }, { "epoch": 3.070526123046875e-05, "step": 20123, "training_step_time": 0.14914870262145996 }, { "epoch": 3.0706787109375e-05, "model_forward_time": 0.02433180809020996, "step": 20124 }, { "epoch": 3.0706787109375e-05, "step": 20124, "training_step_time": 0.16347765922546387 }, { "epoch": 3.070831298828125e-05, "model_forward_time": 0.024506092071533203, "step": 20125 }, { "epoch": 3.070831298828125e-05, "step": 20125, "training_step_time": 0.1226353645324707 }, { "epoch": 3.07098388671875e-05, "model_forward_time": 0.024466514587402344, "step": 20126 }, { "epoch": 3.07098388671875e-05, "step": 20126, "training_step_time": 0.2050187587738037 }, { "epoch": 3.071136474609375e-05, "model_forward_time": 0.024794816970825195, "step": 20127 }, { "epoch": 3.071136474609375e-05, "step": 20127, "training_step_time": 0.16788792610168457 }, { "epoch": 3.0712890625e-05, "model_forward_time": 0.024058103561401367, "step": 20128 }, { "epoch": 3.0712890625e-05, "step": 20128, "training_step_time": 0.11533808708190918 }, { "epoch": 3.071441650390625e-05, "model_forward_time": 0.024599552154541016, "step": 20129 }, { "epoch": 3.071441650390625e-05, "step": 20129, "training_step_time": 0.10707664489746094 }, { "epoch": 3.07159423828125e-05, "grad_norm": 0.26147332787513733, "learning_rate": 2.6786368631921836e-05, "loss": 0.0101, "step": 20130 }, { "epoch": 3.07159423828125e-05, "model_forward_time": 0.02516651153564453, "step": 20130 }, { "epoch": 3.07159423828125e-05, "step": 20130, "training_step_time": 0.17835164070129395 }, { "epoch": 3.071746826171875e-05, "model_forward_time": 0.024977684020996094, "step": 20131 }, { "epoch": 3.071746826171875e-05, "step": 20131, "training_step_time": 0.1799161434173584 }, { "epoch": 3.0718994140625e-05, "model_forward_time": 0.02429032325744629, "step": 20132 }, { "epoch": 3.0718994140625e-05, "step": 20132, "training_step_time": 0.1367645263671875 }, { "epoch": 3.072052001953125e-05, "model_forward_time": 0.02455615997314453, "step": 20133 }, { "epoch": 3.072052001953125e-05, "step": 20133, "training_step_time": 0.1148984432220459 }, { "epoch": 3.07220458984375e-05, "model_forward_time": 0.025042057037353516, "step": 20134 }, { "epoch": 3.07220458984375e-05, "step": 20134, "training_step_time": 0.1097564697265625 }, { "epoch": 3.072357177734375e-05, "model_forward_time": 0.025260448455810547, "step": 20135 }, { "epoch": 3.072357177734375e-05, "step": 20135, "training_step_time": 0.10812830924987793 }, { "epoch": 3.072509765625e-05, "model_forward_time": 0.02514958381652832, "step": 20136 }, { "epoch": 3.072509765625e-05, "step": 20136, "training_step_time": 0.11067843437194824 }, { "epoch": 3.072662353515625e-05, "model_forward_time": 0.025125980377197266, "step": 20137 }, { "epoch": 3.072662353515625e-05, "step": 20137, "training_step_time": 0.19734907150268555 }, { "epoch": 3.07281494140625e-05, "model_forward_time": 0.024164676666259766, "step": 20138 }, { "epoch": 3.07281494140625e-05, "step": 20138, "training_step_time": 0.11006879806518555 }, { "epoch": 3.072967529296875e-05, "model_forward_time": 0.02487635612487793, "step": 20139 }, { "epoch": 3.072967529296875e-05, "step": 20139, "training_step_time": 0.10609126091003418 }, { "epoch": 3.0731201171875e-05, "grad_norm": 0.2607508599758148, "learning_rate": 2.6737567226587747e-05, "loss": 0.0057, "step": 20140 }, { "epoch": 3.0731201171875e-05, "model_forward_time": 0.025351762771606445, "step": 20140 }, { "epoch": 3.0731201171875e-05, "step": 20140, "training_step_time": 0.10692882537841797 }, { "epoch": 3.073272705078125e-05, "model_forward_time": 0.025469303131103516, "step": 20141 }, { "epoch": 3.073272705078125e-05, "step": 20141, "training_step_time": 0.10724258422851562 }, { "epoch": 3.07342529296875e-05, "model_forward_time": 0.025270462036132812, "step": 20142 }, { "epoch": 3.07342529296875e-05, "step": 20142, "training_step_time": 0.1138763427734375 }, { "epoch": 3.073577880859375e-05, "model_forward_time": 0.025310754776000977, "step": 20143 }, { "epoch": 3.073577880859375e-05, "step": 20143, "training_step_time": 0.11436271667480469 }, { "epoch": 3.07373046875e-05, "model_forward_time": 0.025474071502685547, "step": 20144 }, { "epoch": 3.07373046875e-05, "step": 20144, "training_step_time": 0.11069488525390625 }, { "epoch": 3.073883056640625e-05, "model_forward_time": 0.0251162052154541, "step": 20145 }, { "epoch": 3.073883056640625e-05, "step": 20145, "training_step_time": 0.11208844184875488 }, { "epoch": 3.07403564453125e-05, "model_forward_time": 0.025007963180541992, "step": 20146 }, { "epoch": 3.07403564453125e-05, "step": 20146, "training_step_time": 0.11350512504577637 }, { "epoch": 3.074188232421875e-05, "model_forward_time": 0.02541518211364746, "step": 20147 }, { "epoch": 3.074188232421875e-05, "step": 20147, "training_step_time": 0.11229419708251953 }, { "epoch": 3.0743408203125e-05, "model_forward_time": 0.02498459815979004, "step": 20148 }, { "epoch": 3.0743408203125e-05, "step": 20148, "training_step_time": 0.10809683799743652 }, { "epoch": 3.074493408203125e-05, "model_forward_time": 0.025279760360717773, "step": 20149 }, { "epoch": 3.074493408203125e-05, "step": 20149, "training_step_time": 0.1080009937286377 }, { "epoch": 3.07464599609375e-05, "grad_norm": 0.13364791870117188, "learning_rate": 2.6688794087302993e-05, "loss": 0.0085, "step": 20150 }, { "epoch": 3.07464599609375e-05, "model_forward_time": 0.025080204010009766, "step": 20150 }, { "epoch": 3.07464599609375e-05, "step": 20150, "training_step_time": 0.11108160018920898 }, { "epoch": 3.074798583984375e-05, "model_forward_time": 0.025337934494018555, "step": 20151 }, { "epoch": 3.074798583984375e-05, "step": 20151, "training_step_time": 0.1914665699005127 }, { "epoch": 3.074951171875e-05, "model_forward_time": 0.024394512176513672, "step": 20152 }, { "epoch": 3.074951171875e-05, "step": 20152, "training_step_time": 0.11838960647583008 }, { "epoch": 3.075103759765625e-05, "model_forward_time": 0.024555206298828125, "step": 20153 }, { "epoch": 3.075103759765625e-05, "step": 20153, "training_step_time": 0.12694668769836426 }, { "epoch": 3.07525634765625e-05, "model_forward_time": 0.025015592575073242, "step": 20154 }, { "epoch": 3.07525634765625e-05, "step": 20154, "training_step_time": 0.1599881649017334 }, { "epoch": 3.075408935546875e-05, "model_forward_time": 0.024646759033203125, "step": 20155 }, { "epoch": 3.075408935546875e-05, "step": 20155, "training_step_time": 0.17630243301391602 }, { "epoch": 3.0755615234375e-05, "model_forward_time": 0.024446725845336914, "step": 20156 }, { "epoch": 3.0755615234375e-05, "step": 20156, "training_step_time": 0.16309022903442383 }, { "epoch": 3.075714111328125e-05, "model_forward_time": 0.02433037757873535, "step": 20157 }, { "epoch": 3.075714111328125e-05, "step": 20157, "training_step_time": 0.10565042495727539 }, { "epoch": 3.07586669921875e-05, "model_forward_time": 0.0246889591217041, "step": 20158 }, { "epoch": 3.07586669921875e-05, "step": 20158, "training_step_time": 0.11262702941894531 }, { "epoch": 3.076019287109375e-05, "model_forward_time": 0.025122880935668945, "step": 20159 }, { "epoch": 3.076019287109375e-05, "step": 20159, "training_step_time": 0.10605311393737793 }, { "epoch": 3.076171875e-05, "grad_norm": 0.1781671792268753, "learning_rate": 2.6640049273331515e-05, "loss": 0.0092, "step": 20160 }, { "epoch": 3.076171875e-05, "model_forward_time": 0.025907516479492188, "step": 20160 }, { "epoch": 3.076171875e-05, "step": 20160, "training_step_time": 0.11122298240661621 }, { "epoch": 3.076324462890625e-05, "model_forward_time": 0.024812698364257812, "step": 20161 }, { "epoch": 3.076324462890625e-05, "step": 20161, "training_step_time": 0.11050534248352051 }, { "epoch": 3.07647705078125e-05, "model_forward_time": 0.02444601058959961, "step": 20162 }, { "epoch": 3.07647705078125e-05, "step": 20162, "training_step_time": 0.1074838638305664 }, { "epoch": 3.076629638671875e-05, "model_forward_time": 0.02477550506591797, "step": 20163 }, { "epoch": 3.076629638671875e-05, "step": 20163, "training_step_time": 0.10726523399353027 }, { "epoch": 3.0767822265625e-05, "model_forward_time": 0.024585485458374023, "step": 20164 }, { "epoch": 3.0767822265625e-05, "step": 20164, "training_step_time": 0.11070656776428223 }, { "epoch": 3.076934814453125e-05, "model_forward_time": 0.024741649627685547, "step": 20165 }, { "epoch": 3.076934814453125e-05, "step": 20165, "training_step_time": 0.10494041442871094 }, { "epoch": 3.07708740234375e-05, "model_forward_time": 0.02466273307800293, "step": 20166 }, { "epoch": 3.07708740234375e-05, "step": 20166, "training_step_time": 0.10512232780456543 }, { "epoch": 3.077239990234375e-05, "model_forward_time": 0.023527860641479492, "step": 20167 }, { "epoch": 3.077239990234375e-05, "step": 20167, "training_step_time": 0.15217900276184082 }, { "epoch": 3.077392578125e-05, "model_forward_time": 0.023688316345214844, "step": 20168 }, { "epoch": 3.077392578125e-05, "step": 20168, "training_step_time": 0.1640167236328125 }, { "epoch": 3.077545166015625e-05, "model_forward_time": 0.02411794662475586, "step": 20169 }, { "epoch": 3.077545166015625e-05, "step": 20169, "training_step_time": 0.11856651306152344 }, { "epoch": 3.07769775390625e-05, "grad_norm": 0.20678578317165375, "learning_rate": 2.6591332843902884e-05, "loss": 0.0126, "step": 20170 }, { "epoch": 3.07769775390625e-05, "model_forward_time": 0.023735523223876953, "step": 20170 }, { "epoch": 3.07769775390625e-05, "step": 20170, "training_step_time": 0.1717686653137207 }, { "epoch": 3.077850341796875e-05, "model_forward_time": 0.023598670959472656, "step": 20171 }, { "epoch": 3.077850341796875e-05, "step": 20171, "training_step_time": 0.19781208038330078 }, { "epoch": 3.0780029296875e-05, "model_forward_time": 0.024016380310058594, "step": 20172 }, { "epoch": 3.0780029296875e-05, "step": 20172, "training_step_time": 0.11575102806091309 }, { "epoch": 3.078155517578125e-05, "model_forward_time": 0.023656845092773438, "step": 20173 }, { "epoch": 3.078155517578125e-05, "step": 20173, "training_step_time": 0.1806182861328125 }, { "epoch": 3.07830810546875e-05, "model_forward_time": 0.023870229721069336, "step": 20174 }, { "epoch": 3.07830810546875e-05, "step": 20174, "training_step_time": 0.11176514625549316 }, { "epoch": 3.078460693359375e-05, "model_forward_time": 0.02383899688720703, "step": 20175 }, { "epoch": 3.078460693359375e-05, "step": 20175, "training_step_time": 0.17157578468322754 }, { "epoch": 3.07861328125e-05, "model_forward_time": 0.02377796173095703, "step": 20176 }, { "epoch": 3.07861328125e-05, "step": 20176, "training_step_time": 0.13790416717529297 }, { "epoch": 3.078765869140625e-05, "model_forward_time": 0.0238645076751709, "step": 20177 }, { "epoch": 3.078765869140625e-05, "step": 20177, "training_step_time": 0.10911083221435547 }, { "epoch": 3.07891845703125e-05, "model_forward_time": 0.02460002899169922, "step": 20178 }, { "epoch": 3.07891845703125e-05, "step": 20178, "training_step_time": 0.11597537994384766 }, { "epoch": 3.079071044921875e-05, "model_forward_time": 0.024309635162353516, "step": 20179 }, { "epoch": 3.079071044921875e-05, "step": 20179, "training_step_time": 0.11704421043395996 }, { "epoch": 3.0792236328125e-05, "grad_norm": 0.24393905699253082, "learning_rate": 2.654264485821214e-05, "loss": 0.0113, "step": 20180 }, { "epoch": 3.0792236328125e-05, "model_forward_time": 0.024632692337036133, "step": 20180 }, { "epoch": 3.0792236328125e-05, "step": 20180, "training_step_time": 0.1798086166381836 }, { "epoch": 3.079376220703125e-05, "model_forward_time": 0.023427724838256836, "step": 20181 }, { "epoch": 3.079376220703125e-05, "step": 20181, "training_step_time": 0.12470054626464844 }, { "epoch": 3.07952880859375e-05, "model_forward_time": 0.02408123016357422, "step": 20182 }, { "epoch": 3.07952880859375e-05, "step": 20182, "training_step_time": 0.10941123962402344 }, { "epoch": 3.079681396484375e-05, "model_forward_time": 0.02460336685180664, "step": 20183 }, { "epoch": 3.079681396484375e-05, "step": 20183, "training_step_time": 0.11217641830444336 }, { "epoch": 3.079833984375e-05, "model_forward_time": 0.024121999740600586, "step": 20184 }, { "epoch": 3.079833984375e-05, "step": 20184, "training_step_time": 0.1081087589263916 }, { "epoch": 3.079986572265625e-05, "model_forward_time": 0.023935556411743164, "step": 20185 }, { "epoch": 3.079986572265625e-05, "step": 20185, "training_step_time": 0.1061856746673584 }, { "epoch": 3.08013916015625e-05, "model_forward_time": 0.024003028869628906, "step": 20186 }, { "epoch": 3.08013916015625e-05, "step": 20186, "training_step_time": 0.1091306209564209 }, { "epoch": 3.080291748046875e-05, "model_forward_time": 0.026261091232299805, "step": 20187 }, { "epoch": 3.080291748046875e-05, "step": 20187, "training_step_time": 0.10797309875488281 }, { "epoch": 3.0804443359375e-05, "model_forward_time": 0.024617671966552734, "step": 20188 }, { "epoch": 3.0804443359375e-05, "step": 20188, "training_step_time": 0.11060404777526855 }, { "epoch": 3.080596923828125e-05, "model_forward_time": 0.023793458938598633, "step": 20189 }, { "epoch": 3.080596923828125e-05, "step": 20189, "training_step_time": 0.11011838912963867 }, { "epoch": 3.08074951171875e-05, "grad_norm": 0.14980293810367584, "learning_rate": 2.6493985375419778e-05, "loss": 0.0057, "step": 20190 }, { "epoch": 3.08074951171875e-05, "model_forward_time": 0.024554729461669922, "step": 20190 }, { "epoch": 3.08074951171875e-05, "step": 20190, "training_step_time": 0.1127471923828125 }, { "epoch": 3.080902099609375e-05, "model_forward_time": 0.024039030075073242, "step": 20191 }, { "epoch": 3.080902099609375e-05, "step": 20191, "training_step_time": 0.11475777626037598 }, { "epoch": 3.0810546875e-05, "model_forward_time": 0.024587631225585938, "step": 20192 }, { "epoch": 3.0810546875e-05, "step": 20192, "training_step_time": 0.1079854965209961 }, { "epoch": 3.081207275390625e-05, "model_forward_time": 0.02412128448486328, "step": 20193 }, { "epoch": 3.081207275390625e-05, "step": 20193, "training_step_time": 0.10799217224121094 }, { "epoch": 3.08135986328125e-05, "model_forward_time": 0.024086713790893555, "step": 20194 }, { "epoch": 3.08135986328125e-05, "step": 20194, "training_step_time": 0.10820508003234863 }, { "epoch": 3.081512451171875e-05, "model_forward_time": 0.024826526641845703, "step": 20195 }, { "epoch": 3.081512451171875e-05, "step": 20195, "training_step_time": 0.2284867763519287 }, { "epoch": 3.0816650390625e-05, "model_forward_time": 0.023824214935302734, "step": 20196 }, { "epoch": 3.0816650390625e-05, "step": 20196, "training_step_time": 0.11643433570861816 }, { "epoch": 3.081817626953125e-05, "model_forward_time": 0.023323774337768555, "step": 20197 }, { "epoch": 3.081817626953125e-05, "step": 20197, "training_step_time": 0.1398162841796875 }, { "epoch": 3.08197021484375e-05, "model_forward_time": 0.024189233779907227, "step": 20198 }, { "epoch": 3.08197021484375e-05, "step": 20198, "training_step_time": 0.161545991897583 }, { "epoch": 3.082122802734375e-05, "model_forward_time": 0.023430824279785156, "step": 20199 }, { "epoch": 3.082122802734375e-05, "step": 20199, "training_step_time": 0.2218492031097412 }, { "epoch": 3.082275390625e-05, "grad_norm": 0.18001802265644073, "learning_rate": 2.644535445465164e-05, "loss": 0.0064, "step": 20200 }, { "epoch": 3.082275390625e-05, "model_forward_time": 0.02357935905456543, "step": 20200 }, { "epoch": 3.082275390625e-05, "step": 20200, "training_step_time": 0.11756253242492676 }, { "epoch": 3.082427978515625e-05, "model_forward_time": 0.023369789123535156, "step": 20201 }, { "epoch": 3.082427978515625e-05, "step": 20201, "training_step_time": 0.11740565299987793 }, { "epoch": 3.08258056640625e-05, "model_forward_time": 0.02399921417236328, "step": 20202 }, { "epoch": 3.08258056640625e-05, "step": 20202, "training_step_time": 0.11145997047424316 }, { "epoch": 3.082733154296875e-05, "model_forward_time": 0.026042938232421875, "step": 20203 }, { "epoch": 3.082733154296875e-05, "step": 20203, "training_step_time": 0.11573982238769531 }, { "epoch": 3.0828857421875e-05, "model_forward_time": 0.024569272994995117, "step": 20204 }, { "epoch": 3.0828857421875e-05, "step": 20204, "training_step_time": 0.10821700096130371 }, { "epoch": 3.083038330078125e-05, "model_forward_time": 0.02445816993713379, "step": 20205 }, { "epoch": 3.083038330078125e-05, "step": 20205, "training_step_time": 0.1122283935546875 }, { "epoch": 3.08319091796875e-05, "model_forward_time": 0.025043249130249023, "step": 20206 }, { "epoch": 3.08319091796875e-05, "step": 20206, "training_step_time": 0.10926246643066406 }, { "epoch": 3.083343505859375e-05, "model_forward_time": 0.024710893630981445, "step": 20207 }, { "epoch": 3.083343505859375e-05, "step": 20207, "training_step_time": 0.10887002944946289 }, { "epoch": 3.08349609375e-05, "model_forward_time": 0.02412724494934082, "step": 20208 }, { "epoch": 3.08349609375e-05, "step": 20208, "training_step_time": 0.1103203296661377 }, { "epoch": 3.083648681640625e-05, "model_forward_time": 0.02427530288696289, "step": 20209 }, { "epoch": 3.083648681640625e-05, "step": 20209, "training_step_time": 0.1092996597290039 }, { "epoch": 3.08380126953125e-05, "grad_norm": 0.0846259742975235, "learning_rate": 2.6396752154998915e-05, "loss": 0.0051, "step": 20210 }, { "epoch": 3.08380126953125e-05, "model_forward_time": 0.024137258529663086, "step": 20210 }, { "epoch": 3.08380126953125e-05, "step": 20210, "training_step_time": 0.10398650169372559 }, { "epoch": 3.083953857421875e-05, "model_forward_time": 0.023733854293823242, "step": 20211 }, { "epoch": 3.083953857421875e-05, "step": 20211, "training_step_time": 0.14608263969421387 }, { "epoch": 3.0841064453125e-05, "model_forward_time": 0.024096012115478516, "step": 20212 }, { "epoch": 3.0841064453125e-05, "step": 20212, "training_step_time": 0.1712636947631836 }, { "epoch": 3.084259033203125e-05, "model_forward_time": 0.0238494873046875, "step": 20213 }, { "epoch": 3.084259033203125e-05, "step": 20213, "training_step_time": 0.1312401294708252 }, { "epoch": 3.08441162109375e-05, "model_forward_time": 0.023639917373657227, "step": 20214 }, { "epoch": 3.08441162109375e-05, "step": 20214, "training_step_time": 0.20663022994995117 }, { "epoch": 3.084564208984375e-05, "model_forward_time": 0.02353048324584961, "step": 20215 }, { "epoch": 3.084564208984375e-05, "step": 20215, "training_step_time": 0.10597443580627441 }, { "epoch": 3.084716796875e-05, "model_forward_time": 0.023611783981323242, "step": 20216 }, { "epoch": 3.084716796875e-05, "step": 20216, "training_step_time": 0.18341851234436035 }, { "epoch": 3.084869384765625e-05, "model_forward_time": 0.023435354232788086, "step": 20217 }, { "epoch": 3.084869384765625e-05, "step": 20217, "training_step_time": 0.10608863830566406 }, { "epoch": 3.08502197265625e-05, "model_forward_time": 0.02347254753112793, "step": 20218 }, { "epoch": 3.08502197265625e-05, "step": 20218, "training_step_time": 0.10412979125976562 }, { "epoch": 3.085174560546875e-05, "model_forward_time": 0.024471044540405273, "step": 20219 }, { "epoch": 3.085174560546875e-05, "step": 20219, "training_step_time": 0.18188953399658203 }, { "epoch": 3.0853271484375e-05, "grad_norm": 0.11704346537590027, "learning_rate": 2.6348178535517966e-05, "loss": 0.0077, "step": 20220 }, { "epoch": 3.0853271484375e-05, "model_forward_time": 0.02349066734313965, "step": 20220 }, { "epoch": 3.0853271484375e-05, "step": 20220, "training_step_time": 0.13672637939453125 }, { "epoch": 3.085479736328125e-05, "model_forward_time": 0.02340984344482422, "step": 20221 }, { "epoch": 3.085479736328125e-05, "step": 20221, "training_step_time": 0.10885834693908691 }, { "epoch": 3.08563232421875e-05, "model_forward_time": 0.024364471435546875, "step": 20222 }, { "epoch": 3.08563232421875e-05, "step": 20222, "training_step_time": 0.1128389835357666 }, { "epoch": 3.085784912109375e-05, "model_forward_time": 0.024009227752685547, "step": 20223 }, { "epoch": 3.085784912109375e-05, "step": 20223, "training_step_time": 0.116302490234375 }, { "epoch": 3.0859375e-05, "model_forward_time": 0.024618864059448242, "step": 20224 }, { "epoch": 3.0859375e-05, "step": 20224, "training_step_time": 0.10491657257080078 }, { "epoch": 3.086090087890625e-05, "model_forward_time": 0.02428150177001953, "step": 20225 }, { "epoch": 3.086090087890625e-05, "step": 20225, "training_step_time": 0.19633841514587402 }, { "epoch": 3.08624267578125e-05, "model_forward_time": 0.023705005645751953, "step": 20226 }, { "epoch": 3.08624267578125e-05, "step": 20226, "training_step_time": 0.1023871898651123 }, { "epoch": 3.086395263671875e-05, "model_forward_time": 0.02349567413330078, "step": 20227 }, { "epoch": 3.086395263671875e-05, "step": 20227, "training_step_time": 0.10074210166931152 }, { "epoch": 3.0865478515625e-05, "model_forward_time": 0.024228811264038086, "step": 20228 }, { "epoch": 3.0865478515625e-05, "step": 20228, "training_step_time": 0.10529589653015137 }, { "epoch": 3.086700439453125e-05, "model_forward_time": 0.024242877960205078, "step": 20229 }, { "epoch": 3.086700439453125e-05, "step": 20229, "training_step_time": 0.10521268844604492 }, { "epoch": 3.08685302734375e-05, "grad_norm": 0.28940412402153015, "learning_rate": 2.629963365523031e-05, "loss": 0.0143, "step": 20230 }, { "epoch": 3.08685302734375e-05, "model_forward_time": 0.024315357208251953, "step": 20230 }, { "epoch": 3.08685302734375e-05, "step": 20230, "training_step_time": 0.10441899299621582 }, { "epoch": 3.087005615234375e-05, "model_forward_time": 0.024458646774291992, "step": 20231 }, { "epoch": 3.087005615234375e-05, "step": 20231, "training_step_time": 0.10368585586547852 }, { "epoch": 3.087158203125e-05, "model_forward_time": 0.02495121955871582, "step": 20232 }, { "epoch": 3.087158203125e-05, "step": 20232, "training_step_time": 0.10915946960449219 }, { "epoch": 3.087310791015625e-05, "model_forward_time": 0.024271488189697266, "step": 20233 }, { "epoch": 3.087310791015625e-05, "step": 20233, "training_step_time": 0.10931921005249023 }, { "epoch": 3.08746337890625e-05, "model_forward_time": 0.024724721908569336, "step": 20234 }, { "epoch": 3.08746337890625e-05, "step": 20234, "training_step_time": 0.10394024848937988 }, { "epoch": 3.087615966796875e-05, "model_forward_time": 0.024185895919799805, "step": 20235 }, { "epoch": 3.087615966796875e-05, "step": 20235, "training_step_time": 0.1086115837097168 }, { "epoch": 3.0877685546875e-05, "model_forward_time": 0.024412870407104492, "step": 20236 }, { "epoch": 3.0877685546875e-05, "step": 20236, "training_step_time": 0.10397005081176758 }, { "epoch": 3.087921142578125e-05, "model_forward_time": 0.024617433547973633, "step": 20237 }, { "epoch": 3.087921142578125e-05, "step": 20237, "training_step_time": 0.10375833511352539 }, { "epoch": 3.08807373046875e-05, "model_forward_time": 0.024225950241088867, "step": 20238 }, { "epoch": 3.08807373046875e-05, "step": 20238, "training_step_time": 0.10857629776000977 }, { "epoch": 3.088226318359375e-05, "model_forward_time": 0.02434706687927246, "step": 20239 }, { "epoch": 3.088226318359375e-05, "step": 20239, "training_step_time": 0.10439443588256836 }, { "epoch": 3.08837890625e-05, "grad_norm": 0.3185952603816986, "learning_rate": 2.6251117573122563e-05, "loss": 0.0082, "step": 20240 }, { "epoch": 3.08837890625e-05, "model_forward_time": 0.02418375015258789, "step": 20240 }, { "epoch": 3.08837890625e-05, "step": 20240, "training_step_time": 0.19379448890686035 }, { "epoch": 3.088531494140625e-05, "model_forward_time": 0.023304224014282227, "step": 20241 }, { "epoch": 3.088531494140625e-05, "step": 20241, "training_step_time": 0.12569713592529297 }, { "epoch": 3.08868408203125e-05, "model_forward_time": 0.023907184600830078, "step": 20242 }, { "epoch": 3.08868408203125e-05, "step": 20242, "training_step_time": 0.13182806968688965 }, { "epoch": 3.088836669921875e-05, "model_forward_time": 0.024083852767944336, "step": 20243 }, { "epoch": 3.088836669921875e-05, "step": 20243, "training_step_time": 0.16073083877563477 }, { "epoch": 3.0889892578125e-05, "model_forward_time": 0.023714780807495117, "step": 20244 }, { "epoch": 3.0889892578125e-05, "step": 20244, "training_step_time": 0.2216653823852539 }, { "epoch": 3.089141845703125e-05, "model_forward_time": 0.022882699966430664, "step": 20245 }, { "epoch": 3.089141845703125e-05, "step": 20245, "training_step_time": 0.12031817436218262 }, { "epoch": 3.08929443359375e-05, "model_forward_time": 0.02342391014099121, "step": 20246 }, { "epoch": 3.08929443359375e-05, "step": 20246, "training_step_time": 0.10500669479370117 }, { "epoch": 3.089447021484375e-05, "model_forward_time": 0.023952245712280273, "step": 20247 }, { "epoch": 3.089447021484375e-05, "step": 20247, "training_step_time": 0.11027407646179199 }, { "epoch": 3.089599609375e-05, "model_forward_time": 0.024573802947998047, "step": 20248 }, { "epoch": 3.089599609375e-05, "step": 20248, "training_step_time": 0.11267662048339844 }, { "epoch": 3.089752197265625e-05, "model_forward_time": 0.024187803268432617, "step": 20249 }, { "epoch": 3.089752197265625e-05, "step": 20249, "training_step_time": 0.10289573669433594 }, { "epoch": 3.08990478515625e-05, "grad_norm": 0.20640796422958374, "learning_rate": 2.6202630348146324e-05, "loss": 0.0072, "step": 20250 }, { "epoch": 3.08990478515625e-05, "model_forward_time": 0.02412557601928711, "step": 20250 }, { "epoch": 3.08990478515625e-05, "step": 20250, "training_step_time": 0.10513067245483398 }, { "epoch": 3.090057373046875e-05, "model_forward_time": 0.024743080139160156, "step": 20251 }, { "epoch": 3.090057373046875e-05, "step": 20251, "training_step_time": 0.10699677467346191 }, { "epoch": 3.0902099609375e-05, "model_forward_time": 0.024134159088134766, "step": 20252 }, { "epoch": 3.0902099609375e-05, "step": 20252, "training_step_time": 0.10776329040527344 }, { "epoch": 3.090362548828125e-05, "model_forward_time": 0.02420806884765625, "step": 20253 }, { "epoch": 3.090362548828125e-05, "step": 20253, "training_step_time": 0.10670280456542969 }, { "epoch": 3.09051513671875e-05, "model_forward_time": 0.02433323860168457, "step": 20254 }, { "epoch": 3.09051513671875e-05, "step": 20254, "training_step_time": 0.1046149730682373 }, { "epoch": 3.090667724609375e-05, "model_forward_time": 0.024295806884765625, "step": 20255 }, { "epoch": 3.090667724609375e-05, "step": 20255, "training_step_time": 0.10667967796325684 }, { "epoch": 3.0908203125e-05, "model_forward_time": 0.023836612701416016, "step": 20256 }, { "epoch": 3.0908203125e-05, "step": 20256, "training_step_time": 0.14601755142211914 }, { "epoch": 3.090972900390625e-05, "model_forward_time": 0.023718595504760742, "step": 20257 }, { "epoch": 3.090972900390625e-05, "step": 20257, "training_step_time": 0.16224145889282227 }, { "epoch": 3.09112548828125e-05, "model_forward_time": 0.02347111701965332, "step": 20258 }, { "epoch": 3.09112548828125e-05, "step": 20258, "training_step_time": 0.15749239921569824 }, { "epoch": 3.091278076171875e-05, "model_forward_time": 0.02343440055847168, "step": 20259 }, { "epoch": 3.091278076171875e-05, "step": 20259, "training_step_time": 0.18209409713745117 }, { "epoch": 3.0914306640625e-05, "grad_norm": 0.1057221069931984, "learning_rate": 2.6154172039218172e-05, "loss": 0.006, "step": 20260 }, { "epoch": 3.0914306640625e-05, "model_forward_time": 0.023080825805664062, "step": 20260 }, { "epoch": 3.0914306640625e-05, "step": 20260, "training_step_time": 0.18787193298339844 }, { "epoch": 3.091583251953125e-05, "model_forward_time": 0.023444414138793945, "step": 20261 }, { "epoch": 3.091583251953125e-05, "step": 20261, "training_step_time": 0.12043380737304688 }, { "epoch": 3.09173583984375e-05, "model_forward_time": 0.023785114288330078, "step": 20262 }, { "epoch": 3.09173583984375e-05, "step": 20262, "training_step_time": 0.10840916633605957 }, { "epoch": 3.091888427734375e-05, "model_forward_time": 0.024281978607177734, "step": 20263 }, { "epoch": 3.091888427734375e-05, "step": 20263, "training_step_time": 0.10541462898254395 }, { "epoch": 3.092041015625e-05, "model_forward_time": 0.024016380310058594, "step": 20264 }, { "epoch": 3.092041015625e-05, "step": 20264, "training_step_time": 0.13516879081726074 }, { "epoch": 3.092193603515625e-05, "model_forward_time": 0.02454376220703125, "step": 20265 }, { "epoch": 3.092193603515625e-05, "step": 20265, "training_step_time": 0.1362764835357666 }, { "epoch": 3.09234619140625e-05, "model_forward_time": 0.024135589599609375, "step": 20266 }, { "epoch": 3.09234619140625e-05, "step": 20266, "training_step_time": 0.11280608177185059 }, { "epoch": 3.092498779296875e-05, "model_forward_time": 0.02664041519165039, "step": 20267 }, { "epoch": 3.092498779296875e-05, "step": 20267, "training_step_time": 0.1115257740020752 }, { "epoch": 3.0926513671875e-05, "model_forward_time": 0.02452826499938965, "step": 20268 }, { "epoch": 3.0926513671875e-05, "step": 20268, "training_step_time": 0.11456084251403809 }, { "epoch": 3.092803955078125e-05, "model_forward_time": 0.02424454689025879, "step": 20269 }, { "epoch": 3.092803955078125e-05, "step": 20269, "training_step_time": 0.11041831970214844 }, { "epoch": 3.09295654296875e-05, "grad_norm": 0.12096409499645233, "learning_rate": 2.6105742705219515e-05, "loss": 0.0067, "step": 20270 }, { "epoch": 3.09295654296875e-05, "model_forward_time": 0.024512767791748047, "step": 20270 }, { "epoch": 3.09295654296875e-05, "step": 20270, "training_step_time": 0.18552136421203613 }, { "epoch": 3.093109130859375e-05, "model_forward_time": 0.02405095100402832, "step": 20271 }, { "epoch": 3.093109130859375e-05, "step": 20271, "training_step_time": 0.14959192276000977 }, { "epoch": 3.09326171875e-05, "model_forward_time": 0.023129701614379883, "step": 20272 }, { "epoch": 3.09326171875e-05, "step": 20272, "training_step_time": 0.1291344165802002 }, { "epoch": 3.093414306640625e-05, "model_forward_time": 0.02367091178894043, "step": 20273 }, { "epoch": 3.093414306640625e-05, "step": 20273, "training_step_time": 0.12607407569885254 }, { "epoch": 3.09356689453125e-05, "model_forward_time": 0.02408885955810547, "step": 20274 }, { "epoch": 3.09356689453125e-05, "step": 20274, "training_step_time": 0.12212395668029785 }, { "epoch": 3.093719482421875e-05, "model_forward_time": 0.024029254913330078, "step": 20275 }, { "epoch": 3.093719482421875e-05, "step": 20275, "training_step_time": 0.1178750991821289 }, { "epoch": 3.0938720703125e-05, "model_forward_time": 0.024106502532958984, "step": 20276 }, { "epoch": 3.0938720703125e-05, "step": 20276, "training_step_time": 0.11334967613220215 }, { "epoch": 3.094024658203125e-05, "model_forward_time": 0.02433490753173828, "step": 20277 }, { "epoch": 3.094024658203125e-05, "step": 20277, "training_step_time": 0.1134939193725586 }, { "epoch": 3.09417724609375e-05, "model_forward_time": 0.024335384368896484, "step": 20278 }, { "epoch": 3.09417724609375e-05, "step": 20278, "training_step_time": 0.11189961433410645 }, { "epoch": 3.094329833984375e-05, "model_forward_time": 0.024118900299072266, "step": 20279 }, { "epoch": 3.094329833984375e-05, "step": 20279, "training_step_time": 0.10966181755065918 }, { "epoch": 3.094482421875e-05, "grad_norm": 0.14801107347011566, "learning_rate": 2.6057342404996522e-05, "loss": 0.005, "step": 20280 }, { "epoch": 3.094482421875e-05, "model_forward_time": 0.02424788475036621, "step": 20280 }, { "epoch": 3.094482421875e-05, "step": 20280, "training_step_time": 0.10901403427124023 }, { "epoch": 3.094635009765625e-05, "model_forward_time": 0.023990631103515625, "step": 20281 }, { "epoch": 3.094635009765625e-05, "step": 20281, "training_step_time": 0.10921907424926758 }, { "epoch": 3.09478759765625e-05, "model_forward_time": 0.024436473846435547, "step": 20282 }, { "epoch": 3.09478759765625e-05, "step": 20282, "training_step_time": 0.10672926902770996 }, { "epoch": 3.094940185546875e-05, "model_forward_time": 0.025330305099487305, "step": 20283 }, { "epoch": 3.094940185546875e-05, "step": 20283, "training_step_time": 0.11105847358703613 }, { "epoch": 3.0950927734375e-05, "model_forward_time": 0.025043725967407227, "step": 20284 }, { "epoch": 3.0950927734375e-05, "step": 20284, "training_step_time": 0.17059707641601562 }, { "epoch": 3.095245361328125e-05, "model_forward_time": 0.024131298065185547, "step": 20285 }, { "epoch": 3.095245361328125e-05, "step": 20285, "training_step_time": 0.11204361915588379 }, { "epoch": 3.09539794921875e-05, "model_forward_time": 0.023813247680664062, "step": 20286 }, { "epoch": 3.09539794921875e-05, "step": 20286, "training_step_time": 0.12683463096618652 }, { "epoch": 3.095550537109375e-05, "model_forward_time": 0.02431654930114746, "step": 20287 }, { "epoch": 3.095550537109375e-05, "step": 20287, "training_step_time": 0.1630098819732666 }, { "epoch": 3.095703125e-05, "model_forward_time": 0.023221254348754883, "step": 20288 }, { "epoch": 3.095703125e-05, "step": 20288, "training_step_time": 0.17503952980041504 }, { "epoch": 3.095855712890625e-05, "model_forward_time": 0.023313283920288086, "step": 20289 }, { "epoch": 3.095855712890625e-05, "step": 20289, "training_step_time": 0.16805672645568848 }, { "epoch": 3.09600830078125e-05, "grad_norm": 0.12908320128917694, "learning_rate": 2.6008971197360176e-05, "loss": 0.0125, "step": 20290 }, { "epoch": 3.09600830078125e-05, "model_forward_time": 0.023643970489501953, "step": 20290 }, { "epoch": 3.09600830078125e-05, "step": 20290, "training_step_time": 0.10753607749938965 }, { "epoch": 3.096160888671875e-05, "model_forward_time": 0.023777484893798828, "step": 20291 }, { "epoch": 3.096160888671875e-05, "step": 20291, "training_step_time": 0.10880208015441895 }, { "epoch": 3.0963134765625e-05, "model_forward_time": 0.02480483055114746, "step": 20292 }, { "epoch": 3.0963134765625e-05, "step": 20292, "training_step_time": 0.10862445831298828 }, { "epoch": 3.096466064453125e-05, "model_forward_time": 0.02412867546081543, "step": 20293 }, { "epoch": 3.096466064453125e-05, "step": 20293, "training_step_time": 0.1114046573638916 }, { "epoch": 3.09661865234375e-05, "model_forward_time": 0.02380204200744629, "step": 20294 }, { "epoch": 3.09661865234375e-05, "step": 20294, "training_step_time": 0.10920524597167969 }, { "epoch": 3.096771240234375e-05, "model_forward_time": 0.02398395538330078, "step": 20295 }, { "epoch": 3.096771240234375e-05, "step": 20295, "training_step_time": 0.10948562622070312 }, { "epoch": 3.096923828125e-05, "model_forward_time": 0.02419734001159668, "step": 20296 }, { "epoch": 3.096923828125e-05, "step": 20296, "training_step_time": 0.11516356468200684 }, { "epoch": 3.097076416015625e-05, "model_forward_time": 0.024021387100219727, "step": 20297 }, { "epoch": 3.097076416015625e-05, "step": 20297, "training_step_time": 0.11346101760864258 }, { "epoch": 3.09722900390625e-05, "model_forward_time": 0.02448582649230957, "step": 20298 }, { "epoch": 3.09722900390625e-05, "step": 20298, "training_step_time": 0.11006927490234375 }, { "epoch": 3.097381591796875e-05, "model_forward_time": 0.024636268615722656, "step": 20299 }, { "epoch": 3.097381591796875e-05, "step": 20299, "training_step_time": 0.10822272300720215 }, { "epoch": 3.0975341796875e-05, "grad_norm": 0.1334718018770218, "learning_rate": 2.5960629141086012e-05, "loss": 0.0058, "step": 20300 }, { "epoch": 3.0975341796875e-05, "model_forward_time": 0.024556636810302734, "step": 20300 }, { "epoch": 3.0975341796875e-05, "step": 20300, "training_step_time": 0.13094854354858398 }, { "epoch": 3.097686767578125e-05, "model_forward_time": 0.02364492416381836, "step": 20301 }, { "epoch": 3.097686767578125e-05, "step": 20301, "training_step_time": 0.11255407333374023 }, { "epoch": 3.09783935546875e-05, "model_forward_time": 0.024955034255981445, "step": 20302 }, { "epoch": 3.09783935546875e-05, "step": 20302, "training_step_time": 0.11995339393615723 }, { "epoch": 3.097991943359375e-05, "model_forward_time": 0.024550676345825195, "step": 20303 }, { "epoch": 3.097991943359375e-05, "step": 20303, "training_step_time": 0.11147499084472656 }, { "epoch": 3.09814453125e-05, "model_forward_time": 0.024528980255126953, "step": 20304 }, { "epoch": 3.09814453125e-05, "step": 20304, "training_step_time": 0.1755213737487793 }, { "epoch": 3.098297119140625e-05, "model_forward_time": 0.023474693298339844, "step": 20305 }, { "epoch": 3.098297119140625e-05, "step": 20305, "training_step_time": 0.18876004219055176 }, { "epoch": 3.09844970703125e-05, "model_forward_time": 0.023133516311645508, "step": 20306 }, { "epoch": 3.09844970703125e-05, "step": 20306, "training_step_time": 0.11075806617736816 }, { "epoch": 3.098602294921875e-05, "model_forward_time": 0.02364802360534668, "step": 20307 }, { "epoch": 3.098602294921875e-05, "step": 20307, "training_step_time": 0.10815691947937012 }, { "epoch": 3.0987548828125e-05, "model_forward_time": 0.02443242073059082, "step": 20308 }, { "epoch": 3.0987548828125e-05, "step": 20308, "training_step_time": 0.11348772048950195 }, { "epoch": 3.098907470703125e-05, "model_forward_time": 0.024419307708740234, "step": 20309 }, { "epoch": 3.098907470703125e-05, "step": 20309, "training_step_time": 0.1411147117614746 }, { "epoch": 3.09906005859375e-05, "grad_norm": 0.18202942609786987, "learning_rate": 2.591231629491423e-05, "loss": 0.0114, "step": 20310 }, { "epoch": 3.09906005859375e-05, "model_forward_time": 0.024049997329711914, "step": 20310 }, { "epoch": 3.09906005859375e-05, "step": 20310, "training_step_time": 0.13256025314331055 }, { "epoch": 3.099212646484375e-05, "model_forward_time": 0.023494243621826172, "step": 20311 }, { "epoch": 3.099212646484375e-05, "step": 20311, "training_step_time": 0.12480521202087402 }, { "epoch": 3.099365234375e-05, "model_forward_time": 0.024001359939575195, "step": 20312 }, { "epoch": 3.099365234375e-05, "step": 20312, "training_step_time": 0.10767269134521484 }, { "epoch": 3.099517822265625e-05, "model_forward_time": 0.024210453033447266, "step": 20313 }, { "epoch": 3.099517822265625e-05, "step": 20313, "training_step_time": 0.13320374488830566 }, { "epoch": 3.09967041015625e-05, "model_forward_time": 0.02465343475341797, "step": 20314 }, { "epoch": 3.09967041015625e-05, "step": 20314, "training_step_time": 0.10839223861694336 }, { "epoch": 3.099822998046875e-05, "model_forward_time": 0.024294137954711914, "step": 20315 }, { "epoch": 3.099822998046875e-05, "step": 20315, "training_step_time": 0.19416213035583496 }, { "epoch": 3.0999755859375e-05, "model_forward_time": 0.023432016372680664, "step": 20316 }, { "epoch": 3.0999755859375e-05, "step": 20316, "training_step_time": 0.10625100135803223 }, { "epoch": 3.100128173828125e-05, "model_forward_time": 0.02371978759765625, "step": 20317 }, { "epoch": 3.100128173828125e-05, "step": 20317, "training_step_time": 0.10913538932800293 }, { "epoch": 3.10028076171875e-05, "model_forward_time": 0.02446293830871582, "step": 20318 }, { "epoch": 3.10028076171875e-05, "step": 20318, "training_step_time": 0.10594367980957031 }, { "epoch": 3.100433349609375e-05, "model_forward_time": 0.024090051651000977, "step": 20319 }, { "epoch": 3.100433349609375e-05, "step": 20319, "training_step_time": 0.10794448852539062 }, { "epoch": 3.1005859375e-05, "grad_norm": 0.280348539352417, "learning_rate": 2.586403271754947e-05, "loss": 0.0094, "step": 20320 }, { "epoch": 3.1005859375e-05, "model_forward_time": 0.02423381805419922, "step": 20320 }, { "epoch": 3.1005859375e-05, "step": 20320, "training_step_time": 0.10675668716430664 }, { "epoch": 3.100738525390625e-05, "model_forward_time": 0.024366140365600586, "step": 20321 }, { "epoch": 3.100738525390625e-05, "step": 20321, "training_step_time": 0.10655331611633301 }, { "epoch": 3.10089111328125e-05, "model_forward_time": 0.024501323699951172, "step": 20322 }, { "epoch": 3.10089111328125e-05, "step": 20322, "training_step_time": 0.10703468322753906 }, { "epoch": 3.101043701171875e-05, "model_forward_time": 0.02444767951965332, "step": 20323 }, { "epoch": 3.101043701171875e-05, "step": 20323, "training_step_time": 0.1436612606048584 }, { "epoch": 3.1011962890625e-05, "model_forward_time": 0.02377605438232422, "step": 20324 }, { "epoch": 3.1011962890625e-05, "step": 20324, "training_step_time": 0.1664588451385498 }, { "epoch": 3.101348876953125e-05, "model_forward_time": 0.023504972457885742, "step": 20325 }, { "epoch": 3.101348876953125e-05, "step": 20325, "training_step_time": 0.14690947532653809 }, { "epoch": 3.10150146484375e-05, "model_forward_time": 0.023774147033691406, "step": 20326 }, { "epoch": 3.10150146484375e-05, "step": 20326, "training_step_time": 0.13970136642456055 }, { "epoch": 3.101654052734375e-05, "model_forward_time": 0.023624181747436523, "step": 20327 }, { "epoch": 3.101654052734375e-05, "step": 20327, "training_step_time": 0.12900924682617188 }, { "epoch": 3.101806640625e-05, "model_forward_time": 0.02335190773010254, "step": 20328 }, { "epoch": 3.101806640625e-05, "step": 20328, "training_step_time": 0.2120654582977295 }, { "epoch": 3.101959228515625e-05, "model_forward_time": 0.023449182510375977, "step": 20329 }, { "epoch": 3.101959228515625e-05, "step": 20329, "training_step_time": 0.12731385231018066 }, { "epoch": 3.10211181640625e-05, "grad_norm": 0.2727183699607849, "learning_rate": 2.5815778467660823e-05, "loss": 0.0072, "step": 20330 }, { "epoch": 3.10211181640625e-05, "model_forward_time": 0.023593664169311523, "step": 20330 }, { "epoch": 3.10211181640625e-05, "step": 20330, "training_step_time": 0.11457300186157227 }, { "epoch": 3.102264404296875e-05, "model_forward_time": 0.02353072166442871, "step": 20331 }, { "epoch": 3.102264404296875e-05, "step": 20331, "training_step_time": 0.11472558975219727 }, { "epoch": 3.1024169921875e-05, "model_forward_time": 0.02430438995361328, "step": 20332 }, { "epoch": 3.1024169921875e-05, "step": 20332, "training_step_time": 0.11632394790649414 }, { "epoch": 3.102569580078125e-05, "model_forward_time": 0.024750709533691406, "step": 20333 }, { "epoch": 3.102569580078125e-05, "step": 20333, "training_step_time": 0.11632966995239258 }, { "epoch": 3.10272216796875e-05, "model_forward_time": 0.024159908294677734, "step": 20334 }, { "epoch": 3.10272216796875e-05, "step": 20334, "training_step_time": 0.12450456619262695 }, { "epoch": 3.102874755859375e-05, "model_forward_time": 0.024199724197387695, "step": 20335 }, { "epoch": 3.102874755859375e-05, "step": 20335, "training_step_time": 0.11366581916809082 }, { "epoch": 3.10302734375e-05, "model_forward_time": 0.024562597274780273, "step": 20336 }, { "epoch": 3.10302734375e-05, "step": 20336, "training_step_time": 0.1057136058807373 }, { "epoch": 3.103179931640625e-05, "model_forward_time": 0.024151325225830078, "step": 20337 }, { "epoch": 3.103179931640625e-05, "step": 20337, "training_step_time": 0.11228084564208984 }, { "epoch": 3.10333251953125e-05, "model_forward_time": 0.024050235748291016, "step": 20338 }, { "epoch": 3.10333251953125e-05, "step": 20338, "training_step_time": 0.11221432685852051 }, { "epoch": 3.103485107421875e-05, "model_forward_time": 0.0242464542388916, "step": 20339 }, { "epoch": 3.103485107421875e-05, "step": 20339, "training_step_time": 0.11094522476196289 }, { "epoch": 3.1036376953125e-05, "grad_norm": 0.2212766855955124, "learning_rate": 2.5767553603881767e-05, "loss": 0.0102, "step": 20340 }, { "epoch": 3.1036376953125e-05, "model_forward_time": 0.024171113967895508, "step": 20340 }, { "epoch": 3.1036376953125e-05, "step": 20340, "training_step_time": 0.10960769653320312 }, { "epoch": 3.103790283203125e-05, "model_forward_time": 0.024101972579956055, "step": 20341 }, { "epoch": 3.103790283203125e-05, "step": 20341, "training_step_time": 0.10882878303527832 }, { "epoch": 3.10394287109375e-05, "model_forward_time": 0.024158954620361328, "step": 20342 }, { "epoch": 3.10394287109375e-05, "step": 20342, "training_step_time": 0.10892081260681152 }, { "epoch": 3.104095458984375e-05, "model_forward_time": 0.024196386337280273, "step": 20343 }, { "epoch": 3.104095458984375e-05, "step": 20343, "training_step_time": 0.11417460441589355 }, { "epoch": 3.104248046875e-05, "model_forward_time": 0.024295806884765625, "step": 20344 }, { "epoch": 3.104248046875e-05, "step": 20344, "training_step_time": 0.1095283031463623 }, { "epoch": 3.104400634765625e-05, "model_forward_time": 0.02423095703125, "step": 20345 }, { "epoch": 3.104400634765625e-05, "step": 20345, "training_step_time": 0.10478639602661133 }, { "epoch": 3.10455322265625e-05, "model_forward_time": 0.023151397705078125, "step": 20346 }, { "epoch": 3.10455322265625e-05, "step": 20346, "training_step_time": 0.14491510391235352 }, { "epoch": 3.104705810546875e-05, "model_forward_time": 0.023964405059814453, "step": 20347 }, { "epoch": 3.104705810546875e-05, "step": 20347, "training_step_time": 0.147871732711792 }, { "epoch": 3.1048583984375e-05, "model_forward_time": 0.024170637130737305, "step": 20348 }, { "epoch": 3.1048583984375e-05, "step": 20348, "training_step_time": 0.1676180362701416 }, { "epoch": 3.105010986328125e-05, "model_forward_time": 0.024091005325317383, "step": 20349 }, { "epoch": 3.105010986328125e-05, "step": 20349, "training_step_time": 0.18203401565551758 }, { "epoch": 3.10516357421875e-05, "grad_norm": 0.1759641170501709, "learning_rate": 2.571935818481005e-05, "loss": 0.0094, "step": 20350 }, { "epoch": 3.10516357421875e-05, "model_forward_time": 0.023740530014038086, "step": 20350 }, { "epoch": 3.10516357421875e-05, "step": 20350, "training_step_time": 0.16592907905578613 }, { "epoch": 3.105316162109375e-05, "model_forward_time": 0.024566650390625, "step": 20351 }, { "epoch": 3.105316162109375e-05, "step": 20351, "training_step_time": 0.1929769515991211 }, { "epoch": 3.10546875e-05, "model_forward_time": 0.02453923225402832, "step": 20352 }, { "epoch": 3.10546875e-05, "step": 20352, "training_step_time": 0.11443471908569336 }, { "epoch": 3.105621337890625e-05, "model_forward_time": 0.024609088897705078, "step": 20353 }, { "epoch": 3.105621337890625e-05, "step": 20353, "training_step_time": 0.1074824333190918 }, { "epoch": 3.10577392578125e-05, "model_forward_time": 0.02525019645690918, "step": 20354 }, { "epoch": 3.10577392578125e-05, "step": 20354, "training_step_time": 0.13378024101257324 }, { "epoch": 3.105926513671875e-05, "model_forward_time": 0.025290727615356445, "step": 20355 }, { "epoch": 3.105926513671875e-05, "step": 20355, "training_step_time": 0.11923718452453613 }, { "epoch": 3.1060791015625e-05, "model_forward_time": 0.024960994720458984, "step": 20356 }, { "epoch": 3.1060791015625e-05, "step": 20356, "training_step_time": 0.11603713035583496 }, { "epoch": 3.106231689453125e-05, "model_forward_time": 0.0251615047454834, "step": 20357 }, { "epoch": 3.106231689453125e-05, "step": 20357, "training_step_time": 0.11896753311157227 }, { "epoch": 3.10638427734375e-05, "model_forward_time": 0.02486395835876465, "step": 20358 }, { "epoch": 3.10638427734375e-05, "step": 20358, "training_step_time": 0.11026811599731445 }, { "epoch": 3.106536865234375e-05, "model_forward_time": 0.025456666946411133, "step": 20359 }, { "epoch": 3.106536865234375e-05, "step": 20359, "training_step_time": 0.19449186325073242 }, { "epoch": 3.106689453125e-05, "grad_norm": 0.13494102656841278, "learning_rate": 2.567119226900764e-05, "loss": 0.0098, "step": 20360 }, { "epoch": 3.106689453125e-05, "model_forward_time": 0.024602890014648438, "step": 20360 }, { "epoch": 3.106689453125e-05, "step": 20360, "training_step_time": 0.10335564613342285 }, { "epoch": 3.106842041015625e-05, "model_forward_time": 0.0241241455078125, "step": 20361 }, { "epoch": 3.106842041015625e-05, "step": 20361, "training_step_time": 0.10336542129516602 }, { "epoch": 3.10699462890625e-05, "model_forward_time": 0.024919986724853516, "step": 20362 }, { "epoch": 3.10699462890625e-05, "step": 20362, "training_step_time": 0.10613608360290527 }, { "epoch": 3.107147216796875e-05, "model_forward_time": 0.02514171600341797, "step": 20363 }, { "epoch": 3.107147216796875e-05, "step": 20363, "training_step_time": 0.10741162300109863 }, { "epoch": 3.1072998046875e-05, "model_forward_time": 0.025303363800048828, "step": 20364 }, { "epoch": 3.1072998046875e-05, "step": 20364, "training_step_time": 0.1091318130493164 }, { "epoch": 3.107452392578125e-05, "model_forward_time": 0.027009248733520508, "step": 20365 }, { "epoch": 3.107452392578125e-05, "step": 20365, "training_step_time": 0.12375521659851074 }, { "epoch": 3.10760498046875e-05, "model_forward_time": 0.02488088607788086, "step": 20366 }, { "epoch": 3.10760498046875e-05, "step": 20366, "training_step_time": 0.12371492385864258 }, { "epoch": 3.107757568359375e-05, "model_forward_time": 0.024896621704101562, "step": 20367 }, { "epoch": 3.107757568359375e-05, "step": 20367, "training_step_time": 0.12636899948120117 }, { "epoch": 3.10791015625e-05, "model_forward_time": 0.0253446102142334, "step": 20368 }, { "epoch": 3.10791015625e-05, "step": 20368, "training_step_time": 0.12727046012878418 }, { "epoch": 3.108062744140625e-05, "model_forward_time": 0.024823427200317383, "step": 20369 }, { "epoch": 3.108062744140625e-05, "step": 20369, "training_step_time": 0.12270164489746094 }, { "epoch": 3.10821533203125e-05, "grad_norm": 0.20362550020217896, "learning_rate": 2.562305591500069e-05, "loss": 0.0126, "step": 20370 }, { "epoch": 3.10821533203125e-05, "model_forward_time": 0.024915695190429688, "step": 20370 }, { "epoch": 3.10821533203125e-05, "step": 20370, "training_step_time": 0.12064719200134277 }, { "epoch": 3.108367919921875e-05, "model_forward_time": 0.024836063385009766, "step": 20371 }, { "epoch": 3.108367919921875e-05, "step": 20371, "training_step_time": 0.11359333992004395 }, { "epoch": 3.1085205078125e-05, "model_forward_time": 0.025261402130126953, "step": 20372 }, { "epoch": 3.1085205078125e-05, "step": 20372, "training_step_time": 0.11461138725280762 }, { "epoch": 3.108673095703125e-05, "model_forward_time": 0.025026798248291016, "step": 20373 }, { "epoch": 3.108673095703125e-05, "step": 20373, "training_step_time": 0.11183285713195801 }, { "epoch": 3.10882568359375e-05, "model_forward_time": 0.025376319885253906, "step": 20374 }, { "epoch": 3.10882568359375e-05, "step": 20374, "training_step_time": 0.18063712120056152 }, { "epoch": 3.108978271484375e-05, "model_forward_time": 0.026760578155517578, "step": 20375 }, { "epoch": 3.108978271484375e-05, "step": 20375, "training_step_time": 0.11977458000183105 }, { "epoch": 3.109130859375e-05, "model_forward_time": 0.02406001091003418, "step": 20376 }, { "epoch": 3.109130859375e-05, "step": 20376, "training_step_time": 0.12819242477416992 }, { "epoch": 3.109283447265625e-05, "model_forward_time": 0.024762392044067383, "step": 20377 }, { "epoch": 3.109283447265625e-05, "step": 20377, "training_step_time": 0.11462664604187012 }, { "epoch": 3.10943603515625e-05, "model_forward_time": 0.025179386138916016, "step": 20378 }, { "epoch": 3.10943603515625e-05, "step": 20378, "training_step_time": 0.17092275619506836 }, { "epoch": 3.109588623046875e-05, "model_forward_time": 0.02478337287902832, "step": 20379 }, { "epoch": 3.109588623046875e-05, "step": 20379, "training_step_time": 0.13606810569763184 }, { "epoch": 3.1097412109375e-05, "grad_norm": 0.22211630642414093, "learning_rate": 2.55749491812794e-05, "loss": 0.0197, "step": 20380 }, { "epoch": 3.1097412109375e-05, "model_forward_time": 0.024234533309936523, "step": 20380 }, { "epoch": 3.1097412109375e-05, "step": 20380, "training_step_time": 0.11030793190002441 }, { "epoch": 3.109893798828125e-05, "model_forward_time": 0.024780750274658203, "step": 20381 }, { "epoch": 3.109893798828125e-05, "step": 20381, "training_step_time": 0.10662150382995605 }, { "epoch": 3.11004638671875e-05, "model_forward_time": 0.025136470794677734, "step": 20382 }, { "epoch": 3.11004638671875e-05, "step": 20382, "training_step_time": 0.10565996170043945 }, { "epoch": 3.110198974609375e-05, "model_forward_time": 0.02466106414794922, "step": 20383 }, { "epoch": 3.110198974609375e-05, "step": 20383, "training_step_time": 0.10759520530700684 }, { "epoch": 3.1103515625e-05, "model_forward_time": 0.024927854537963867, "step": 20384 }, { "epoch": 3.1103515625e-05, "step": 20384, "training_step_time": 0.10775351524353027 }, { "epoch": 3.110504150390625e-05, "model_forward_time": 0.024290084838867188, "step": 20385 }, { "epoch": 3.110504150390625e-05, "step": 20385, "training_step_time": 0.10499739646911621 }, { "epoch": 3.11065673828125e-05, "model_forward_time": 0.02501368522644043, "step": 20386 }, { "epoch": 3.11065673828125e-05, "step": 20386, "training_step_time": 0.10746598243713379 }, { "epoch": 3.110809326171875e-05, "model_forward_time": 0.02521681785583496, "step": 20387 }, { "epoch": 3.110809326171875e-05, "step": 20387, "training_step_time": 0.10686516761779785 }, { "epoch": 3.1109619140625e-05, "model_forward_time": 0.025272130966186523, "step": 20388 }, { "epoch": 3.1109619140625e-05, "step": 20388, "training_step_time": 0.10399365425109863 }, { "epoch": 3.111114501953125e-05, "model_forward_time": 0.024933338165283203, "step": 20389 }, { "epoch": 3.111114501953125e-05, "step": 20389, "training_step_time": 0.10834860801696777 }, { "epoch": 3.11126708984375e-05, "grad_norm": 0.25785398483276367, "learning_rate": 2.552687212629799e-05, "loss": 0.0093, "step": 20390 }, { "epoch": 3.11126708984375e-05, "model_forward_time": 0.02491903305053711, "step": 20390 }, { "epoch": 3.11126708984375e-05, "step": 20390, "training_step_time": 0.11054420471191406 }, { "epoch": 3.111419677734375e-05, "model_forward_time": 0.024378299713134766, "step": 20391 }, { "epoch": 3.111419677734375e-05, "step": 20391, "training_step_time": 0.14413833618164062 }, { "epoch": 3.111572265625e-05, "model_forward_time": 0.026362180709838867, "step": 20392 }, { "epoch": 3.111572265625e-05, "step": 20392, "training_step_time": 0.161116361618042 }, { "epoch": 3.111724853515625e-05, "model_forward_time": 0.025140762329101562, "step": 20393 }, { "epoch": 3.111724853515625e-05, "step": 20393, "training_step_time": 0.11197495460510254 }, { "epoch": 3.11187744140625e-05, "model_forward_time": 0.024820327758789062, "step": 20394 }, { "epoch": 3.11187744140625e-05, "step": 20394, "training_step_time": 0.1340630054473877 }, { "epoch": 3.112030029296875e-05, "model_forward_time": 0.025660037994384766, "step": 20395 }, { "epoch": 3.112030029296875e-05, "step": 20395, "training_step_time": 0.20145702362060547 }, { "epoch": 3.1121826171875e-05, "model_forward_time": 0.024155139923095703, "step": 20396 }, { "epoch": 3.1121826171875e-05, "step": 20396, "training_step_time": 0.13207507133483887 }, { "epoch": 3.112335205078125e-05, "model_forward_time": 0.02453136444091797, "step": 20397 }, { "epoch": 3.112335205078125e-05, "step": 20397, "training_step_time": 0.12000131607055664 }, { "epoch": 3.11248779296875e-05, "model_forward_time": 0.024870634078979492, "step": 20398 }, { "epoch": 3.11248779296875e-05, "step": 20398, "training_step_time": 0.1681993007659912 }, { "epoch": 3.112640380859375e-05, "model_forward_time": 0.025240421295166016, "step": 20399 }, { "epoch": 3.112640380859375e-05, "step": 20399, "training_step_time": 0.1349015235900879 }, { "epoch": 3.11279296875e-05, "grad_norm": 0.2017369121313095, "learning_rate": 2.547882480847461e-05, "loss": 0.0064, "step": 20400 }, { "epoch": 3.11279296875e-05, "model_forward_time": 0.02423548698425293, "step": 20400 }, { "epoch": 3.11279296875e-05, "step": 20400, "training_step_time": 0.11487293243408203 }, { "epoch": 3.112945556640625e-05, "model_forward_time": 0.02504730224609375, "step": 20401 }, { "epoch": 3.112945556640625e-05, "step": 20401, "training_step_time": 0.10878634452819824 }, { "epoch": 3.11309814453125e-05, "model_forward_time": 0.024625062942504883, "step": 20402 }, { "epoch": 3.11309814453125e-05, "step": 20402, "training_step_time": 0.11632490158081055 }, { "epoch": 3.113250732421875e-05, "model_forward_time": 0.025011539459228516, "step": 20403 }, { "epoch": 3.113250732421875e-05, "step": 20403, "training_step_time": 0.1076359748840332 }, { "epoch": 3.1134033203125e-05, "model_forward_time": 0.024991989135742188, "step": 20404 }, { "epoch": 3.1134033203125e-05, "step": 20404, "training_step_time": 0.13332653045654297 }, { "epoch": 3.113555908203125e-05, "model_forward_time": 0.025496721267700195, "step": 20405 }, { "epoch": 3.113555908203125e-05, "step": 20405, "training_step_time": 0.11874866485595703 }, { "epoch": 3.11370849609375e-05, "model_forward_time": 0.02481532096862793, "step": 20406 }, { "epoch": 3.11370849609375e-05, "step": 20406, "training_step_time": 0.10432553291320801 }, { "epoch": 3.113861083984375e-05, "model_forward_time": 0.025310039520263672, "step": 20407 }, { "epoch": 3.113861083984375e-05, "step": 20407, "training_step_time": 0.10714411735534668 }, { "epoch": 3.114013671875e-05, "model_forward_time": 0.025255680084228516, "step": 20408 }, { "epoch": 3.114013671875e-05, "step": 20408, "training_step_time": 0.10868120193481445 }, { "epoch": 3.114166259765625e-05, "model_forward_time": 0.02530050277709961, "step": 20409 }, { "epoch": 3.114166259765625e-05, "step": 20409, "training_step_time": 0.10892844200134277 }, { "epoch": 3.11431884765625e-05, "grad_norm": 0.38811054825782776, "learning_rate": 2.543080728619127e-05, "loss": 0.009, "step": 20410 }, { "epoch": 3.11431884765625e-05, "model_forward_time": 0.02608513832092285, "step": 20410 }, { "epoch": 3.11431884765625e-05, "step": 20410, "training_step_time": 0.1089780330657959 }, { "epoch": 3.114471435546875e-05, "model_forward_time": 0.02552962303161621, "step": 20411 }, { "epoch": 3.114471435546875e-05, "step": 20411, "training_step_time": 0.10712456703186035 }, { "epoch": 3.1146240234375e-05, "model_forward_time": 0.025484800338745117, "step": 20412 }, { "epoch": 3.1146240234375e-05, "step": 20412, "training_step_time": 0.10709404945373535 }, { "epoch": 3.114776611328125e-05, "model_forward_time": 0.025324344635009766, "step": 20413 }, { "epoch": 3.114776611328125e-05, "step": 20413, "training_step_time": 0.10573792457580566 }, { "epoch": 3.11492919921875e-05, "model_forward_time": 0.02557086944580078, "step": 20414 }, { "epoch": 3.11492919921875e-05, "step": 20414, "training_step_time": 0.10743427276611328 }, { "epoch": 3.115081787109375e-05, "model_forward_time": 0.025432109832763672, "step": 20415 }, { "epoch": 3.115081787109375e-05, "step": 20415, "training_step_time": 0.10666823387145996 }, { "epoch": 3.115234375e-05, "model_forward_time": 0.025096893310546875, "step": 20416 }, { "epoch": 3.115234375e-05, "step": 20416, "training_step_time": 0.10606789588928223 }, { "epoch": 3.115386962890625e-05, "model_forward_time": 0.025214433670043945, "step": 20417 }, { "epoch": 3.115386962890625e-05, "step": 20417, "training_step_time": 0.10618472099304199 }, { "epoch": 3.11553955078125e-05, "model_forward_time": 0.02852654457092285, "step": 20418 }, { "epoch": 3.11553955078125e-05, "step": 20418, "training_step_time": 0.10781025886535645 }, { "epoch": 3.115692138671875e-05, "model_forward_time": 0.025450468063354492, "step": 20419 }, { "epoch": 3.115692138671875e-05, "step": 20419, "training_step_time": 0.10933136940002441 }, { "epoch": 3.1158447265625e-05, "grad_norm": 0.14591389894485474, "learning_rate": 2.5382819617793813e-05, "loss": 0.0068, "step": 20420 }, { "epoch": 3.1158447265625e-05, "model_forward_time": 0.025165081024169922, "step": 20420 }, { "epoch": 3.1158447265625e-05, "step": 20420, "training_step_time": 0.10795736312866211 }, { "epoch": 3.115997314453125e-05, "model_forward_time": 0.025066137313842773, "step": 20421 }, { "epoch": 3.115997314453125e-05, "step": 20421, "training_step_time": 0.12075114250183105 }, { "epoch": 3.11614990234375e-05, "model_forward_time": 0.024999618530273438, "step": 20422 }, { "epoch": 3.11614990234375e-05, "step": 20422, "training_step_time": 0.12048649787902832 }, { "epoch": 3.116302490234375e-05, "model_forward_time": 0.025182008743286133, "step": 20423 }, { "epoch": 3.116302490234375e-05, "step": 20423, "training_step_time": 0.13148736953735352 }, { "epoch": 3.116455078125e-05, "model_forward_time": 0.024830341339111328, "step": 20424 }, { "epoch": 3.116455078125e-05, "step": 20424, "training_step_time": 0.15788650512695312 }, { "epoch": 3.116607666015625e-05, "model_forward_time": 0.024180173873901367, "step": 20425 }, { "epoch": 3.116607666015625e-05, "step": 20425, "training_step_time": 0.21845483779907227 }, { "epoch": 3.11676025390625e-05, "model_forward_time": 0.02472400665283203, "step": 20426 }, { "epoch": 3.11676025390625e-05, "step": 20426, "training_step_time": 0.1189262866973877 }, { "epoch": 3.116912841796875e-05, "model_forward_time": 0.024558544158935547, "step": 20427 }, { "epoch": 3.116912841796875e-05, "step": 20427, "training_step_time": 0.10732340812683105 }, { "epoch": 3.1170654296875e-05, "model_forward_time": 0.025431156158447266, "step": 20428 }, { "epoch": 3.1170654296875e-05, "step": 20428, "training_step_time": 0.10600423812866211 }, { "epoch": 3.117218017578125e-05, "model_forward_time": 0.025560617446899414, "step": 20429 }, { "epoch": 3.117218017578125e-05, "step": 20429, "training_step_time": 0.11062955856323242 }, { "epoch": 3.11737060546875e-05, "grad_norm": 0.17885607481002808, "learning_rate": 2.5334861861591753e-05, "loss": 0.0056, "step": 20430 }, { "epoch": 3.11737060546875e-05, "model_forward_time": 0.024947404861450195, "step": 20430 }, { "epoch": 3.11737060546875e-05, "step": 20430, "training_step_time": 0.10522913932800293 }, { "epoch": 3.117523193359375e-05, "model_forward_time": 0.02517414093017578, "step": 20431 }, { "epoch": 3.117523193359375e-05, "step": 20431, "training_step_time": 0.10863375663757324 }, { "epoch": 3.11767578125e-05, "model_forward_time": 0.025266647338867188, "step": 20432 }, { "epoch": 3.11767578125e-05, "step": 20432, "training_step_time": 0.10877370834350586 }, { "epoch": 3.117828369140625e-05, "model_forward_time": 0.026983261108398438, "step": 20433 }, { "epoch": 3.117828369140625e-05, "step": 20433, "training_step_time": 0.1101226806640625 }, { "epoch": 3.11798095703125e-05, "model_forward_time": 0.024824857711791992, "step": 20434 }, { "epoch": 3.11798095703125e-05, "step": 20434, "training_step_time": 0.10632467269897461 }, { "epoch": 3.118133544921875e-05, "model_forward_time": 0.024945974349975586, "step": 20435 }, { "epoch": 3.118133544921875e-05, "step": 20435, "training_step_time": 0.10532402992248535 }, { "epoch": 3.1182861328125e-05, "model_forward_time": 0.02496480941772461, "step": 20436 }, { "epoch": 3.1182861328125e-05, "step": 20436, "training_step_time": 0.10535764694213867 }, { "epoch": 3.118438720703125e-05, "model_forward_time": 0.02482128143310547, "step": 20437 }, { "epoch": 3.118438720703125e-05, "step": 20437, "training_step_time": 0.1461029052734375 }, { "epoch": 3.11859130859375e-05, "model_forward_time": 0.024402379989624023, "step": 20438 }, { "epoch": 3.11859130859375e-05, "step": 20438, "training_step_time": 0.16138124465942383 }, { "epoch": 3.118743896484375e-05, "model_forward_time": 0.024184703826904297, "step": 20439 }, { "epoch": 3.118743896484375e-05, "step": 20439, "training_step_time": 0.16146373748779297 }, { "epoch": 3.118896484375e-05, "grad_norm": 0.37250664830207825, "learning_rate": 2.528693407585832e-05, "loss": 0.0126, "step": 20440 }, { "epoch": 3.118896484375e-05, "model_forward_time": 0.025819063186645508, "step": 20440 }, { "epoch": 3.118896484375e-05, "step": 20440, "training_step_time": 0.17675518989562988 }, { "epoch": 3.119049072265625e-05, "model_forward_time": 0.02501201629638672, "step": 20441 }, { "epoch": 3.119049072265625e-05, "step": 20441, "training_step_time": 0.1863386631011963 }, { "epoch": 3.11920166015625e-05, "model_forward_time": 0.024637460708618164, "step": 20442 }, { "epoch": 3.11920166015625e-05, "step": 20442, "training_step_time": 0.11068177223205566 }, { "epoch": 3.119354248046875e-05, "model_forward_time": 0.0246427059173584, "step": 20443 }, { "epoch": 3.119354248046875e-05, "step": 20443, "training_step_time": 0.11658453941345215 }, { "epoch": 3.1195068359375e-05, "model_forward_time": 0.02465057373046875, "step": 20444 }, { "epoch": 3.1195068359375e-05, "step": 20444, "training_step_time": 0.1710047721862793 }, { "epoch": 3.119659423828125e-05, "model_forward_time": 0.024183273315429688, "step": 20445 }, { "epoch": 3.119659423828125e-05, "step": 20445, "training_step_time": 0.13242077827453613 }, { "epoch": 3.11981201171875e-05, "model_forward_time": 0.02753901481628418, "step": 20446 }, { "epoch": 3.11981201171875e-05, "step": 20446, "training_step_time": 0.1101231575012207 }, { "epoch": 3.119964599609375e-05, "model_forward_time": 0.02529597282409668, "step": 20447 }, { "epoch": 3.119964599609375e-05, "step": 20447, "training_step_time": 0.13007760047912598 }, { "epoch": 3.1201171875e-05, "model_forward_time": 0.024972200393676758, "step": 20448 }, { "epoch": 3.1201171875e-05, "step": 20448, "training_step_time": 0.12068939208984375 }, { "epoch": 3.120269775390625e-05, "model_forward_time": 0.025066137313842773, "step": 20449 }, { "epoch": 3.120269775390625e-05, "step": 20449, "training_step_time": 0.10463428497314453 }, { "epoch": 3.12042236328125e-05, "grad_norm": 0.26953527331352234, "learning_rate": 2.5239036318830278e-05, "loss": 0.0097, "step": 20450 }, { "epoch": 3.12042236328125e-05, "model_forward_time": 0.02538156509399414, "step": 20450 }, { "epoch": 3.12042236328125e-05, "step": 20450, "training_step_time": 0.10718417167663574 }, { "epoch": 3.120574951171875e-05, "model_forward_time": 0.024911880493164062, "step": 20451 }, { "epoch": 3.120574951171875e-05, "step": 20451, "training_step_time": 0.11488485336303711 }, { "epoch": 3.1207275390625e-05, "model_forward_time": 0.025112152099609375, "step": 20452 }, { "epoch": 3.1207275390625e-05, "step": 20452, "training_step_time": 0.10546040534973145 }, { "epoch": 3.120880126953125e-05, "model_forward_time": 0.025464296340942383, "step": 20453 }, { "epoch": 3.120880126953125e-05, "step": 20453, "training_step_time": 0.10883331298828125 }, { "epoch": 3.12103271484375e-05, "model_forward_time": 0.02742290496826172, "step": 20454 }, { "epoch": 3.12103271484375e-05, "step": 20454, "training_step_time": 0.10835576057434082 }, { "epoch": 3.121185302734375e-05, "model_forward_time": 0.02506113052368164, "step": 20455 }, { "epoch": 3.121185302734375e-05, "step": 20455, "training_step_time": 0.10542154312133789 }, { "epoch": 3.121337890625e-05, "model_forward_time": 0.025527477264404297, "step": 20456 }, { "epoch": 3.121337890625e-05, "step": 20456, "training_step_time": 0.10657930374145508 }, { "epoch": 3.121490478515625e-05, "model_forward_time": 0.02516484260559082, "step": 20457 }, { "epoch": 3.121490478515625e-05, "step": 20457, "training_step_time": 0.10913705825805664 }, { "epoch": 3.12164306640625e-05, "model_forward_time": 0.02512335777282715, "step": 20458 }, { "epoch": 3.12164306640625e-05, "step": 20458, "training_step_time": 0.10603690147399902 }, { "epoch": 3.121795654296875e-05, "model_forward_time": 0.025205612182617188, "step": 20459 }, { "epoch": 3.121795654296875e-05, "step": 20459, "training_step_time": 0.10616254806518555 }, { "epoch": 3.1219482421875e-05, "grad_norm": 0.18705344200134277, "learning_rate": 2.5191168648707887e-05, "loss": 0.0297, "step": 20460 }, { "epoch": 3.1219482421875e-05, "model_forward_time": 0.0256044864654541, "step": 20460 }, { "epoch": 3.1219482421875e-05, "step": 20460, "training_step_time": 0.11236572265625 }, { "epoch": 3.122100830078125e-05, "model_forward_time": 0.025088787078857422, "step": 20461 }, { "epoch": 3.122100830078125e-05, "step": 20461, "training_step_time": 0.10549378395080566 }, { "epoch": 3.12225341796875e-05, "model_forward_time": 0.025125503540039062, "step": 20462 }, { "epoch": 3.12225341796875e-05, "step": 20462, "training_step_time": 0.10924959182739258 }, { "epoch": 3.122406005859375e-05, "model_forward_time": 0.02544260025024414, "step": 20463 }, { "epoch": 3.122406005859375e-05, "step": 20463, "training_step_time": 0.10643911361694336 }, { "epoch": 3.12255859375e-05, "model_forward_time": 0.025136232376098633, "step": 20464 }, { "epoch": 3.12255859375e-05, "step": 20464, "training_step_time": 0.10842609405517578 }, { "epoch": 3.122711181640625e-05, "model_forward_time": 0.025003433227539062, "step": 20465 }, { "epoch": 3.122711181640625e-05, "step": 20465, "training_step_time": 0.10810065269470215 }, { "epoch": 3.12286376953125e-05, "model_forward_time": 0.02527785301208496, "step": 20466 }, { "epoch": 3.12286376953125e-05, "step": 20466, "training_step_time": 0.2023637294769287 }, { "epoch": 3.123016357421875e-05, "model_forward_time": 0.024599552154541016, "step": 20467 }, { "epoch": 3.123016357421875e-05, "step": 20467, "training_step_time": 0.11918759346008301 }, { "epoch": 3.1231689453125e-05, "model_forward_time": 0.024722576141357422, "step": 20468 }, { "epoch": 3.1231689453125e-05, "step": 20468, "training_step_time": 0.11128616333007812 }, { "epoch": 3.123321533203125e-05, "model_forward_time": 0.025234222412109375, "step": 20469 }, { "epoch": 3.123321533203125e-05, "step": 20469, "training_step_time": 0.10904908180236816 }, { "epoch": 3.12347412109375e-05, "grad_norm": 0.1729920357465744, "learning_rate": 2.5143331123654933e-05, "loss": 0.0158, "step": 20470 }, { "epoch": 3.12347412109375e-05, "model_forward_time": 0.027698516845703125, "step": 20470 }, { "epoch": 3.12347412109375e-05, "step": 20470, "training_step_time": 0.14938783645629883 }, { "epoch": 3.123626708984375e-05, "model_forward_time": 0.02508854866027832, "step": 20471 }, { "epoch": 3.123626708984375e-05, "step": 20471, "training_step_time": 0.13103365898132324 }, { "epoch": 3.123779296875e-05, "model_forward_time": 0.02626776695251465, "step": 20472 }, { "epoch": 3.123779296875e-05, "step": 20472, "training_step_time": 0.12303757667541504 }, { "epoch": 3.123931884765625e-05, "model_forward_time": 0.024585723876953125, "step": 20473 }, { "epoch": 3.123931884765625e-05, "step": 20473, "training_step_time": 0.1123814582824707 }, { "epoch": 3.12408447265625e-05, "model_forward_time": 0.02526712417602539, "step": 20474 }, { "epoch": 3.12408447265625e-05, "step": 20474, "training_step_time": 0.11015558242797852 }, { "epoch": 3.124237060546875e-05, "model_forward_time": 0.025051116943359375, "step": 20475 }, { "epoch": 3.124237060546875e-05, "step": 20475, "training_step_time": 0.10771322250366211 }, { "epoch": 3.1243896484375e-05, "model_forward_time": 0.02417159080505371, "step": 20476 }, { "epoch": 3.1243896484375e-05, "step": 20476, "training_step_time": 0.10909819602966309 }, { "epoch": 3.124542236328125e-05, "model_forward_time": 0.024007558822631836, "step": 20477 }, { "epoch": 3.124542236328125e-05, "step": 20477, "training_step_time": 0.11355733871459961 }, { "epoch": 3.12469482421875e-05, "model_forward_time": 0.027773380279541016, "step": 20478 }, { "epoch": 3.12469482421875e-05, "step": 20478, "training_step_time": 0.11075711250305176 }, { "epoch": 3.124847412109375e-05, "model_forward_time": 0.024991512298583984, "step": 20479 }, { "epoch": 3.124847412109375e-05, "step": 20479, "training_step_time": 0.10997939109802246 }, { "epoch": 3.125e-05, "grad_norm": 0.1775788962841034, "learning_rate": 2.5095523801798495e-05, "loss": 0.0079, "step": 20480 }, { "epoch": 3.125e-05, "model_forward_time": 0.02524280548095703, "step": 20480 }, { "epoch": 3.125e-05, "step": 20480, "training_step_time": 0.11041545867919922 }, { "epoch": 3.125152587890625e-05, "model_forward_time": 0.025027751922607422, "step": 20481 }, { "epoch": 3.125152587890625e-05, "step": 20481, "training_step_time": 0.10884451866149902 }, { "epoch": 3.12530517578125e-05, "model_forward_time": 0.02544403076171875, "step": 20482 }, { "epoch": 3.12530517578125e-05, "step": 20482, "training_step_time": 0.10685873031616211 }, { "epoch": 3.125457763671875e-05, "model_forward_time": 0.024260759353637695, "step": 20483 }, { "epoch": 3.125457763671875e-05, "step": 20483, "training_step_time": 0.14116597175598145 }, { "epoch": 3.1256103515625e-05, "model_forward_time": 0.025297880172729492, "step": 20484 }, { "epoch": 3.1256103515625e-05, "step": 20484, "training_step_time": 0.1682753562927246 }, { "epoch": 3.125762939453125e-05, "model_forward_time": 0.02643132209777832, "step": 20485 }, { "epoch": 3.125762939453125e-05, "step": 20485, "training_step_time": 0.17473149299621582 }, { "epoch": 3.12591552734375e-05, "model_forward_time": 0.024599313735961914, "step": 20486 }, { "epoch": 3.12591552734375e-05, "step": 20486, "training_step_time": 0.1556859016418457 }, { "epoch": 3.126068115234375e-05, "model_forward_time": 0.024196147918701172, "step": 20487 }, { "epoch": 3.126068115234375e-05, "step": 20487, "training_step_time": 0.20816659927368164 }, { "epoch": 3.126220703125e-05, "model_forward_time": 0.023787260055541992, "step": 20488 }, { "epoch": 3.126220703125e-05, "step": 20488, "training_step_time": 0.11965799331665039 }, { "epoch": 3.126373291015625e-05, "model_forward_time": 0.02520608901977539, "step": 20489 }, { "epoch": 3.126373291015625e-05, "step": 20489, "training_step_time": 0.11709427833557129 }, { "epoch": 3.12652587890625e-05, "grad_norm": 0.41922852396965027, "learning_rate": 2.5047746741228978e-05, "loss": 0.0063, "step": 20490 }, { "epoch": 3.12652587890625e-05, "model_forward_time": 0.02466607093811035, "step": 20490 }, { "epoch": 3.12652587890625e-05, "step": 20490, "training_step_time": 0.17425942420959473 }, { "epoch": 3.126678466796875e-05, "model_forward_time": 0.026747465133666992, "step": 20491 }, { "epoch": 3.126678466796875e-05, "step": 20491, "training_step_time": 0.16351938247680664 }, { "epoch": 3.1268310546875e-05, "model_forward_time": 0.024102210998535156, "step": 20492 }, { "epoch": 3.1268310546875e-05, "step": 20492, "training_step_time": 0.11326384544372559 }, { "epoch": 3.126983642578125e-05, "model_forward_time": 0.02425074577331543, "step": 20493 }, { "epoch": 3.126983642578125e-05, "step": 20493, "training_step_time": 0.12543535232543945 }, { "epoch": 3.12713623046875e-05, "model_forward_time": 0.024608612060546875, "step": 20494 }, { "epoch": 3.12713623046875e-05, "step": 20494, "training_step_time": 0.15553593635559082 }, { "epoch": 3.127288818359375e-05, "model_forward_time": 0.024268150329589844, "step": 20495 }, { "epoch": 3.127288818359375e-05, "step": 20495, "training_step_time": 0.16475629806518555 }, { "epoch": 3.12744140625e-05, "model_forward_time": 0.024024248123168945, "step": 20496 }, { "epoch": 3.12744140625e-05, "step": 20496, "training_step_time": 0.13995671272277832 }, { "epoch": 3.127593994140625e-05, "model_forward_time": 0.024458646774291992, "step": 20497 }, { "epoch": 3.127593994140625e-05, "step": 20497, "training_step_time": 0.13447070121765137 }, { "epoch": 3.12774658203125e-05, "model_forward_time": 0.024914264678955078, "step": 20498 }, { "epoch": 3.12774658203125e-05, "step": 20498, "training_step_time": 0.12515974044799805 }, { "epoch": 3.127899169921875e-05, "model_forward_time": 0.025028467178344727, "step": 20499 }, { "epoch": 3.127899169921875e-05, "step": 20499, "training_step_time": 0.12459230422973633 }, { "epoch": 3.1280517578125e-05, "grad_norm": 0.17294204235076904, "learning_rate": 2.500000000000001e-05, "loss": 0.0074, "step": 20500 }, { "epoch": 3.1280517578125e-05, "model_forward_time": 0.024628162384033203, "step": 20500 }, { "epoch": 3.1280517578125e-05, "step": 20500, "training_step_time": 0.1190495491027832 }, { "epoch": 3.128204345703125e-05, "model_forward_time": 0.02515697479248047, "step": 20501 }, { "epoch": 3.128204345703125e-05, "step": 20501, "training_step_time": 0.11376786231994629 }, { "epoch": 3.12835693359375e-05, "model_forward_time": 0.02513432502746582, "step": 20502 }, { "epoch": 3.12835693359375e-05, "step": 20502, "training_step_time": 0.11221599578857422 }, { "epoch": 3.128509521484375e-05, "model_forward_time": 0.025011539459228516, "step": 20503 }, { "epoch": 3.128509521484375e-05, "step": 20503, "training_step_time": 0.11024188995361328 }, { "epoch": 3.128662109375e-05, "model_forward_time": 0.02504253387451172, "step": 20504 }, { "epoch": 3.128662109375e-05, "step": 20504, "training_step_time": 0.11214470863342285 }, { "epoch": 3.128814697265625e-05, "model_forward_time": 0.025163888931274414, "step": 20505 }, { "epoch": 3.128814697265625e-05, "step": 20505, "training_step_time": 0.11267471313476562 }, { "epoch": 3.12896728515625e-05, "model_forward_time": 0.025268077850341797, "step": 20506 }, { "epoch": 3.12896728515625e-05, "step": 20506, "training_step_time": 0.11237025260925293 }, { "epoch": 3.129119873046875e-05, "model_forward_time": 0.024979352951049805, "step": 20507 }, { "epoch": 3.129119873046875e-05, "step": 20507, "training_step_time": 0.10994982719421387 }, { "epoch": 3.1292724609375e-05, "model_forward_time": 0.025050878524780273, "step": 20508 }, { "epoch": 3.1292724609375e-05, "step": 20508, "training_step_time": 0.10926580429077148 }, { "epoch": 3.129425048828125e-05, "model_forward_time": 0.02486133575439453, "step": 20509 }, { "epoch": 3.129425048828125e-05, "step": 20509, "training_step_time": 0.10756564140319824 }, { "epoch": 3.12957763671875e-05, "grad_norm": 0.17006202042102814, "learning_rate": 2.4952283636128372e-05, "loss": 0.0079, "step": 20510 }, { "epoch": 3.12957763671875e-05, "model_forward_time": 0.025213956832885742, "step": 20510 }, { "epoch": 3.12957763671875e-05, "step": 20510, "training_step_time": 0.16357707977294922 }, { "epoch": 3.129730224609375e-05, "model_forward_time": 0.02446150779724121, "step": 20511 }, { "epoch": 3.129730224609375e-05, "step": 20511, "training_step_time": 0.12177085876464844 }, { "epoch": 3.1298828125e-05, "model_forward_time": 0.02486109733581543, "step": 20512 }, { "epoch": 3.1298828125e-05, "step": 20512, "training_step_time": 0.12957143783569336 }, { "epoch": 3.130035400390625e-05, "model_forward_time": 0.02473735809326172, "step": 20513 }, { "epoch": 3.130035400390625e-05, "step": 20513, "training_step_time": 0.15950775146484375 }, { "epoch": 3.13018798828125e-05, "model_forward_time": 0.02431797981262207, "step": 20514 }, { "epoch": 3.13018798828125e-05, "step": 20514, "training_step_time": 0.2212824821472168 }, { "epoch": 3.130340576171875e-05, "model_forward_time": 0.02412700653076172, "step": 20515 }, { "epoch": 3.130340576171875e-05, "step": 20515, "training_step_time": 0.12479186058044434 }, { "epoch": 3.1304931640625e-05, "model_forward_time": 0.02414107322692871, "step": 20516 }, { "epoch": 3.1304931640625e-05, "step": 20516, "training_step_time": 0.10607624053955078 }, { "epoch": 3.130645751953125e-05, "model_forward_time": 0.025371313095092773, "step": 20517 }, { "epoch": 3.130645751953125e-05, "step": 20517, "training_step_time": 0.11039113998413086 }, { "epoch": 3.13079833984375e-05, "model_forward_time": 0.02474355697631836, "step": 20518 }, { "epoch": 3.13079833984375e-05, "step": 20518, "training_step_time": 0.11175107955932617 }, { "epoch": 3.130950927734375e-05, "model_forward_time": 0.024724721908569336, "step": 20519 }, { "epoch": 3.130950927734375e-05, "step": 20519, "training_step_time": 0.11142158508300781 }, { "epoch": 3.131103515625e-05, "grad_norm": 0.2066083699464798, "learning_rate": 2.490459770759398e-05, "loss": 0.0057, "step": 20520 }, { "epoch": 3.131103515625e-05, "model_forward_time": 0.025858402252197266, "step": 20520 }, { "epoch": 3.131103515625e-05, "step": 20520, "training_step_time": 0.11350607872009277 }, { "epoch": 3.131256103515625e-05, "model_forward_time": 0.02526545524597168, "step": 20521 }, { "epoch": 3.131256103515625e-05, "step": 20521, "training_step_time": 0.10991644859313965 }, { "epoch": 3.13140869140625e-05, "model_forward_time": 0.025092601776123047, "step": 20522 }, { "epoch": 3.13140869140625e-05, "step": 20522, "training_step_time": 0.10931754112243652 }, { "epoch": 3.131561279296875e-05, "model_forward_time": 0.024916410446166992, "step": 20523 }, { "epoch": 3.131561279296875e-05, "step": 20523, "training_step_time": 0.10927438735961914 }, { "epoch": 3.1317138671875e-05, "model_forward_time": 0.02543497085571289, "step": 20524 }, { "epoch": 3.1317138671875e-05, "step": 20524, "training_step_time": 0.10972452163696289 }, { "epoch": 3.131866455078125e-05, "model_forward_time": 0.025475502014160156, "step": 20525 }, { "epoch": 3.131866455078125e-05, "step": 20525, "training_step_time": 0.1106874942779541 }, { "epoch": 3.13201904296875e-05, "model_forward_time": 0.025233030319213867, "step": 20526 }, { "epoch": 3.13201904296875e-05, "step": 20526, "training_step_time": 0.13881492614746094 }, { "epoch": 3.132171630859375e-05, "model_forward_time": 0.025217056274414062, "step": 20527 }, { "epoch": 3.132171630859375e-05, "step": 20527, "training_step_time": 0.17469143867492676 }, { "epoch": 3.13232421875e-05, "model_forward_time": 0.024817705154418945, "step": 20528 }, { "epoch": 3.13232421875e-05, "step": 20528, "training_step_time": 0.12029361724853516 }, { "epoch": 3.132476806640625e-05, "model_forward_time": 0.02449321746826172, "step": 20529 }, { "epoch": 3.132476806640625e-05, "step": 20529, "training_step_time": 0.11452078819274902 }, { "epoch": 3.13262939453125e-05, "grad_norm": 0.2253899872303009, "learning_rate": 2.485694227233971e-05, "loss": 0.0138, "step": 20530 }, { "epoch": 3.13262939453125e-05, "model_forward_time": 0.02542901039123535, "step": 20530 }, { "epoch": 3.13262939453125e-05, "step": 20530, "training_step_time": 0.1992936134338379 }, { "epoch": 3.132781982421875e-05, "model_forward_time": 0.02497410774230957, "step": 20531 }, { "epoch": 3.132781982421875e-05, "step": 20531, "training_step_time": 0.15787363052368164 }, { "epoch": 3.1329345703125e-05, "model_forward_time": 0.025403738021850586, "step": 20532 }, { "epoch": 3.1329345703125e-05, "step": 20532, "training_step_time": 0.10877013206481934 }, { "epoch": 3.133087158203125e-05, "model_forward_time": 0.024821043014526367, "step": 20533 }, { "epoch": 3.133087158203125e-05, "step": 20533, "training_step_time": 0.11808180809020996 }, { "epoch": 3.13323974609375e-05, "model_forward_time": 0.025028705596923828, "step": 20534 }, { "epoch": 3.13323974609375e-05, "step": 20534, "training_step_time": 0.19439148902893066 }, { "epoch": 3.133392333984375e-05, "model_forward_time": 0.024146080017089844, "step": 20535 }, { "epoch": 3.133392333984375e-05, "step": 20535, "training_step_time": 0.13718771934509277 }, { "epoch": 3.133544921875e-05, "model_forward_time": 0.024811744689941406, "step": 20536 }, { "epoch": 3.133544921875e-05, "step": 20536, "training_step_time": 0.1084432601928711 }, { "epoch": 3.133697509765625e-05, "model_forward_time": 0.025049448013305664, "step": 20537 }, { "epoch": 3.133697509765625e-05, "step": 20537, "training_step_time": 0.1129453182220459 }, { "epoch": 3.13385009765625e-05, "model_forward_time": 0.025290250778198242, "step": 20538 }, { "epoch": 3.13385009765625e-05, "step": 20538, "training_step_time": 0.11046242713928223 }, { "epoch": 3.134002685546875e-05, "model_forward_time": 0.025930404663085938, "step": 20539 }, { "epoch": 3.134002685546875e-05, "step": 20539, "training_step_time": 0.10640788078308105 }, { "epoch": 3.1341552734375e-05, "grad_norm": 0.26835936307907104, "learning_rate": 2.4809317388271426e-05, "loss": 0.005, "step": 20540 }, { "epoch": 3.1341552734375e-05, "model_forward_time": 0.02527618408203125, "step": 20540 }, { "epoch": 3.1341552734375e-05, "step": 20540, "training_step_time": 0.19098901748657227 }, { "epoch": 3.134307861328125e-05, "model_forward_time": 0.024337291717529297, "step": 20541 }, { "epoch": 3.134307861328125e-05, "step": 20541, "training_step_time": 0.10726499557495117 }, { "epoch": 3.13446044921875e-05, "model_forward_time": 0.024818897247314453, "step": 20542 }, { "epoch": 3.13446044921875e-05, "step": 20542, "training_step_time": 0.1032404899597168 }, { "epoch": 3.134613037109375e-05, "model_forward_time": 0.02489018440246582, "step": 20543 }, { "epoch": 3.134613037109375e-05, "step": 20543, "training_step_time": 0.10628080368041992 }, { "epoch": 3.134765625e-05, "model_forward_time": 0.02514505386352539, "step": 20544 }, { "epoch": 3.134765625e-05, "step": 20544, "training_step_time": 0.1063683032989502 }, { "epoch": 3.134918212890625e-05, "model_forward_time": 0.025470733642578125, "step": 20545 }, { "epoch": 3.134918212890625e-05, "step": 20545, "training_step_time": 0.10642457008361816 }, { "epoch": 3.13507080078125e-05, "model_forward_time": 0.025505542755126953, "step": 20546 }, { "epoch": 3.13507080078125e-05, "step": 20546, "training_step_time": 0.1062922477722168 }, { "epoch": 3.135223388671875e-05, "model_forward_time": 0.024820327758789062, "step": 20547 }, { "epoch": 3.135223388671875e-05, "step": 20547, "training_step_time": 0.10829019546508789 }, { "epoch": 3.1353759765625e-05, "model_forward_time": 0.02498030662536621, "step": 20548 }, { "epoch": 3.1353759765625e-05, "step": 20548, "training_step_time": 0.10569643974304199 }, { "epoch": 3.135528564453125e-05, "model_forward_time": 0.024504423141479492, "step": 20549 }, { "epoch": 3.135528564453125e-05, "step": 20549, "training_step_time": 0.10878753662109375 }, { "epoch": 3.13568115234375e-05, "grad_norm": 0.21904797852039337, "learning_rate": 2.476172311325783e-05, "loss": 0.0084, "step": 20550 }, { "epoch": 3.13568115234375e-05, "model_forward_time": 0.025470733642578125, "step": 20550 }, { "epoch": 3.13568115234375e-05, "step": 20550, "training_step_time": 0.10930371284484863 }, { "epoch": 3.135833740234375e-05, "model_forward_time": 0.024024248123168945, "step": 20551 }, { "epoch": 3.135833740234375e-05, "step": 20551, "training_step_time": 0.10760855674743652 }, { "epoch": 3.135986328125e-05, "model_forward_time": 0.024329423904418945, "step": 20552 }, { "epoch": 3.135986328125e-05, "step": 20552, "training_step_time": 0.11164093017578125 }, { "epoch": 3.136138916015625e-05, "model_forward_time": 0.025037527084350586, "step": 20553 }, { "epoch": 3.136138916015625e-05, "step": 20553, "training_step_time": 0.1100456714630127 }, { "epoch": 3.13629150390625e-05, "model_forward_time": 0.02472090721130371, "step": 20554 }, { "epoch": 3.13629150390625e-05, "step": 20554, "training_step_time": 0.10709953308105469 }, { "epoch": 3.136444091796875e-05, "model_forward_time": 0.024889707565307617, "step": 20555 }, { "epoch": 3.136444091796875e-05, "step": 20555, "training_step_time": 0.19934821128845215 }, { "epoch": 3.1365966796875e-05, "model_forward_time": 0.023773670196533203, "step": 20556 }, { "epoch": 3.1365966796875e-05, "step": 20556, "training_step_time": 0.12378811836242676 }, { "epoch": 3.136749267578125e-05, "model_forward_time": 0.024443626403808594, "step": 20557 }, { "epoch": 3.136749267578125e-05, "step": 20557, "training_step_time": 0.11337018013000488 }, { "epoch": 3.13690185546875e-05, "model_forward_time": 0.027028560638427734, "step": 20558 }, { "epoch": 3.13690185546875e-05, "step": 20558, "training_step_time": 0.1594407558441162 }, { "epoch": 3.137054443359375e-05, "model_forward_time": 0.02451944351196289, "step": 20559 }, { "epoch": 3.137054443359375e-05, "step": 20559, "training_step_time": 0.17675256729125977 }, { "epoch": 3.13720703125e-05, "grad_norm": 0.3118372857570648, "learning_rate": 2.4714159505130452e-05, "loss": 0.0068, "step": 20560 }, { "epoch": 3.13720703125e-05, "model_forward_time": 0.024271726608276367, "step": 20560 }, { "epoch": 3.13720703125e-05, "step": 20560, "training_step_time": 0.16133379936218262 }, { "epoch": 3.137359619140625e-05, "model_forward_time": 0.02428436279296875, "step": 20561 }, { "epoch": 3.137359619140625e-05, "step": 20561, "training_step_time": 0.11382842063903809 }, { "epoch": 3.13751220703125e-05, "model_forward_time": 0.024514436721801758, "step": 20562 }, { "epoch": 3.13751220703125e-05, "step": 20562, "training_step_time": 0.10971617698669434 }, { "epoch": 3.137664794921875e-05, "model_forward_time": 0.024964332580566406, "step": 20563 }, { "epoch": 3.137664794921875e-05, "step": 20563, "training_step_time": 0.10756063461303711 }, { "epoch": 3.1378173828125e-05, "model_forward_time": 0.025181055068969727, "step": 20564 }, { "epoch": 3.1378173828125e-05, "step": 20564, "training_step_time": 0.10816645622253418 }, { "epoch": 3.137969970703125e-05, "model_forward_time": 0.026098251342773438, "step": 20565 }, { "epoch": 3.137969970703125e-05, "step": 20565, "training_step_time": 0.10790491104125977 }, { "epoch": 3.13812255859375e-05, "model_forward_time": 0.024805068969726562, "step": 20566 }, { "epoch": 3.13812255859375e-05, "step": 20566, "training_step_time": 0.10875105857849121 }, { "epoch": 3.138275146484375e-05, "model_forward_time": 0.02469658851623535, "step": 20567 }, { "epoch": 3.138275146484375e-05, "step": 20567, "training_step_time": 0.11125516891479492 }, { "epoch": 3.138427734375e-05, "model_forward_time": 0.024399280548095703, "step": 20568 }, { "epoch": 3.138427734375e-05, "step": 20568, "training_step_time": 0.1059722900390625 }, { "epoch": 3.138580322265625e-05, "model_forward_time": 0.024599790573120117, "step": 20569 }, { "epoch": 3.138580322265625e-05, "step": 20569, "training_step_time": 0.11080336570739746 }, { "epoch": 3.13873291015625e-05, "grad_norm": 0.11126571148633957, "learning_rate": 2.4666626621683592e-05, "loss": 0.0051, "step": 20570 }, { "epoch": 3.13873291015625e-05, "model_forward_time": 0.024835586547851562, "step": 20570 }, { "epoch": 3.13873291015625e-05, "step": 20570, "training_step_time": 0.10753250122070312 }, { "epoch": 3.138885498046875e-05, "model_forward_time": 0.024793624877929688, "step": 20571 }, { "epoch": 3.138885498046875e-05, "step": 20571, "training_step_time": 0.15090513229370117 }, { "epoch": 3.1390380859375e-05, "model_forward_time": 0.023921728134155273, "step": 20572 }, { "epoch": 3.1390380859375e-05, "step": 20572, "training_step_time": 0.1585249900817871 }, { "epoch": 3.139190673828125e-05, "model_forward_time": 0.02491164207458496, "step": 20573 }, { "epoch": 3.139190673828125e-05, "step": 20573, "training_step_time": 0.11076211929321289 }, { "epoch": 3.13934326171875e-05, "model_forward_time": 0.026210784912109375, "step": 20574 }, { "epoch": 3.13934326171875e-05, "step": 20574, "training_step_time": 0.12554454803466797 }, { "epoch": 3.139495849609375e-05, "model_forward_time": 0.025132417678833008, "step": 20575 }, { "epoch": 3.139495849609375e-05, "step": 20575, "training_step_time": 0.20638251304626465 }, { "epoch": 3.1396484375e-05, "model_forward_time": 0.024350881576538086, "step": 20576 }, { "epoch": 3.1396484375e-05, "step": 20576, "training_step_time": 0.1293320655822754 }, { "epoch": 3.139801025390625e-05, "model_forward_time": 0.02468276023864746, "step": 20577 }, { "epoch": 3.139801025390625e-05, "step": 20577, "training_step_time": 0.11166143417358398 }, { "epoch": 3.13995361328125e-05, "model_forward_time": 0.0249941349029541, "step": 20578 }, { "epoch": 3.13995361328125e-05, "step": 20578, "training_step_time": 0.12047719955444336 }, { "epoch": 3.140106201171875e-05, "model_forward_time": 0.027432680130004883, "step": 20579 }, { "epoch": 3.140106201171875e-05, "step": 20579, "training_step_time": 0.1604154109954834 }, { "epoch": 3.1402587890625e-05, "grad_norm": 0.17350897192955017, "learning_rate": 2.4619124520674146e-05, "loss": 0.0047, "step": 20580 }, { "epoch": 3.1402587890625e-05, "model_forward_time": 0.02452540397644043, "step": 20580 }, { "epoch": 3.1402587890625e-05, "step": 20580, "training_step_time": 0.17169570922851562 }, { "epoch": 3.140411376953125e-05, "model_forward_time": 0.02397894859313965, "step": 20581 }, { "epoch": 3.140411376953125e-05, "step": 20581, "training_step_time": 0.10414290428161621 }, { "epoch": 3.14056396484375e-05, "model_forward_time": 0.024307966232299805, "step": 20582 }, { "epoch": 3.14056396484375e-05, "step": 20582, "training_step_time": 0.11286067962646484 }, { "epoch": 3.140716552734375e-05, "model_forward_time": 0.025502443313598633, "step": 20583 }, { "epoch": 3.140716552734375e-05, "step": 20583, "training_step_time": 0.1166226863861084 }, { "epoch": 3.140869140625e-05, "model_forward_time": 0.025084733963012695, "step": 20584 }, { "epoch": 3.140869140625e-05, "step": 20584, "training_step_time": 0.10914802551269531 }, { "epoch": 3.141021728515625e-05, "model_forward_time": 0.024925947189331055, "step": 20585 }, { "epoch": 3.141021728515625e-05, "step": 20585, "training_step_time": 0.19291996955871582 }, { "epoch": 3.14117431640625e-05, "model_forward_time": 0.025218725204467773, "step": 20586 }, { "epoch": 3.14117431640625e-05, "step": 20586, "training_step_time": 0.11324524879455566 }, { "epoch": 3.141326904296875e-05, "model_forward_time": 0.024022817611694336, "step": 20587 }, { "epoch": 3.141326904296875e-05, "step": 20587, "training_step_time": 0.10542106628417969 }, { "epoch": 3.1414794921875e-05, "model_forward_time": 0.024758100509643555, "step": 20588 }, { "epoch": 3.1414794921875e-05, "step": 20588, "training_step_time": 0.13703036308288574 }, { "epoch": 3.141632080078125e-05, "model_forward_time": 0.02486705780029297, "step": 20589 }, { "epoch": 3.141632080078125e-05, "step": 20589, "training_step_time": 0.18683743476867676 }, { "epoch": 3.14178466796875e-05, "grad_norm": 0.21014504134655, "learning_rate": 2.4571653259821694e-05, "loss": 0.008, "step": 20590 }, { "epoch": 3.14178466796875e-05, "model_forward_time": 0.023346424102783203, "step": 20590 }, { "epoch": 3.14178466796875e-05, "step": 20590, "training_step_time": 0.18623614311218262 }, { "epoch": 3.141937255859375e-05, "model_forward_time": 0.024047374725341797, "step": 20591 }, { "epoch": 3.141937255859375e-05, "step": 20591, "training_step_time": 0.177565336227417 }, { "epoch": 3.14208984375e-05, "model_forward_time": 0.024545907974243164, "step": 20592 }, { "epoch": 3.14208984375e-05, "step": 20592, "training_step_time": 0.15633153915405273 }, { "epoch": 3.142242431640625e-05, "model_forward_time": 0.023782968521118164, "step": 20593 }, { "epoch": 3.142242431640625e-05, "step": 20593, "training_step_time": 0.14544677734375 }, { "epoch": 3.14239501953125e-05, "model_forward_time": 0.024092435836791992, "step": 20594 }, { "epoch": 3.14239501953125e-05, "step": 20594, "training_step_time": 0.12963390350341797 }, { "epoch": 3.142547607421875e-05, "model_forward_time": 0.024512290954589844, "step": 20595 }, { "epoch": 3.142547607421875e-05, "step": 20595, "training_step_time": 0.1268603801727295 }, { "epoch": 3.1427001953125e-05, "model_forward_time": 0.025098323822021484, "step": 20596 }, { "epoch": 3.1427001953125e-05, "step": 20596, "training_step_time": 0.11785292625427246 }, { "epoch": 3.142852783203125e-05, "model_forward_time": 0.02449774742126465, "step": 20597 }, { "epoch": 3.142852783203125e-05, "step": 20597, "training_step_time": 0.11635851860046387 }, { "epoch": 3.14300537109375e-05, "model_forward_time": 0.025457382202148438, "step": 20598 }, { "epoch": 3.14300537109375e-05, "step": 20598, "training_step_time": 0.1170046329498291 }, { "epoch": 3.143157958984375e-05, "model_forward_time": 0.025466442108154297, "step": 20599 }, { "epoch": 3.143157958984375e-05, "step": 20599, "training_step_time": 0.2063913345336914 }, { "epoch": 3.143310546875e-05, "grad_norm": 0.23332205414772034, "learning_rate": 2.4524212896808263e-05, "loss": 0.0097, "step": 20600 }, { "epoch": 3.143310546875e-05, "model_forward_time": 0.024246692657470703, "step": 20600 }, { "epoch": 3.143310546875e-05, "step": 20600, "training_step_time": 0.10812997817993164 }, { "epoch": 3.143463134765625e-05, "model_forward_time": 0.025010347366333008, "step": 20601 }, { "epoch": 3.143463134765625e-05, "step": 20601, "training_step_time": 0.21222186088562012 }, { "epoch": 3.14361572265625e-05, "model_forward_time": 0.024252891540527344, "step": 20602 }, { "epoch": 3.14361572265625e-05, "step": 20602, "training_step_time": 0.11905908584594727 }, { "epoch": 3.143768310546875e-05, "model_forward_time": 0.02419567108154297, "step": 20603 }, { "epoch": 3.143768310546875e-05, "step": 20603, "training_step_time": 0.10321712493896484 }, { "epoch": 3.1439208984375e-05, "model_forward_time": 0.025074005126953125, "step": 20604 }, { "epoch": 3.1439208984375e-05, "step": 20604, "training_step_time": 0.10217881202697754 }, { "epoch": 3.144073486328125e-05, "model_forward_time": 0.02528691291809082, "step": 20605 }, { "epoch": 3.144073486328125e-05, "step": 20605, "training_step_time": 0.11067414283752441 }, { "epoch": 3.14422607421875e-05, "model_forward_time": 0.025298595428466797, "step": 20606 }, { "epoch": 3.14422607421875e-05, "step": 20606, "training_step_time": 0.10805320739746094 }, { "epoch": 3.144378662109375e-05, "model_forward_time": 0.025137901306152344, "step": 20607 }, { "epoch": 3.144378662109375e-05, "step": 20607, "training_step_time": 0.10650444030761719 }, { "epoch": 3.14453125e-05, "model_forward_time": 0.0255892276763916, "step": 20608 }, { "epoch": 3.14453125e-05, "step": 20608, "training_step_time": 0.10478973388671875 }, { "epoch": 3.144683837890625e-05, "model_forward_time": 0.02488112449645996, "step": 20609 }, { "epoch": 3.144683837890625e-05, "step": 20609, "training_step_time": 0.10443401336669922 }, { "epoch": 3.14483642578125e-05, "grad_norm": 0.29580172896385193, "learning_rate": 2.447680348927837e-05, "loss": 0.0138, "step": 20610 }, { "epoch": 3.14483642578125e-05, "model_forward_time": 0.025202035903930664, "step": 20610 }, { "epoch": 3.14483642578125e-05, "step": 20610, "training_step_time": 0.10323429107666016 }, { "epoch": 3.144989013671875e-05, "model_forward_time": 0.024883270263671875, "step": 20611 }, { "epoch": 3.144989013671875e-05, "step": 20611, "training_step_time": 0.10295319557189941 }, { "epoch": 3.1451416015625e-05, "model_forward_time": 0.0256807804107666, "step": 20612 }, { "epoch": 3.1451416015625e-05, "step": 20612, "training_step_time": 0.10535454750061035 }, { "epoch": 3.145294189453125e-05, "model_forward_time": 0.02463674545288086, "step": 20613 }, { "epoch": 3.145294189453125e-05, "step": 20613, "training_step_time": 0.15311002731323242 }, { "epoch": 3.14544677734375e-05, "model_forward_time": 0.024644136428833008, "step": 20614 }, { "epoch": 3.14544677734375e-05, "step": 20614, "training_step_time": 0.16457605361938477 }, { "epoch": 3.145599365234375e-05, "model_forward_time": 0.024947404861450195, "step": 20615 }, { "epoch": 3.145599365234375e-05, "step": 20615, "training_step_time": 0.13673186302185059 }, { "epoch": 3.145751953125e-05, "model_forward_time": 0.024286508560180664, "step": 20616 }, { "epoch": 3.145751953125e-05, "step": 20616, "training_step_time": 0.10977506637573242 }, { "epoch": 3.145904541015625e-05, "model_forward_time": 0.025252819061279297, "step": 20617 }, { "epoch": 3.145904541015625e-05, "step": 20617, "training_step_time": 0.1898491382598877 }, { "epoch": 3.14605712890625e-05, "model_forward_time": 0.02456378936767578, "step": 20618 }, { "epoch": 3.14605712890625e-05, "step": 20618, "training_step_time": 0.17006754875183105 }, { "epoch": 3.146209716796875e-05, "model_forward_time": 0.024471044540405273, "step": 20619 }, { "epoch": 3.146209716796875e-05, "step": 20619, "training_step_time": 0.11527538299560547 }, { "epoch": 3.1463623046875e-05, "grad_norm": 0.1555105447769165, "learning_rate": 2.4429425094838903e-05, "loss": 0.0066, "step": 20620 }, { "epoch": 3.1463623046875e-05, "model_forward_time": 0.0266721248626709, "step": 20620 }, { "epoch": 3.1463623046875e-05, "step": 20620, "training_step_time": 0.10814833641052246 }, { "epoch": 3.146514892578125e-05, "model_forward_time": 0.02454686164855957, "step": 20621 }, { "epoch": 3.146514892578125e-05, "step": 20621, "training_step_time": 0.17139506340026855 }, { "epoch": 3.14666748046875e-05, "model_forward_time": 0.02490687370300293, "step": 20622 }, { "epoch": 3.14666748046875e-05, "step": 20622, "training_step_time": 0.13705754280090332 }, { "epoch": 3.146820068359375e-05, "model_forward_time": 0.024480104446411133, "step": 20623 }, { "epoch": 3.146820068359375e-05, "step": 20623, "training_step_time": 0.1067051887512207 }, { "epoch": 3.14697265625e-05, "model_forward_time": 0.02626800537109375, "step": 20624 }, { "epoch": 3.14697265625e-05, "step": 20624, "training_step_time": 0.11019659042358398 }, { "epoch": 3.147125244140625e-05, "model_forward_time": 0.025109291076660156, "step": 20625 }, { "epoch": 3.147125244140625e-05, "step": 20625, "training_step_time": 0.12220072746276855 }, { "epoch": 3.14727783203125e-05, "model_forward_time": 0.0249173641204834, "step": 20626 }, { "epoch": 3.14727783203125e-05, "step": 20626, "training_step_time": 0.10727715492248535 }, { "epoch": 3.147430419921875e-05, "model_forward_time": 0.0248563289642334, "step": 20627 }, { "epoch": 3.147430419921875e-05, "step": 20627, "training_step_time": 0.1877002716064453 }, { "epoch": 3.1475830078125e-05, "model_forward_time": 0.024515867233276367, "step": 20628 }, { "epoch": 3.1475830078125e-05, "step": 20628, "training_step_time": 0.10427975654602051 }, { "epoch": 3.147735595703125e-05, "model_forward_time": 0.024553298950195312, "step": 20629 }, { "epoch": 3.147735595703125e-05, "step": 20629, "training_step_time": 0.10350561141967773 }, { "epoch": 3.14788818359375e-05, "grad_norm": 0.27353760600090027, "learning_rate": 2.438207777105911e-05, "loss": 0.0079, "step": 20630 }, { "epoch": 3.14788818359375e-05, "model_forward_time": 0.024940013885498047, "step": 20630 }, { "epoch": 3.14788818359375e-05, "step": 20630, "training_step_time": 0.10677170753479004 }, { "epoch": 3.148040771484375e-05, "model_forward_time": 0.025317907333374023, "step": 20631 }, { "epoch": 3.148040771484375e-05, "step": 20631, "training_step_time": 0.10391521453857422 }, { "epoch": 3.148193359375e-05, "model_forward_time": 0.02529740333557129, "step": 20632 }, { "epoch": 3.148193359375e-05, "step": 20632, "training_step_time": 0.10454297065734863 }, { "epoch": 3.148345947265625e-05, "model_forward_time": 0.025235891342163086, "step": 20633 }, { "epoch": 3.148345947265625e-05, "step": 20633, "training_step_time": 0.11024594306945801 }, { "epoch": 3.14849853515625e-05, "model_forward_time": 0.025067806243896484, "step": 20634 }, { "epoch": 3.14849853515625e-05, "step": 20634, "training_step_time": 0.10528302192687988 }, { "epoch": 3.148651123046875e-05, "model_forward_time": 0.025241851806640625, "step": 20635 }, { "epoch": 3.148651123046875e-05, "step": 20635, "training_step_time": 0.10708189010620117 }, { "epoch": 3.1488037109375e-05, "model_forward_time": 0.025351762771606445, "step": 20636 }, { "epoch": 3.1488037109375e-05, "step": 20636, "training_step_time": 0.10694408416748047 }, { "epoch": 3.148956298828125e-05, "model_forward_time": 0.025554895401000977, "step": 20637 }, { "epoch": 3.148956298828125e-05, "step": 20637, "training_step_time": 0.10595417022705078 }, { "epoch": 3.14910888671875e-05, "model_forward_time": 0.02491593360900879, "step": 20638 }, { "epoch": 3.14910888671875e-05, "step": 20638, "training_step_time": 0.10448026657104492 }, { "epoch": 3.149261474609375e-05, "model_forward_time": 0.025191783905029297, "step": 20639 }, { "epoch": 3.149261474609375e-05, "step": 20639, "training_step_time": 0.10908889770507812 }, { "epoch": 3.1494140625e-05, "grad_norm": 0.3734072744846344, "learning_rate": 2.433476157547044e-05, "loss": 0.0076, "step": 20640 }, { "epoch": 3.1494140625e-05, "model_forward_time": 0.025458812713623047, "step": 20640 }, { "epoch": 3.1494140625e-05, "step": 20640, "training_step_time": 0.18061208724975586 }, { "epoch": 3.149566650390625e-05, "model_forward_time": 0.0249783992767334, "step": 20641 }, { "epoch": 3.149566650390625e-05, "step": 20641, "training_step_time": 0.20447015762329102 }, { "epoch": 3.14971923828125e-05, "model_forward_time": 0.024279117584228516, "step": 20642 }, { "epoch": 3.14971923828125e-05, "step": 20642, "training_step_time": 0.2408463954925537 }, { "epoch": 3.149871826171875e-05, "model_forward_time": 0.024524211883544922, "step": 20643 }, { "epoch": 3.149871826171875e-05, "step": 20643, "training_step_time": 0.2088308334350586 }, { "epoch": 3.1500244140625e-05, "model_forward_time": 0.024237394332885742, "step": 20644 }, { "epoch": 3.1500244140625e-05, "step": 20644, "training_step_time": 0.1875467300415039 }, { "epoch": 3.150177001953125e-05, "model_forward_time": 0.02464771270751953, "step": 20645 }, { "epoch": 3.150177001953125e-05, "step": 20645, "training_step_time": 0.1774120330810547 }, { "epoch": 3.15032958984375e-05, "model_forward_time": 0.0245361328125, "step": 20646 }, { "epoch": 3.15032958984375e-05, "step": 20646, "training_step_time": 0.16726207733154297 }, { "epoch": 3.150482177734375e-05, "model_forward_time": 0.024179935455322266, "step": 20647 }, { "epoch": 3.150482177734375e-05, "step": 20647, "training_step_time": 0.10902929306030273 }, { "epoch": 3.150634765625e-05, "model_forward_time": 0.024752378463745117, "step": 20648 }, { "epoch": 3.150634765625e-05, "step": 20648, "training_step_time": 0.10428237915039062 }, { "epoch": 3.150787353515625e-05, "model_forward_time": 0.02612757682800293, "step": 20649 }, { "epoch": 3.150787353515625e-05, "step": 20649, "training_step_time": 0.585674524307251 }, { "epoch": 3.15093994140625e-05, "grad_norm": 0.25616809725761414, "learning_rate": 2.4287476565566527e-05, "loss": 0.0043, "step": 20650 }, { "epoch": 3.15093994140625e-05, "model_forward_time": 0.02333807945251465, "step": 20650 }, { "epoch": 3.15093994140625e-05, "step": 20650, "training_step_time": 0.10599160194396973 }, { "epoch": 3.151092529296875e-05, "model_forward_time": 0.024001598358154297, "step": 20651 }, { "epoch": 3.151092529296875e-05, "step": 20651, "training_step_time": 0.19608545303344727 }, { "epoch": 3.1512451171875e-05, "model_forward_time": 0.024433135986328125, "step": 20652 }, { "epoch": 3.1512451171875e-05, "step": 20652, "training_step_time": 0.15146446228027344 }, { "epoch": 3.151397705078125e-05, "model_forward_time": 0.024441003799438477, "step": 20653 }, { "epoch": 3.151397705078125e-05, "step": 20653, "training_step_time": 0.16575884819030762 }, { "epoch": 3.15155029296875e-05, "model_forward_time": 0.02436518669128418, "step": 20654 }, { "epoch": 3.15155029296875e-05, "step": 20654, "training_step_time": 0.17833542823791504 }, { "epoch": 3.151702880859375e-05, "model_forward_time": 0.024562358856201172, "step": 20655 }, { "epoch": 3.151702880859375e-05, "step": 20655, "training_step_time": 0.16649699211120605 }, { "epoch": 3.15185546875e-05, "model_forward_time": 0.02464604377746582, "step": 20656 }, { "epoch": 3.15185546875e-05, "step": 20656, "training_step_time": 0.13065242767333984 }, { "epoch": 3.152008056640625e-05, "model_forward_time": 0.026152372360229492, "step": 20657 }, { "epoch": 3.152008056640625e-05, "step": 20657, "training_step_time": 0.11854267120361328 }, { "epoch": 3.15216064453125e-05, "model_forward_time": 0.024641990661621094, "step": 20658 }, { "epoch": 3.15216064453125e-05, "step": 20658, "training_step_time": 0.1112678050994873 }, { "epoch": 3.152313232421875e-05, "model_forward_time": 0.02551412582397461, "step": 20659 }, { "epoch": 3.152313232421875e-05, "step": 20659, "training_step_time": 0.1777348518371582 }, { "epoch": 3.1524658203125e-05, "grad_norm": 0.09712230414152145, "learning_rate": 2.424022279880312e-05, "loss": 0.0062, "step": 20660 }, { "epoch": 3.1524658203125e-05, "model_forward_time": 0.024485111236572266, "step": 20660 }, { "epoch": 3.1524658203125e-05, "step": 20660, "training_step_time": 0.13455533981323242 }, { "epoch": 3.152618408203125e-05, "model_forward_time": 0.024139881134033203, "step": 20661 }, { "epoch": 3.152618408203125e-05, "step": 20661, "training_step_time": 0.10887455940246582 }, { "epoch": 3.15277099609375e-05, "model_forward_time": 0.02494955062866211, "step": 20662 }, { "epoch": 3.15277099609375e-05, "step": 20662, "training_step_time": 0.11508631706237793 }, { "epoch": 3.152923583984375e-05, "model_forward_time": 0.025473833084106445, "step": 20663 }, { "epoch": 3.152923583984375e-05, "step": 20663, "training_step_time": 0.11283230781555176 }, { "epoch": 3.153076171875e-05, "model_forward_time": 0.025490999221801758, "step": 20664 }, { "epoch": 3.153076171875e-05, "step": 20664, "training_step_time": 0.11524796485900879 }, { "epoch": 3.153228759765625e-05, "model_forward_time": 0.025023698806762695, "step": 20665 }, { "epoch": 3.153228759765625e-05, "step": 20665, "training_step_time": 0.19072484970092773 }, { "epoch": 3.15338134765625e-05, "model_forward_time": 0.024636030197143555, "step": 20666 }, { "epoch": 3.15338134765625e-05, "step": 20666, "training_step_time": 0.10351252555847168 }, { "epoch": 3.153533935546875e-05, "model_forward_time": 0.024320125579833984, "step": 20667 }, { "epoch": 3.153533935546875e-05, "step": 20667, "training_step_time": 0.10203838348388672 }, { "epoch": 3.1536865234375e-05, "model_forward_time": 0.025452375411987305, "step": 20668 }, { "epoch": 3.1536865234375e-05, "step": 20668, "training_step_time": 0.10771369934082031 }, { "epoch": 3.153839111328125e-05, "model_forward_time": 0.025574207305908203, "step": 20669 }, { "epoch": 3.153839111328125e-05, "step": 20669, "training_step_time": 0.10489010810852051 }, { "epoch": 3.15399169921875e-05, "grad_norm": 0.10359963774681091, "learning_rate": 2.419300033259798e-05, "loss": 0.0039, "step": 20670 }, { "epoch": 3.15399169921875e-05, "model_forward_time": 0.025453805923461914, "step": 20670 }, { "epoch": 3.15399169921875e-05, "step": 20670, "training_step_time": 0.11071538925170898 }, { "epoch": 3.154144287109375e-05, "model_forward_time": 0.025765180587768555, "step": 20671 }, { "epoch": 3.154144287109375e-05, "step": 20671, "training_step_time": 0.10737776756286621 }, { "epoch": 3.154296875e-05, "model_forward_time": 0.024924278259277344, "step": 20672 }, { "epoch": 3.154296875e-05, "step": 20672, "training_step_time": 0.10734248161315918 }, { "epoch": 3.154449462890625e-05, "model_forward_time": 0.025188922882080078, "step": 20673 }, { "epoch": 3.154449462890625e-05, "step": 20673, "training_step_time": 0.10474538803100586 }, { "epoch": 3.15460205078125e-05, "model_forward_time": 0.02511906623840332, "step": 20674 }, { "epoch": 3.15460205078125e-05, "step": 20674, "training_step_time": 0.1050870418548584 }, { "epoch": 3.154754638671875e-05, "model_forward_time": 0.02557992935180664, "step": 20675 }, { "epoch": 3.154754638671875e-05, "step": 20675, "training_step_time": 0.1031033992767334 }, { "epoch": 3.1549072265625e-05, "model_forward_time": 0.02510690689086914, "step": 20676 }, { "epoch": 3.1549072265625e-05, "step": 20676, "training_step_time": 0.10477352142333984 }, { "epoch": 3.155059814453125e-05, "model_forward_time": 0.024929285049438477, "step": 20677 }, { "epoch": 3.155059814453125e-05, "step": 20677, "training_step_time": 0.10323190689086914 }, { "epoch": 3.15521240234375e-05, "model_forward_time": 0.024713993072509766, "step": 20678 }, { "epoch": 3.15521240234375e-05, "step": 20678, "training_step_time": 0.10405468940734863 }, { "epoch": 3.155364990234375e-05, "model_forward_time": 0.0244596004486084, "step": 20679 }, { "epoch": 3.155364990234375e-05, "step": 20679, "training_step_time": 0.10951375961303711 }, { "epoch": 3.155517578125e-05, "grad_norm": 0.1519148200750351, "learning_rate": 2.4145809224330896e-05, "loss": 0.0073, "step": 20680 }, { "epoch": 3.155517578125e-05, "model_forward_time": 0.02497267723083496, "step": 20680 }, { "epoch": 3.155517578125e-05, "step": 20680, "training_step_time": 0.18686270713806152 }, { "epoch": 3.155670166015625e-05, "model_forward_time": 0.02387070655822754, "step": 20681 }, { "epoch": 3.155670166015625e-05, "step": 20681, "training_step_time": 0.21013927459716797 }, { "epoch": 3.15582275390625e-05, "model_forward_time": 0.023865222930908203, "step": 20682 }, { "epoch": 3.15582275390625e-05, "step": 20682, "training_step_time": 0.20511341094970703 }, { "epoch": 3.155975341796875e-05, "model_forward_time": 0.024127483367919922, "step": 20683 }, { "epoch": 3.155975341796875e-05, "step": 20683, "training_step_time": 0.23737788200378418 }, { "epoch": 3.1561279296875e-05, "model_forward_time": 0.02369999885559082, "step": 20684 }, { "epoch": 3.1561279296875e-05, "step": 20684, "training_step_time": 0.2355637550354004 }, { "epoch": 3.156280517578125e-05, "model_forward_time": 0.023929119110107422, "step": 20685 }, { "epoch": 3.156280517578125e-05, "step": 20685, "training_step_time": 0.19212818145751953 }, { "epoch": 3.15643310546875e-05, "model_forward_time": 0.024302005767822266, "step": 20686 }, { "epoch": 3.15643310546875e-05, "step": 20686, "training_step_time": 0.1812591552734375 }, { "epoch": 3.156585693359375e-05, "model_forward_time": 0.02437567710876465, "step": 20687 }, { "epoch": 3.156585693359375e-05, "step": 20687, "training_step_time": 0.16174936294555664 }, { "epoch": 3.15673828125e-05, "model_forward_time": 0.0240023136138916, "step": 20688 }, { "epoch": 3.15673828125e-05, "step": 20688, "training_step_time": 0.10582351684570312 }, { "epoch": 3.156890869140625e-05, "model_forward_time": 0.02437877655029297, "step": 20689 }, { "epoch": 3.156890869140625e-05, "step": 20689, "training_step_time": 0.10163307189941406 }, { "epoch": 3.15704345703125e-05, "grad_norm": 0.33185362815856934, "learning_rate": 2.4098649531343497e-05, "loss": 0.0061, "step": 20690 }, { "epoch": 3.15704345703125e-05, "model_forward_time": 0.024849414825439453, "step": 20690 }, { "epoch": 3.15704345703125e-05, "step": 20690, "training_step_time": 0.10292291641235352 }, { "epoch": 3.157196044921875e-05, "model_forward_time": 0.02507305145263672, "step": 20691 }, { "epoch": 3.157196044921875e-05, "step": 20691, "training_step_time": 0.10837793350219727 }, { "epoch": 3.1573486328125e-05, "model_forward_time": 0.02445054054260254, "step": 20692 }, { "epoch": 3.1573486328125e-05, "step": 20692, "training_step_time": 0.14047598838806152 }, { "epoch": 3.157501220703125e-05, "model_forward_time": 0.025241851806640625, "step": 20693 }, { "epoch": 3.157501220703125e-05, "step": 20693, "training_step_time": 0.16272282600402832 }, { "epoch": 3.15765380859375e-05, "model_forward_time": 0.0245513916015625, "step": 20694 }, { "epoch": 3.15765380859375e-05, "step": 20694, "training_step_time": 0.19331026077270508 }, { "epoch": 3.157806396484375e-05, "model_forward_time": 0.024148941040039062, "step": 20695 }, { "epoch": 3.157806396484375e-05, "step": 20695, "training_step_time": 0.14586973190307617 }, { "epoch": 3.157958984375e-05, "model_forward_time": 0.02462601661682129, "step": 20696 }, { "epoch": 3.157958984375e-05, "step": 20696, "training_step_time": 0.2112882137298584 }, { "epoch": 3.158111572265625e-05, "model_forward_time": 0.024312257766723633, "step": 20697 }, { "epoch": 3.158111572265625e-05, "step": 20697, "training_step_time": 0.12217164039611816 }, { "epoch": 3.15826416015625e-05, "model_forward_time": 0.02426624298095703, "step": 20698 }, { "epoch": 3.15826416015625e-05, "step": 20698, "training_step_time": 0.11878561973571777 }, { "epoch": 3.158416748046875e-05, "model_forward_time": 0.02573680877685547, "step": 20699 }, { "epoch": 3.158416748046875e-05, "step": 20699, "training_step_time": 0.1198875904083252 }, { "epoch": 3.1585693359375e-05, "grad_norm": 0.12606415152549744, "learning_rate": 2.405152131093926e-05, "loss": 0.009, "step": 20700 }, { "epoch": 3.1585693359375e-05, "model_forward_time": 0.025359630584716797, "step": 20700 }, { "epoch": 3.1585693359375e-05, "step": 20700, "training_step_time": 0.13372564315795898 }, { "epoch": 3.158721923828125e-05, "model_forward_time": 0.025069475173950195, "step": 20701 }, { "epoch": 3.158721923828125e-05, "step": 20701, "training_step_time": 0.13780617713928223 }, { "epoch": 3.15887451171875e-05, "model_forward_time": 0.02647089958190918, "step": 20702 }, { "epoch": 3.15887451171875e-05, "step": 20702, "training_step_time": 0.10906100273132324 }, { "epoch": 3.159027099609375e-05, "model_forward_time": 0.025393247604370117, "step": 20703 }, { "epoch": 3.159027099609375e-05, "step": 20703, "training_step_time": 0.10835838317871094 }, { "epoch": 3.1591796875e-05, "model_forward_time": 0.025197267532348633, "step": 20704 }, { "epoch": 3.1591796875e-05, "step": 20704, "training_step_time": 0.11574506759643555 }, { "epoch": 3.159332275390625e-05, "model_forward_time": 0.02531147003173828, "step": 20705 }, { "epoch": 3.159332275390625e-05, "step": 20705, "training_step_time": 0.10699319839477539 }, { "epoch": 3.15948486328125e-05, "model_forward_time": 0.026636362075805664, "step": 20706 }, { "epoch": 3.15948486328125e-05, "step": 20706, "training_step_time": 0.19372963905334473 }, { "epoch": 3.159637451171875e-05, "model_forward_time": 0.024389982223510742, "step": 20707 }, { "epoch": 3.159637451171875e-05, "step": 20707, "training_step_time": 0.10294604301452637 }, { "epoch": 3.1597900390625e-05, "model_forward_time": 0.024550914764404297, "step": 20708 }, { "epoch": 3.1597900390625e-05, "step": 20708, "training_step_time": 0.10239434242248535 }, { "epoch": 3.159942626953125e-05, "model_forward_time": 0.025450468063354492, "step": 20709 }, { "epoch": 3.159942626953125e-05, "step": 20709, "training_step_time": 0.10691118240356445 }, { "epoch": 3.16009521484375e-05, "grad_norm": 0.1511950045824051, "learning_rate": 2.4004424620383386e-05, "loss": 0.0048, "step": 20710 }, { "epoch": 3.16009521484375e-05, "model_forward_time": 0.025044679641723633, "step": 20710 }, { "epoch": 3.16009521484375e-05, "step": 20710, "training_step_time": 0.11053848266601562 }, { "epoch": 3.160247802734375e-05, "model_forward_time": 0.02503657341003418, "step": 20711 }, { "epoch": 3.160247802734375e-05, "step": 20711, "training_step_time": 0.11397504806518555 }, { "epoch": 3.160400390625e-05, "model_forward_time": 0.025638580322265625, "step": 20712 }, { "epoch": 3.160400390625e-05, "step": 20712, "training_step_time": 0.11626815795898438 }, { "epoch": 3.160552978515625e-05, "model_forward_time": 0.025356054306030273, "step": 20713 }, { "epoch": 3.160552978515625e-05, "step": 20713, "training_step_time": 0.1189579963684082 }, { "epoch": 3.16070556640625e-05, "model_forward_time": 0.025310516357421875, "step": 20714 }, { "epoch": 3.16070556640625e-05, "step": 20714, "training_step_time": 0.11876296997070312 }, { "epoch": 3.160858154296875e-05, "model_forward_time": 0.02493119239807129, "step": 20715 }, { "epoch": 3.160858154296875e-05, "step": 20715, "training_step_time": 0.11573910713195801 }, { "epoch": 3.1610107421875e-05, "model_forward_time": 0.025492191314697266, "step": 20716 }, { "epoch": 3.1610107421875e-05, "step": 20716, "training_step_time": 0.11744976043701172 }, { "epoch": 3.161163330078125e-05, "model_forward_time": 0.024964332580566406, "step": 20717 }, { "epoch": 3.161163330078125e-05, "step": 20717, "training_step_time": 0.11475944519042969 }, { "epoch": 3.16131591796875e-05, "model_forward_time": 0.02512955665588379, "step": 20718 }, { "epoch": 3.16131591796875e-05, "step": 20718, "training_step_time": 0.11520218849182129 }, { "epoch": 3.161468505859375e-05, "model_forward_time": 0.025191307067871094, "step": 20719 }, { "epoch": 3.161468505859375e-05, "step": 20719, "training_step_time": 0.11188721656799316 }, { "epoch": 3.16162109375e-05, "grad_norm": 0.11043795943260193, "learning_rate": 2.3957359516902845e-05, "loss": 0.0109, "step": 20720 }, { "epoch": 3.16162109375e-05, "model_forward_time": 0.025226831436157227, "step": 20720 }, { "epoch": 3.16162109375e-05, "step": 20720, "training_step_time": 0.11263847351074219 }, { "epoch": 3.161773681640625e-05, "model_forward_time": 0.025321483612060547, "step": 20721 }, { "epoch": 3.161773681640625e-05, "step": 20721, "training_step_time": 0.18237900733947754 }, { "epoch": 3.16192626953125e-05, "model_forward_time": 0.024611473083496094, "step": 20722 }, { "epoch": 3.16192626953125e-05, "step": 20722, "training_step_time": 0.11699271202087402 }, { "epoch": 3.162078857421875e-05, "model_forward_time": 0.02420520782470703, "step": 20723 }, { "epoch": 3.162078857421875e-05, "step": 20723, "training_step_time": 0.12441778182983398 }, { "epoch": 3.1622314453125e-05, "model_forward_time": 0.02517223358154297, "step": 20724 }, { "epoch": 3.1622314453125e-05, "step": 20724, "training_step_time": 0.14185619354248047 }, { "epoch": 3.162384033203125e-05, "model_forward_time": 0.02467513084411621, "step": 20725 }, { "epoch": 3.162384033203125e-05, "step": 20725, "training_step_time": 0.1175391674041748 }, { "epoch": 3.16253662109375e-05, "model_forward_time": 0.024672746658325195, "step": 20726 }, { "epoch": 3.16253662109375e-05, "step": 20726, "training_step_time": 0.1289510726928711 }, { "epoch": 3.162689208984375e-05, "model_forward_time": 0.025783777236938477, "step": 20727 }, { "epoch": 3.162689208984375e-05, "step": 20727, "training_step_time": 0.11605048179626465 }, { "epoch": 3.162841796875e-05, "model_forward_time": 0.025492429733276367, "step": 20728 }, { "epoch": 3.162841796875e-05, "step": 20728, "training_step_time": 0.1086890697479248 }, { "epoch": 3.162994384765625e-05, "model_forward_time": 0.02489447593688965, "step": 20729 }, { "epoch": 3.162994384765625e-05, "step": 20729, "training_step_time": 0.11183643341064453 }, { "epoch": 3.16314697265625e-05, "grad_norm": 0.16555260121822357, "learning_rate": 2.3910326057686127e-05, "loss": 0.0165, "step": 20730 }, { "epoch": 3.16314697265625e-05, "model_forward_time": 0.02549123764038086, "step": 20730 }, { "epoch": 3.16314697265625e-05, "step": 20730, "training_step_time": 0.11242341995239258 }, { "epoch": 3.163299560546875e-05, "model_forward_time": 0.02541184425354004, "step": 20731 }, { "epoch": 3.163299560546875e-05, "step": 20731, "training_step_time": 0.10867094993591309 }, { "epoch": 3.1634521484375e-05, "model_forward_time": 0.025127172470092773, "step": 20732 }, { "epoch": 3.1634521484375e-05, "step": 20732, "training_step_time": 0.10868978500366211 }, { "epoch": 3.163604736328125e-05, "model_forward_time": 0.025144577026367188, "step": 20733 }, { "epoch": 3.163604736328125e-05, "step": 20733, "training_step_time": 0.11056303977966309 }, { "epoch": 3.16375732421875e-05, "model_forward_time": 0.025198698043823242, "step": 20734 }, { "epoch": 3.16375732421875e-05, "step": 20734, "training_step_time": 0.11150646209716797 }, { "epoch": 3.163909912109375e-05, "model_forward_time": 0.025629520416259766, "step": 20735 }, { "epoch": 3.163909912109375e-05, "step": 20735, "training_step_time": 0.1132807731628418 }, { "epoch": 3.1640625e-05, "model_forward_time": 0.026111602783203125, "step": 20736 }, { "epoch": 3.1640625e-05, "step": 20736, "training_step_time": 0.1094660758972168 }, { "epoch": 3.164215087890625e-05, "model_forward_time": 0.025235652923583984, "step": 20737 }, { "epoch": 3.164215087890625e-05, "step": 20737, "training_step_time": 0.14861845970153809 }, { "epoch": 3.16436767578125e-05, "model_forward_time": 0.025146961212158203, "step": 20738 }, { "epoch": 3.16436767578125e-05, "step": 20738, "training_step_time": 0.16847944259643555 }, { "epoch": 3.164520263671875e-05, "model_forward_time": 0.024373292922973633, "step": 20739 }, { "epoch": 3.164520263671875e-05, "step": 20739, "training_step_time": 0.10924839973449707 }, { "epoch": 3.1646728515625e-05, "grad_norm": 0.11490935832262039, "learning_rate": 2.3863324299883366e-05, "loss": 0.004, "step": 20740 }, { "epoch": 3.1646728515625e-05, "model_forward_time": 0.024765729904174805, "step": 20740 }, { "epoch": 3.1646728515625e-05, "step": 20740, "training_step_time": 0.13894963264465332 }, { "epoch": 3.164825439453125e-05, "model_forward_time": 0.025066375732421875, "step": 20741 }, { "epoch": 3.164825439453125e-05, "step": 20741, "training_step_time": 0.19254350662231445 }, { "epoch": 3.16497802734375e-05, "model_forward_time": 0.024647235870361328, "step": 20742 }, { "epoch": 3.16497802734375e-05, "step": 20742, "training_step_time": 0.15812039375305176 }, { "epoch": 3.165130615234375e-05, "model_forward_time": 0.024166107177734375, "step": 20743 }, { "epoch": 3.165130615234375e-05, "step": 20743, "training_step_time": 0.12479496002197266 }, { "epoch": 3.165283203125e-05, "model_forward_time": 0.024815797805786133, "step": 20744 }, { "epoch": 3.165283203125e-05, "step": 20744, "training_step_time": 0.1082921028137207 }, { "epoch": 3.165435791015625e-05, "model_forward_time": 0.02497696876525879, "step": 20745 }, { "epoch": 3.165435791015625e-05, "step": 20745, "training_step_time": 0.12818360328674316 }, { "epoch": 3.16558837890625e-05, "model_forward_time": 0.024749755859375, "step": 20746 }, { "epoch": 3.16558837890625e-05, "step": 20746, "training_step_time": 0.11229085922241211 }, { "epoch": 3.165740966796875e-05, "model_forward_time": 0.025187253952026367, "step": 20747 }, { "epoch": 3.165740966796875e-05, "step": 20747, "training_step_time": 0.1298367977142334 }, { "epoch": 3.1658935546875e-05, "model_forward_time": 0.025503873825073242, "step": 20748 }, { "epoch": 3.1658935546875e-05, "step": 20748, "training_step_time": 0.11685800552368164 }, { "epoch": 3.166046142578125e-05, "model_forward_time": 0.02421259880065918, "step": 20749 }, { "epoch": 3.166046142578125e-05, "step": 20749, "training_step_time": 0.18486928939819336 }, { "epoch": 3.16619873046875e-05, "grad_norm": 0.2567179799079895, "learning_rate": 2.381635430060611e-05, "loss": 0.0123, "step": 20750 }, { "epoch": 3.16619873046875e-05, "model_forward_time": 0.024132490158081055, "step": 20750 }, { "epoch": 3.16619873046875e-05, "step": 20750, "training_step_time": 0.20169949531555176 }, { "epoch": 3.166351318359375e-05, "model_forward_time": 0.02469801902770996, "step": 20751 }, { "epoch": 3.166351318359375e-05, "step": 20751, "training_step_time": 0.21213150024414062 }, { "epoch": 3.16650390625e-05, "model_forward_time": 0.02429342269897461, "step": 20752 }, { "epoch": 3.16650390625e-05, "step": 20752, "training_step_time": 0.1971750259399414 }, { "epoch": 3.166656494140625e-05, "model_forward_time": 0.02386188507080078, "step": 20753 }, { "epoch": 3.166656494140625e-05, "step": 20753, "training_step_time": 0.19209718704223633 }, { "epoch": 3.16680908203125e-05, "model_forward_time": 0.024041175842285156, "step": 20754 }, { "epoch": 3.16680908203125e-05, "step": 20754, "training_step_time": 0.18244719505310059 }, { "epoch": 3.166961669921875e-05, "model_forward_time": 0.02425408363342285, "step": 20755 }, { "epoch": 3.166961669921875e-05, "step": 20755, "training_step_time": 0.1083681583404541 }, { "epoch": 3.1671142578125e-05, "model_forward_time": 0.024461984634399414, "step": 20756 }, { "epoch": 3.1671142578125e-05, "step": 20756, "training_step_time": 0.1056220531463623 }, { "epoch": 3.167266845703125e-05, "model_forward_time": 0.024915218353271484, "step": 20757 }, { "epoch": 3.167266845703125e-05, "step": 20757, "training_step_time": 0.10996294021606445 }, { "epoch": 3.16741943359375e-05, "model_forward_time": 0.025127887725830078, "step": 20758 }, { "epoch": 3.16741943359375e-05, "step": 20758, "training_step_time": 0.11100053787231445 }, { "epoch": 3.167572021484375e-05, "model_forward_time": 0.025577306747436523, "step": 20759 }, { "epoch": 3.167572021484375e-05, "step": 20759, "training_step_time": 0.11177563667297363 }, { "epoch": 3.167724609375e-05, "grad_norm": 0.24263069033622742, "learning_rate": 2.3769416116927335e-05, "loss": 0.009, "step": 20760 }, { "epoch": 3.167724609375e-05, "model_forward_time": 0.02530074119567871, "step": 20760 }, { "epoch": 3.167724609375e-05, "step": 20760, "training_step_time": 0.10830831527709961 }, { "epoch": 3.167877197265625e-05, "model_forward_time": 0.024767637252807617, "step": 20761 }, { "epoch": 3.167877197265625e-05, "step": 20761, "training_step_time": 0.11713171005249023 }, { "epoch": 3.16802978515625e-05, "model_forward_time": 0.024916887283325195, "step": 20762 }, { "epoch": 3.16802978515625e-05, "step": 20762, "training_step_time": 0.11265373229980469 }, { "epoch": 3.168182373046875e-05, "model_forward_time": 0.025521278381347656, "step": 20763 }, { "epoch": 3.168182373046875e-05, "step": 20763, "training_step_time": 0.11465263366699219 }, { "epoch": 3.1683349609375e-05, "model_forward_time": 0.0250091552734375, "step": 20764 }, { "epoch": 3.1683349609375e-05, "step": 20764, "training_step_time": 0.12122321128845215 }, { "epoch": 3.168487548828125e-05, "model_forward_time": 0.025263547897338867, "step": 20765 }, { "epoch": 3.168487548828125e-05, "step": 20765, "training_step_time": 0.1185762882232666 }, { "epoch": 3.16864013671875e-05, "model_forward_time": 0.02532172203063965, "step": 20766 }, { "epoch": 3.16864013671875e-05, "step": 20766, "training_step_time": 0.13256096839904785 }, { "epoch": 3.168792724609375e-05, "model_forward_time": 0.02491307258605957, "step": 20767 }, { "epoch": 3.168792724609375e-05, "step": 20767, "training_step_time": 0.1627037525177002 }, { "epoch": 3.1689453125e-05, "model_forward_time": 0.024481534957885742, "step": 20768 }, { "epoch": 3.1689453125e-05, "step": 20768, "training_step_time": 0.22065401077270508 }, { "epoch": 3.169097900390625e-05, "model_forward_time": 0.025542020797729492, "step": 20769 }, { "epoch": 3.169097900390625e-05, "step": 20769, "training_step_time": 0.12055444717407227 }, { "epoch": 3.16925048828125e-05, "grad_norm": 0.17910423874855042, "learning_rate": 2.3722509805881356e-05, "loss": 0.0143, "step": 20770 }, { "epoch": 3.16925048828125e-05, "model_forward_time": 0.02405571937561035, "step": 20770 }, { "epoch": 3.16925048828125e-05, "step": 20770, "training_step_time": 0.11540937423706055 }, { "epoch": 3.169403076171875e-05, "model_forward_time": 0.02532505989074707, "step": 20771 }, { "epoch": 3.169403076171875e-05, "step": 20771, "training_step_time": 0.1151125431060791 }, { "epoch": 3.1695556640625e-05, "model_forward_time": 0.02534317970275879, "step": 20772 }, { "epoch": 3.1695556640625e-05, "step": 20772, "training_step_time": 0.1123661994934082 }, { "epoch": 3.169708251953125e-05, "model_forward_time": 0.02692890167236328, "step": 20773 }, { "epoch": 3.169708251953125e-05, "step": 20773, "training_step_time": 0.11022472381591797 }, { "epoch": 3.16986083984375e-05, "model_forward_time": 0.02493143081665039, "step": 20774 }, { "epoch": 3.16986083984375e-05, "step": 20774, "training_step_time": 0.10936379432678223 }, { "epoch": 3.170013427734375e-05, "model_forward_time": 0.027202606201171875, "step": 20775 }, { "epoch": 3.170013427734375e-05, "step": 20775, "training_step_time": 0.10977315902709961 }, { "epoch": 3.170166015625e-05, "model_forward_time": 0.026238441467285156, "step": 20776 }, { "epoch": 3.170166015625e-05, "step": 20776, "training_step_time": 0.10866665840148926 }, { "epoch": 3.170318603515625e-05, "model_forward_time": 0.0288393497467041, "step": 20777 }, { "epoch": 3.170318603515625e-05, "step": 20777, "training_step_time": 0.11051654815673828 }, { "epoch": 3.17047119140625e-05, "model_forward_time": 0.025334596633911133, "step": 20778 }, { "epoch": 3.17047119140625e-05, "step": 20778, "training_step_time": 0.10550570487976074 }, { "epoch": 3.170623779296875e-05, "model_forward_time": 0.02440190315246582, "step": 20779 }, { "epoch": 3.170623779296875e-05, "step": 20779, "training_step_time": 0.13516521453857422 }, { "epoch": 3.1707763671875e-05, "grad_norm": 0.238984152674675, "learning_rate": 2.3675635424463754e-05, "loss": 0.0124, "step": 20780 }, { "epoch": 3.1707763671875e-05, "model_forward_time": 0.025301456451416016, "step": 20780 }, { "epoch": 3.1707763671875e-05, "step": 20780, "training_step_time": 0.11484432220458984 }, { "epoch": 3.170928955078125e-05, "model_forward_time": 0.025111913681030273, "step": 20781 }, { "epoch": 3.170928955078125e-05, "step": 20781, "training_step_time": 0.11349177360534668 }, { "epoch": 3.17108154296875e-05, "model_forward_time": 0.025307655334472656, "step": 20782 }, { "epoch": 3.17108154296875e-05, "step": 20782, "training_step_time": 0.11896395683288574 }, { "epoch": 3.171234130859375e-05, "model_forward_time": 0.024878501892089844, "step": 20783 }, { "epoch": 3.171234130859375e-05, "step": 20783, "training_step_time": 0.10982036590576172 }, { "epoch": 3.17138671875e-05, "model_forward_time": 0.02583789825439453, "step": 20784 }, { "epoch": 3.17138671875e-05, "step": 20784, "training_step_time": 0.12938475608825684 }, { "epoch": 3.171539306640625e-05, "model_forward_time": 0.025821924209594727, "step": 20785 }, { "epoch": 3.171539306640625e-05, "step": 20785, "training_step_time": 0.20596623420715332 }, { "epoch": 3.17169189453125e-05, "model_forward_time": 0.024789810180664062, "step": 20786 }, { "epoch": 3.17169189453125e-05, "step": 20786, "training_step_time": 0.1269512176513672 }, { "epoch": 3.171844482421875e-05, "model_forward_time": 0.024674654006958008, "step": 20787 }, { "epoch": 3.171844482421875e-05, "step": 20787, "training_step_time": 0.1154944896697998 }, { "epoch": 3.1719970703125e-05, "model_forward_time": 0.02520585060119629, "step": 20788 }, { "epoch": 3.1719970703125e-05, "step": 20788, "training_step_time": 0.14835119247436523 }, { "epoch": 3.172149658203125e-05, "model_forward_time": 0.025281190872192383, "step": 20789 }, { "epoch": 3.172149658203125e-05, "step": 20789, "training_step_time": 0.14440512657165527 }, { "epoch": 3.17230224609375e-05, "grad_norm": 0.12975092232227325, "learning_rate": 2.362879302963135e-05, "loss": 0.0047, "step": 20790 }, { "epoch": 3.17230224609375e-05, "model_forward_time": 0.024744033813476562, "step": 20790 }, { "epoch": 3.17230224609375e-05, "step": 20790, "training_step_time": 0.11063838005065918 }, { "epoch": 3.172454833984375e-05, "model_forward_time": 0.026821613311767578, "step": 20791 }, { "epoch": 3.172454833984375e-05, "step": 20791, "training_step_time": 0.11112427711486816 }, { "epoch": 3.172607421875e-05, "model_forward_time": 0.02529740333557129, "step": 20792 }, { "epoch": 3.172607421875e-05, "step": 20792, "training_step_time": 0.10890007019042969 }, { "epoch": 3.172760009765625e-05, "model_forward_time": 0.024281024932861328, "step": 20793 }, { "epoch": 3.172760009765625e-05, "step": 20793, "training_step_time": 0.1810460090637207 }, { "epoch": 3.17291259765625e-05, "model_forward_time": 0.024836301803588867, "step": 20794 }, { "epoch": 3.17291259765625e-05, "step": 20794, "training_step_time": 0.11632943153381348 }, { "epoch": 3.173065185546875e-05, "model_forward_time": 0.024623394012451172, "step": 20795 }, { "epoch": 3.173065185546875e-05, "step": 20795, "training_step_time": 0.11379265785217285 }, { "epoch": 3.1732177734375e-05, "model_forward_time": 0.025310039520263672, "step": 20796 }, { "epoch": 3.1732177734375e-05, "step": 20796, "training_step_time": 0.11301898956298828 }, { "epoch": 3.173370361328125e-05, "model_forward_time": 0.025574922561645508, "step": 20797 }, { "epoch": 3.173370361328125e-05, "step": 20797, "training_step_time": 0.1125340461730957 }, { "epoch": 3.17352294921875e-05, "model_forward_time": 0.025353193283081055, "step": 20798 }, { "epoch": 3.17352294921875e-05, "step": 20798, "training_step_time": 0.1081991195678711 }, { "epoch": 3.173675537109375e-05, "model_forward_time": 0.02575516700744629, "step": 20799 }, { "epoch": 3.173675537109375e-05, "step": 20799, "training_step_time": 0.10750007629394531 }, { "epoch": 3.173828125e-05, "grad_norm": 0.1220598816871643, "learning_rate": 2.3581982678302063e-05, "loss": 0.0123, "step": 20800 }, { "epoch": 3.173828125e-05, "model_forward_time": 0.025641679763793945, "step": 20800 }, { "epoch": 3.173828125e-05, "step": 20800, "training_step_time": 0.11035013198852539 }, { "epoch": 3.173980712890625e-05, "model_forward_time": 0.025315284729003906, "step": 20801 }, { "epoch": 3.173980712890625e-05, "step": 20801, "training_step_time": 0.10729002952575684 }, { "epoch": 3.17413330078125e-05, "model_forward_time": 0.025231361389160156, "step": 20802 }, { "epoch": 3.17413330078125e-05, "step": 20802, "training_step_time": 0.10660552978515625 }, { "epoch": 3.174285888671875e-05, "model_forward_time": 0.0252687931060791, "step": 20803 }, { "epoch": 3.174285888671875e-05, "step": 20803, "training_step_time": 0.10571980476379395 }, { "epoch": 3.1744384765625e-05, "model_forward_time": 0.02560281753540039, "step": 20804 }, { "epoch": 3.1744384765625e-05, "step": 20804, "training_step_time": 0.10579252243041992 }, { "epoch": 3.174591064453125e-05, "model_forward_time": 0.024433612823486328, "step": 20805 }, { "epoch": 3.174591064453125e-05, "step": 20805, "training_step_time": 0.10732126235961914 }, { "epoch": 3.17474365234375e-05, "model_forward_time": 0.025250911712646484, "step": 20806 }, { "epoch": 3.17474365234375e-05, "step": 20806, "training_step_time": 0.10862851142883301 }, { "epoch": 3.174896240234375e-05, "model_forward_time": 0.02541804313659668, "step": 20807 }, { "epoch": 3.174896240234375e-05, "step": 20807, "training_step_time": 0.11018991470336914 }, { "epoch": 3.175048828125e-05, "model_forward_time": 0.025758028030395508, "step": 20808 }, { "epoch": 3.175048828125e-05, "step": 20808, "training_step_time": 0.10549664497375488 }, { "epoch": 3.175201416015625e-05, "model_forward_time": 0.02521538734436035, "step": 20809 }, { "epoch": 3.175201416015625e-05, "step": 20809, "training_step_time": 0.10509085655212402 }, { "epoch": 3.17535400390625e-05, "grad_norm": 0.1310320496559143, "learning_rate": 2.353520442735488e-05, "loss": 0.0043, "step": 20810 }, { "epoch": 3.17535400390625e-05, "model_forward_time": 0.0251462459564209, "step": 20810 }, { "epoch": 3.17535400390625e-05, "step": 20810, "training_step_time": 0.12888407707214355 }, { "epoch": 3.175506591796875e-05, "model_forward_time": 0.02557086944580078, "step": 20811 }, { "epoch": 3.175506591796875e-05, "step": 20811, "training_step_time": 0.11256814002990723 }, { "epoch": 3.1756591796875e-05, "model_forward_time": 0.025347232818603516, "step": 20812 }, { "epoch": 3.1756591796875e-05, "step": 20812, "training_step_time": 0.12863755226135254 }, { "epoch": 3.175811767578125e-05, "model_forward_time": 0.02519989013671875, "step": 20813 }, { "epoch": 3.175811767578125e-05, "step": 20813, "training_step_time": 0.13690567016601562 }, { "epoch": 3.17596435546875e-05, "model_forward_time": 0.02509593963623047, "step": 20814 }, { "epoch": 3.17596435546875e-05, "step": 20814, "training_step_time": 0.11874055862426758 }, { "epoch": 3.176116943359375e-05, "model_forward_time": 0.0266873836517334, "step": 20815 }, { "epoch": 3.176116943359375e-05, "step": 20815, "training_step_time": 0.12831377983093262 }, { "epoch": 3.17626953125e-05, "model_forward_time": 0.02518773078918457, "step": 20816 }, { "epoch": 3.17626953125e-05, "step": 20816, "training_step_time": 0.1165003776550293 }, { "epoch": 3.176422119140625e-05, "model_forward_time": 0.02521657943725586, "step": 20817 }, { "epoch": 3.176422119140625e-05, "step": 20817, "training_step_time": 0.10573840141296387 }, { "epoch": 3.17657470703125e-05, "model_forward_time": 0.025226354598999023, "step": 20818 }, { "epoch": 3.17657470703125e-05, "step": 20818, "training_step_time": 0.11005496978759766 }, { "epoch": 3.176727294921875e-05, "model_forward_time": 0.02493596076965332, "step": 20819 }, { "epoch": 3.176727294921875e-05, "step": 20819, "training_step_time": 0.1055607795715332 }, { "epoch": 3.1768798828125e-05, "grad_norm": 0.12881946563720703, "learning_rate": 2.3488458333629777e-05, "loss": 0.0153, "step": 20820 }, { "epoch": 3.1768798828125e-05, "model_forward_time": 0.024974346160888672, "step": 20820 }, { "epoch": 3.1768798828125e-05, "step": 20820, "training_step_time": 0.10694003105163574 }, { "epoch": 3.177032470703125e-05, "model_forward_time": 0.02532172203063965, "step": 20821 }, { "epoch": 3.177032470703125e-05, "step": 20821, "training_step_time": 0.10769891738891602 }, { "epoch": 3.17718505859375e-05, "model_forward_time": 0.025367259979248047, "step": 20822 }, { "epoch": 3.17718505859375e-05, "step": 20822, "training_step_time": 0.10882449150085449 }, { "epoch": 3.177337646484375e-05, "model_forward_time": 0.02500772476196289, "step": 20823 }, { "epoch": 3.177337646484375e-05, "step": 20823, "training_step_time": 0.10730719566345215 }, { "epoch": 3.177490234375e-05, "model_forward_time": 0.025046586990356445, "step": 20824 }, { "epoch": 3.177490234375e-05, "step": 20824, "training_step_time": 0.1069173812866211 }, { "epoch": 3.177642822265625e-05, "model_forward_time": 0.024770498275756836, "step": 20825 }, { "epoch": 3.177642822265625e-05, "step": 20825, "training_step_time": 0.10652613639831543 }, { "epoch": 3.17779541015625e-05, "model_forward_time": 0.025076627731323242, "step": 20826 }, { "epoch": 3.17779541015625e-05, "step": 20826, "training_step_time": 0.15584397315979004 }, { "epoch": 3.177947998046875e-05, "model_forward_time": 0.024164915084838867, "step": 20827 }, { "epoch": 3.177947998046875e-05, "step": 20827, "training_step_time": 0.19965028762817383 }, { "epoch": 3.1781005859375e-05, "model_forward_time": 0.024282455444335938, "step": 20828 }, { "epoch": 3.1781005859375e-05, "step": 20828, "training_step_time": 0.20502305030822754 }, { "epoch": 3.178253173828125e-05, "model_forward_time": 0.0240018367767334, "step": 20829 }, { "epoch": 3.178253173828125e-05, "step": 20829, "training_step_time": 0.1793513298034668 }, { "epoch": 3.17840576171875e-05, "grad_norm": 0.2658945918083191, "learning_rate": 2.344174445392766e-05, "loss": 0.0046, "step": 20830 }, { "epoch": 3.17840576171875e-05, "model_forward_time": 0.024616003036499023, "step": 20830 }, { "epoch": 3.17840576171875e-05, "step": 20830, "training_step_time": 0.20130038261413574 }, { "epoch": 3.178558349609375e-05, "model_forward_time": 0.023886680603027344, "step": 20831 }, { "epoch": 3.178558349609375e-05, "step": 20831, "training_step_time": 0.21325325965881348 }, { "epoch": 3.1787109375e-05, "model_forward_time": 0.024231433868408203, "step": 20832 }, { "epoch": 3.1787109375e-05, "step": 20832, "training_step_time": 0.14525938034057617 }, { "epoch": 3.178863525390625e-05, "model_forward_time": 0.024719715118408203, "step": 20833 }, { "epoch": 3.178863525390625e-05, "step": 20833, "training_step_time": 0.17214727401733398 }, { "epoch": 3.17901611328125e-05, "model_forward_time": 0.024571895599365234, "step": 20834 }, { "epoch": 3.17901611328125e-05, "step": 20834, "training_step_time": 0.16259336471557617 }, { "epoch": 3.179168701171875e-05, "model_forward_time": 0.024523258209228516, "step": 20835 }, { "epoch": 3.179168701171875e-05, "step": 20835, "training_step_time": 0.10352969169616699 }, { "epoch": 3.1793212890625e-05, "model_forward_time": 0.025072336196899414, "step": 20836 }, { "epoch": 3.1793212890625e-05, "step": 20836, "training_step_time": 0.12267017364501953 }, { "epoch": 3.179473876953125e-05, "model_forward_time": 0.02525019645690918, "step": 20837 }, { "epoch": 3.179473876953125e-05, "step": 20837, "training_step_time": 0.10623574256896973 }, { "epoch": 3.17962646484375e-05, "model_forward_time": 0.025304079055786133, "step": 20838 }, { "epoch": 3.17962646484375e-05, "step": 20838, "training_step_time": 0.19823503494262695 }, { "epoch": 3.179779052734375e-05, "model_forward_time": 0.023879528045654297, "step": 20839 }, { "epoch": 3.179779052734375e-05, "step": 20839, "training_step_time": 0.10929751396179199 }, { "epoch": 3.179931640625e-05, "grad_norm": 0.13971443474292755, "learning_rate": 2.339506284501033e-05, "loss": 0.0087, "step": 20840 }, { "epoch": 3.179931640625e-05, "model_forward_time": 0.02488565444946289, "step": 20840 }, { "epoch": 3.179931640625e-05, "step": 20840, "training_step_time": 0.10435342788696289 }, { "epoch": 3.180084228515625e-05, "model_forward_time": 0.025182008743286133, "step": 20841 }, { "epoch": 3.180084228515625e-05, "step": 20841, "training_step_time": 0.10995030403137207 }, { "epoch": 3.18023681640625e-05, "model_forward_time": 0.026810407638549805, "step": 20842 }, { "epoch": 3.18023681640625e-05, "step": 20842, "training_step_time": 0.10855436325073242 }, { "epoch": 3.180389404296875e-05, "model_forward_time": 0.02527141571044922, "step": 20843 }, { "epoch": 3.180389404296875e-05, "step": 20843, "training_step_time": 0.10493826866149902 }, { "epoch": 3.1805419921875e-05, "model_forward_time": 0.025180339813232422, "step": 20844 }, { "epoch": 3.1805419921875e-05, "step": 20844, "training_step_time": 0.10901880264282227 }, { "epoch": 3.180694580078125e-05, "model_forward_time": 0.02506566047668457, "step": 20845 }, { "epoch": 3.180694580078125e-05, "step": 20845, "training_step_time": 0.10476016998291016 }, { "epoch": 3.18084716796875e-05, "model_forward_time": 0.02523016929626465, "step": 20846 }, { "epoch": 3.18084716796875e-05, "step": 20846, "training_step_time": 0.10540056228637695 }, { "epoch": 3.180999755859375e-05, "model_forward_time": 0.025115013122558594, "step": 20847 }, { "epoch": 3.180999755859375e-05, "step": 20847, "training_step_time": 0.10620665550231934 }, { "epoch": 3.18115234375e-05, "model_forward_time": 0.02496027946472168, "step": 20848 }, { "epoch": 3.18115234375e-05, "step": 20848, "training_step_time": 0.10468721389770508 }, { "epoch": 3.181304931640625e-05, "model_forward_time": 0.024091243743896484, "step": 20849 }, { "epoch": 3.181304931640625e-05, "step": 20849, "training_step_time": 0.10612297058105469 }, { "epoch": 3.18145751953125e-05, "grad_norm": 0.128709614276886, "learning_rate": 2.3348413563600325e-05, "loss": 0.0072, "step": 20850 }, { "epoch": 3.18145751953125e-05, "model_forward_time": 0.0250856876373291, "step": 20850 }, { "epoch": 3.18145751953125e-05, "step": 20850, "training_step_time": 0.11839056015014648 }, { "epoch": 3.181610107421875e-05, "model_forward_time": 0.025763988494873047, "step": 20851 }, { "epoch": 3.181610107421875e-05, "step": 20851, "training_step_time": 0.14472746849060059 }, { "epoch": 3.1817626953125e-05, "model_forward_time": 0.024891376495361328, "step": 20852 }, { "epoch": 3.1817626953125e-05, "step": 20852, "training_step_time": 0.13581252098083496 }, { "epoch": 3.181915283203125e-05, "model_forward_time": 0.02466750144958496, "step": 20853 }, { "epoch": 3.181915283203125e-05, "step": 20853, "training_step_time": 0.22120118141174316 }, { "epoch": 3.18206787109375e-05, "model_forward_time": 0.024425029754638672, "step": 20854 }, { "epoch": 3.18206787109375e-05, "step": 20854, "training_step_time": 0.13273167610168457 }, { "epoch": 3.182220458984375e-05, "model_forward_time": 0.024687767028808594, "step": 20855 }, { "epoch": 3.182220458984375e-05, "step": 20855, "training_step_time": 0.15469598770141602 }, { "epoch": 3.182373046875e-05, "model_forward_time": 0.024582386016845703, "step": 20856 }, { "epoch": 3.182373046875e-05, "step": 20856, "training_step_time": 0.13431477546691895 }, { "epoch": 3.182525634765625e-05, "model_forward_time": 0.026174306869506836, "step": 20857 }, { "epoch": 3.182525634765625e-05, "step": 20857, "training_step_time": 0.21352767944335938 }, { "epoch": 3.18267822265625e-05, "model_forward_time": 0.024483919143676758, "step": 20858 }, { "epoch": 3.18267822265625e-05, "step": 20858, "training_step_time": 0.12007617950439453 }, { "epoch": 3.182830810546875e-05, "model_forward_time": 0.02412128448486328, "step": 20859 }, { "epoch": 3.182830810546875e-05, "step": 20859, "training_step_time": 0.10243535041809082 }, { "epoch": 3.1829833984375e-05, "grad_norm": 0.23636561632156372, "learning_rate": 2.3301796666380898e-05, "loss": 0.0111, "step": 20860 }, { "epoch": 3.1829833984375e-05, "model_forward_time": 0.025049209594726562, "step": 20860 }, { "epoch": 3.1829833984375e-05, "step": 20860, "training_step_time": 0.10692572593688965 }, { "epoch": 3.183135986328125e-05, "model_forward_time": 0.025345325469970703, "step": 20861 }, { "epoch": 3.183135986328125e-05, "step": 20861, "training_step_time": 0.10640907287597656 }, { "epoch": 3.18328857421875e-05, "model_forward_time": 0.025022268295288086, "step": 20862 }, { "epoch": 3.18328857421875e-05, "step": 20862, "training_step_time": 0.10808515548706055 }, { "epoch": 3.183441162109375e-05, "model_forward_time": 0.025142192840576172, "step": 20863 }, { "epoch": 3.183441162109375e-05, "step": 20863, "training_step_time": 0.10917782783508301 }, { "epoch": 3.18359375e-05, "model_forward_time": 0.024912595748901367, "step": 20864 }, { "epoch": 3.18359375e-05, "step": 20864, "training_step_time": 0.10842394828796387 }, { "epoch": 3.183746337890625e-05, "model_forward_time": 0.02498030662536621, "step": 20865 }, { "epoch": 3.183746337890625e-05, "step": 20865, "training_step_time": 0.10465693473815918 }, { "epoch": 3.18389892578125e-05, "model_forward_time": 0.02520918846130371, "step": 20866 }, { "epoch": 3.18389892578125e-05, "step": 20866, "training_step_time": 0.10567617416381836 }, { "epoch": 3.184051513671875e-05, "model_forward_time": 0.02533578872680664, "step": 20867 }, { "epoch": 3.184051513671875e-05, "step": 20867, "training_step_time": 0.10598874092102051 }, { "epoch": 3.1842041015625e-05, "model_forward_time": 0.025208711624145508, "step": 20868 }, { "epoch": 3.1842041015625e-05, "step": 20868, "training_step_time": 0.10860109329223633 }, { "epoch": 3.184356689453125e-05, "model_forward_time": 0.02535724639892578, "step": 20869 }, { "epoch": 3.184356689453125e-05, "step": 20869, "training_step_time": 0.10575580596923828 }, { "epoch": 3.18450927734375e-05, "grad_norm": 0.22090311348438263, "learning_rate": 2.3255212209996025e-05, "loss": 0.0069, "step": 20870 }, { "epoch": 3.18450927734375e-05, "model_forward_time": 0.024407148361206055, "step": 20870 }, { "epoch": 3.18450927734375e-05, "step": 20870, "training_step_time": 0.14859771728515625 }, { "epoch": 3.184661865234375e-05, "model_forward_time": 0.024658679962158203, "step": 20871 }, { "epoch": 3.184661865234375e-05, "step": 20871, "training_step_time": 0.16710686683654785 }, { "epoch": 3.184814453125e-05, "model_forward_time": 0.026210784912109375, "step": 20872 }, { "epoch": 3.184814453125e-05, "step": 20872, "training_step_time": 0.1180417537689209 }, { "epoch": 3.184967041015625e-05, "model_forward_time": 0.024649381637573242, "step": 20873 }, { "epoch": 3.184967041015625e-05, "step": 20873, "training_step_time": 0.11768913269042969 }, { "epoch": 3.18511962890625e-05, "model_forward_time": 0.025539875030517578, "step": 20874 }, { "epoch": 3.18511962890625e-05, "step": 20874, "training_step_time": 0.20099544525146484 }, { "epoch": 3.185272216796875e-05, "model_forward_time": 0.024906158447265625, "step": 20875 }, { "epoch": 3.185272216796875e-05, "step": 20875, "training_step_time": 0.16398024559020996 }, { "epoch": 3.1854248046875e-05, "model_forward_time": 0.0240478515625, "step": 20876 }, { "epoch": 3.1854248046875e-05, "step": 20876, "training_step_time": 0.19109416007995605 }, { "epoch": 3.185577392578125e-05, "model_forward_time": 0.023636579513549805, "step": 20877 }, { "epoch": 3.185577392578125e-05, "step": 20877, "training_step_time": 0.13818693161010742 }, { "epoch": 3.18572998046875e-05, "model_forward_time": 0.023825407028198242, "step": 20878 }, { "epoch": 3.18572998046875e-05, "step": 20878, "training_step_time": 0.11240768432617188 }, { "epoch": 3.185882568359375e-05, "model_forward_time": 0.024588823318481445, "step": 20879 }, { "epoch": 3.185882568359375e-05, "step": 20879, "training_step_time": 0.1066887378692627 }, { "epoch": 3.18603515625e-05, "grad_norm": 0.3050976097583771, "learning_rate": 2.3208660251050158e-05, "loss": 0.0111, "step": 20880 }, { "epoch": 3.18603515625e-05, "model_forward_time": 0.024410724639892578, "step": 20880 }, { "epoch": 3.18603515625e-05, "step": 20880, "training_step_time": 0.1069645881652832 }, { "epoch": 3.186187744140625e-05, "model_forward_time": 0.024531841278076172, "step": 20881 }, { "epoch": 3.186187744140625e-05, "step": 20881, "training_step_time": 0.1074671745300293 }, { "epoch": 3.18634033203125e-05, "model_forward_time": 0.024474620819091797, "step": 20882 }, { "epoch": 3.18634033203125e-05, "step": 20882, "training_step_time": 0.1943802833557129 }, { "epoch": 3.186492919921875e-05, "model_forward_time": 0.023926973342895508, "step": 20883 }, { "epoch": 3.186492919921875e-05, "step": 20883, "training_step_time": 0.11274051666259766 }, { "epoch": 3.1866455078125e-05, "model_forward_time": 0.023557186126708984, "step": 20884 }, { "epoch": 3.1866455078125e-05, "step": 20884, "training_step_time": 0.11159205436706543 }, { "epoch": 3.186798095703125e-05, "model_forward_time": 0.02423262596130371, "step": 20885 }, { "epoch": 3.186798095703125e-05, "step": 20885, "training_step_time": 0.11328935623168945 }, { "epoch": 3.18695068359375e-05, "model_forward_time": 0.02410435676574707, "step": 20886 }, { "epoch": 3.18695068359375e-05, "step": 20886, "training_step_time": 0.11318278312683105 }, { "epoch": 3.187103271484375e-05, "model_forward_time": 0.024291515350341797, "step": 20887 }, { "epoch": 3.187103271484375e-05, "step": 20887, "training_step_time": 0.11020183563232422 }, { "epoch": 3.187255859375e-05, "model_forward_time": 0.02452254295349121, "step": 20888 }, { "epoch": 3.187255859375e-05, "step": 20888, "training_step_time": 0.10756850242614746 }, { "epoch": 3.187408447265625e-05, "model_forward_time": 0.023779630661010742, "step": 20889 }, { "epoch": 3.187408447265625e-05, "step": 20889, "training_step_time": 0.1083061695098877 }, { "epoch": 3.18756103515625e-05, "grad_norm": 0.2992817163467407, "learning_rate": 2.3162140846108366e-05, "loss": 0.0104, "step": 20890 }, { "epoch": 3.18756103515625e-05, "model_forward_time": 0.024184465408325195, "step": 20890 }, { "epoch": 3.18756103515625e-05, "step": 20890, "training_step_time": 0.1102597713470459 }, { "epoch": 3.187713623046875e-05, "model_forward_time": 0.024748802185058594, "step": 20891 }, { "epoch": 3.187713623046875e-05, "step": 20891, "training_step_time": 0.11478614807128906 }, { "epoch": 3.1878662109375e-05, "model_forward_time": 0.024597644805908203, "step": 20892 }, { "epoch": 3.1878662109375e-05, "step": 20892, "training_step_time": 0.11287713050842285 }, { "epoch": 3.188018798828125e-05, "model_forward_time": 0.024138450622558594, "step": 20893 }, { "epoch": 3.188018798828125e-05, "step": 20893, "training_step_time": 0.11603927612304688 }, { "epoch": 3.18817138671875e-05, "model_forward_time": 0.024454832077026367, "step": 20894 }, { "epoch": 3.18817138671875e-05, "step": 20894, "training_step_time": 0.10783600807189941 }, { "epoch": 3.188323974609375e-05, "model_forward_time": 0.02420639991760254, "step": 20895 }, { "epoch": 3.188323974609375e-05, "step": 20895, "training_step_time": 0.11224961280822754 }, { "epoch": 3.1884765625e-05, "model_forward_time": 0.02467942237854004, "step": 20896 }, { "epoch": 3.1884765625e-05, "step": 20896, "training_step_time": 0.10582423210144043 }, { "epoch": 3.188629150390625e-05, "model_forward_time": 0.02448248863220215, "step": 20897 }, { "epoch": 3.188629150390625e-05, "step": 20897, "training_step_time": 0.10831928253173828 }, { "epoch": 3.18878173828125e-05, "model_forward_time": 0.02448272705078125, "step": 20898 }, { "epoch": 3.18878173828125e-05, "step": 20898, "training_step_time": 0.15099310874938965 }, { "epoch": 3.188934326171875e-05, "model_forward_time": 0.0241396427154541, "step": 20899 }, { "epoch": 3.188934326171875e-05, "step": 20899, "training_step_time": 0.11275005340576172 }, { "epoch": 3.1890869140625e-05, "grad_norm": 0.48044052720069885, "learning_rate": 2.3115654051696095e-05, "loss": 0.0097, "step": 20900 }, { "epoch": 3.1890869140625e-05, "model_forward_time": 0.024339914321899414, "step": 20900 }, { "epoch": 3.1890869140625e-05, "step": 20900, "training_step_time": 0.2097020149230957 }, { "epoch": 3.189239501953125e-05, "model_forward_time": 0.02384018898010254, "step": 20901 }, { "epoch": 3.189239501953125e-05, "step": 20901, "training_step_time": 0.1087653636932373 }, { "epoch": 3.18939208984375e-05, "model_forward_time": 0.024075984954833984, "step": 20902 }, { "epoch": 3.18939208984375e-05, "step": 20902, "training_step_time": 0.1195986270904541 }, { "epoch": 3.189544677734375e-05, "model_forward_time": 0.02469944953918457, "step": 20903 }, { "epoch": 3.189544677734375e-05, "step": 20903, "training_step_time": 0.11021018028259277 }, { "epoch": 3.189697265625e-05, "model_forward_time": 0.024339914321899414, "step": 20904 }, { "epoch": 3.189697265625e-05, "step": 20904, "training_step_time": 0.11266660690307617 }, { "epoch": 3.189849853515625e-05, "model_forward_time": 0.024362802505493164, "step": 20905 }, { "epoch": 3.189849853515625e-05, "step": 20905, "training_step_time": 0.10747003555297852 }, { "epoch": 3.19000244140625e-05, "model_forward_time": 0.02451014518737793, "step": 20906 }, { "epoch": 3.19000244140625e-05, "step": 20906, "training_step_time": 0.10698914527893066 }, { "epoch": 3.190155029296875e-05, "model_forward_time": 0.024125337600708008, "step": 20907 }, { "epoch": 3.190155029296875e-05, "step": 20907, "training_step_time": 0.10592007637023926 }, { "epoch": 3.1903076171875e-05, "model_forward_time": 0.0242464542388916, "step": 20908 }, { "epoch": 3.1903076171875e-05, "step": 20908, "training_step_time": 0.11037421226501465 }, { "epoch": 3.190460205078125e-05, "model_forward_time": 0.024834632873535156, "step": 20909 }, { "epoch": 3.190460205078125e-05, "step": 20909, "training_step_time": 0.1159520149230957 }, { "epoch": 3.19061279296875e-05, "grad_norm": 0.33849474787712097, "learning_rate": 2.3069199924299174e-05, "loss": 0.0103, "step": 20910 }, { "epoch": 3.19061279296875e-05, "model_forward_time": 0.026947021484375, "step": 20910 }, { "epoch": 3.19061279296875e-05, "step": 20910, "training_step_time": 0.1456005573272705 }, { "epoch": 3.190765380859375e-05, "model_forward_time": 0.02509140968322754, "step": 20911 }, { "epoch": 3.190765380859375e-05, "step": 20911, "training_step_time": 0.17772555351257324 }, { "epoch": 3.19091796875e-05, "model_forward_time": 0.02367234230041504, "step": 20912 }, { "epoch": 3.19091796875e-05, "step": 20912, "training_step_time": 0.15875506401062012 }, { "epoch": 3.191070556640625e-05, "model_forward_time": 0.023256540298461914, "step": 20913 }, { "epoch": 3.191070556640625e-05, "step": 20913, "training_step_time": 0.15701699256896973 }, { "epoch": 3.19122314453125e-05, "model_forward_time": 0.023736238479614258, "step": 20914 }, { "epoch": 3.19122314453125e-05, "step": 20914, "training_step_time": 0.1899254322052002 }, { "epoch": 3.191375732421875e-05, "model_forward_time": 0.02338862419128418, "step": 20915 }, { "epoch": 3.191375732421875e-05, "step": 20915, "training_step_time": 0.14279985427856445 }, { "epoch": 3.1915283203125e-05, "model_forward_time": 0.02374577522277832, "step": 20916 }, { "epoch": 3.1915283203125e-05, "step": 20916, "training_step_time": 0.21148443222045898 }, { "epoch": 3.191680908203125e-05, "model_forward_time": 0.02332019805908203, "step": 20917 }, { "epoch": 3.191680908203125e-05, "step": 20917, "training_step_time": 0.12008976936340332 }, { "epoch": 3.19183349609375e-05, "model_forward_time": 0.023340225219726562, "step": 20918 }, { "epoch": 3.19183349609375e-05, "step": 20918, "training_step_time": 0.17731785774230957 }, { "epoch": 3.191986083984375e-05, "model_forward_time": 0.02382206916809082, "step": 20919 }, { "epoch": 3.191986083984375e-05, "step": 20919, "training_step_time": 0.17373394966125488 }, { "epoch": 3.192138671875e-05, "grad_norm": 0.2071171998977661, "learning_rate": 2.3022778520363753e-05, "loss": 0.0086, "step": 20920 }, { "epoch": 3.192138671875e-05, "model_forward_time": 0.023595094680786133, "step": 20920 }, { "epoch": 3.192138671875e-05, "step": 20920, "training_step_time": 0.18980860710144043 }, { "epoch": 3.192291259765625e-05, "model_forward_time": 0.023751258850097656, "step": 20921 }, { "epoch": 3.192291259765625e-05, "step": 20921, "training_step_time": 0.14075398445129395 }, { "epoch": 3.19244384765625e-05, "model_forward_time": 0.023923873901367188, "step": 20922 }, { "epoch": 3.19244384765625e-05, "step": 20922, "training_step_time": 0.10462522506713867 }, { "epoch": 3.192596435546875e-05, "model_forward_time": 0.024194955825805664, "step": 20923 }, { "epoch": 3.192596435546875e-05, "step": 20923, "training_step_time": 0.11624288558959961 }, { "epoch": 3.1927490234375e-05, "model_forward_time": 0.024443626403808594, "step": 20924 }, { "epoch": 3.1927490234375e-05, "step": 20924, "training_step_time": 0.10983943939208984 }, { "epoch": 3.192901611328125e-05, "model_forward_time": 0.024003028869628906, "step": 20925 }, { "epoch": 3.192901611328125e-05, "step": 20925, "training_step_time": 0.10519266128540039 }, { "epoch": 3.19305419921875e-05, "model_forward_time": 0.024275779724121094, "step": 20926 }, { "epoch": 3.19305419921875e-05, "step": 20926, "training_step_time": 0.1604933738708496 }, { "epoch": 3.193206787109375e-05, "model_forward_time": 0.024112939834594727, "step": 20927 }, { "epoch": 3.193206787109375e-05, "step": 20927, "training_step_time": 0.11036133766174316 }, { "epoch": 3.193359375e-05, "model_forward_time": 0.023923873901367188, "step": 20928 }, { "epoch": 3.193359375e-05, "step": 20928, "training_step_time": 0.11069965362548828 }, { "epoch": 3.193511962890625e-05, "model_forward_time": 0.024476051330566406, "step": 20929 }, { "epoch": 3.193511962890625e-05, "step": 20929, "training_step_time": 0.1078493595123291 }, { "epoch": 3.19366455078125e-05, "grad_norm": 0.18889905512332916, "learning_rate": 2.2976389896296203e-05, "loss": 0.0048, "step": 20930 }, { "epoch": 3.19366455078125e-05, "model_forward_time": 0.0243380069732666, "step": 20930 }, { "epoch": 3.19366455078125e-05, "step": 20930, "training_step_time": 0.10697484016418457 }, { "epoch": 3.193817138671875e-05, "model_forward_time": 0.02434515953063965, "step": 20931 }, { "epoch": 3.193817138671875e-05, "step": 20931, "training_step_time": 0.10756659507751465 }, { "epoch": 3.1939697265625e-05, "model_forward_time": 0.024018526077270508, "step": 20932 }, { "epoch": 3.1939697265625e-05, "step": 20932, "training_step_time": 0.10716009140014648 }, { "epoch": 3.194122314453125e-05, "model_forward_time": 0.024338960647583008, "step": 20933 }, { "epoch": 3.194122314453125e-05, "step": 20933, "training_step_time": 0.1056208610534668 }, { "epoch": 3.19427490234375e-05, "model_forward_time": 0.02461075782775879, "step": 20934 }, { "epoch": 3.19427490234375e-05, "step": 20934, "training_step_time": 0.1045067310333252 }, { "epoch": 3.194427490234375e-05, "model_forward_time": 0.024304866790771484, "step": 20935 }, { "epoch": 3.194427490234375e-05, "step": 20935, "training_step_time": 0.1055455207824707 }, { "epoch": 3.194580078125e-05, "model_forward_time": 0.024506807327270508, "step": 20936 }, { "epoch": 3.194580078125e-05, "step": 20936, "training_step_time": 0.10832381248474121 }, { "epoch": 3.194732666015625e-05, "model_forward_time": 0.02446126937866211, "step": 20937 }, { "epoch": 3.194732666015625e-05, "step": 20937, "training_step_time": 0.1055595874786377 }, { "epoch": 3.19488525390625e-05, "model_forward_time": 0.02409672737121582, "step": 20938 }, { "epoch": 3.19488525390625e-05, "step": 20938, "training_step_time": 0.10618400573730469 }, { "epoch": 3.195037841796875e-05, "model_forward_time": 0.024507761001586914, "step": 20939 }, { "epoch": 3.195037841796875e-05, "step": 20939, "training_step_time": 0.10516548156738281 }, { "epoch": 3.1951904296875e-05, "grad_norm": 0.16985678672790527, "learning_rate": 2.29300341084631e-05, "loss": 0.0064, "step": 20940 }, { "epoch": 3.1951904296875e-05, "model_forward_time": 0.024466991424560547, "step": 20940 }, { "epoch": 3.1951904296875e-05, "step": 20940, "training_step_time": 0.10623669624328613 }, { "epoch": 3.195343017578125e-05, "model_forward_time": 0.025005340576171875, "step": 20941 }, { "epoch": 3.195343017578125e-05, "step": 20941, "training_step_time": 0.10666084289550781 }, { "epoch": 3.19549560546875e-05, "model_forward_time": 0.024379491806030273, "step": 20942 }, { "epoch": 3.19549560546875e-05, "step": 20942, "training_step_time": 0.19836187362670898 }, { "epoch": 3.195648193359375e-05, "model_forward_time": 0.02278614044189453, "step": 20943 }, { "epoch": 3.195648193359375e-05, "step": 20943, "training_step_time": 0.1080026626586914 }, { "epoch": 3.19580078125e-05, "model_forward_time": 0.023691654205322266, "step": 20944 }, { "epoch": 3.19580078125e-05, "step": 20944, "training_step_time": 0.16800737380981445 }, { "epoch": 3.195953369140625e-05, "model_forward_time": 0.024271249771118164, "step": 20945 }, { "epoch": 3.195953369140625e-05, "step": 20945, "training_step_time": 0.12114119529724121 }, { "epoch": 3.19610595703125e-05, "model_forward_time": 0.023670673370361328, "step": 20946 }, { "epoch": 3.19610595703125e-05, "step": 20946, "training_step_time": 0.10887718200683594 }, { "epoch": 3.196258544921875e-05, "model_forward_time": 0.02483820915222168, "step": 20947 }, { "epoch": 3.196258544921875e-05, "step": 20947, "training_step_time": 0.12133979797363281 }, { "epoch": 3.1964111328125e-05, "model_forward_time": 0.024420976638793945, "step": 20948 }, { "epoch": 3.1964111328125e-05, "step": 20948, "training_step_time": 0.12640786170959473 }, { "epoch": 3.196563720703125e-05, "model_forward_time": 0.024159908294677734, "step": 20949 }, { "epoch": 3.196563720703125e-05, "step": 20949, "training_step_time": 0.10400915145874023 }, { "epoch": 3.19671630859375e-05, "grad_norm": 0.2694411873817444, "learning_rate": 2.288371121319109e-05, "loss": 0.0098, "step": 20950 }, { "epoch": 3.19671630859375e-05, "model_forward_time": 0.024370193481445312, "step": 20950 }, { "epoch": 3.19671630859375e-05, "step": 20950, "training_step_time": 0.10578036308288574 }, { "epoch": 3.196868896484375e-05, "model_forward_time": 0.02436995506286621, "step": 20951 }, { "epoch": 3.196868896484375e-05, "step": 20951, "training_step_time": 0.10386824607849121 }, { "epoch": 3.197021484375e-05, "model_forward_time": 0.02568984031677246, "step": 20952 }, { "epoch": 3.197021484375e-05, "step": 20952, "training_step_time": 0.10805225372314453 }, { "epoch": 3.197174072265625e-05, "model_forward_time": 0.02538466453552246, "step": 20953 }, { "epoch": 3.197174072265625e-05, "step": 20953, "training_step_time": 0.1058800220489502 }, { "epoch": 3.19732666015625e-05, "model_forward_time": 0.024991989135742188, "step": 20954 }, { "epoch": 3.19732666015625e-05, "step": 20954, "training_step_time": 0.10443902015686035 }, { "epoch": 3.197479248046875e-05, "model_forward_time": 0.0249178409576416, "step": 20955 }, { "epoch": 3.197479248046875e-05, "step": 20955, "training_step_time": 0.10324454307556152 }, { "epoch": 3.1976318359375e-05, "model_forward_time": 0.025630712509155273, "step": 20956 }, { "epoch": 3.1976318359375e-05, "step": 20956, "training_step_time": 0.10499811172485352 }, { "epoch": 3.197784423828125e-05, "model_forward_time": 0.02494072914123535, "step": 20957 }, { "epoch": 3.197784423828125e-05, "step": 20957, "training_step_time": 0.10555219650268555 }, { "epoch": 3.19793701171875e-05, "model_forward_time": 0.025252103805541992, "step": 20958 }, { "epoch": 3.19793701171875e-05, "step": 20958, "training_step_time": 0.10450410842895508 }, { "epoch": 3.198089599609375e-05, "model_forward_time": 0.024678945541381836, "step": 20959 }, { "epoch": 3.198089599609375e-05, "step": 20959, "training_step_time": 0.10578131675720215 }, { "epoch": 3.1982421875e-05, "grad_norm": 0.1979796290397644, "learning_rate": 2.2837421266766857e-05, "loss": 0.0085, "step": 20960 }, { "epoch": 3.1982421875e-05, "model_forward_time": 0.02428460121154785, "step": 20960 }, { "epoch": 3.1982421875e-05, "step": 20960, "training_step_time": 0.14855360984802246 }, { "epoch": 3.198394775390625e-05, "model_forward_time": 0.02537226676940918, "step": 20961 }, { "epoch": 3.198394775390625e-05, "step": 20961, "training_step_time": 0.15253806114196777 }, { "epoch": 3.19854736328125e-05, "model_forward_time": 0.02459263801574707, "step": 20962 }, { "epoch": 3.19854736328125e-05, "step": 20962, "training_step_time": 0.1141505241394043 }, { "epoch": 3.198699951171875e-05, "model_forward_time": 0.024610042572021484, "step": 20963 }, { "epoch": 3.198699951171875e-05, "step": 20963, "training_step_time": 0.12652587890625 }, { "epoch": 3.1988525390625e-05, "model_forward_time": 0.024911880493164062, "step": 20964 }, { "epoch": 3.1988525390625e-05, "step": 20964, "training_step_time": 0.16631388664245605 }, { "epoch": 3.199005126953125e-05, "model_forward_time": 0.024394989013671875, "step": 20965 }, { "epoch": 3.199005126953125e-05, "step": 20965, "training_step_time": 0.16535353660583496 }, { "epoch": 3.19915771484375e-05, "model_forward_time": 0.023775100708007812, "step": 20966 }, { "epoch": 3.19915771484375e-05, "step": 20966, "training_step_time": 0.13075518608093262 }, { "epoch": 3.199310302734375e-05, "model_forward_time": 0.024721384048461914, "step": 20967 }, { "epoch": 3.199310302734375e-05, "step": 20967, "training_step_time": 0.11049699783325195 }, { "epoch": 3.199462890625e-05, "model_forward_time": 0.02506113052368164, "step": 20968 }, { "epoch": 3.199462890625e-05, "step": 20968, "training_step_time": 0.1205441951751709 }, { "epoch": 3.199615478515625e-05, "model_forward_time": 0.025595664978027344, "step": 20969 }, { "epoch": 3.199615478515625e-05, "step": 20969, "training_step_time": 0.10684013366699219 }, { "epoch": 3.19976806640625e-05, "grad_norm": 0.1606241762638092, "learning_rate": 2.279116432543705e-05, "loss": 0.0058, "step": 20970 }, { "epoch": 3.19976806640625e-05, "model_forward_time": 0.025014400482177734, "step": 20970 }, { "epoch": 3.19976806640625e-05, "step": 20970, "training_step_time": 0.10595464706420898 }, { "epoch": 3.199920654296875e-05, "model_forward_time": 0.025043249130249023, "step": 20971 }, { "epoch": 3.199920654296875e-05, "step": 20971, "training_step_time": 0.11782336235046387 }, { "epoch": 3.2000732421875e-05, "model_forward_time": 0.025203943252563477, "step": 20972 }, { "epoch": 3.2000732421875e-05, "step": 20972, "training_step_time": 0.10997772216796875 }, { "epoch": 3.200225830078125e-05, "model_forward_time": 0.02482891082763672, "step": 20973 }, { "epoch": 3.200225830078125e-05, "step": 20973, "training_step_time": 0.18892621994018555 }, { "epoch": 3.20037841796875e-05, "model_forward_time": 0.024064302444458008, "step": 20974 }, { "epoch": 3.20037841796875e-05, "step": 20974, "training_step_time": 0.10538268089294434 }, { "epoch": 3.200531005859375e-05, "model_forward_time": 0.024271488189697266, "step": 20975 }, { "epoch": 3.200531005859375e-05, "step": 20975, "training_step_time": 0.10281777381896973 }, { "epoch": 3.20068359375e-05, "model_forward_time": 0.02514481544494629, "step": 20976 }, { "epoch": 3.20068359375e-05, "step": 20976, "training_step_time": 0.10588502883911133 }, { "epoch": 3.200836181640625e-05, "model_forward_time": 0.024989604949951172, "step": 20977 }, { "epoch": 3.200836181640625e-05, "step": 20977, "training_step_time": 0.18580889701843262 }, { "epoch": 3.20098876953125e-05, "model_forward_time": 0.024666309356689453, "step": 20978 }, { "epoch": 3.20098876953125e-05, "step": 20978, "training_step_time": 0.20614242553710938 }, { "epoch": 3.201141357421875e-05, "model_forward_time": 0.024148225784301758, "step": 20979 }, { "epoch": 3.201141357421875e-05, "step": 20979, "training_step_time": 0.20559000968933105 }, { "epoch": 3.2012939453125e-05, "grad_norm": 0.11838044226169586, "learning_rate": 2.2744940445408202e-05, "loss": 0.0042, "step": 20980 }, { "epoch": 3.2012939453125e-05, "model_forward_time": 0.024283409118652344, "step": 20980 }, { "epoch": 3.2012939453125e-05, "step": 20980, "training_step_time": 0.193495512008667 }, { "epoch": 3.201446533203125e-05, "model_forward_time": 0.024239301681518555, "step": 20981 }, { "epoch": 3.201446533203125e-05, "step": 20981, "training_step_time": 0.18219327926635742 }, { "epoch": 3.20159912109375e-05, "model_forward_time": 0.024628639221191406, "step": 20982 }, { "epoch": 3.20159912109375e-05, "step": 20982, "training_step_time": 0.17626619338989258 }, { "epoch": 3.201751708984375e-05, "model_forward_time": 0.024436235427856445, "step": 20983 }, { "epoch": 3.201751708984375e-05, "step": 20983, "training_step_time": 0.16077065467834473 }, { "epoch": 3.201904296875e-05, "model_forward_time": 0.024114370346069336, "step": 20984 }, { "epoch": 3.201904296875e-05, "step": 20984, "training_step_time": 0.11462759971618652 }, { "epoch": 3.202056884765625e-05, "model_forward_time": 0.024523019790649414, "step": 20985 }, { "epoch": 3.202056884765625e-05, "step": 20985, "training_step_time": 0.11400294303894043 }, { "epoch": 3.20220947265625e-05, "model_forward_time": 0.02523493766784668, "step": 20986 }, { "epoch": 3.20220947265625e-05, "step": 20986, "training_step_time": 0.12998533248901367 }, { "epoch": 3.202362060546875e-05, "model_forward_time": 0.025480270385742188, "step": 20987 }, { "epoch": 3.202362060546875e-05, "step": 20987, "training_step_time": 0.12911486625671387 }, { "epoch": 3.2025146484375e-05, "model_forward_time": 0.024914026260375977, "step": 20988 }, { "epoch": 3.2025146484375e-05, "step": 20988, "training_step_time": 0.10532855987548828 }, { "epoch": 3.202667236328125e-05, "model_forward_time": 0.02524280548095703, "step": 20989 }, { "epoch": 3.202667236328125e-05, "step": 20989, "training_step_time": 0.14786529541015625 }, { "epoch": 3.20281982421875e-05, "grad_norm": 0.1383422315120697, "learning_rate": 2.2698749682846687e-05, "loss": 0.0046, "step": 20990 }, { "epoch": 3.20281982421875e-05, "model_forward_time": 0.024940967559814453, "step": 20990 }, { "epoch": 3.20281982421875e-05, "step": 20990, "training_step_time": 0.12484574317932129 }, { "epoch": 3.202972412109375e-05, "model_forward_time": 0.024914264678955078, "step": 20991 }, { "epoch": 3.202972412109375e-05, "step": 20991, "training_step_time": 0.12060928344726562 }, { "epoch": 3.203125e-05, "model_forward_time": 0.025206327438354492, "step": 20992 }, { "epoch": 3.203125e-05, "step": 20992, "training_step_time": 0.10564804077148438 }, { "epoch": 3.203277587890625e-05, "model_forward_time": 0.025262832641601562, "step": 20993 }, { "epoch": 3.203277587890625e-05, "step": 20993, "training_step_time": 0.10614347457885742 }, { "epoch": 3.20343017578125e-05, "model_forward_time": 0.02471613883972168, "step": 20994 }, { "epoch": 3.20343017578125e-05, "step": 20994, "training_step_time": 0.10723352432250977 }, { "epoch": 3.203582763671875e-05, "model_forward_time": 0.025940656661987305, "step": 20995 }, { "epoch": 3.203582763671875e-05, "step": 20995, "training_step_time": 0.10569024085998535 }, { "epoch": 3.2037353515625e-05, "model_forward_time": 0.025329113006591797, "step": 20996 }, { "epoch": 3.2037353515625e-05, "step": 20996, "training_step_time": 0.10524249076843262 }, { "epoch": 3.203887939453125e-05, "model_forward_time": 0.02527451515197754, "step": 20997 }, { "epoch": 3.203887939453125e-05, "step": 20997, "training_step_time": 0.1073911190032959 }, { "epoch": 3.20404052734375e-05, "model_forward_time": 0.024895429611206055, "step": 20998 }, { "epoch": 3.20404052734375e-05, "step": 20998, "training_step_time": 0.10617876052856445 }, { "epoch": 3.204193115234375e-05, "model_forward_time": 0.024880170822143555, "step": 20999 }, { "epoch": 3.204193115234375e-05, "step": 20999, "training_step_time": 0.10453915596008301 }, { "epoch": 3.204345703125e-05, "grad_norm": 0.12018714845180511, "learning_rate": 2.2652592093878666e-05, "loss": 0.0082, "step": 21000 }, { "epoch": 3.204345703125e-05, "model_forward_time": 0.02322530746459961, "step": 21000 }, { "epoch": 3.204345703125e-05, "step": 21000, "training_step_time": 0.09423136711120605 }, { "epoch": 3.204498291015625e-05, "model_forward_time": 0.023467063903808594, "step": 21001 }, { "epoch": 3.204498291015625e-05, "step": 21001, "training_step_time": 0.0988612174987793 }, { "epoch": 3.20465087890625e-05, "model_forward_time": 0.024544477462768555, "step": 21002 }, { "epoch": 3.20465087890625e-05, "step": 21002, "training_step_time": 0.1031191349029541 }, { "epoch": 3.204803466796875e-05, "model_forward_time": 0.024768829345703125, "step": 21003 }, { "epoch": 3.204803466796875e-05, "step": 21003, "training_step_time": 0.10243654251098633 }, { "epoch": 3.2049560546875e-05, "model_forward_time": 0.02525782585144043, "step": 21004 }, { "epoch": 3.2049560546875e-05, "step": 21004, "training_step_time": 0.10604310035705566 }, { "epoch": 3.205108642578125e-05, "model_forward_time": 0.025297164916992188, "step": 21005 }, { "epoch": 3.205108642578125e-05, "step": 21005, "training_step_time": 0.10829520225524902 }, { "epoch": 3.20526123046875e-05, "model_forward_time": 0.02553415298461914, "step": 21006 }, { "epoch": 3.20526123046875e-05, "step": 21006, "training_step_time": 0.10339927673339844 }, { "epoch": 3.205413818359375e-05, "model_forward_time": 0.024680614471435547, "step": 21007 }, { "epoch": 3.205413818359375e-05, "step": 21007, "training_step_time": 0.150040864944458 }, { "epoch": 3.20556640625e-05, "model_forward_time": 0.024448156356811523, "step": 21008 }, { "epoch": 3.20556640625e-05, "step": 21008, "training_step_time": 0.15775251388549805 }, { "epoch": 3.205718994140625e-05, "model_forward_time": 0.024425268173217773, "step": 21009 }, { "epoch": 3.205718994140625e-05, "step": 21009, "training_step_time": 0.1583404541015625 }, { "epoch": 3.20587158203125e-05, "grad_norm": 0.25087517499923706, "learning_rate": 2.2606467734589924e-05, "loss": 0.0118, "step": 21010 }, { "epoch": 3.20587158203125e-05, "model_forward_time": 0.024266481399536133, "step": 21010 }, { "epoch": 3.20587158203125e-05, "step": 21010, "training_step_time": 0.18371105194091797 }, { "epoch": 3.206024169921875e-05, "model_forward_time": 0.0244903564453125, "step": 21011 }, { "epoch": 3.206024169921875e-05, "step": 21011, "training_step_time": 0.17206120491027832 }, { "epoch": 3.2061767578125e-05, "model_forward_time": 0.024683713912963867, "step": 21012 }, { "epoch": 3.2061767578125e-05, "step": 21012, "training_step_time": 0.22068309783935547 }, { "epoch": 3.206329345703125e-05, "model_forward_time": 0.024411439895629883, "step": 21013 }, { "epoch": 3.206329345703125e-05, "step": 21013, "training_step_time": 0.14562129974365234 }, { "epoch": 3.20648193359375e-05, "model_forward_time": 0.024422168731689453, "step": 21014 }, { "epoch": 3.20648193359375e-05, "step": 21014, "training_step_time": 0.19055461883544922 }, { "epoch": 3.206634521484375e-05, "model_forward_time": 0.024055004119873047, "step": 21015 }, { "epoch": 3.206634521484375e-05, "step": 21015, "training_step_time": 0.10859084129333496 }, { "epoch": 3.206787109375e-05, "model_forward_time": 0.026369571685791016, "step": 21016 }, { "epoch": 3.206787109375e-05, "step": 21016, "training_step_time": 0.19722390174865723 }, { "epoch": 3.206939697265625e-05, "model_forward_time": 0.024967670440673828, "step": 21017 }, { "epoch": 3.206939697265625e-05, "step": 21017, "training_step_time": 0.1064293384552002 }, { "epoch": 3.20709228515625e-05, "model_forward_time": 0.02445220947265625, "step": 21018 }, { "epoch": 3.20709228515625e-05, "step": 21018, "training_step_time": 0.10428047180175781 }, { "epoch": 3.207244873046875e-05, "model_forward_time": 0.02541661262512207, "step": 21019 }, { "epoch": 3.207244873046875e-05, "step": 21019, "training_step_time": 0.10465860366821289 }, { "epoch": 3.2073974609375e-05, "grad_norm": 0.19894975423812866, "learning_rate": 2.2560376661025945e-05, "loss": 0.0068, "step": 21020 }, { "epoch": 3.2073974609375e-05, "model_forward_time": 0.025252342224121094, "step": 21020 }, { "epoch": 3.2073974609375e-05, "step": 21020, "training_step_time": 0.11088037490844727 }, { "epoch": 3.207550048828125e-05, "model_forward_time": 0.02655339241027832, "step": 21021 }, { "epoch": 3.207550048828125e-05, "step": 21021, "training_step_time": 0.10682010650634766 }, { "epoch": 3.20770263671875e-05, "model_forward_time": 0.025124073028564453, "step": 21022 }, { "epoch": 3.20770263671875e-05, "step": 21022, "training_step_time": 0.1076803207397461 }, { "epoch": 3.207855224609375e-05, "model_forward_time": 0.025011539459228516, "step": 21023 }, { "epoch": 3.207855224609375e-05, "step": 21023, "training_step_time": 0.10767865180969238 }, { "epoch": 3.2080078125e-05, "model_forward_time": 0.025256633758544922, "step": 21024 }, { "epoch": 3.2080078125e-05, "step": 21024, "training_step_time": 0.10580277442932129 }, { "epoch": 3.208160400390625e-05, "model_forward_time": 0.02512359619140625, "step": 21025 }, { "epoch": 3.208160400390625e-05, "step": 21025, "training_step_time": 0.10624265670776367 }, { "epoch": 3.20831298828125e-05, "model_forward_time": 0.025471210479736328, "step": 21026 }, { "epoch": 3.20831298828125e-05, "step": 21026, "training_step_time": 0.10708022117614746 }, { "epoch": 3.208465576171875e-05, "model_forward_time": 0.025400161743164062, "step": 21027 }, { "epoch": 3.208465576171875e-05, "step": 21027, "training_step_time": 0.1092379093170166 }, { "epoch": 3.2086181640625e-05, "model_forward_time": 0.025461435317993164, "step": 21028 }, { "epoch": 3.2086181640625e-05, "step": 21028, "training_step_time": 0.1041111946105957 }, { "epoch": 3.208770751953125e-05, "model_forward_time": 0.026166200637817383, "step": 21029 }, { "epoch": 3.208770751953125e-05, "step": 21029, "training_step_time": 0.10735678672790527 }, { "epoch": 3.20892333984375e-05, "grad_norm": 0.12291596829891205, "learning_rate": 2.251431892919171e-05, "loss": 0.0102, "step": 21030 }, { "epoch": 3.20892333984375e-05, "model_forward_time": 0.02539205551147461, "step": 21030 }, { "epoch": 3.20892333984375e-05, "step": 21030, "training_step_time": 0.13961172103881836 }, { "epoch": 3.209075927734375e-05, "model_forward_time": 0.025295257568359375, "step": 21031 }, { "epoch": 3.209075927734375e-05, "step": 21031, "training_step_time": 0.15492606163024902 }, { "epoch": 3.209228515625e-05, "model_forward_time": 0.024661779403686523, "step": 21032 }, { "epoch": 3.209228515625e-05, "step": 21032, "training_step_time": 0.1365213394165039 }, { "epoch": 3.209381103515625e-05, "model_forward_time": 0.025146484375, "step": 21033 }, { "epoch": 3.209381103515625e-05, "step": 21033, "training_step_time": 0.15526485443115234 }, { "epoch": 3.20953369140625e-05, "model_forward_time": 0.024460554122924805, "step": 21034 }, { "epoch": 3.20953369140625e-05, "step": 21034, "training_step_time": 0.1095433235168457 }, { "epoch": 3.209686279296875e-05, "model_forward_time": 0.02466750144958496, "step": 21035 }, { "epoch": 3.209686279296875e-05, "step": 21035, "training_step_time": 0.12091970443725586 }, { "epoch": 3.2098388671875e-05, "model_forward_time": 0.02536463737487793, "step": 21036 }, { "epoch": 3.2098388671875e-05, "step": 21036, "training_step_time": 0.11558914184570312 }, { "epoch": 3.209991455078125e-05, "model_forward_time": 0.025399208068847656, "step": 21037 }, { "epoch": 3.209991455078125e-05, "step": 21037, "training_step_time": 0.1633157730102539 }, { "epoch": 3.21014404296875e-05, "model_forward_time": 0.02440190315246582, "step": 21038 }, { "epoch": 3.21014404296875e-05, "step": 21038, "training_step_time": 0.10747718811035156 }, { "epoch": 3.210296630859375e-05, "model_forward_time": 0.02467036247253418, "step": 21039 }, { "epoch": 3.210296630859375e-05, "step": 21039, "training_step_time": 0.1133279800415039 }, { "epoch": 3.21044921875e-05, "grad_norm": 0.15215924382209778, "learning_rate": 2.2468294595051687e-05, "loss": 0.0081, "step": 21040 }, { "epoch": 3.21044921875e-05, "model_forward_time": 0.02494072914123535, "step": 21040 }, { "epoch": 3.21044921875e-05, "step": 21040, "training_step_time": 0.10536432266235352 }, { "epoch": 3.210601806640625e-05, "model_forward_time": 0.02517080307006836, "step": 21041 }, { "epoch": 3.210601806640625e-05, "step": 21041, "training_step_time": 0.10616922378540039 }, { "epoch": 3.21075439453125e-05, "model_forward_time": 0.02533745765686035, "step": 21042 }, { "epoch": 3.21075439453125e-05, "step": 21042, "training_step_time": 0.10864853858947754 }, { "epoch": 3.210906982421875e-05, "model_forward_time": 0.025047779083251953, "step": 21043 }, { "epoch": 3.210906982421875e-05, "step": 21043, "training_step_time": 0.1063694953918457 }, { "epoch": 3.2110595703125e-05, "model_forward_time": 0.025476932525634766, "step": 21044 }, { "epoch": 3.2110595703125e-05, "step": 21044, "training_step_time": 0.10620427131652832 }, { "epoch": 3.211212158203125e-05, "model_forward_time": 0.025572538375854492, "step": 21045 }, { "epoch": 3.211212158203125e-05, "step": 21045, "training_step_time": 0.10952425003051758 }, { "epoch": 3.21136474609375e-05, "model_forward_time": 0.025313854217529297, "step": 21046 }, { "epoch": 3.21136474609375e-05, "step": 21046, "training_step_time": 0.10838055610656738 }, { "epoch": 3.211517333984375e-05, "model_forward_time": 0.0254364013671875, "step": 21047 }, { "epoch": 3.211517333984375e-05, "step": 21047, "training_step_time": 0.10993003845214844 }, { "epoch": 3.211669921875e-05, "model_forward_time": 0.02524876594543457, "step": 21048 }, { "epoch": 3.211669921875e-05, "step": 21048, "training_step_time": 0.12034153938293457 }, { "epoch": 3.211822509765625e-05, "model_forward_time": 0.025159120559692383, "step": 21049 }, { "epoch": 3.211822509765625e-05, "step": 21049, "training_step_time": 0.14071202278137207 }, { "epoch": 3.21197509765625e-05, "grad_norm": 0.2919601798057556, "learning_rate": 2.242230371452982e-05, "loss": 0.0086, "step": 21050 }, { "epoch": 3.21197509765625e-05, "model_forward_time": 0.025293827056884766, "step": 21050 }, { "epoch": 3.21197509765625e-05, "step": 21050, "training_step_time": 0.1121816635131836 }, { "epoch": 3.212127685546875e-05, "model_forward_time": 0.024393320083618164, "step": 21051 }, { "epoch": 3.212127685546875e-05, "step": 21051, "training_step_time": 0.1553206443786621 }, { "epoch": 3.2122802734375e-05, "model_forward_time": 0.024788618087768555, "step": 21052 }, { "epoch": 3.2122802734375e-05, "step": 21052, "training_step_time": 0.1535799503326416 }, { "epoch": 3.212432861328125e-05, "model_forward_time": 0.024834394454956055, "step": 21053 }, { "epoch": 3.212432861328125e-05, "step": 21053, "training_step_time": 0.18761992454528809 }, { "epoch": 3.21258544921875e-05, "model_forward_time": 0.024721622467041016, "step": 21054 }, { "epoch": 3.21258544921875e-05, "step": 21054, "training_step_time": 0.15810585021972656 }, { "epoch": 3.212738037109375e-05, "model_forward_time": 0.027066469192504883, "step": 21055 }, { "epoch": 3.212738037109375e-05, "step": 21055, "training_step_time": 0.19086170196533203 }, { "epoch": 3.212890625e-05, "model_forward_time": 0.024515628814697266, "step": 21056 }, { "epoch": 3.212890625e-05, "step": 21056, "training_step_time": 0.19450640678405762 }, { "epoch": 3.213043212890625e-05, "model_forward_time": 0.02426886558532715, "step": 21057 }, { "epoch": 3.213043212890625e-05, "step": 21057, "training_step_time": 0.1704401969909668 }, { "epoch": 3.21319580078125e-05, "model_forward_time": 0.023818254470825195, "step": 21058 }, { "epoch": 3.21319580078125e-05, "step": 21058, "training_step_time": 0.18068885803222656 }, { "epoch": 3.213348388671875e-05, "model_forward_time": 0.023616790771484375, "step": 21059 }, { "epoch": 3.213348388671875e-05, "step": 21059, "training_step_time": 0.1107473373413086 }, { "epoch": 3.2135009765625e-05, "grad_norm": 0.24266916513442993, "learning_rate": 2.237634634350934e-05, "loss": 0.0133, "step": 21060 }, { "epoch": 3.2135009765625e-05, "model_forward_time": 0.024321556091308594, "step": 21060 }, { "epoch": 3.2135009765625e-05, "step": 21060, "training_step_time": 0.12611031532287598 }, { "epoch": 3.213653564453125e-05, "model_forward_time": 0.026088953018188477, "step": 21061 }, { "epoch": 3.213653564453125e-05, "step": 21061, "training_step_time": 0.11939144134521484 }, { "epoch": 3.21380615234375e-05, "model_forward_time": 0.025325298309326172, "step": 21062 }, { "epoch": 3.21380615234375e-05, "step": 21062, "training_step_time": 0.10639357566833496 }, { "epoch": 3.213958740234375e-05, "model_forward_time": 0.02591872215270996, "step": 21063 }, { "epoch": 3.213958740234375e-05, "step": 21063, "training_step_time": 0.10566163063049316 }, { "epoch": 3.214111328125e-05, "model_forward_time": 0.02567768096923828, "step": 21064 }, { "epoch": 3.214111328125e-05, "step": 21064, "training_step_time": 0.10636162757873535 }, { "epoch": 3.214263916015625e-05, "model_forward_time": 0.027269840240478516, "step": 21065 }, { "epoch": 3.214263916015625e-05, "step": 21065, "training_step_time": 0.10856080055236816 }, { "epoch": 3.21441650390625e-05, "model_forward_time": 0.025417089462280273, "step": 21066 }, { "epoch": 3.21441650390625e-05, "step": 21066, "training_step_time": 0.10489296913146973 }, { "epoch": 3.214569091796875e-05, "model_forward_time": 0.025609254837036133, "step": 21067 }, { "epoch": 3.214569091796875e-05, "step": 21067, "training_step_time": 0.10773634910583496 }, { "epoch": 3.2147216796875e-05, "model_forward_time": 0.025650978088378906, "step": 21068 }, { "epoch": 3.2147216796875e-05, "step": 21068, "training_step_time": 0.10774922370910645 }, { "epoch": 3.214874267578125e-05, "model_forward_time": 0.02536177635192871, "step": 21069 }, { "epoch": 3.214874267578125e-05, "step": 21069, "training_step_time": 0.10801386833190918 }, { "epoch": 3.21502685546875e-05, "grad_norm": 0.23565426468849182, "learning_rate": 2.23304225378328e-05, "loss": 0.0099, "step": 21070 }, { "epoch": 3.21502685546875e-05, "model_forward_time": 0.025113821029663086, "step": 21070 }, { "epoch": 3.21502685546875e-05, "step": 21070, "training_step_time": 0.10464692115783691 }, { "epoch": 3.215179443359375e-05, "model_forward_time": 0.02554941177368164, "step": 21071 }, { "epoch": 3.215179443359375e-05, "step": 21071, "training_step_time": 0.10890722274780273 }, { "epoch": 3.21533203125e-05, "model_forward_time": 0.02564835548400879, "step": 21072 }, { "epoch": 3.21533203125e-05, "step": 21072, "training_step_time": 0.10660314559936523 }, { "epoch": 3.215484619140625e-05, "model_forward_time": 0.02496790885925293, "step": 21073 }, { "epoch": 3.215484619140625e-05, "step": 21073, "training_step_time": 0.10352945327758789 }, { "epoch": 3.21563720703125e-05, "model_forward_time": 0.0258331298828125, "step": 21074 }, { "epoch": 3.21563720703125e-05, "step": 21074, "training_step_time": 0.1051180362701416 }, { "epoch": 3.215789794921875e-05, "model_forward_time": 0.024367570877075195, "step": 21075 }, { "epoch": 3.215789794921875e-05, "step": 21075, "training_step_time": 0.10581111907958984 }, { "epoch": 3.2159423828125e-05, "model_forward_time": 0.02569127082824707, "step": 21076 }, { "epoch": 3.2159423828125e-05, "step": 21076, "training_step_time": 0.10779023170471191 }, { "epoch": 3.216094970703125e-05, "model_forward_time": 0.025057554244995117, "step": 21077 }, { "epoch": 3.216094970703125e-05, "step": 21077, "training_step_time": 0.10608267784118652 }, { "epoch": 3.21624755859375e-05, "model_forward_time": 0.02766108512878418, "step": 21078 }, { "epoch": 3.21624755859375e-05, "step": 21078, "training_step_time": 0.2060239315032959 }, { "epoch": 3.216400146484375e-05, "model_forward_time": 0.02412557601928711, "step": 21079 }, { "epoch": 3.216400146484375e-05, "step": 21079, "training_step_time": 0.11222267150878906 }, { "epoch": 3.216552734375e-05, "grad_norm": 0.18812458217144012, "learning_rate": 2.2284532353301953e-05, "loss": 0.0052, "step": 21080 }, { "epoch": 3.216552734375e-05, "model_forward_time": 0.025162696838378906, "step": 21080 }, { "epoch": 3.216552734375e-05, "step": 21080, "training_step_time": 0.1305561065673828 }, { "epoch": 3.216705322265625e-05, "model_forward_time": 0.024494171142578125, "step": 21081 }, { "epoch": 3.216705322265625e-05, "step": 21081, "training_step_time": 0.12300372123718262 }, { "epoch": 3.21685791015625e-05, "model_forward_time": 0.02466869354248047, "step": 21082 }, { "epoch": 3.21685791015625e-05, "step": 21082, "training_step_time": 0.13306736946105957 }, { "epoch": 3.217010498046875e-05, "model_forward_time": 0.024712324142456055, "step": 21083 }, { "epoch": 3.217010498046875e-05, "step": 21083, "training_step_time": 0.12731099128723145 }, { "epoch": 3.2171630859375e-05, "model_forward_time": 0.024339675903320312, "step": 21084 }, { "epoch": 3.2171630859375e-05, "step": 21084, "training_step_time": 0.12717223167419434 }, { "epoch": 3.217315673828125e-05, "model_forward_time": 0.025300264358520508, "step": 21085 }, { "epoch": 3.217315673828125e-05, "step": 21085, "training_step_time": 0.10240507125854492 }, { "epoch": 3.21746826171875e-05, "model_forward_time": 0.025016307830810547, "step": 21086 }, { "epoch": 3.21746826171875e-05, "step": 21086, "training_step_time": 0.10848498344421387 }, { "epoch": 3.217620849609375e-05, "model_forward_time": 0.02575230598449707, "step": 21087 }, { "epoch": 3.217620849609375e-05, "step": 21087, "training_step_time": 0.11292600631713867 }, { "epoch": 3.2177734375e-05, "model_forward_time": 0.024939298629760742, "step": 21088 }, { "epoch": 3.2177734375e-05, "step": 21088, "training_step_time": 0.10768342018127441 }, { "epoch": 3.217926025390625e-05, "model_forward_time": 0.024736642837524414, "step": 21089 }, { "epoch": 3.217926025390625e-05, "step": 21089, "training_step_time": 0.10599112510681152 }, { "epoch": 3.21807861328125e-05, "grad_norm": 0.12697091698646545, "learning_rate": 2.2238675845677663e-05, "loss": 0.0111, "step": 21090 }, { "epoch": 3.21807861328125e-05, "model_forward_time": 0.025030851364135742, "step": 21090 }, { "epoch": 3.21807861328125e-05, "step": 21090, "training_step_time": 0.1064903736114502 }, { "epoch": 3.218231201171875e-05, "model_forward_time": 0.025336742401123047, "step": 21091 }, { "epoch": 3.218231201171875e-05, "step": 21091, "training_step_time": 0.10695028305053711 }, { "epoch": 3.2183837890625e-05, "model_forward_time": 0.02520155906677246, "step": 21092 }, { "epoch": 3.2183837890625e-05, "step": 21092, "training_step_time": 0.11504364013671875 }, { "epoch": 3.218536376953125e-05, "model_forward_time": 0.02470111846923828, "step": 21093 }, { "epoch": 3.218536376953125e-05, "step": 21093, "training_step_time": 0.11203670501708984 }, { "epoch": 3.21868896484375e-05, "model_forward_time": 0.025255680084228516, "step": 21094 }, { "epoch": 3.21868896484375e-05, "step": 21094, "training_step_time": 0.10781359672546387 }, { "epoch": 3.218841552734375e-05, "model_forward_time": 0.02498912811279297, "step": 21095 }, { "epoch": 3.218841552734375e-05, "step": 21095, "training_step_time": 0.10896944999694824 }, { "epoch": 3.218994140625e-05, "model_forward_time": 0.024546146392822266, "step": 21096 }, { "epoch": 3.218994140625e-05, "step": 21096, "training_step_time": 0.14392447471618652 }, { "epoch": 3.219146728515625e-05, "model_forward_time": 0.0249326229095459, "step": 21097 }, { "epoch": 3.219146728515625e-05, "step": 21097, "training_step_time": 0.15697002410888672 }, { "epoch": 3.21929931640625e-05, "model_forward_time": 0.02424001693725586, "step": 21098 }, { "epoch": 3.21929931640625e-05, "step": 21098, "training_step_time": 0.11417055130004883 }, { "epoch": 3.219451904296875e-05, "model_forward_time": 0.024742841720581055, "step": 21099 }, { "epoch": 3.219451904296875e-05, "step": 21099, "training_step_time": 0.13398480415344238 }, { "epoch": 3.2196044921875e-05, "grad_norm": 0.3559652268886566, "learning_rate": 2.219285307067997e-05, "loss": 0.0113, "step": 21100 }, { "epoch": 3.2196044921875e-05, "model_forward_time": 0.025150775909423828, "step": 21100 }, { "epoch": 3.2196044921875e-05, "step": 21100, "training_step_time": 0.19571614265441895 }, { "epoch": 3.219757080078125e-05, "model_forward_time": 0.024554014205932617, "step": 21101 }, { "epoch": 3.219757080078125e-05, "step": 21101, "training_step_time": 0.12973999977111816 }, { "epoch": 3.21990966796875e-05, "model_forward_time": 0.024652719497680664, "step": 21102 }, { "epoch": 3.21990966796875e-05, "step": 21102, "training_step_time": 0.1428537368774414 }, { "epoch": 3.220062255859375e-05, "model_forward_time": 0.023976802825927734, "step": 21103 }, { "epoch": 3.220062255859375e-05, "step": 21103, "training_step_time": 0.11202001571655273 }, { "epoch": 3.22021484375e-05, "model_forward_time": 0.02476644515991211, "step": 21104 }, { "epoch": 3.22021484375e-05, "step": 21104, "training_step_time": 0.10760617256164551 }, { "epoch": 3.220367431640625e-05, "model_forward_time": 0.025214195251464844, "step": 21105 }, { "epoch": 3.220367431640625e-05, "step": 21105, "training_step_time": 0.13596844673156738 }, { "epoch": 3.22052001953125e-05, "model_forward_time": 0.024297475814819336, "step": 21106 }, { "epoch": 3.22052001953125e-05, "step": 21106, "training_step_time": 0.2098379135131836 }, { "epoch": 3.220672607421875e-05, "model_forward_time": 0.024859189987182617, "step": 21107 }, { "epoch": 3.220672607421875e-05, "step": 21107, "training_step_time": 0.17070388793945312 }, { "epoch": 3.2208251953125e-05, "model_forward_time": 0.02403426170349121, "step": 21108 }, { "epoch": 3.2208251953125e-05, "step": 21108, "training_step_time": 0.16711640357971191 }, { "epoch": 3.220977783203125e-05, "model_forward_time": 0.024085521697998047, "step": 21109 }, { "epoch": 3.220977783203125e-05, "step": 21109, "training_step_time": 0.16875410079956055 }, { "epoch": 3.22113037109375e-05, "grad_norm": 0.21862581372261047, "learning_rate": 2.2147064083987838e-05, "loss": 0.0068, "step": 21110 }, { "epoch": 3.22113037109375e-05, "model_forward_time": 0.024361610412597656, "step": 21110 }, { "epoch": 3.22113037109375e-05, "step": 21110, "training_step_time": 0.15017318725585938 }, { "epoch": 3.221282958984375e-05, "model_forward_time": 0.024413585662841797, "step": 21111 }, { "epoch": 3.221282958984375e-05, "step": 21111, "training_step_time": 0.13892412185668945 }, { "epoch": 3.221435546875e-05, "model_forward_time": 0.024481534957885742, "step": 21112 }, { "epoch": 3.221435546875e-05, "step": 21112, "training_step_time": 0.13253474235534668 }, { "epoch": 3.221588134765625e-05, "model_forward_time": 0.02751469612121582, "step": 21113 }, { "epoch": 3.221588134765625e-05, "step": 21113, "training_step_time": 0.1296827793121338 }, { "epoch": 3.22174072265625e-05, "model_forward_time": 0.02532815933227539, "step": 21114 }, { "epoch": 3.22174072265625e-05, "step": 21114, "training_step_time": 0.11786627769470215 }, { "epoch": 3.221893310546875e-05, "model_forward_time": 0.024986982345581055, "step": 21115 }, { "epoch": 3.221893310546875e-05, "step": 21115, "training_step_time": 0.11551904678344727 }, { "epoch": 3.2220458984375e-05, "model_forward_time": 0.02517223358154297, "step": 21116 }, { "epoch": 3.2220458984375e-05, "step": 21116, "training_step_time": 0.11705493927001953 }, { "epoch": 3.222198486328125e-05, "model_forward_time": 0.024164676666259766, "step": 21117 }, { "epoch": 3.222198486328125e-05, "step": 21117, "training_step_time": 0.11173319816589355 }, { "epoch": 3.22235107421875e-05, "model_forward_time": 0.024269580841064453, "step": 21118 }, { "epoch": 3.22235107421875e-05, "step": 21118, "training_step_time": 0.11076641082763672 }, { "epoch": 3.222503662109375e-05, "model_forward_time": 0.02397918701171875, "step": 21119 }, { "epoch": 3.222503662109375e-05, "step": 21119, "training_step_time": 0.11178779602050781 }, { "epoch": 3.22265625e-05, "grad_norm": 0.10320556908845901, "learning_rate": 2.2101308941239203e-05, "loss": 0.0065, "step": 21120 }, { "epoch": 3.22265625e-05, "model_forward_time": 0.024203062057495117, "step": 21120 }, { "epoch": 3.22265625e-05, "step": 21120, "training_step_time": 0.11410236358642578 }, { "epoch": 3.222808837890625e-05, "model_forward_time": 0.02686333656311035, "step": 21121 }, { "epoch": 3.222808837890625e-05, "step": 21121, "training_step_time": 0.10918474197387695 }, { "epoch": 3.22296142578125e-05, "model_forward_time": 0.025805234909057617, "step": 21122 }, { "epoch": 3.22296142578125e-05, "step": 21122, "training_step_time": 0.19870972633361816 }, { "epoch": 3.223114013671875e-05, "model_forward_time": 0.024204730987548828, "step": 21123 }, { "epoch": 3.223114013671875e-05, "step": 21123, "training_step_time": 0.10590314865112305 }, { "epoch": 3.2232666015625e-05, "model_forward_time": 0.024292707443237305, "step": 21124 }, { "epoch": 3.2232666015625e-05, "step": 21124, "training_step_time": 0.12429237365722656 }, { "epoch": 3.223419189453125e-05, "model_forward_time": 0.025079727172851562, "step": 21125 }, { "epoch": 3.223419189453125e-05, "step": 21125, "training_step_time": 0.13730931282043457 }, { "epoch": 3.22357177734375e-05, "model_forward_time": 0.024784326553344727, "step": 21126 }, { "epoch": 3.22357177734375e-05, "step": 21126, "training_step_time": 0.11589646339416504 }, { "epoch": 3.223724365234375e-05, "model_forward_time": 0.024826526641845703, "step": 21127 }, { "epoch": 3.223724365234375e-05, "step": 21127, "training_step_time": 0.1356487274169922 }, { "epoch": 3.223876953125e-05, "model_forward_time": 0.0282132625579834, "step": 21128 }, { "epoch": 3.223876953125e-05, "step": 21128, "training_step_time": 0.11227989196777344 }, { "epoch": 3.224029541015625e-05, "model_forward_time": 0.0251314640045166, "step": 21129 }, { "epoch": 3.224029541015625e-05, "step": 21129, "training_step_time": 0.10677886009216309 }, { "epoch": 3.22418212890625e-05, "grad_norm": 0.1518149971961975, "learning_rate": 2.2055587698030877e-05, "loss": 0.0143, "step": 21130 }, { "epoch": 3.22418212890625e-05, "model_forward_time": 0.02476978302001953, "step": 21130 }, { "epoch": 3.22418212890625e-05, "step": 21130, "training_step_time": 0.10728788375854492 }, { "epoch": 3.224334716796875e-05, "model_forward_time": 0.02576303482055664, "step": 21131 }, { "epoch": 3.224334716796875e-05, "step": 21131, "training_step_time": 0.1086421012878418 }, { "epoch": 3.2244873046875e-05, "model_forward_time": 0.02473735809326172, "step": 21132 }, { "epoch": 3.2244873046875e-05, "step": 21132, "training_step_time": 0.10787081718444824 }, { "epoch": 3.224639892578125e-05, "model_forward_time": 0.02524590492248535, "step": 21133 }, { "epoch": 3.224639892578125e-05, "step": 21133, "training_step_time": 0.10401725769042969 }, { "epoch": 3.22479248046875e-05, "model_forward_time": 0.024825096130371094, "step": 21134 }, { "epoch": 3.22479248046875e-05, "step": 21134, "training_step_time": 0.10587263107299805 }, { "epoch": 3.224945068359375e-05, "model_forward_time": 0.024815082550048828, "step": 21135 }, { "epoch": 3.224945068359375e-05, "step": 21135, "training_step_time": 0.10649991035461426 }, { "epoch": 3.22509765625e-05, "model_forward_time": 0.025132179260253906, "step": 21136 }, { "epoch": 3.22509765625e-05, "step": 21136, "training_step_time": 0.11367130279541016 }, { "epoch": 3.225250244140625e-05, "model_forward_time": 0.024682283401489258, "step": 21137 }, { "epoch": 3.225250244140625e-05, "step": 21137, "training_step_time": 0.1054527759552002 }, { "epoch": 3.22540283203125e-05, "model_forward_time": 0.025109291076660156, "step": 21138 }, { "epoch": 3.22540283203125e-05, "step": 21138, "training_step_time": 0.1035313606262207 }, { "epoch": 3.225555419921875e-05, "model_forward_time": 0.02533698081970215, "step": 21139 }, { "epoch": 3.225555419921875e-05, "step": 21139, "training_step_time": 0.10422301292419434 }, { "epoch": 3.2257080078125e-05, "grad_norm": 0.2994593679904938, "learning_rate": 2.2009900409918465e-05, "loss": 0.0105, "step": 21140 }, { "epoch": 3.2257080078125e-05, "model_forward_time": 0.024922609329223633, "step": 21140 }, { "epoch": 3.2257080078125e-05, "step": 21140, "training_step_time": 0.14243650436401367 }, { "epoch": 3.225860595703125e-05, "model_forward_time": 0.024197816848754883, "step": 21141 }, { "epoch": 3.225860595703125e-05, "step": 21141, "training_step_time": 0.16670680046081543 }, { "epoch": 3.22601318359375e-05, "model_forward_time": 0.024222612380981445, "step": 21142 }, { "epoch": 3.22601318359375e-05, "step": 21142, "training_step_time": 0.10964512825012207 }, { "epoch": 3.226165771484375e-05, "model_forward_time": 0.02457904815673828, "step": 21143 }, { "epoch": 3.226165771484375e-05, "step": 21143, "training_step_time": 0.15297603607177734 }, { "epoch": 3.226318359375e-05, "model_forward_time": 0.024840116500854492, "step": 21144 }, { "epoch": 3.226318359375e-05, "step": 21144, "training_step_time": 0.17779064178466797 }, { "epoch": 3.226470947265625e-05, "model_forward_time": 0.024689674377441406, "step": 21145 }, { "epoch": 3.226470947265625e-05, "step": 21145, "training_step_time": 0.1655879020690918 }, { "epoch": 3.22662353515625e-05, "model_forward_time": 0.024198293685913086, "step": 21146 }, { "epoch": 3.22662353515625e-05, "step": 21146, "training_step_time": 0.13478851318359375 }, { "epoch": 3.226776123046875e-05, "model_forward_time": 0.02408289909362793, "step": 21147 }, { "epoch": 3.226776123046875e-05, "step": 21147, "training_step_time": 0.11075425148010254 }, { "epoch": 3.2269287109375e-05, "model_forward_time": 0.025351762771606445, "step": 21148 }, { "epoch": 3.2269287109375e-05, "step": 21148, "training_step_time": 0.12004733085632324 }, { "epoch": 3.227081298828125e-05, "model_forward_time": 0.024771451950073242, "step": 21149 }, { "epoch": 3.227081298828125e-05, "step": 21149, "training_step_time": 0.10836434364318848 }, { "epoch": 3.22723388671875e-05, "grad_norm": 0.215509295463562, "learning_rate": 2.196424713241637e-05, "loss": 0.0065, "step": 21150 }, { "epoch": 3.22723388671875e-05, "model_forward_time": 0.025159120559692383, "step": 21150 }, { "epoch": 3.22723388671875e-05, "step": 21150, "training_step_time": 0.11777138710021973 }, { "epoch": 3.227386474609375e-05, "model_forward_time": 0.025344133377075195, "step": 21151 }, { "epoch": 3.227386474609375e-05, "step": 21151, "training_step_time": 0.10483670234680176 }, { "epoch": 3.2275390625e-05, "model_forward_time": 0.024941205978393555, "step": 21152 }, { "epoch": 3.2275390625e-05, "step": 21152, "training_step_time": 0.19800782203674316 }, { "epoch": 3.227691650390625e-05, "model_forward_time": 0.02415943145751953, "step": 21153 }, { "epoch": 3.227691650390625e-05, "step": 21153, "training_step_time": 0.10711956024169922 }, { "epoch": 3.22784423828125e-05, "model_forward_time": 0.024597644805908203, "step": 21154 }, { "epoch": 3.22784423828125e-05, "step": 21154, "training_step_time": 0.10780715942382812 }, { "epoch": 3.227996826171875e-05, "model_forward_time": 0.024873971939086914, "step": 21155 }, { "epoch": 3.227996826171875e-05, "step": 21155, "training_step_time": 0.10795164108276367 }, { "epoch": 3.2281494140625e-05, "model_forward_time": 0.024897336959838867, "step": 21156 }, { "epoch": 3.2281494140625e-05, "step": 21156, "training_step_time": 0.10606122016906738 }, { "epoch": 3.228302001953125e-05, "model_forward_time": 0.024784088134765625, "step": 21157 }, { "epoch": 3.228302001953125e-05, "step": 21157, "training_step_time": 0.10716986656188965 }, { "epoch": 3.22845458984375e-05, "model_forward_time": 0.025293350219726562, "step": 21158 }, { "epoch": 3.22845458984375e-05, "step": 21158, "training_step_time": 0.11139893531799316 }, { "epoch": 3.228607177734375e-05, "model_forward_time": 0.02523207664489746, "step": 21159 }, { "epoch": 3.228607177734375e-05, "step": 21159, "training_step_time": 0.1623075008392334 }, { "epoch": 3.228759765625e-05, "grad_norm": 0.12247727811336517, "learning_rate": 2.1918627920997593e-05, "loss": 0.0095, "step": 21160 }, { "epoch": 3.228759765625e-05, "model_forward_time": 0.02364039421081543, "step": 21160 }, { "epoch": 3.228759765625e-05, "step": 21160, "training_step_time": 0.15630364418029785 }, { "epoch": 3.228912353515625e-05, "model_forward_time": 0.023675918579101562, "step": 21161 }, { "epoch": 3.228912353515625e-05, "step": 21161, "training_step_time": 0.14830517768859863 }, { "epoch": 3.22906494140625e-05, "model_forward_time": 0.023703575134277344, "step": 21162 }, { "epoch": 3.22906494140625e-05, "step": 21162, "training_step_time": 0.13562941551208496 }, { "epoch": 3.229217529296875e-05, "model_forward_time": 0.023604154586791992, "step": 21163 }, { "epoch": 3.229217529296875e-05, "step": 21163, "training_step_time": 0.12831497192382812 }, { "epoch": 3.2293701171875e-05, "model_forward_time": 0.02338719367980957, "step": 21164 }, { "epoch": 3.2293701171875e-05, "step": 21164, "training_step_time": 0.1271049976348877 }, { "epoch": 3.229522705078125e-05, "model_forward_time": 0.026682376861572266, "step": 21165 }, { "epoch": 3.229522705078125e-05, "step": 21165, "training_step_time": 0.12337350845336914 }, { "epoch": 3.22967529296875e-05, "model_forward_time": 0.025015592575073242, "step": 21166 }, { "epoch": 3.22967529296875e-05, "step": 21166, "training_step_time": 0.12014603614807129 }, { "epoch": 3.229827880859375e-05, "model_forward_time": 0.0242922306060791, "step": 21167 }, { "epoch": 3.229827880859375e-05, "step": 21167, "training_step_time": 0.18651151657104492 }, { "epoch": 3.22998046875e-05, "model_forward_time": 0.02480459213256836, "step": 21168 }, { "epoch": 3.22998046875e-05, "step": 21168, "training_step_time": 0.11352658271789551 }, { "epoch": 3.230133056640625e-05, "model_forward_time": 0.02449941635131836, "step": 21169 }, { "epoch": 3.230133056640625e-05, "step": 21169, "training_step_time": 0.11191010475158691 }, { "epoch": 3.23028564453125e-05, "grad_norm": 0.11898034065961838, "learning_rate": 2.1873042831093803e-05, "loss": 0.008, "step": 21170 }, { "epoch": 3.23028564453125e-05, "model_forward_time": 0.025146007537841797, "step": 21170 }, { "epoch": 3.23028564453125e-05, "step": 21170, "training_step_time": 0.11211562156677246 }, { "epoch": 3.230438232421875e-05, "model_forward_time": 0.02483201026916504, "step": 21171 }, { "epoch": 3.230438232421875e-05, "step": 21171, "training_step_time": 0.1365222930908203 }, { "epoch": 3.2305908203125e-05, "model_forward_time": 0.025719165802001953, "step": 21172 }, { "epoch": 3.2305908203125e-05, "step": 21172, "training_step_time": 0.20273923873901367 }, { "epoch": 3.230743408203125e-05, "model_forward_time": 0.024067401885986328, "step": 21173 }, { "epoch": 3.230743408203125e-05, "step": 21173, "training_step_time": 0.11932158470153809 }, { "epoch": 3.23089599609375e-05, "model_forward_time": 0.023897409439086914, "step": 21174 }, { "epoch": 3.23089599609375e-05, "step": 21174, "training_step_time": 0.10608530044555664 }, { "epoch": 3.231048583984375e-05, "model_forward_time": 0.02498912811279297, "step": 21175 }, { "epoch": 3.231048583984375e-05, "step": 21175, "training_step_time": 0.10579419136047363 }, { "epoch": 3.231201171875e-05, "model_forward_time": 0.024982690811157227, "step": 21176 }, { "epoch": 3.231201171875e-05, "step": 21176, "training_step_time": 0.10849666595458984 }, { "epoch": 3.231353759765625e-05, "model_forward_time": 0.027215242385864258, "step": 21177 }, { "epoch": 3.231353759765625e-05, "step": 21177, "training_step_time": 0.11033916473388672 }, { "epoch": 3.23150634765625e-05, "model_forward_time": 0.02533578872680664, "step": 21178 }, { "epoch": 3.23150634765625e-05, "step": 21178, "training_step_time": 0.10845398902893066 }, { "epoch": 3.231658935546875e-05, "model_forward_time": 0.02730274200439453, "step": 21179 }, { "epoch": 3.231658935546875e-05, "step": 21179, "training_step_time": 0.15689373016357422 }, { "epoch": 3.2318115234375e-05, "grad_norm": 0.15386469662189484, "learning_rate": 2.182749191809518e-05, "loss": 0.0053, "step": 21180 }, { "epoch": 3.2318115234375e-05, "model_forward_time": 0.026033878326416016, "step": 21180 }, { "epoch": 3.2318115234375e-05, "step": 21180, "training_step_time": 0.17985224723815918 }, { "epoch": 3.231964111328125e-05, "model_forward_time": 0.024247169494628906, "step": 21181 }, { "epoch": 3.231964111328125e-05, "step": 21181, "training_step_time": 0.18060541152954102 }, { "epoch": 3.23211669921875e-05, "model_forward_time": 0.02530217170715332, "step": 21182 }, { "epoch": 3.23211669921875e-05, "step": 21182, "training_step_time": 0.1416475772857666 }, { "epoch": 3.232269287109375e-05, "model_forward_time": 0.024146080017089844, "step": 21183 }, { "epoch": 3.232269287109375e-05, "step": 21183, "training_step_time": 0.15105557441711426 }, { "epoch": 3.232421875e-05, "model_forward_time": 0.024245500564575195, "step": 21184 }, { "epoch": 3.232421875e-05, "step": 21184, "training_step_time": 0.15365195274353027 }, { "epoch": 3.232574462890625e-05, "model_forward_time": 0.02378249168395996, "step": 21185 }, { "epoch": 3.232574462890625e-05, "step": 21185, "training_step_time": 0.21893095970153809 }, { "epoch": 3.23272705078125e-05, "model_forward_time": 0.024749040603637695, "step": 21186 }, { "epoch": 3.23272705078125e-05, "step": 21186, "training_step_time": 0.201218843460083 }, { "epoch": 3.232879638671875e-05, "model_forward_time": 0.02450847625732422, "step": 21187 }, { "epoch": 3.232879638671875e-05, "step": 21187, "training_step_time": 0.12005424499511719 }, { "epoch": 3.2330322265625e-05, "model_forward_time": 0.024097919464111328, "step": 21188 }, { "epoch": 3.2330322265625e-05, "step": 21188, "training_step_time": 0.13227605819702148 }, { "epoch": 3.233184814453125e-05, "model_forward_time": 0.02524542808532715, "step": 21189 }, { "epoch": 3.233184814453125e-05, "step": 21189, "training_step_time": 0.1397538185119629 }, { "epoch": 3.23333740234375e-05, "grad_norm": 0.4359036087989807, "learning_rate": 2.1781975237350366e-05, "loss": 0.0106, "step": 21190 }, { "epoch": 3.23333740234375e-05, "model_forward_time": 0.024667739868164062, "step": 21190 }, { "epoch": 3.23333740234375e-05, "step": 21190, "training_step_time": 0.18721723556518555 }, { "epoch": 3.233489990234375e-05, "model_forward_time": 0.02505183219909668, "step": 21191 }, { "epoch": 3.233489990234375e-05, "step": 21191, "training_step_time": 0.13910555839538574 }, { "epoch": 3.233642578125e-05, "model_forward_time": 0.024353504180908203, "step": 21192 }, { "epoch": 3.233642578125e-05, "step": 21192, "training_step_time": 0.10639357566833496 }, { "epoch": 3.233795166015625e-05, "model_forward_time": 0.025227785110473633, "step": 21193 }, { "epoch": 3.233795166015625e-05, "step": 21193, "training_step_time": 0.20298981666564941 }, { "epoch": 3.23394775390625e-05, "model_forward_time": 0.02467060089111328, "step": 21194 }, { "epoch": 3.23394775390625e-05, "step": 21194, "training_step_time": 0.10439634323120117 }, { "epoch": 3.234100341796875e-05, "model_forward_time": 0.025470495223999023, "step": 21195 }, { "epoch": 3.234100341796875e-05, "step": 21195, "training_step_time": 0.10763025283813477 }, { "epoch": 3.2342529296875e-05, "model_forward_time": 0.02551746368408203, "step": 21196 }, { "epoch": 3.2342529296875e-05, "step": 21196, "training_step_time": 0.1078188419342041 }, { "epoch": 3.234405517578125e-05, "model_forward_time": 0.025696516036987305, "step": 21197 }, { "epoch": 3.234405517578125e-05, "step": 21197, "training_step_time": 0.11059808731079102 }, { "epoch": 3.23455810546875e-05, "model_forward_time": 0.025440216064453125, "step": 21198 }, { "epoch": 3.23455810546875e-05, "step": 21198, "training_step_time": 0.11473369598388672 }, { "epoch": 3.234710693359375e-05, "model_forward_time": 0.024814367294311523, "step": 21199 }, { "epoch": 3.234710693359375e-05, "step": 21199, "training_step_time": 0.10697698593139648 }, { "epoch": 3.23486328125e-05, "grad_norm": 0.20810121297836304, "learning_rate": 2.1736492844166407e-05, "loss": 0.01, "step": 21200 }, { "epoch": 3.23486328125e-05, "model_forward_time": 0.025196313858032227, "step": 21200 }, { "epoch": 3.23486328125e-05, "step": 21200, "training_step_time": 0.10869431495666504 }, { "epoch": 3.235015869140625e-05, "model_forward_time": 0.024559974670410156, "step": 21201 }, { "epoch": 3.235015869140625e-05, "step": 21201, "training_step_time": 0.10734176635742188 }, { "epoch": 3.23516845703125e-05, "model_forward_time": 0.025150299072265625, "step": 21202 }, { "epoch": 3.23516845703125e-05, "step": 21202, "training_step_time": 0.10800004005432129 }, { "epoch": 3.235321044921875e-05, "model_forward_time": 0.025125980377197266, "step": 21203 }, { "epoch": 3.235321044921875e-05, "step": 21203, "training_step_time": 0.10706353187561035 }, { "epoch": 3.2354736328125e-05, "model_forward_time": 0.026156187057495117, "step": 21204 }, { "epoch": 3.2354736328125e-05, "step": 21204, "training_step_time": 0.11110472679138184 }, { "epoch": 3.235626220703125e-05, "model_forward_time": 0.026600360870361328, "step": 21205 }, { "epoch": 3.235626220703125e-05, "step": 21205, "training_step_time": 0.11316156387329102 }, { "epoch": 3.23577880859375e-05, "model_forward_time": 0.02500176429748535, "step": 21206 }, { "epoch": 3.23577880859375e-05, "step": 21206, "training_step_time": 0.11544585227966309 }, { "epoch": 3.235931396484375e-05, "model_forward_time": 0.028255224227905273, "step": 21207 }, { "epoch": 3.235931396484375e-05, "step": 21207, "training_step_time": 0.11554932594299316 }, { "epoch": 3.236083984375e-05, "model_forward_time": 0.025579214096069336, "step": 21208 }, { "epoch": 3.236083984375e-05, "step": 21208, "training_step_time": 0.1082611083984375 }, { "epoch": 3.236236572265625e-05, "model_forward_time": 0.02570509910583496, "step": 21209 }, { "epoch": 3.236236572265625e-05, "step": 21209, "training_step_time": 0.11014080047607422 }, { "epoch": 3.23638916015625e-05, "grad_norm": 0.19430792331695557, "learning_rate": 2.1691044793808734e-05, "loss": 0.0065, "step": 21210 }, { "epoch": 3.23638916015625e-05, "model_forward_time": 0.02509617805480957, "step": 21210 }, { "epoch": 3.23638916015625e-05, "step": 21210, "training_step_time": 0.1860666275024414 }, { "epoch": 3.236541748046875e-05, "model_forward_time": 0.024563312530517578, "step": 21211 }, { "epoch": 3.236541748046875e-05, "step": 21211, "training_step_time": 0.17884588241577148 }, { "epoch": 3.2366943359375e-05, "model_forward_time": 0.024672508239746094, "step": 21212 }, { "epoch": 3.2366943359375e-05, "step": 21212, "training_step_time": 0.12824034690856934 }, { "epoch": 3.236846923828125e-05, "model_forward_time": 0.024639606475830078, "step": 21213 }, { "epoch": 3.236846923828125e-05, "step": 21213, "training_step_time": 0.10855364799499512 }, { "epoch": 3.23699951171875e-05, "model_forward_time": 0.029419660568237305, "step": 21214 }, { "epoch": 3.23699951171875e-05, "step": 21214, "training_step_time": 0.17448902130126953 }, { "epoch": 3.237152099609375e-05, "model_forward_time": 0.024523019790649414, "step": 21215 }, { "epoch": 3.237152099609375e-05, "step": 21215, "training_step_time": 0.13331341743469238 }, { "epoch": 3.2373046875e-05, "model_forward_time": 0.02500295639038086, "step": 21216 }, { "epoch": 3.2373046875e-05, "step": 21216, "training_step_time": 0.12585711479187012 }, { "epoch": 3.237457275390625e-05, "model_forward_time": 0.025064706802368164, "step": 21217 }, { "epoch": 3.237457275390625e-05, "step": 21217, "training_step_time": 0.10875320434570312 }, { "epoch": 3.23760986328125e-05, "model_forward_time": 0.026226282119750977, "step": 21218 }, { "epoch": 3.23760986328125e-05, "step": 21218, "training_step_time": 0.11018013954162598 }, { "epoch": 3.237762451171875e-05, "model_forward_time": 0.025165081024169922, "step": 21219 }, { "epoch": 3.237762451171875e-05, "step": 21219, "training_step_time": 0.13895702362060547 }, { "epoch": 3.2379150390625e-05, "grad_norm": 0.1496005356311798, "learning_rate": 2.1645631141500994e-05, "loss": 0.0101, "step": 21220 }, { "epoch": 3.2379150390625e-05, "model_forward_time": 0.02419900894165039, "step": 21220 }, { "epoch": 3.2379150390625e-05, "step": 21220, "training_step_time": 0.16323280334472656 }, { "epoch": 3.238067626953125e-05, "model_forward_time": 0.02678084373474121, "step": 21221 }, { "epoch": 3.238067626953125e-05, "step": 21221, "training_step_time": 0.16374731063842773 }, { "epoch": 3.23822021484375e-05, "model_forward_time": 0.02356266975402832, "step": 21222 }, { "epoch": 3.23822021484375e-05, "step": 21222, "training_step_time": 0.1537158489227295 }, { "epoch": 3.238372802734375e-05, "model_forward_time": 0.024825096130371094, "step": 21223 }, { "epoch": 3.238372802734375e-05, "step": 21223, "training_step_time": 0.16394257545471191 }, { "epoch": 3.238525390625e-05, "model_forward_time": 0.02569103240966797, "step": 21224 }, { "epoch": 3.238525390625e-05, "step": 21224, "training_step_time": 0.11689400672912598 }, { "epoch": 3.238677978515625e-05, "model_forward_time": 0.02464008331298828, "step": 21225 }, { "epoch": 3.238677978515625e-05, "step": 21225, "training_step_time": 0.1573953628540039 }, { "epoch": 3.23883056640625e-05, "model_forward_time": 0.024379491806030273, "step": 21226 }, { "epoch": 3.23883056640625e-05, "step": 21226, "training_step_time": 0.14690399169921875 }, { "epoch": 3.238983154296875e-05, "model_forward_time": 0.025241851806640625, "step": 21227 }, { "epoch": 3.238983154296875e-05, "step": 21227, "training_step_time": 0.11838722229003906 }, { "epoch": 3.2391357421875e-05, "model_forward_time": 0.024970054626464844, "step": 21228 }, { "epoch": 3.2391357421875e-05, "step": 21228, "training_step_time": 0.12140345573425293 }, { "epoch": 3.239288330078125e-05, "model_forward_time": 0.026240825653076172, "step": 21229 }, { "epoch": 3.239288330078125e-05, "step": 21229, "training_step_time": 0.20972204208374023 }, { "epoch": 3.23944091796875e-05, "grad_norm": 0.22654598951339722, "learning_rate": 2.1600251942425066e-05, "loss": 0.0076, "step": 21230 }, { "epoch": 3.23944091796875e-05, "model_forward_time": 0.02619457244873047, "step": 21230 }, { "epoch": 3.23944091796875e-05, "step": 21230, "training_step_time": 0.13608717918395996 }, { "epoch": 3.239593505859375e-05, "model_forward_time": 0.027230262756347656, "step": 21231 }, { "epoch": 3.239593505859375e-05, "step": 21231, "training_step_time": 0.22448086738586426 }, { "epoch": 3.23974609375e-05, "model_forward_time": 0.025946617126464844, "step": 21232 }, { "epoch": 3.23974609375e-05, "step": 21232, "training_step_time": 0.15306854248046875 }, { "epoch": 3.239898681640625e-05, "model_forward_time": 0.024697303771972656, "step": 21233 }, { "epoch": 3.239898681640625e-05, "step": 21233, "training_step_time": 0.17835283279418945 }, { "epoch": 3.24005126953125e-05, "model_forward_time": 0.02568984031677246, "step": 21234 }, { "epoch": 3.24005126953125e-05, "step": 21234, "training_step_time": 0.11004757881164551 }, { "epoch": 3.240203857421875e-05, "model_forward_time": 0.025584697723388672, "step": 21235 }, { "epoch": 3.240203857421875e-05, "step": 21235, "training_step_time": 0.18908262252807617 }, { "epoch": 3.2403564453125e-05, "model_forward_time": 0.025524616241455078, "step": 21236 }, { "epoch": 3.2403564453125e-05, "step": 21236, "training_step_time": 0.1049039363861084 }, { "epoch": 3.240509033203125e-05, "model_forward_time": 0.0254819393157959, "step": 21237 }, { "epoch": 3.240509033203125e-05, "step": 21237, "training_step_time": 0.1027688980102539 }, { "epoch": 3.24066162109375e-05, "model_forward_time": 0.025950908660888672, "step": 21238 }, { "epoch": 3.24066162109375e-05, "step": 21238, "training_step_time": 0.10636663436889648 }, { "epoch": 3.240814208984375e-05, "model_forward_time": 0.026631832122802734, "step": 21239 }, { "epoch": 3.240814208984375e-05, "step": 21239, "training_step_time": 0.10644984245300293 }, { "epoch": 3.240966796875e-05, "grad_norm": 0.1778857707977295, "learning_rate": 2.1554907251720945e-05, "loss": 0.0053, "step": 21240 }, { "epoch": 3.240966796875e-05, "model_forward_time": 0.029713869094848633, "step": 21240 }, { "epoch": 3.240966796875e-05, "step": 21240, "training_step_time": 0.10948562622070312 }, { "epoch": 3.241119384765625e-05, "model_forward_time": 0.02584075927734375, "step": 21241 }, { "epoch": 3.241119384765625e-05, "step": 21241, "training_step_time": 0.10787534713745117 }, { "epoch": 3.24127197265625e-05, "model_forward_time": 0.02519845962524414, "step": 21242 }, { "epoch": 3.24127197265625e-05, "step": 21242, "training_step_time": 0.10442304611206055 }, { "epoch": 3.241424560546875e-05, "model_forward_time": 0.025231599807739258, "step": 21243 }, { "epoch": 3.241424560546875e-05, "step": 21243, "training_step_time": 0.10607600212097168 }, { "epoch": 3.2415771484375e-05, "model_forward_time": 0.025231122970581055, "step": 21244 }, { "epoch": 3.2415771484375e-05, "step": 21244, "training_step_time": 0.10566067695617676 }, { "epoch": 3.241729736328125e-05, "model_forward_time": 0.025299072265625, "step": 21245 }, { "epoch": 3.241729736328125e-05, "step": 21245, "training_step_time": 0.10839056968688965 }, { "epoch": 3.24188232421875e-05, "model_forward_time": 0.02610015869140625, "step": 21246 }, { "epoch": 3.24188232421875e-05, "step": 21246, "training_step_time": 0.10614991188049316 }, { "epoch": 3.242034912109375e-05, "model_forward_time": 0.02570366859436035, "step": 21247 }, { "epoch": 3.242034912109375e-05, "step": 21247, "training_step_time": 0.10657525062561035 }, { "epoch": 3.2421875e-05, "model_forward_time": 0.02432560920715332, "step": 21248 }, { "epoch": 3.2421875e-05, "step": 21248, "training_step_time": 0.10465145111083984 }, { "epoch": 3.242340087890625e-05, "model_forward_time": 0.024507999420166016, "step": 21249 }, { "epoch": 3.242340087890625e-05, "step": 21249, "training_step_time": 0.10587787628173828 }, { "epoch": 3.24249267578125e-05, "grad_norm": 0.24440622329711914, "learning_rate": 2.150959712448669e-05, "loss": 0.0055, "step": 21250 }, { "epoch": 3.24249267578125e-05, "model_forward_time": 0.024590253829956055, "step": 21250 }, { "epoch": 3.24249267578125e-05, "step": 21250, "training_step_time": 0.1047675609588623 }, { "epoch": 3.242645263671875e-05, "model_forward_time": 0.02554488182067871, "step": 21251 }, { "epoch": 3.242645263671875e-05, "step": 21251, "training_step_time": 0.13676238059997559 }, { "epoch": 3.2427978515625e-05, "model_forward_time": 0.02427053451538086, "step": 21252 }, { "epoch": 3.2427978515625e-05, "step": 21252, "training_step_time": 0.164292573928833 }, { "epoch": 3.242950439453125e-05, "model_forward_time": 0.02374124526977539, "step": 21253 }, { "epoch": 3.242950439453125e-05, "step": 21253, "training_step_time": 0.16121315956115723 }, { "epoch": 3.24310302734375e-05, "model_forward_time": 0.023733854293823242, "step": 21254 }, { "epoch": 3.24310302734375e-05, "step": 21254, "training_step_time": 0.13312387466430664 }, { "epoch": 3.243255615234375e-05, "model_forward_time": 0.023810148239135742, "step": 21255 }, { "epoch": 3.243255615234375e-05, "step": 21255, "training_step_time": 0.12884521484375 }, { "epoch": 3.243408203125e-05, "model_forward_time": 0.024776697158813477, "step": 21256 }, { "epoch": 3.243408203125e-05, "step": 21256, "training_step_time": 0.13691043853759766 }, { "epoch": 3.243560791015625e-05, "model_forward_time": 0.024858474731445312, "step": 21257 }, { "epoch": 3.243560791015625e-05, "step": 21257, "training_step_time": 0.20888304710388184 }, { "epoch": 3.24371337890625e-05, "model_forward_time": 0.025579452514648438, "step": 21258 }, { "epoch": 3.24371337890625e-05, "step": 21258, "training_step_time": 0.1198573112487793 }, { "epoch": 3.243865966796875e-05, "model_forward_time": 0.024273157119750977, "step": 21259 }, { "epoch": 3.243865966796875e-05, "step": 21259, "training_step_time": 0.10398983955383301 }, { "epoch": 3.2440185546875e-05, "grad_norm": 0.20188677310943604, "learning_rate": 2.1464321615778422e-05, "loss": 0.0104, "step": 21260 }, { "epoch": 3.2440185546875e-05, "model_forward_time": 0.02530837059020996, "step": 21260 }, { "epoch": 3.2440185546875e-05, "step": 21260, "training_step_time": 0.11028385162353516 }, { "epoch": 3.244171142578125e-05, "model_forward_time": 0.02611231803894043, "step": 21261 }, { "epoch": 3.244171142578125e-05, "step": 21261, "training_step_time": 0.11213850975036621 }, { "epoch": 3.24432373046875e-05, "model_forward_time": 0.0264585018157959, "step": 21262 }, { "epoch": 3.24432373046875e-05, "step": 21262, "training_step_time": 0.11168622970581055 }, { "epoch": 3.244476318359375e-05, "model_forward_time": 0.028642892837524414, "step": 21263 }, { "epoch": 3.244476318359375e-05, "step": 21263, "training_step_time": 0.12099194526672363 }, { "epoch": 3.24462890625e-05, "model_forward_time": 0.02540135383605957, "step": 21264 }, { "epoch": 3.24462890625e-05, "step": 21264, "training_step_time": 0.11056232452392578 }, { "epoch": 3.244781494140625e-05, "model_forward_time": 0.025817394256591797, "step": 21265 }, { "epoch": 3.244781494140625e-05, "step": 21265, "training_step_time": 0.11344623565673828 }, { "epoch": 3.24493408203125e-05, "model_forward_time": 0.025115013122558594, "step": 21266 }, { "epoch": 3.24493408203125e-05, "step": 21266, "training_step_time": 0.11269712448120117 }, { "epoch": 3.245086669921875e-05, "model_forward_time": 0.02422499656677246, "step": 21267 }, { "epoch": 3.245086669921875e-05, "step": 21267, "training_step_time": 0.11496448516845703 }, { "epoch": 3.2452392578125e-05, "model_forward_time": 0.025621891021728516, "step": 21268 }, { "epoch": 3.2452392578125e-05, "step": 21268, "training_step_time": 0.10912036895751953 }, { "epoch": 3.245391845703125e-05, "model_forward_time": 0.024749755859375, "step": 21269 }, { "epoch": 3.245391845703125e-05, "step": 21269, "training_step_time": 0.14738202095031738 }, { "epoch": 3.24554443359375e-05, "grad_norm": 0.12327086180448532, "learning_rate": 2.1419080780610123e-05, "loss": 0.0101, "step": 21270 }, { "epoch": 3.24554443359375e-05, "model_forward_time": 0.02468729019165039, "step": 21270 }, { "epoch": 3.24554443359375e-05, "step": 21270, "training_step_time": 0.15407252311706543 }, { "epoch": 3.245697021484375e-05, "model_forward_time": 0.024357318878173828, "step": 21271 }, { "epoch": 3.245697021484375e-05, "step": 21271, "training_step_time": 0.11011791229248047 }, { "epoch": 3.245849609375e-05, "model_forward_time": 0.02506709098815918, "step": 21272 }, { "epoch": 3.245849609375e-05, "step": 21272, "training_step_time": 0.1347203254699707 }, { "epoch": 3.246002197265625e-05, "model_forward_time": 0.02528691291809082, "step": 21273 }, { "epoch": 3.246002197265625e-05, "step": 21273, "training_step_time": 0.19986915588378906 }, { "epoch": 3.24615478515625e-05, "model_forward_time": 0.02436232566833496, "step": 21274 }, { "epoch": 3.24615478515625e-05, "step": 21274, "training_step_time": 0.16452765464782715 }, { "epoch": 3.246307373046875e-05, "model_forward_time": 0.024370670318603516, "step": 21275 }, { "epoch": 3.246307373046875e-05, "step": 21275, "training_step_time": 0.20297479629516602 }, { "epoch": 3.2464599609375e-05, "model_forward_time": 0.024593353271484375, "step": 21276 }, { "epoch": 3.2464599609375e-05, "step": 21276, "training_step_time": 0.14850211143493652 }, { "epoch": 3.246612548828125e-05, "model_forward_time": 0.0241851806640625, "step": 21277 }, { "epoch": 3.246612548828125e-05, "step": 21277, "training_step_time": 0.18338561058044434 }, { "epoch": 3.24676513671875e-05, "model_forward_time": 0.024279356002807617, "step": 21278 }, { "epoch": 3.24676513671875e-05, "step": 21278, "training_step_time": 0.13506317138671875 }, { "epoch": 3.246917724609375e-05, "model_forward_time": 0.02419424057006836, "step": 21279 }, { "epoch": 3.246917724609375e-05, "step": 21279, "training_step_time": 0.1097254753112793 }, { "epoch": 3.2470703125e-05, "grad_norm": 0.12808099389076233, "learning_rate": 2.1373874673953685e-05, "loss": 0.0085, "step": 21280 }, { "epoch": 3.2470703125e-05, "model_forward_time": 0.025443553924560547, "step": 21280 }, { "epoch": 3.2470703125e-05, "step": 21280, "training_step_time": 0.11110568046569824 }, { "epoch": 3.247222900390625e-05, "model_forward_time": 0.025237560272216797, "step": 21281 }, { "epoch": 3.247222900390625e-05, "step": 21281, "training_step_time": 0.10890340805053711 }, { "epoch": 3.24737548828125e-05, "model_forward_time": 0.025832176208496094, "step": 21282 }, { "epoch": 3.24737548828125e-05, "step": 21282, "training_step_time": 0.11000609397888184 }, { "epoch": 3.247528076171875e-05, "model_forward_time": 0.026210784912109375, "step": 21283 }, { "epoch": 3.247528076171875e-05, "step": 21283, "training_step_time": 0.10967636108398438 }, { "epoch": 3.2476806640625e-05, "model_forward_time": 0.02518439292907715, "step": 21284 }, { "epoch": 3.2476806640625e-05, "step": 21284, "training_step_time": 0.1105353832244873 }, { "epoch": 3.247833251953125e-05, "model_forward_time": 0.025671958923339844, "step": 21285 }, { "epoch": 3.247833251953125e-05, "step": 21285, "training_step_time": 0.10935282707214355 }, { "epoch": 3.24798583984375e-05, "model_forward_time": 0.025464296340942383, "step": 21286 }, { "epoch": 3.24798583984375e-05, "step": 21286, "training_step_time": 0.11247420310974121 }, { "epoch": 3.248138427734375e-05, "model_forward_time": 0.025449037551879883, "step": 21287 }, { "epoch": 3.248138427734375e-05, "step": 21287, "training_step_time": 0.10718512535095215 }, { "epoch": 3.248291015625e-05, "model_forward_time": 0.024883031845092773, "step": 21288 }, { "epoch": 3.248291015625e-05, "step": 21288, "training_step_time": 0.11532115936279297 }, { "epoch": 3.248443603515625e-05, "model_forward_time": 0.0251157283782959, "step": 21289 }, { "epoch": 3.248443603515625e-05, "step": 21289, "training_step_time": 0.11313629150390625 }, { "epoch": 3.24859619140625e-05, "grad_norm": 0.40604090690612793, "learning_rate": 2.1328703350738765e-05, "loss": 0.0075, "step": 21290 }, { "epoch": 3.24859619140625e-05, "model_forward_time": 0.02522730827331543, "step": 21290 }, { "epoch": 3.24859619140625e-05, "step": 21290, "training_step_time": 0.11601376533508301 }, { "epoch": 3.248748779296875e-05, "model_forward_time": 0.02581620216369629, "step": 21291 }, { "epoch": 3.248748779296875e-05, "step": 21291, "training_step_time": 0.10946178436279297 }, { "epoch": 3.2489013671875e-05, "model_forward_time": 0.025052547454833984, "step": 21292 }, { "epoch": 3.2489013671875e-05, "step": 21292, "training_step_time": 0.10922431945800781 }, { "epoch": 3.249053955078125e-05, "model_forward_time": 0.025054454803466797, "step": 21293 }, { "epoch": 3.249053955078125e-05, "step": 21293, "training_step_time": 0.1067967414855957 }, { "epoch": 3.24920654296875e-05, "model_forward_time": 0.025393009185791016, "step": 21294 }, { "epoch": 3.24920654296875e-05, "step": 21294, "training_step_time": 0.10677576065063477 }, { "epoch": 3.249359130859375e-05, "model_forward_time": 0.025377511978149414, "step": 21295 }, { "epoch": 3.249359130859375e-05, "step": 21295, "training_step_time": 0.10663557052612305 }, { "epoch": 3.24951171875e-05, "model_forward_time": 0.025272846221923828, "step": 21296 }, { "epoch": 3.24951171875e-05, "step": 21296, "training_step_time": 0.1996934413909912 }, { "epoch": 3.249664306640625e-05, "model_forward_time": 0.02434515953063965, "step": 21297 }, { "epoch": 3.249664306640625e-05, "step": 21297, "training_step_time": 0.12753582000732422 }, { "epoch": 3.24981689453125e-05, "model_forward_time": 0.023917675018310547, "step": 21298 }, { "epoch": 3.24981689453125e-05, "step": 21298, "training_step_time": 0.1106119155883789 }, { "epoch": 3.249969482421875e-05, "model_forward_time": 0.02521204948425293, "step": 21299 }, { "epoch": 3.249969482421875e-05, "step": 21299, "training_step_time": 0.11374855041503906 }, { "epoch": 3.2501220703125e-05, "grad_norm": 0.1312633454799652, "learning_rate": 2.128356686585282e-05, "loss": 0.0057, "step": 21300 }, { "epoch": 3.2501220703125e-05, "model_forward_time": 0.025166034698486328, "step": 21300 }, { "epoch": 3.2501220703125e-05, "step": 21300, "training_step_time": 0.16726255416870117 }, { "epoch": 3.250274658203125e-05, "model_forward_time": 0.024360179901123047, "step": 21301 }, { "epoch": 3.250274658203125e-05, "step": 21301, "training_step_time": 0.15343666076660156 }, { "epoch": 3.25042724609375e-05, "model_forward_time": 0.024587154388427734, "step": 21302 }, { "epoch": 3.25042724609375e-05, "step": 21302, "training_step_time": 0.11338424682617188 }, { "epoch": 3.250579833984375e-05, "model_forward_time": 0.02434992790222168, "step": 21303 }, { "epoch": 3.250579833984375e-05, "step": 21303, "training_step_time": 0.10473442077636719 }, { "epoch": 3.250732421875e-05, "model_forward_time": 0.025153636932373047, "step": 21304 }, { "epoch": 3.250732421875e-05, "step": 21304, "training_step_time": 0.10920405387878418 }, { "epoch": 3.250885009765625e-05, "model_forward_time": 0.02488112449645996, "step": 21305 }, { "epoch": 3.250885009765625e-05, "step": 21305, "training_step_time": 0.11178779602050781 }, { "epoch": 3.25103759765625e-05, "model_forward_time": 0.025148630142211914, "step": 21306 }, { "epoch": 3.25103759765625e-05, "step": 21306, "training_step_time": 0.1092221736907959 }, { "epoch": 3.251190185546875e-05, "model_forward_time": 0.02528095245361328, "step": 21307 }, { "epoch": 3.251190185546875e-05, "step": 21307, "training_step_time": 0.11386346817016602 }, { "epoch": 3.2513427734375e-05, "model_forward_time": 0.025056123733520508, "step": 21308 }, { "epoch": 3.2513427734375e-05, "step": 21308, "training_step_time": 0.1161651611328125 }, { "epoch": 3.251495361328125e-05, "model_forward_time": 0.024778366088867188, "step": 21309 }, { "epoch": 3.251495361328125e-05, "step": 21309, "training_step_time": 0.1216585636138916 }, { "epoch": 3.25164794921875e-05, "grad_norm": 0.2531159818172455, "learning_rate": 2.12384652741409e-05, "loss": 0.0064, "step": 21310 }, { "epoch": 3.25164794921875e-05, "model_forward_time": 0.025117158889770508, "step": 21310 }, { "epoch": 3.25164794921875e-05, "step": 21310, "training_step_time": 0.1188044548034668 }, { "epoch": 3.251800537109375e-05, "model_forward_time": 0.02592182159423828, "step": 21311 }, { "epoch": 3.251800537109375e-05, "step": 21311, "training_step_time": 0.11706972122192383 }, { "epoch": 3.251953125e-05, "model_forward_time": 0.024829387664794922, "step": 21312 }, { "epoch": 3.251953125e-05, "step": 21312, "training_step_time": 0.1062474250793457 }, { "epoch": 3.252105712890625e-05, "model_forward_time": 0.023712873458862305, "step": 21313 }, { "epoch": 3.252105712890625e-05, "step": 21313, "training_step_time": 0.14829206466674805 }, { "epoch": 3.25225830078125e-05, "model_forward_time": 0.024098873138427734, "step": 21314 }, { "epoch": 3.25225830078125e-05, "step": 21314, "training_step_time": 0.15225481986999512 }, { "epoch": 3.252410888671875e-05, "model_forward_time": 0.024515867233276367, "step": 21315 }, { "epoch": 3.252410888671875e-05, "step": 21315, "training_step_time": 0.11450743675231934 }, { "epoch": 3.2525634765625e-05, "model_forward_time": 0.025091886520385742, "step": 21316 }, { "epoch": 3.2525634765625e-05, "step": 21316, "training_step_time": 0.13332843780517578 }, { "epoch": 3.252716064453125e-05, "model_forward_time": 0.02563762664794922, "step": 21317 }, { "epoch": 3.252716064453125e-05, "step": 21317, "training_step_time": 0.20834064483642578 }, { "epoch": 3.25286865234375e-05, "model_forward_time": 0.025037527084350586, "step": 21318 }, { "epoch": 3.25286865234375e-05, "step": 21318, "training_step_time": 0.15656590461730957 }, { "epoch": 3.253021240234375e-05, "model_forward_time": 0.02461862564086914, "step": 21319 }, { "epoch": 3.253021240234375e-05, "step": 21319, "training_step_time": 0.18573975563049316 }, { "epoch": 3.253173828125e-05, "grad_norm": 0.11055553704500198, "learning_rate": 2.1193398630405725e-05, "loss": 0.0063, "step": 21320 }, { "epoch": 3.253173828125e-05, "model_forward_time": 0.02436089515686035, "step": 21320 }, { "epoch": 3.253173828125e-05, "step": 21320, "training_step_time": 0.16140294075012207 }, { "epoch": 3.253326416015625e-05, "model_forward_time": 0.024593114852905273, "step": 21321 }, { "epoch": 3.253326416015625e-05, "step": 21321, "training_step_time": 0.19222140312194824 }, { "epoch": 3.25347900390625e-05, "model_forward_time": 0.025799989700317383, "step": 21322 }, { "epoch": 3.25347900390625e-05, "step": 21322, "training_step_time": 0.12839746475219727 }, { "epoch": 3.253631591796875e-05, "model_forward_time": 0.025096654891967773, "step": 21323 }, { "epoch": 3.253631591796875e-05, "step": 21323, "training_step_time": 0.10694599151611328 }, { "epoch": 3.2537841796875e-05, "model_forward_time": 0.025942564010620117, "step": 21324 }, { "epoch": 3.2537841796875e-05, "step": 21324, "training_step_time": 0.10702133178710938 }, { "epoch": 3.253936767578125e-05, "model_forward_time": 0.026082515716552734, "step": 21325 }, { "epoch": 3.253936767578125e-05, "step": 21325, "training_step_time": 0.10600829124450684 }, { "epoch": 3.25408935546875e-05, "model_forward_time": 0.02938365936279297, "step": 21326 }, { "epoch": 3.25408935546875e-05, "step": 21326, "training_step_time": 0.10964179039001465 }, { "epoch": 3.254241943359375e-05, "model_forward_time": 0.02550220489501953, "step": 21327 }, { "epoch": 3.254241943359375e-05, "step": 21327, "training_step_time": 0.11289072036743164 }, { "epoch": 3.25439453125e-05, "model_forward_time": 0.02670764923095703, "step": 21328 }, { "epoch": 3.25439453125e-05, "step": 21328, "training_step_time": 0.10724067687988281 }, { "epoch": 3.254547119140625e-05, "model_forward_time": 0.02556586265563965, "step": 21329 }, { "epoch": 3.254547119140625e-05, "step": 21329, "training_step_time": 0.10686612129211426 }, { "epoch": 3.25469970703125e-05, "grad_norm": 0.27071115374565125, "learning_rate": 2.1148366989407496e-05, "loss": 0.0147, "step": 21330 }, { "epoch": 3.25469970703125e-05, "model_forward_time": 0.025428056716918945, "step": 21330 }, { "epoch": 3.25469970703125e-05, "step": 21330, "training_step_time": 0.10884976387023926 }, { "epoch": 3.254852294921875e-05, "model_forward_time": 0.02520751953125, "step": 21331 }, { "epoch": 3.254852294921875e-05, "step": 21331, "training_step_time": 0.1066279411315918 }, { "epoch": 3.2550048828125e-05, "model_forward_time": 0.025646448135375977, "step": 21332 }, { "epoch": 3.2550048828125e-05, "step": 21332, "training_step_time": 0.10484194755554199 }, { "epoch": 3.255157470703125e-05, "model_forward_time": 0.02533721923828125, "step": 21333 }, { "epoch": 3.255157470703125e-05, "step": 21333, "training_step_time": 0.10896992683410645 }, { "epoch": 3.25531005859375e-05, "model_forward_time": 0.026538610458374023, "step": 21334 }, { "epoch": 3.25531005859375e-05, "step": 21334, "training_step_time": 0.10889697074890137 }, { "epoch": 3.255462646484375e-05, "model_forward_time": 0.025992631912231445, "step": 21335 }, { "epoch": 3.255462646484375e-05, "step": 21335, "training_step_time": 0.10542559623718262 }, { "epoch": 3.255615234375e-05, "model_forward_time": 0.026656389236450195, "step": 21336 }, { "epoch": 3.255615234375e-05, "step": 21336, "training_step_time": 0.10625886917114258 }, { "epoch": 3.255767822265625e-05, "model_forward_time": 0.02653050422668457, "step": 21337 }, { "epoch": 3.255767822265625e-05, "step": 21337, "training_step_time": 0.10554289817810059 }, { "epoch": 3.25592041015625e-05, "model_forward_time": 0.027411699295043945, "step": 21338 }, { "epoch": 3.25592041015625e-05, "step": 21338, "training_step_time": 0.10778594017028809 }, { "epoch": 3.256072998046875e-05, "model_forward_time": 0.025847911834716797, "step": 21339 }, { "epoch": 3.256072998046875e-05, "step": 21339, "training_step_time": 0.10598349571228027 }, { "epoch": 3.2562255859375e-05, "grad_norm": 0.29129672050476074, "learning_rate": 2.110337040586391e-05, "loss": 0.0071, "step": 21340 }, { "epoch": 3.2562255859375e-05, "model_forward_time": 0.02493000030517578, "step": 21340 }, { "epoch": 3.2562255859375e-05, "step": 21340, "training_step_time": 0.10450482368469238 }, { "epoch": 3.256378173828125e-05, "model_forward_time": 0.024988174438476562, "step": 21341 }, { "epoch": 3.256378173828125e-05, "step": 21341, "training_step_time": 0.1085507869720459 }, { "epoch": 3.25653076171875e-05, "model_forward_time": 0.025181293487548828, "step": 21342 }, { "epoch": 3.25653076171875e-05, "step": 21342, "training_step_time": 0.22105932235717773 }, { "epoch": 3.256683349609375e-05, "model_forward_time": 0.024350404739379883, "step": 21343 }, { "epoch": 3.256683349609375e-05, "step": 21343, "training_step_time": 0.17120885848999023 }, { "epoch": 3.2568359375e-05, "model_forward_time": 0.02548527717590332, "step": 21344 }, { "epoch": 3.2568359375e-05, "step": 21344, "training_step_time": 0.18340635299682617 }, { "epoch": 3.256988525390625e-05, "model_forward_time": 0.02452707290649414, "step": 21345 }, { "epoch": 3.256988525390625e-05, "step": 21345, "training_step_time": 0.1776583194732666 }, { "epoch": 3.25714111328125e-05, "model_forward_time": 0.024905920028686523, "step": 21346 }, { "epoch": 3.25714111328125e-05, "step": 21346, "training_step_time": 0.13362383842468262 }, { "epoch": 3.257293701171875e-05, "model_forward_time": 0.025040864944458008, "step": 21347 }, { "epoch": 3.257293701171875e-05, "step": 21347, "training_step_time": 0.11795878410339355 }, { "epoch": 3.2574462890625e-05, "model_forward_time": 0.025641679763793945, "step": 21348 }, { "epoch": 3.2574462890625e-05, "step": 21348, "training_step_time": 0.10856270790100098 }, { "epoch": 3.257598876953125e-05, "model_forward_time": 0.02622532844543457, "step": 21349 }, { "epoch": 3.257598876953125e-05, "step": 21349, "training_step_time": 0.10607528686523438 }, { "epoch": 3.25775146484375e-05, "grad_norm": 0.28593915700912476, "learning_rate": 2.105840893445005e-05, "loss": 0.0101, "step": 21350 }, { "epoch": 3.25775146484375e-05, "model_forward_time": 0.025183439254760742, "step": 21350 }, { "epoch": 3.25775146484375e-05, "step": 21350, "training_step_time": 0.11247539520263672 }, { "epoch": 3.257904052734375e-05, "model_forward_time": 0.02522754669189453, "step": 21351 }, { "epoch": 3.257904052734375e-05, "step": 21351, "training_step_time": 0.1322019100189209 }, { "epoch": 3.258056640625e-05, "model_forward_time": 0.024935007095336914, "step": 21352 }, { "epoch": 3.258056640625e-05, "step": 21352, "training_step_time": 0.15845584869384766 }, { "epoch": 3.258209228515625e-05, "model_forward_time": 0.024411678314208984, "step": 21353 }, { "epoch": 3.258209228515625e-05, "step": 21353, "training_step_time": 0.1574704647064209 }, { "epoch": 3.25836181640625e-05, "model_forward_time": 0.02443408966064453, "step": 21354 }, { "epoch": 3.25836181640625e-05, "step": 21354, "training_step_time": 0.1367182731628418 }, { "epoch": 3.258514404296875e-05, "model_forward_time": 0.025136709213256836, "step": 21355 }, { "epoch": 3.258514404296875e-05, "step": 21355, "training_step_time": 0.1079854965209961 }, { "epoch": 3.2586669921875e-05, "model_forward_time": 0.02461099624633789, "step": 21356 }, { "epoch": 3.2586669921875e-05, "step": 21356, "training_step_time": 0.14814305305480957 }, { "epoch": 3.258819580078125e-05, "model_forward_time": 0.024613142013549805, "step": 21357 }, { "epoch": 3.258819580078125e-05, "step": 21357, "training_step_time": 0.15858173370361328 }, { "epoch": 3.25897216796875e-05, "model_forward_time": 0.024570941925048828, "step": 21358 }, { "epoch": 3.25897216796875e-05, "step": 21358, "training_step_time": 0.11369824409484863 }, { "epoch": 3.259124755859375e-05, "model_forward_time": 0.026026487350463867, "step": 21359 }, { "epoch": 3.259124755859375e-05, "step": 21359, "training_step_time": 0.13006186485290527 }, { "epoch": 3.25927734375e-05, "grad_norm": 0.20840445160865784, "learning_rate": 2.1013482629798333e-05, "loss": 0.0122, "step": 21360 }, { "epoch": 3.25927734375e-05, "model_forward_time": 0.02498650550842285, "step": 21360 }, { "epoch": 3.25927734375e-05, "step": 21360, "training_step_time": 0.19704747200012207 }, { "epoch": 3.259429931640625e-05, "model_forward_time": 0.024827241897583008, "step": 21361 }, { "epoch": 3.259429931640625e-05, "step": 21361, "training_step_time": 0.15967965126037598 }, { "epoch": 3.25958251953125e-05, "model_forward_time": 0.02453780174255371, "step": 21362 }, { "epoch": 3.25958251953125e-05, "step": 21362, "training_step_time": 0.12873101234436035 }, { "epoch": 3.259735107421875e-05, "model_forward_time": 0.0241241455078125, "step": 21363 }, { "epoch": 3.259735107421875e-05, "step": 21363, "training_step_time": 0.11409163475036621 }, { "epoch": 3.2598876953125e-05, "model_forward_time": 0.026187658309936523, "step": 21364 }, { "epoch": 3.2598876953125e-05, "step": 21364, "training_step_time": 0.12126374244689941 }, { "epoch": 3.260040283203125e-05, "model_forward_time": 0.025059223175048828, "step": 21365 }, { "epoch": 3.260040283203125e-05, "step": 21365, "training_step_time": 0.11343240737915039 }, { "epoch": 3.26019287109375e-05, "model_forward_time": 0.02526068687438965, "step": 21366 }, { "epoch": 3.26019287109375e-05, "step": 21366, "training_step_time": 0.17711997032165527 }, { "epoch": 3.260345458984375e-05, "model_forward_time": 0.024739980697631836, "step": 21367 }, { "epoch": 3.260345458984375e-05, "step": 21367, "training_step_time": 0.11723589897155762 }, { "epoch": 3.260498046875e-05, "model_forward_time": 0.024399280548095703, "step": 21368 }, { "epoch": 3.260498046875e-05, "step": 21368, "training_step_time": 0.10583162307739258 }, { "epoch": 3.260650634765625e-05, "model_forward_time": 0.026609420776367188, "step": 21369 }, { "epoch": 3.260650634765625e-05, "step": 21369, "training_step_time": 0.10903167724609375 }, { "epoch": 3.26080322265625e-05, "grad_norm": 0.19517971575260162, "learning_rate": 2.0968591546498488e-05, "loss": 0.0091, "step": 21370 }, { "epoch": 3.26080322265625e-05, "model_forward_time": 0.02515578269958496, "step": 21370 }, { "epoch": 3.26080322265625e-05, "step": 21370, "training_step_time": 0.10760831832885742 }, { "epoch": 3.260955810546875e-05, "model_forward_time": 0.025435686111450195, "step": 21371 }, { "epoch": 3.260955810546875e-05, "step": 21371, "training_step_time": 0.10597515106201172 }, { "epoch": 3.2611083984375e-05, "model_forward_time": 0.025447845458984375, "step": 21372 }, { "epoch": 3.2611083984375e-05, "step": 21372, "training_step_time": 0.10650849342346191 }, { "epoch": 3.261260986328125e-05, "model_forward_time": 0.0252225399017334, "step": 21373 }, { "epoch": 3.261260986328125e-05, "step": 21373, "training_step_time": 0.10647249221801758 }, { "epoch": 3.26141357421875e-05, "model_forward_time": 0.024986743927001953, "step": 21374 }, { "epoch": 3.26141357421875e-05, "step": 21374, "training_step_time": 0.10435152053833008 }, { "epoch": 3.261566162109375e-05, "model_forward_time": 0.025393009185791016, "step": 21375 }, { "epoch": 3.261566162109375e-05, "step": 21375, "training_step_time": 0.1047968864440918 }, { "epoch": 3.26171875e-05, "model_forward_time": 0.026475906372070312, "step": 21376 }, { "epoch": 3.26171875e-05, "step": 21376, "training_step_time": 0.10819530487060547 }, { "epoch": 3.261871337890625e-05, "model_forward_time": 0.026598691940307617, "step": 21377 }, { "epoch": 3.261871337890625e-05, "step": 21377, "training_step_time": 0.10600662231445312 }, { "epoch": 3.26202392578125e-05, "model_forward_time": 0.026185274124145508, "step": 21378 }, { "epoch": 3.26202392578125e-05, "step": 21378, "training_step_time": 0.10475277900695801 }, { "epoch": 3.262176513671875e-05, "model_forward_time": 0.02539825439453125, "step": 21379 }, { "epoch": 3.262176513671875e-05, "step": 21379, "training_step_time": 0.10705447196960449 }, { "epoch": 3.2623291015625e-05, "grad_norm": 0.16651000082492828, "learning_rate": 2.09237357390974e-05, "loss": 0.0067, "step": 21380 }, { "epoch": 3.2623291015625e-05, "model_forward_time": 0.025920867919921875, "step": 21380 }, { "epoch": 3.2623291015625e-05, "step": 21380, "training_step_time": 0.10423707962036133 }, { "epoch": 3.262481689453125e-05, "model_forward_time": 0.02617025375366211, "step": 21381 }, { "epoch": 3.262481689453125e-05, "step": 21381, "training_step_time": 0.1067051887512207 }, { "epoch": 3.26263427734375e-05, "model_forward_time": 0.0254669189453125, "step": 21382 }, { "epoch": 3.26263427734375e-05, "step": 21382, "training_step_time": 0.10767602920532227 }, { "epoch": 3.262786865234375e-05, "model_forward_time": 0.02643275260925293, "step": 21383 }, { "epoch": 3.262786865234375e-05, "step": 21383, "training_step_time": 0.10576367378234863 }, { "epoch": 3.262939453125e-05, "model_forward_time": 0.02525043487548828, "step": 21384 }, { "epoch": 3.262939453125e-05, "step": 21384, "training_step_time": 0.1068429946899414 }, { "epoch": 3.263092041015625e-05, "model_forward_time": 0.025199174880981445, "step": 21385 }, { "epoch": 3.263092041015625e-05, "step": 21385, "training_step_time": 0.11375880241394043 }, { "epoch": 3.26324462890625e-05, "model_forward_time": 0.024149179458618164, "step": 21386 }, { "epoch": 3.26324462890625e-05, "step": 21386, "training_step_time": 0.18388938903808594 }, { "epoch": 3.263397216796875e-05, "model_forward_time": 0.025084495544433594, "step": 21387 }, { "epoch": 3.263397216796875e-05, "step": 21387, "training_step_time": 0.17194175720214844 }, { "epoch": 3.2635498046875e-05, "model_forward_time": 0.024611234664916992, "step": 21388 }, { "epoch": 3.2635498046875e-05, "step": 21388, "training_step_time": 0.13388395309448242 }, { "epoch": 3.263702392578125e-05, "model_forward_time": 0.024343490600585938, "step": 21389 }, { "epoch": 3.263702392578125e-05, "step": 21389, "training_step_time": 0.1585078239440918 }, { "epoch": 3.26385498046875e-05, "grad_norm": 0.1198514774441719, "learning_rate": 2.0878915262099098e-05, "loss": 0.0066, "step": 21390 }, { "epoch": 3.26385498046875e-05, "model_forward_time": 0.025183439254760742, "step": 21390 }, { "epoch": 3.26385498046875e-05, "step": 21390, "training_step_time": 0.21726727485656738 }, { "epoch": 3.264007568359375e-05, "model_forward_time": 0.024637460708618164, "step": 21391 }, { "epoch": 3.264007568359375e-05, "step": 21391, "training_step_time": 0.1228179931640625 }, { "epoch": 3.26416015625e-05, "model_forward_time": 0.028346776962280273, "step": 21392 }, { "epoch": 3.26416015625e-05, "step": 21392, "training_step_time": 0.11328697204589844 }, { "epoch": 3.264312744140625e-05, "model_forward_time": 0.02584218978881836, "step": 21393 }, { "epoch": 3.264312744140625e-05, "step": 21393, "training_step_time": 0.11167502403259277 }, { "epoch": 3.26446533203125e-05, "model_forward_time": 0.025397062301635742, "step": 21394 }, { "epoch": 3.26446533203125e-05, "step": 21394, "training_step_time": 0.10777449607849121 }, { "epoch": 3.264617919921875e-05, "model_forward_time": 0.025455474853515625, "step": 21395 }, { "epoch": 3.264617919921875e-05, "step": 21395, "training_step_time": 0.10875272750854492 }, { "epoch": 3.2647705078125e-05, "model_forward_time": 0.025969743728637695, "step": 21396 }, { "epoch": 3.2647705078125e-05, "step": 21396, "training_step_time": 0.1058037281036377 }, { "epoch": 3.264923095703125e-05, "model_forward_time": 0.028711557388305664, "step": 21397 }, { "epoch": 3.264923095703125e-05, "step": 21397, "training_step_time": 0.10924124717712402 }, { "epoch": 3.26507568359375e-05, "model_forward_time": 0.026340484619140625, "step": 21398 }, { "epoch": 3.26507568359375e-05, "step": 21398, "training_step_time": 0.10995912551879883 }, { "epoch": 3.265228271484375e-05, "model_forward_time": 0.025932788848876953, "step": 21399 }, { "epoch": 3.265228271484375e-05, "step": 21399, "training_step_time": 0.11035537719726562 }, { "epoch": 3.265380859375e-05, "grad_norm": 0.15694105625152588, "learning_rate": 2.0834130169964692e-05, "loss": 0.0122, "step": 21400 }, { "epoch": 3.265380859375e-05, "model_forward_time": 0.025883197784423828, "step": 21400 }, { "epoch": 3.265380859375e-05, "step": 21400, "training_step_time": 0.10556793212890625 }, { "epoch": 3.265533447265625e-05, "model_forward_time": 0.02433466911315918, "step": 21401 }, { "epoch": 3.265533447265625e-05, "step": 21401, "training_step_time": 0.15153145790100098 }, { "epoch": 3.26568603515625e-05, "model_forward_time": 0.024839401245117188, "step": 21402 }, { "epoch": 3.26568603515625e-05, "step": 21402, "training_step_time": 0.15307855606079102 }, { "epoch": 3.265838623046875e-05, "model_forward_time": 0.024536609649658203, "step": 21403 }, { "epoch": 3.265838623046875e-05, "step": 21403, "training_step_time": 0.10989999771118164 }, { "epoch": 3.2659912109375e-05, "model_forward_time": 0.026547670364379883, "step": 21404 }, { "epoch": 3.2659912109375e-05, "step": 21404, "training_step_time": 0.13157272338867188 }, { "epoch": 3.266143798828125e-05, "model_forward_time": 0.02541351318359375, "step": 21405 }, { "epoch": 3.266143798828125e-05, "step": 21405, "training_step_time": 0.12725067138671875 }, { "epoch": 3.26629638671875e-05, "model_forward_time": 0.026391983032226562, "step": 21406 }, { "epoch": 3.26629638671875e-05, "step": 21406, "training_step_time": 0.12491726875305176 }, { "epoch": 3.266448974609375e-05, "model_forward_time": 0.025368213653564453, "step": 21407 }, { "epoch": 3.266448974609375e-05, "step": 21407, "training_step_time": 0.22081971168518066 }, { "epoch": 3.2666015625e-05, "model_forward_time": 0.02415943145751953, "step": 21408 }, { "epoch": 3.2666015625e-05, "step": 21408, "training_step_time": 0.21675872802734375 }, { "epoch": 3.266754150390625e-05, "model_forward_time": 0.025788545608520508, "step": 21409 }, { "epoch": 3.266754150390625e-05, "step": 21409, "training_step_time": 0.13145160675048828 }, { "epoch": 3.26690673828125e-05, "grad_norm": 0.1490681767463684, "learning_rate": 2.0789380517112272e-05, "loss": 0.014, "step": 21410 }, { "epoch": 3.26690673828125e-05, "model_forward_time": 0.023677825927734375, "step": 21410 }, { "epoch": 3.26690673828125e-05, "step": 21410, "training_step_time": 0.1896672248840332 }, { "epoch": 3.267059326171875e-05, "model_forward_time": 0.024270057678222656, "step": 21411 }, { "epoch": 3.267059326171875e-05, "step": 21411, "training_step_time": 0.13915061950683594 }, { "epoch": 3.2672119140625e-05, "model_forward_time": 0.024865150451660156, "step": 21412 }, { "epoch": 3.2672119140625e-05, "step": 21412, "training_step_time": 0.10713052749633789 }, { "epoch": 3.267364501953125e-05, "model_forward_time": 0.024958133697509766, "step": 21413 }, { "epoch": 3.267364501953125e-05, "step": 21413, "training_step_time": 0.10623764991760254 }, { "epoch": 3.26751708984375e-05, "model_forward_time": 0.025310754776000977, "step": 21414 }, { "epoch": 3.26751708984375e-05, "step": 21414, "training_step_time": 0.10644912719726562 }, { "epoch": 3.267669677734375e-05, "model_forward_time": 0.025640487670898438, "step": 21415 }, { "epoch": 3.267669677734375e-05, "step": 21415, "training_step_time": 0.1128394603729248 }, { "epoch": 3.267822265625e-05, "model_forward_time": 0.025661468505859375, "step": 21416 }, { "epoch": 3.267822265625e-05, "step": 21416, "training_step_time": 0.10508561134338379 }, { "epoch": 3.267974853515625e-05, "model_forward_time": 0.02522444725036621, "step": 21417 }, { "epoch": 3.267974853515625e-05, "step": 21417, "training_step_time": 0.10712575912475586 }, { "epoch": 3.26812744140625e-05, "model_forward_time": 0.025450944900512695, "step": 21418 }, { "epoch": 3.26812744140625e-05, "step": 21418, "training_step_time": 0.17090511322021484 }, { "epoch": 3.268280029296875e-05, "model_forward_time": 0.02460646629333496, "step": 21419 }, { "epoch": 3.268280029296875e-05, "step": 21419, "training_step_time": 0.18145108222961426 }, { "epoch": 3.2684326171875e-05, "grad_norm": 0.2113717496395111, "learning_rate": 2.0744666357916925e-05, "loss": 0.0089, "step": 21420 }, { "epoch": 3.2684326171875e-05, "model_forward_time": 0.024824142456054688, "step": 21420 }, { "epoch": 3.2684326171875e-05, "step": 21420, "training_step_time": 0.16096091270446777 }, { "epoch": 3.268585205078125e-05, "model_forward_time": 0.024448394775390625, "step": 21421 }, { "epoch": 3.268585205078125e-05, "step": 21421, "training_step_time": 0.15123939514160156 }, { "epoch": 3.26873779296875e-05, "model_forward_time": 0.02460789680480957, "step": 21422 }, { "epoch": 3.26873779296875e-05, "step": 21422, "training_step_time": 0.13439440727233887 }, { "epoch": 3.268890380859375e-05, "model_forward_time": 0.02446150779724121, "step": 21423 }, { "epoch": 3.268890380859375e-05, "step": 21423, "training_step_time": 0.1273021697998047 }, { "epoch": 3.26904296875e-05, "model_forward_time": 0.025025367736816406, "step": 21424 }, { "epoch": 3.26904296875e-05, "step": 21424, "training_step_time": 0.12816119194030762 }, { "epoch": 3.269195556640625e-05, "model_forward_time": 0.025610923767089844, "step": 21425 }, { "epoch": 3.269195556640625e-05, "step": 21425, "training_step_time": 0.12122726440429688 }, { "epoch": 3.26934814453125e-05, "model_forward_time": 0.024606943130493164, "step": 21426 }, { "epoch": 3.26934814453125e-05, "step": 21426, "training_step_time": 0.11835193634033203 }, { "epoch": 3.269500732421875e-05, "model_forward_time": 0.025483369827270508, "step": 21427 }, { "epoch": 3.269500732421875e-05, "step": 21427, "training_step_time": 0.10801959037780762 }, { "epoch": 3.2696533203125e-05, "model_forward_time": 0.025312423706054688, "step": 21428 }, { "epoch": 3.2696533203125e-05, "step": 21428, "training_step_time": 0.1354517936706543 }, { "epoch": 3.269805908203125e-05, "model_forward_time": 0.02529454231262207, "step": 21429 }, { "epoch": 3.269805908203125e-05, "step": 21429, "training_step_time": 0.11032629013061523 }, { "epoch": 3.26995849609375e-05, "grad_norm": 0.1630994975566864, "learning_rate": 2.0699987746710554e-05, "loss": 0.0058, "step": 21430 }, { "epoch": 3.26995849609375e-05, "model_forward_time": 0.0262143611907959, "step": 21430 }, { "epoch": 3.26995849609375e-05, "step": 21430, "training_step_time": 0.13472771644592285 }, { "epoch": 3.270111083984375e-05, "model_forward_time": 0.025525808334350586, "step": 21431 }, { "epoch": 3.270111083984375e-05, "step": 21431, "training_step_time": 0.15726399421691895 }, { "epoch": 3.270263671875e-05, "model_forward_time": 0.02477550506591797, "step": 21432 }, { "epoch": 3.270263671875e-05, "step": 21432, "training_step_time": 0.10753846168518066 }, { "epoch": 3.270416259765625e-05, "model_forward_time": 0.02564263343811035, "step": 21433 }, { "epoch": 3.270416259765625e-05, "step": 21433, "training_step_time": 0.11534905433654785 }, { "epoch": 3.27056884765625e-05, "model_forward_time": 0.027423858642578125, "step": 21434 }, { "epoch": 3.27056884765625e-05, "step": 21434, "training_step_time": 0.12099885940551758 }, { "epoch": 3.270721435546875e-05, "model_forward_time": 0.0255887508392334, "step": 21435 }, { "epoch": 3.270721435546875e-05, "step": 21435, "training_step_time": 0.10765242576599121 }, { "epoch": 3.2708740234375e-05, "model_forward_time": 0.025589466094970703, "step": 21436 }, { "epoch": 3.2708740234375e-05, "step": 21436, "training_step_time": 0.10562705993652344 }, { "epoch": 3.271026611328125e-05, "model_forward_time": 0.025283098220825195, "step": 21437 }, { "epoch": 3.271026611328125e-05, "step": 21437, "training_step_time": 0.10678625106811523 }, { "epoch": 3.27117919921875e-05, "model_forward_time": 0.02572464942932129, "step": 21438 }, { "epoch": 3.27117919921875e-05, "step": 21438, "training_step_time": 0.10535931587219238 }, { "epoch": 3.271331787109375e-05, "model_forward_time": 0.025243520736694336, "step": 21439 }, { "epoch": 3.271331787109375e-05, "step": 21439, "training_step_time": 0.10544896125793457 }, { "epoch": 3.271484375e-05, "grad_norm": 0.1422598659992218, "learning_rate": 2.065534473778186e-05, "loss": 0.0074, "step": 21440 }, { "epoch": 3.271484375e-05, "model_forward_time": 0.02503371238708496, "step": 21440 }, { "epoch": 3.271484375e-05, "step": 21440, "training_step_time": 0.10764217376708984 }, { "epoch": 3.271636962890625e-05, "model_forward_time": 0.025027990341186523, "step": 21441 }, { "epoch": 3.271636962890625e-05, "step": 21441, "training_step_time": 0.12452244758605957 }, { "epoch": 3.27178955078125e-05, "model_forward_time": 0.025783777236938477, "step": 21442 }, { "epoch": 3.27178955078125e-05, "step": 21442, "training_step_time": 0.14392852783203125 }, { "epoch": 3.271942138671875e-05, "model_forward_time": 0.02419590950012207, "step": 21443 }, { "epoch": 3.271942138671875e-05, "step": 21443, "training_step_time": 0.10866641998291016 }, { "epoch": 3.2720947265625e-05, "model_forward_time": 0.02461099624633789, "step": 21444 }, { "epoch": 3.2720947265625e-05, "step": 21444, "training_step_time": 0.14781641960144043 }, { "epoch": 3.272247314453125e-05, "model_forward_time": 0.02529597282409668, "step": 21445 }, { "epoch": 3.272247314453125e-05, "step": 21445, "training_step_time": 0.15373706817626953 }, { "epoch": 3.27239990234375e-05, "model_forward_time": 0.02459120750427246, "step": 21446 }, { "epoch": 3.27239990234375e-05, "step": 21446, "training_step_time": 0.1206514835357666 }, { "epoch": 3.272552490234375e-05, "model_forward_time": 0.024644136428833008, "step": 21447 }, { "epoch": 3.272552490234375e-05, "step": 21447, "training_step_time": 0.12737655639648438 }, { "epoch": 3.272705078125e-05, "model_forward_time": 0.025829076766967773, "step": 21448 }, { "epoch": 3.272705078125e-05, "step": 21448, "training_step_time": 0.11434578895568848 }, { "epoch": 3.272857666015625e-05, "model_forward_time": 0.025661230087280273, "step": 21449 }, { "epoch": 3.272857666015625e-05, "step": 21449, "training_step_time": 0.13219356536865234 }, { "epoch": 3.27301025390625e-05, "grad_norm": 0.2459985464811325, "learning_rate": 2.061073738537635e-05, "loss": 0.0158, "step": 21450 }, { "epoch": 3.27301025390625e-05, "model_forward_time": 0.024809837341308594, "step": 21450 }, { "epoch": 3.27301025390625e-05, "step": 21450, "training_step_time": 0.19077396392822266 }, { "epoch": 3.273162841796875e-05, "model_forward_time": 0.024216175079345703, "step": 21451 }, { "epoch": 3.273162841796875e-05, "step": 21451, "training_step_time": 0.16658997535705566 }, { "epoch": 3.2733154296875e-05, "model_forward_time": 0.025172948837280273, "step": 21452 }, { "epoch": 3.2733154296875e-05, "step": 21452, "training_step_time": 0.20341134071350098 }, { "epoch": 3.273468017578125e-05, "model_forward_time": 0.026114940643310547, "step": 21453 }, { "epoch": 3.273468017578125e-05, "step": 21453, "training_step_time": 0.11648964881896973 }, { "epoch": 3.27362060546875e-05, "model_forward_time": 0.025034189224243164, "step": 21454 }, { "epoch": 3.27362060546875e-05, "step": 21454, "training_step_time": 0.11602163314819336 }, { "epoch": 3.273773193359375e-05, "model_forward_time": 0.025682449340820312, "step": 21455 }, { "epoch": 3.273773193359375e-05, "step": 21455, "training_step_time": 0.10506415367126465 }, { "epoch": 3.27392578125e-05, "model_forward_time": 0.025560855865478516, "step": 21456 }, { "epoch": 3.27392578125e-05, "step": 21456, "training_step_time": 0.11066937446594238 }, { "epoch": 3.274078369140625e-05, "model_forward_time": 0.025326967239379883, "step": 21457 }, { "epoch": 3.274078369140625e-05, "step": 21457, "training_step_time": 0.10399842262268066 }, { "epoch": 3.27423095703125e-05, "model_forward_time": 0.0252687931060791, "step": 21458 }, { "epoch": 3.27423095703125e-05, "step": 21458, "training_step_time": 0.1062307357788086 }, { "epoch": 3.274383544921875e-05, "model_forward_time": 0.024988174438476562, "step": 21459 }, { "epoch": 3.274383544921875e-05, "step": 21459, "training_step_time": 0.10452914237976074 }, { "epoch": 3.2745361328125e-05, "grad_norm": 0.22268171608448029, "learning_rate": 2.056616574369612e-05, "loss": 0.0065, "step": 21460 }, { "epoch": 3.2745361328125e-05, "model_forward_time": 0.025077342987060547, "step": 21460 }, { "epoch": 3.2745361328125e-05, "step": 21460, "training_step_time": 0.10692977905273438 }, { "epoch": 3.274688720703125e-05, "model_forward_time": 0.025075912475585938, "step": 21461 }, { "epoch": 3.274688720703125e-05, "step": 21461, "training_step_time": 0.10890984535217285 }, { "epoch": 3.27484130859375e-05, "model_forward_time": 0.025015830993652344, "step": 21462 }, { "epoch": 3.27484130859375e-05, "step": 21462, "training_step_time": 0.10702800750732422 }, { "epoch": 3.274993896484375e-05, "model_forward_time": 0.02605724334716797, "step": 21463 }, { "epoch": 3.274993896484375e-05, "step": 21463, "training_step_time": 0.11018133163452148 }, { "epoch": 3.275146484375e-05, "model_forward_time": 0.025979995727539062, "step": 21464 }, { "epoch": 3.275146484375e-05, "step": 21464, "training_step_time": 0.10545659065246582 }, { "epoch": 3.275299072265625e-05, "model_forward_time": 0.02634453773498535, "step": 21465 }, { "epoch": 3.275299072265625e-05, "step": 21465, "training_step_time": 0.10638213157653809 }, { "epoch": 3.27545166015625e-05, "model_forward_time": 0.026576995849609375, "step": 21466 }, { "epoch": 3.27545166015625e-05, "step": 21466, "training_step_time": 0.10454201698303223 }, { "epoch": 3.275604248046875e-05, "model_forward_time": 0.02646017074584961, "step": 21467 }, { "epoch": 3.275604248046875e-05, "step": 21467, "training_step_time": 0.10399127006530762 }, { "epoch": 3.2757568359375e-05, "model_forward_time": 0.024941205978393555, "step": 21468 }, { "epoch": 3.2757568359375e-05, "step": 21468, "training_step_time": 0.10338759422302246 }, { "epoch": 3.275909423828125e-05, "model_forward_time": 0.0251467227935791, "step": 21469 }, { "epoch": 3.275909423828125e-05, "step": 21469, "training_step_time": 0.10410785675048828 }, { "epoch": 3.27606201171875e-05, "grad_norm": 0.4208851754665375, "learning_rate": 2.0521629866899966e-05, "loss": 0.0148, "step": 21470 }, { "epoch": 3.27606201171875e-05, "model_forward_time": 0.026107072830200195, "step": 21470 }, { "epoch": 3.27606201171875e-05, "step": 21470, "training_step_time": 0.10523295402526855 }, { "epoch": 3.276214599609375e-05, "model_forward_time": 0.02570939064025879, "step": 21471 }, { "epoch": 3.276214599609375e-05, "step": 21471, "training_step_time": 0.1092078685760498 }, { "epoch": 3.2763671875e-05, "model_forward_time": 0.02927708625793457, "step": 21472 }, { "epoch": 3.2763671875e-05, "step": 21472, "training_step_time": 0.11399626731872559 }, { "epoch": 3.276519775390625e-05, "model_forward_time": 0.02492213249206543, "step": 21473 }, { "epoch": 3.276519775390625e-05, "step": 21473, "training_step_time": 0.11025333404541016 }, { "epoch": 3.27667236328125e-05, "model_forward_time": 0.0254666805267334, "step": 21474 }, { "epoch": 3.27667236328125e-05, "step": 21474, "training_step_time": 0.16224908828735352 }, { "epoch": 3.276824951171875e-05, "model_forward_time": 0.025496959686279297, "step": 21475 }, { "epoch": 3.276824951171875e-05, "step": 21475, "training_step_time": 0.12044382095336914 }, { "epoch": 3.2769775390625e-05, "model_forward_time": 0.0246427059173584, "step": 21476 }, { "epoch": 3.2769775390625e-05, "step": 21476, "training_step_time": 0.12644195556640625 }, { "epoch": 3.277130126953125e-05, "model_forward_time": 0.026027917861938477, "step": 21477 }, { "epoch": 3.277130126953125e-05, "step": 21477, "training_step_time": 0.14521265029907227 }, { "epoch": 3.27728271484375e-05, "model_forward_time": 0.02507162094116211, "step": 21478 }, { "epoch": 3.27728271484375e-05, "step": 21478, "training_step_time": 0.12110519409179688 }, { "epoch": 3.277435302734375e-05, "model_forward_time": 0.025676727294921875, "step": 21479 }, { "epoch": 3.277435302734375e-05, "step": 21479, "training_step_time": 0.1261157989501953 }, { "epoch": 3.277587890625e-05, "grad_norm": 0.19327159225940704, "learning_rate": 2.0477129809103147e-05, "loss": 0.0061, "step": 21480 }, { "epoch": 3.277587890625e-05, "model_forward_time": 0.025008201599121094, "step": 21480 }, { "epoch": 3.277587890625e-05, "step": 21480, "training_step_time": 0.11131691932678223 }, { "epoch": 3.277740478515625e-05, "model_forward_time": 0.025173425674438477, "step": 21481 }, { "epoch": 3.277740478515625e-05, "step": 21481, "training_step_time": 0.10843348503112793 }, { "epoch": 3.27789306640625e-05, "model_forward_time": 0.024735450744628906, "step": 21482 }, { "epoch": 3.27789306640625e-05, "step": 21482, "training_step_time": 0.11014866828918457 }, { "epoch": 3.278045654296875e-05, "model_forward_time": 0.025592565536499023, "step": 21483 }, { "epoch": 3.278045654296875e-05, "step": 21483, "training_step_time": 0.1072089672088623 }, { "epoch": 3.2781982421875e-05, "model_forward_time": 0.024317502975463867, "step": 21484 }, { "epoch": 3.2781982421875e-05, "step": 21484, "training_step_time": 0.10916829109191895 }, { "epoch": 3.278350830078125e-05, "model_forward_time": 0.024244070053100586, "step": 21485 }, { "epoch": 3.278350830078125e-05, "step": 21485, "training_step_time": 0.11597180366516113 }, { "epoch": 3.27850341796875e-05, "model_forward_time": 0.025903940200805664, "step": 21486 }, { "epoch": 3.27850341796875e-05, "step": 21486, "training_step_time": 0.11055207252502441 }, { "epoch": 3.278656005859375e-05, "model_forward_time": 0.026667118072509766, "step": 21487 }, { "epoch": 3.278656005859375e-05, "step": 21487, "training_step_time": 0.11065983772277832 }, { "epoch": 3.27880859375e-05, "model_forward_time": 0.025974035263061523, "step": 21488 }, { "epoch": 3.27880859375e-05, "step": 21488, "training_step_time": 0.10856246948242188 }, { "epoch": 3.278961181640625e-05, "model_forward_time": 0.025088787078857422, "step": 21489 }, { "epoch": 3.278961181640625e-05, "step": 21489, "training_step_time": 0.10731220245361328 }, { "epoch": 3.27911376953125e-05, "grad_norm": 0.3131754398345947, "learning_rate": 2.0432665624377434e-05, "loss": 0.0089, "step": 21490 }, { "epoch": 3.27911376953125e-05, "model_forward_time": 0.025774717330932617, "step": 21490 }, { "epoch": 3.27911376953125e-05, "step": 21490, "training_step_time": 0.10640788078308105 }, { "epoch": 3.279266357421875e-05, "model_forward_time": 0.024432897567749023, "step": 21491 }, { "epoch": 3.279266357421875e-05, "step": 21491, "training_step_time": 0.1444835662841797 }, { "epoch": 3.2794189453125e-05, "model_forward_time": 0.024487018585205078, "step": 21492 }, { "epoch": 3.2794189453125e-05, "step": 21492, "training_step_time": 0.15711474418640137 }, { "epoch": 3.279571533203125e-05, "model_forward_time": 0.02386331558227539, "step": 21493 }, { "epoch": 3.279571533203125e-05, "step": 21493, "training_step_time": 0.11394095420837402 }, { "epoch": 3.27972412109375e-05, "model_forward_time": 0.024614810943603516, "step": 21494 }, { "epoch": 3.27972412109375e-05, "step": 21494, "training_step_time": 0.12711286544799805 }, { "epoch": 3.279876708984375e-05, "model_forward_time": 0.025324106216430664, "step": 21495 }, { "epoch": 3.279876708984375e-05, "step": 21495, "training_step_time": 0.20073747634887695 }, { "epoch": 3.280029296875e-05, "model_forward_time": 0.024139881134033203, "step": 21496 }, { "epoch": 3.280029296875e-05, "step": 21496, "training_step_time": 0.17986392974853516 }, { "epoch": 3.280181884765625e-05, "model_forward_time": 0.024300098419189453, "step": 21497 }, { "epoch": 3.280181884765625e-05, "step": 21497, "training_step_time": 0.19476556777954102 }, { "epoch": 3.28033447265625e-05, "model_forward_time": 0.02441716194152832, "step": 21498 }, { "epoch": 3.28033447265625e-05, "step": 21498, "training_step_time": 0.15674662590026855 }, { "epoch": 3.280487060546875e-05, "model_forward_time": 0.023775577545166016, "step": 21499 }, { "epoch": 3.280487060546875e-05, "step": 21499, "training_step_time": 0.17407631874084473 }, { "epoch": 3.2806396484375e-05, "grad_norm": 0.15037991106510162, "learning_rate": 2.0388237366751006e-05, "loss": 0.0075, "step": 21500 }, { "epoch": 3.2806396484375e-05, "model_forward_time": 0.02744579315185547, "step": 21500 }, { "epoch": 3.2806396484375e-05, "step": 21500, "training_step_time": 0.18578815460205078 }, { "epoch": 3.280792236328125e-05, "model_forward_time": 0.02402353286743164, "step": 21501 }, { "epoch": 3.280792236328125e-05, "step": 21501, "training_step_time": 0.10668253898620605 }, { "epoch": 3.28094482421875e-05, "model_forward_time": 0.024646759033203125, "step": 21502 }, { "epoch": 3.28094482421875e-05, "step": 21502, "training_step_time": 0.10440564155578613 }, { "epoch": 3.281097412109375e-05, "model_forward_time": 0.026366710662841797, "step": 21503 }, { "epoch": 3.281097412109375e-05, "step": 21503, "training_step_time": 0.10871315002441406 }, { "epoch": 3.28125e-05, "model_forward_time": 0.025136947631835938, "step": 21504 }, { "epoch": 3.28125e-05, "step": 21504, "training_step_time": 0.10741662979125977 }, { "epoch": 3.281402587890625e-05, "model_forward_time": 0.025295257568359375, "step": 21505 }, { "epoch": 3.281402587890625e-05, "step": 21505, "training_step_time": 0.10708189010620117 }, { "epoch": 3.28155517578125e-05, "model_forward_time": 0.025191307067871094, "step": 21506 }, { "epoch": 3.28155517578125e-05, "step": 21506, "training_step_time": 0.10668087005615234 }, { "epoch": 3.281707763671875e-05, "model_forward_time": 0.025470972061157227, "step": 21507 }, { "epoch": 3.281707763671875e-05, "step": 21507, "training_step_time": 0.11404705047607422 }, { "epoch": 3.2818603515625e-05, "model_forward_time": 0.025552749633789062, "step": 21508 }, { "epoch": 3.2818603515625e-05, "step": 21508, "training_step_time": 0.10682559013366699 }, { "epoch": 3.282012939453125e-05, "model_forward_time": 0.02451491355895996, "step": 21509 }, { "epoch": 3.282012939453125e-05, "step": 21509, "training_step_time": 0.1108553409576416 }, { "epoch": 3.28216552734375e-05, "grad_norm": 0.1571519672870636, "learning_rate": 2.0343845090208368e-05, "loss": 0.0104, "step": 21510 }, { "epoch": 3.28216552734375e-05, "model_forward_time": 0.024941682815551758, "step": 21510 }, { "epoch": 3.28216552734375e-05, "step": 21510, "training_step_time": 0.10613393783569336 }, { "epoch": 3.282318115234375e-05, "model_forward_time": 0.025030851364135742, "step": 21511 }, { "epoch": 3.282318115234375e-05, "step": 21511, "training_step_time": 0.10718464851379395 }, { "epoch": 3.282470703125e-05, "model_forward_time": 0.025182723999023438, "step": 21512 }, { "epoch": 3.282470703125e-05, "step": 21512, "training_step_time": 0.10704946517944336 }, { "epoch": 3.282623291015625e-05, "model_forward_time": 0.024796009063720703, "step": 21513 }, { "epoch": 3.282623291015625e-05, "step": 21513, "training_step_time": 0.1072993278503418 }, { "epoch": 3.28277587890625e-05, "model_forward_time": 0.024898767471313477, "step": 21514 }, { "epoch": 3.28277587890625e-05, "step": 21514, "training_step_time": 0.10870814323425293 }, { "epoch": 3.282928466796875e-05, "model_forward_time": 0.02528858184814453, "step": 21515 }, { "epoch": 3.282928466796875e-05, "step": 21515, "training_step_time": 0.10987639427185059 }, { "epoch": 3.2830810546875e-05, "model_forward_time": 0.02509593963623047, "step": 21516 }, { "epoch": 3.2830810546875e-05, "step": 21516, "training_step_time": 0.10849165916442871 }, { "epoch": 3.283233642578125e-05, "model_forward_time": 0.025395870208740234, "step": 21517 }, { "epoch": 3.283233642578125e-05, "step": 21517, "training_step_time": 0.15361380577087402 }, { "epoch": 3.28338623046875e-05, "model_forward_time": 0.025089025497436523, "step": 21518 }, { "epoch": 3.28338623046875e-05, "step": 21518, "training_step_time": 0.16307711601257324 }, { "epoch": 3.283538818359375e-05, "model_forward_time": 0.024187088012695312, "step": 21519 }, { "epoch": 3.283538818359375e-05, "step": 21519, "training_step_time": 0.1461658477783203 }, { "epoch": 3.28369140625e-05, "grad_norm": 0.13632987439632416, "learning_rate": 2.0299488848690355e-05, "loss": 0.0064, "step": 21520 }, { "epoch": 3.28369140625e-05, "model_forward_time": 0.024236440658569336, "step": 21520 }, { "epoch": 3.28369140625e-05, "step": 21520, "training_step_time": 0.21790504455566406 }, { "epoch": 3.283843994140625e-05, "model_forward_time": 0.02570319175720215, "step": 21521 }, { "epoch": 3.283843994140625e-05, "step": 21521, "training_step_time": 0.1640787124633789 }, { "epoch": 3.28399658203125e-05, "model_forward_time": 0.024812698364257812, "step": 21522 }, { "epoch": 3.28399658203125e-05, "step": 21522, "training_step_time": 0.22839117050170898 }, { "epoch": 3.284149169921875e-05, "model_forward_time": 0.024095535278320312, "step": 21523 }, { "epoch": 3.284149169921875e-05, "step": 21523, "training_step_time": 0.11899065971374512 }, { "epoch": 3.2843017578125e-05, "model_forward_time": 0.025454282760620117, "step": 21524 }, { "epoch": 3.2843017578125e-05, "step": 21524, "training_step_time": 0.11448097229003906 }, { "epoch": 3.284454345703125e-05, "model_forward_time": 0.02493739128112793, "step": 21525 }, { "epoch": 3.284454345703125e-05, "step": 21525, "training_step_time": 0.11047554016113281 }, { "epoch": 3.28460693359375e-05, "model_forward_time": 0.024954557418823242, "step": 21526 }, { "epoch": 3.28460693359375e-05, "step": 21526, "training_step_time": 0.10754919052124023 }, { "epoch": 3.284759521484375e-05, "model_forward_time": 0.02541208267211914, "step": 21527 }, { "epoch": 3.284759521484375e-05, "step": 21527, "training_step_time": 0.10722613334655762 }, { "epoch": 3.284912109375e-05, "model_forward_time": 0.02524256706237793, "step": 21528 }, { "epoch": 3.284912109375e-05, "step": 21528, "training_step_time": 0.10635018348693848 }, { "epoch": 3.285064697265625e-05, "model_forward_time": 0.0256345272064209, "step": 21529 }, { "epoch": 3.285064697265625e-05, "step": 21529, "training_step_time": 0.10648846626281738 }, { "epoch": 3.28521728515625e-05, "grad_norm": 0.1119745522737503, "learning_rate": 2.0255168696093968e-05, "loss": 0.0051, "step": 21530 }, { "epoch": 3.28521728515625e-05, "model_forward_time": 0.025252342224121094, "step": 21530 }, { "epoch": 3.28521728515625e-05, "step": 21530, "training_step_time": 0.10613727569580078 }, { "epoch": 3.285369873046875e-05, "model_forward_time": 0.025050640106201172, "step": 21531 }, { "epoch": 3.285369873046875e-05, "step": 21531, "training_step_time": 0.10724830627441406 }, { "epoch": 3.2855224609375e-05, "model_forward_time": 0.02527904510498047, "step": 21532 }, { "epoch": 3.2855224609375e-05, "step": 21532, "training_step_time": 0.10765910148620605 }, { "epoch": 3.285675048828125e-05, "model_forward_time": 0.025311946868896484, "step": 21533 }, { "epoch": 3.285675048828125e-05, "step": 21533, "training_step_time": 0.10719108581542969 }, { "epoch": 3.28582763671875e-05, "model_forward_time": 0.024045228958129883, "step": 21534 }, { "epoch": 3.28582763671875e-05, "step": 21534, "training_step_time": 0.14964890480041504 }, { "epoch": 3.285980224609375e-05, "model_forward_time": 0.025176048278808594, "step": 21535 }, { "epoch": 3.285980224609375e-05, "step": 21535, "training_step_time": 0.15727472305297852 }, { "epoch": 3.2861328125e-05, "model_forward_time": 0.025552749633789062, "step": 21536 }, { "epoch": 3.2861328125e-05, "step": 21536, "training_step_time": 0.11104750633239746 }, { "epoch": 3.286285400390625e-05, "model_forward_time": 0.027614593505859375, "step": 21537 }, { "epoch": 3.286285400390625e-05, "step": 21537, "training_step_time": 0.132310152053833 }, { "epoch": 3.28643798828125e-05, "model_forward_time": 0.02612757682800293, "step": 21538 }, { "epoch": 3.28643798828125e-05, "step": 21538, "training_step_time": 0.19652366638183594 }, { "epoch": 3.286590576171875e-05, "model_forward_time": 0.024497032165527344, "step": 21539 }, { "epoch": 3.286590576171875e-05, "step": 21539, "training_step_time": 0.13765621185302734 }, { "epoch": 3.2867431640625e-05, "grad_norm": 0.1564493030309677, "learning_rate": 2.0210884686272368e-05, "loss": 0.0067, "step": 21540 }, { "epoch": 3.2867431640625e-05, "model_forward_time": 0.0253298282623291, "step": 21540 }, { "epoch": 3.2867431640625e-05, "step": 21540, "training_step_time": 0.11155390739440918 }, { "epoch": 3.286895751953125e-05, "model_forward_time": 0.025160551071166992, "step": 21541 }, { "epoch": 3.286895751953125e-05, "step": 21541, "training_step_time": 0.10631823539733887 }, { "epoch": 3.28704833984375e-05, "model_forward_time": 0.025925159454345703, "step": 21542 }, { "epoch": 3.28704833984375e-05, "step": 21542, "training_step_time": 0.11331057548522949 }, { "epoch": 3.287200927734375e-05, "model_forward_time": 0.029433250427246094, "step": 21543 }, { "epoch": 3.287200927734375e-05, "step": 21543, "training_step_time": 0.1258692741394043 }, { "epoch": 3.287353515625e-05, "model_forward_time": 0.025684118270874023, "step": 21544 }, { "epoch": 3.287353515625e-05, "step": 21544, "training_step_time": 0.17607998847961426 }, { "epoch": 3.287506103515625e-05, "model_forward_time": 0.02514934539794922, "step": 21545 }, { "epoch": 3.287506103515625e-05, "step": 21545, "training_step_time": 0.1794416904449463 }, { "epoch": 3.28765869140625e-05, "model_forward_time": 0.024749040603637695, "step": 21546 }, { "epoch": 3.28765869140625e-05, "step": 21546, "training_step_time": 0.10444402694702148 }, { "epoch": 3.287811279296875e-05, "model_forward_time": 0.024876117706298828, "step": 21547 }, { "epoch": 3.287811279296875e-05, "step": 21547, "training_step_time": 0.10263228416442871 }, { "epoch": 3.2879638671875e-05, "model_forward_time": 0.02537703514099121, "step": 21548 }, { "epoch": 3.2879638671875e-05, "step": 21548, "training_step_time": 0.10442209243774414 }, { "epoch": 3.288116455078125e-05, "model_forward_time": 0.02559947967529297, "step": 21549 }, { "epoch": 3.288116455078125e-05, "step": 21549, "training_step_time": 0.1045846939086914 }, { "epoch": 3.28826904296875e-05, "grad_norm": 0.2632162868976593, "learning_rate": 2.0166636873034805e-05, "loss": 0.0069, "step": 21550 }, { "epoch": 3.28826904296875e-05, "model_forward_time": 0.024941682815551758, "step": 21550 }, { "epoch": 3.28826904296875e-05, "step": 21550, "training_step_time": 0.10779309272766113 }, { "epoch": 3.288421630859375e-05, "model_forward_time": 0.0252687931060791, "step": 21551 }, { "epoch": 3.288421630859375e-05, "step": 21551, "training_step_time": 0.11011409759521484 }, { "epoch": 3.28857421875e-05, "model_forward_time": 0.02561020851135254, "step": 21552 }, { "epoch": 3.28857421875e-05, "step": 21552, "training_step_time": 0.12099838256835938 }, { "epoch": 3.288726806640625e-05, "model_forward_time": 0.025429248809814453, "step": 21553 }, { "epoch": 3.288726806640625e-05, "step": 21553, "training_step_time": 0.12843966484069824 }, { "epoch": 3.28887939453125e-05, "model_forward_time": 0.025462865829467773, "step": 21554 }, { "epoch": 3.28887939453125e-05, "step": 21554, "training_step_time": 0.13503742218017578 }, { "epoch": 3.289031982421875e-05, "model_forward_time": 0.02446126937866211, "step": 21555 }, { "epoch": 3.289031982421875e-05, "step": 21555, "training_step_time": 0.12807846069335938 }, { "epoch": 3.2891845703125e-05, "model_forward_time": 0.02577686309814453, "step": 21556 }, { "epoch": 3.2891845703125e-05, "step": 21556, "training_step_time": 0.12390518188476562 }, { "epoch": 3.289337158203125e-05, "model_forward_time": 0.025543212890625, "step": 21557 }, { "epoch": 3.289337158203125e-05, "step": 21557, "training_step_time": 0.11790609359741211 }, { "epoch": 3.28948974609375e-05, "model_forward_time": 0.026508808135986328, "step": 21558 }, { "epoch": 3.28948974609375e-05, "step": 21558, "training_step_time": 0.11689615249633789 }, { "epoch": 3.289642333984375e-05, "model_forward_time": 0.025481462478637695, "step": 21559 }, { "epoch": 3.289642333984375e-05, "step": 21559, "training_step_time": 0.11307072639465332 }, { "epoch": 3.289794921875e-05, "grad_norm": 0.10203037410974503, "learning_rate": 2.0122425310146542e-05, "loss": 0.0062, "step": 21560 }, { "epoch": 3.289794921875e-05, "model_forward_time": 0.025548934936523438, "step": 21560 }, { "epoch": 3.289794921875e-05, "step": 21560, "training_step_time": 0.11379742622375488 }, { "epoch": 3.289947509765625e-05, "model_forward_time": 0.02515411376953125, "step": 21561 }, { "epoch": 3.289947509765625e-05, "step": 21561, "training_step_time": 0.10952639579772949 }, { "epoch": 3.29010009765625e-05, "model_forward_time": 0.025330543518066406, "step": 21562 }, { "epoch": 3.29010009765625e-05, "step": 21562, "training_step_time": 0.10773968696594238 }, { "epoch": 3.290252685546875e-05, "model_forward_time": 0.025376319885253906, "step": 21563 }, { "epoch": 3.290252685546875e-05, "step": 21563, "training_step_time": 0.17806482315063477 }, { "epoch": 3.2904052734375e-05, "model_forward_time": 0.02465987205505371, "step": 21564 }, { "epoch": 3.2904052734375e-05, "step": 21564, "training_step_time": 0.11498379707336426 }, { "epoch": 3.290557861328125e-05, "model_forward_time": 0.025045156478881836, "step": 21565 }, { "epoch": 3.290557861328125e-05, "step": 21565, "training_step_time": 0.13141131401062012 }, { "epoch": 3.29071044921875e-05, "model_forward_time": 0.02486419677734375, "step": 21566 }, { "epoch": 3.29071044921875e-05, "step": 21566, "training_step_time": 0.15852618217468262 }, { "epoch": 3.290863037109375e-05, "model_forward_time": 0.023921966552734375, "step": 21567 }, { "epoch": 3.290863037109375e-05, "step": 21567, "training_step_time": 0.22017359733581543 }, { "epoch": 3.291015625e-05, "model_forward_time": 0.024416208267211914, "step": 21568 }, { "epoch": 3.291015625e-05, "step": 21568, "training_step_time": 0.11817550659179688 }, { "epoch": 3.291168212890625e-05, "model_forward_time": 0.02428603172302246, "step": 21569 }, { "epoch": 3.291168212890625e-05, "step": 21569, "training_step_time": 0.10556983947753906 }, { "epoch": 3.29132080078125e-05, "grad_norm": 0.15802134573459625, "learning_rate": 2.0078250051328784e-05, "loss": 0.0063, "step": 21570 }, { "epoch": 3.29132080078125e-05, "model_forward_time": 0.024266958236694336, "step": 21570 }, { "epoch": 3.29132080078125e-05, "step": 21570, "training_step_time": 0.10771489143371582 }, { "epoch": 3.291473388671875e-05, "model_forward_time": 0.02455925941467285, "step": 21571 }, { "epoch": 3.291473388671875e-05, "step": 21571, "training_step_time": 0.10815548896789551 }, { "epoch": 3.2916259765625e-05, "model_forward_time": 0.025177955627441406, "step": 21572 }, { "epoch": 3.2916259765625e-05, "step": 21572, "training_step_time": 0.10911297798156738 }, { "epoch": 3.291778564453125e-05, "model_forward_time": 0.025167226791381836, "step": 21573 }, { "epoch": 3.291778564453125e-05, "step": 21573, "training_step_time": 0.10740137100219727 }, { "epoch": 3.29193115234375e-05, "model_forward_time": 0.025327205657958984, "step": 21574 }, { "epoch": 3.29193115234375e-05, "step": 21574, "training_step_time": 0.10699105262756348 }, { "epoch": 3.292083740234375e-05, "model_forward_time": 0.02555680274963379, "step": 21575 }, { "epoch": 3.292083740234375e-05, "step": 21575, "training_step_time": 0.10927867889404297 }, { "epoch": 3.292236328125e-05, "model_forward_time": 0.02524566650390625, "step": 21576 }, { "epoch": 3.292236328125e-05, "step": 21576, "training_step_time": 0.10761380195617676 }, { "epoch": 3.292388916015625e-05, "model_forward_time": 0.025261640548706055, "step": 21577 }, { "epoch": 3.292388916015625e-05, "step": 21577, "training_step_time": 0.11361861228942871 }, { "epoch": 3.29254150390625e-05, "model_forward_time": 0.025734663009643555, "step": 21578 }, { "epoch": 3.29254150390625e-05, "step": 21578, "training_step_time": 0.10715246200561523 }, { "epoch": 3.292694091796875e-05, "model_forward_time": 0.025177001953125, "step": 21579 }, { "epoch": 3.292694091796875e-05, "step": 21579, "training_step_time": 0.1461353302001953 }, { "epoch": 3.2928466796875e-05, "grad_norm": 0.10607799142599106, "learning_rate": 2.0034111150258666e-05, "loss": 0.005, "step": 21580 }, { "epoch": 3.2928466796875e-05, "model_forward_time": 0.02518153190612793, "step": 21580 }, { "epoch": 3.2928466796875e-05, "step": 21580, "training_step_time": 0.1569075584411621 }, { "epoch": 3.292999267578125e-05, "model_forward_time": 0.02855396270751953, "step": 21581 }, { "epoch": 3.292999267578125e-05, "step": 21581, "training_step_time": 0.11198163032531738 }, { "epoch": 3.29315185546875e-05, "model_forward_time": 0.025858402252197266, "step": 21582 }, { "epoch": 3.29315185546875e-05, "step": 21582, "training_step_time": 0.13446044921875 }, { "epoch": 3.293304443359375e-05, "model_forward_time": 0.02644944190979004, "step": 21583 }, { "epoch": 3.293304443359375e-05, "step": 21583, "training_step_time": 0.20357203483581543 }, { "epoch": 3.29345703125e-05, "model_forward_time": 0.025226831436157227, "step": 21584 }, { "epoch": 3.29345703125e-05, "step": 21584, "training_step_time": 0.13934803009033203 }, { "epoch": 3.293609619140625e-05, "model_forward_time": 0.023853778839111328, "step": 21585 }, { "epoch": 3.293609619140625e-05, "step": 21585, "training_step_time": 0.19888639450073242 }, { "epoch": 3.29376220703125e-05, "model_forward_time": 0.024805545806884766, "step": 21586 }, { "epoch": 3.29376220703125e-05, "step": 21586, "training_step_time": 0.16363954544067383 }, { "epoch": 3.293914794921875e-05, "model_forward_time": 0.02881765365600586, "step": 21587 }, { "epoch": 3.293914794921875e-05, "step": 21587, "training_step_time": 0.17384982109069824 }, { "epoch": 3.2940673828125e-05, "model_forward_time": 0.02469921112060547, "step": 21588 }, { "epoch": 3.2940673828125e-05, "step": 21588, "training_step_time": 0.19984817504882812 }, { "epoch": 3.294219970703125e-05, "model_forward_time": 0.024837255477905273, "step": 21589 }, { "epoch": 3.294219970703125e-05, "step": 21589, "training_step_time": 0.11052584648132324 }, { "epoch": 3.29437255859375e-05, "grad_norm": 0.18321019411087036, "learning_rate": 1.999000866056908e-05, "loss": 0.0109, "step": 21590 }, { "epoch": 3.29437255859375e-05, "model_forward_time": 0.02471613883972168, "step": 21590 }, { "epoch": 3.29437255859375e-05, "step": 21590, "training_step_time": 0.10564446449279785 }, { "epoch": 3.294525146484375e-05, "model_forward_time": 0.0256192684173584, "step": 21591 }, { "epoch": 3.294525146484375e-05, "step": 21591, "training_step_time": 0.10926580429077148 }, { "epoch": 3.294677734375e-05, "model_forward_time": 0.026243209838867188, "step": 21592 }, { "epoch": 3.294677734375e-05, "step": 21592, "training_step_time": 0.10800313949584961 }, { "epoch": 3.294830322265625e-05, "model_forward_time": 0.025821924209594727, "step": 21593 }, { "epoch": 3.294830322265625e-05, "step": 21593, "training_step_time": 0.13608264923095703 }, { "epoch": 3.29498291015625e-05, "model_forward_time": 0.025051355361938477, "step": 21594 }, { "epoch": 3.29498291015625e-05, "step": 21594, "training_step_time": 0.1742253303527832 }, { "epoch": 3.295135498046875e-05, "model_forward_time": 0.027420759201049805, "step": 21595 }, { "epoch": 3.295135498046875e-05, "step": 21595, "training_step_time": 0.16759419441223145 }, { "epoch": 3.2952880859375e-05, "model_forward_time": 0.02770686149597168, "step": 21596 }, { "epoch": 3.2952880859375e-05, "step": 21596, "training_step_time": 0.1587679386138916 }, { "epoch": 3.295440673828125e-05, "model_forward_time": 0.02574634552001953, "step": 21597 }, { "epoch": 3.295440673828125e-05, "step": 21597, "training_step_time": 0.14095616340637207 }, { "epoch": 3.29559326171875e-05, "model_forward_time": 0.025413036346435547, "step": 21598 }, { "epoch": 3.29559326171875e-05, "step": 21598, "training_step_time": 0.12986159324645996 }, { "epoch": 3.295745849609375e-05, "model_forward_time": 0.024929523468017578, "step": 21599 }, { "epoch": 3.295745849609375e-05, "step": 21599, "training_step_time": 0.11888527870178223 }, { "epoch": 3.2958984375e-05, "grad_norm": 0.13645771145820618, "learning_rate": 1.9945942635848748e-05, "loss": 0.0053, "step": 21600 }, { "epoch": 3.2958984375e-05, "model_forward_time": 0.02456521987915039, "step": 21600 }, { "epoch": 3.2958984375e-05, "step": 21600, "training_step_time": 0.12133932113647461 }, { "epoch": 3.296051025390625e-05, "model_forward_time": 0.02410602569580078, "step": 21601 }, { "epoch": 3.296051025390625e-05, "step": 21601, "training_step_time": 0.10464000701904297 }, { "epoch": 3.29620361328125e-05, "model_forward_time": 0.02534961700439453, "step": 21602 }, { "epoch": 3.29620361328125e-05, "step": 21602, "training_step_time": 0.10602521896362305 }, { "epoch": 3.296356201171875e-05, "model_forward_time": 0.025056123733520508, "step": 21603 }, { "epoch": 3.296356201171875e-05, "step": 21603, "training_step_time": 0.1026604175567627 }, { "epoch": 3.2965087890625e-05, "model_forward_time": 0.025554656982421875, "step": 21604 }, { "epoch": 3.2965087890625e-05, "step": 21604, "training_step_time": 0.10693025588989258 }, { "epoch": 3.296661376953125e-05, "model_forward_time": 0.02476644515991211, "step": 21605 }, { "epoch": 3.296661376953125e-05, "step": 21605, "training_step_time": 0.10554242134094238 }, { "epoch": 3.29681396484375e-05, "model_forward_time": 0.025457382202148438, "step": 21606 }, { "epoch": 3.29681396484375e-05, "step": 21606, "training_step_time": 0.15619301795959473 }, { "epoch": 3.296966552734375e-05, "model_forward_time": 0.025591611862182617, "step": 21607 }, { "epoch": 3.296966552734375e-05, "step": 21607, "training_step_time": 0.12072134017944336 }, { "epoch": 3.297119140625e-05, "model_forward_time": 0.025662660598754883, "step": 21608 }, { "epoch": 3.297119140625e-05, "step": 21608, "training_step_time": 0.12965917587280273 }, { "epoch": 3.297271728515625e-05, "model_forward_time": 0.025268077850341797, "step": 21609 }, { "epoch": 3.297271728515625e-05, "step": 21609, "training_step_time": 0.16080689430236816 }, { "epoch": 3.29742431640625e-05, "grad_norm": 0.19638510048389435, "learning_rate": 1.9901913129642024e-05, "loss": 0.011, "step": 21610 }, { "epoch": 3.29742431640625e-05, "model_forward_time": 0.024733543395996094, "step": 21610 }, { "epoch": 3.29742431640625e-05, "step": 21610, "training_step_time": 0.22168231010437012 }, { "epoch": 3.297576904296875e-05, "model_forward_time": 0.02540135383605957, "step": 21611 }, { "epoch": 3.297576904296875e-05, "step": 21611, "training_step_time": 0.11881875991821289 }, { "epoch": 3.2977294921875e-05, "model_forward_time": 0.024294614791870117, "step": 21612 }, { "epoch": 3.2977294921875e-05, "step": 21612, "training_step_time": 0.10375833511352539 }, { "epoch": 3.297882080078125e-05, "model_forward_time": 0.025632381439208984, "step": 21613 }, { "epoch": 3.297882080078125e-05, "step": 21613, "training_step_time": 0.10560297966003418 }, { "epoch": 3.29803466796875e-05, "model_forward_time": 0.024953126907348633, "step": 21614 }, { "epoch": 3.29803466796875e-05, "step": 21614, "training_step_time": 0.1084756851196289 }, { "epoch": 3.298187255859375e-05, "model_forward_time": 0.025233745574951172, "step": 21615 }, { "epoch": 3.298187255859375e-05, "step": 21615, "training_step_time": 0.10884428024291992 }, { "epoch": 3.29833984375e-05, "model_forward_time": 0.02488112449645996, "step": 21616 }, { "epoch": 3.29833984375e-05, "step": 21616, "training_step_time": 0.10658764839172363 }, { "epoch": 3.298492431640625e-05, "model_forward_time": 0.024851322174072266, "step": 21617 }, { "epoch": 3.298492431640625e-05, "step": 21617, "training_step_time": 0.10854411125183105 }, { "epoch": 3.29864501953125e-05, "model_forward_time": 0.02484440803527832, "step": 21618 }, { "epoch": 3.29864501953125e-05, "step": 21618, "training_step_time": 0.10720610618591309 }, { "epoch": 3.298797607421875e-05, "model_forward_time": 0.025374412536621094, "step": 21619 }, { "epoch": 3.298797607421875e-05, "step": 21619, "training_step_time": 0.10998272895812988 }, { "epoch": 3.2989501953125e-05, "grad_norm": 0.0906025692820549, "learning_rate": 1.98579201954489e-05, "loss": 0.005, "step": 21620 }, { "epoch": 3.2989501953125e-05, "model_forward_time": 0.024613380432128906, "step": 21620 }, { "epoch": 3.2989501953125e-05, "step": 21620, "training_step_time": 0.10365676879882812 }, { "epoch": 3.299102783203125e-05, "model_forward_time": 0.02382826805114746, "step": 21621 }, { "epoch": 3.299102783203125e-05, "step": 21621, "training_step_time": 0.15266036987304688 }, { "epoch": 3.29925537109375e-05, "model_forward_time": 0.024474143981933594, "step": 21622 }, { "epoch": 3.29925537109375e-05, "step": 21622, "training_step_time": 0.1589653491973877 }, { "epoch": 3.299407958984375e-05, "model_forward_time": 0.0257568359375, "step": 21623 }, { "epoch": 3.299407958984375e-05, "step": 21623, "training_step_time": 0.10695505142211914 }, { "epoch": 3.299560546875e-05, "model_forward_time": 0.024897336959838867, "step": 21624 }, { "epoch": 3.299560546875e-05, "step": 21624, "training_step_time": 0.13048505783081055 }, { "epoch": 3.299713134765625e-05, "model_forward_time": 0.024859189987182617, "step": 21625 }, { "epoch": 3.299713134765625e-05, "step": 21625, "training_step_time": 0.1951286792755127 }, { "epoch": 3.29986572265625e-05, "model_forward_time": 0.024225473403930664, "step": 21626 }, { "epoch": 3.29986572265625e-05, "step": 21626, "training_step_time": 0.14817070960998535 }, { "epoch": 3.300018310546875e-05, "model_forward_time": 0.024331092834472656, "step": 21627 }, { "epoch": 3.300018310546875e-05, "step": 21627, "training_step_time": 0.10275697708129883 }, { "epoch": 3.3001708984375e-05, "model_forward_time": 0.024472713470458984, "step": 21628 }, { "epoch": 3.3001708984375e-05, "step": 21628, "training_step_time": 0.1320514678955078 }, { "epoch": 3.300323486328125e-05, "model_forward_time": 0.025026559829711914, "step": 21629 }, { "epoch": 3.300323486328125e-05, "step": 21629, "training_step_time": 0.20738911628723145 }, { "epoch": 3.30047607421875e-05, "grad_norm": 0.15329015254974365, "learning_rate": 1.981396388672496e-05, "loss": 0.0084, "step": 21630 }, { "epoch": 3.30047607421875e-05, "model_forward_time": 0.023899316787719727, "step": 21630 }, { "epoch": 3.30047607421875e-05, "step": 21630, "training_step_time": 0.1344316005706787 }, { "epoch": 3.300628662109375e-05, "model_forward_time": 0.024190902709960938, "step": 21631 }, { "epoch": 3.300628662109375e-05, "step": 21631, "training_step_time": 0.12869572639465332 }, { "epoch": 3.30078125e-05, "model_forward_time": 0.0242612361907959, "step": 21632 }, { "epoch": 3.30078125e-05, "step": 21632, "training_step_time": 0.10609555244445801 }, { "epoch": 3.300933837890625e-05, "model_forward_time": 0.025055646896362305, "step": 21633 }, { "epoch": 3.300933837890625e-05, "step": 21633, "training_step_time": 0.1178736686706543 }, { "epoch": 3.30108642578125e-05, "model_forward_time": 0.024854660034179688, "step": 21634 }, { "epoch": 3.30108642578125e-05, "step": 21634, "training_step_time": 0.10793042182922363 }, { "epoch": 3.301239013671875e-05, "model_forward_time": 0.024514436721801758, "step": 21635 }, { "epoch": 3.301239013671875e-05, "step": 21635, "training_step_time": 0.1103982925415039 }, { "epoch": 3.3013916015625e-05, "model_forward_time": 0.02469801902770996, "step": 21636 }, { "epoch": 3.3013916015625e-05, "step": 21636, "training_step_time": 0.11001205444335938 }, { "epoch": 3.301544189453125e-05, "model_forward_time": 0.02520918846130371, "step": 21637 }, { "epoch": 3.301544189453125e-05, "step": 21637, "training_step_time": 0.10795927047729492 }, { "epoch": 3.30169677734375e-05, "model_forward_time": 0.025318145751953125, "step": 21638 }, { "epoch": 3.30169677734375e-05, "step": 21638, "training_step_time": 0.10701751708984375 }, { "epoch": 3.301849365234375e-05, "model_forward_time": 0.025209426879882812, "step": 21639 }, { "epoch": 3.301849365234375e-05, "step": 21639, "training_step_time": 0.10817170143127441 }, { "epoch": 3.302001953125e-05, "grad_norm": 0.2231944501399994, "learning_rate": 1.977004425688126e-05, "loss": 0.0056, "step": 21640 }, { "epoch": 3.302001953125e-05, "model_forward_time": 0.024566173553466797, "step": 21640 }, { "epoch": 3.302001953125e-05, "step": 21640, "training_step_time": 0.10619688034057617 }, { "epoch": 3.302154541015625e-05, "model_forward_time": 0.0248868465423584, "step": 21641 }, { "epoch": 3.302154541015625e-05, "step": 21641, "training_step_time": 0.10500502586364746 }, { "epoch": 3.30230712890625e-05, "model_forward_time": 0.025059223175048828, "step": 21642 }, { "epoch": 3.30230712890625e-05, "step": 21642, "training_step_time": 0.10553121566772461 }, { "epoch": 3.302459716796875e-05, "model_forward_time": 0.02526235580444336, "step": 21643 }, { "epoch": 3.302459716796875e-05, "step": 21643, "training_step_time": 0.11204266548156738 }, { "epoch": 3.3026123046875e-05, "model_forward_time": 0.02559804916381836, "step": 21644 }, { "epoch": 3.3026123046875e-05, "step": 21644, "training_step_time": 0.1065378189086914 }, { "epoch": 3.302764892578125e-05, "model_forward_time": 0.02774357795715332, "step": 21645 }, { "epoch": 3.302764892578125e-05, "step": 21645, "training_step_time": 0.10740327835083008 }, { "epoch": 3.30291748046875e-05, "model_forward_time": 0.02543187141418457, "step": 21646 }, { "epoch": 3.30291748046875e-05, "step": 21646, "training_step_time": 0.10715484619140625 }, { "epoch": 3.303070068359375e-05, "model_forward_time": 0.025083065032958984, "step": 21647 }, { "epoch": 3.303070068359375e-05, "step": 21647, "training_step_time": 0.10512447357177734 }, { "epoch": 3.30322265625e-05, "model_forward_time": 0.025011539459228516, "step": 21648 }, { "epoch": 3.30322265625e-05, "step": 21648, "training_step_time": 0.10540628433227539 }, { "epoch": 3.303375244140625e-05, "model_forward_time": 0.028086423873901367, "step": 21649 }, { "epoch": 3.303375244140625e-05, "step": 21649, "training_step_time": 0.10953235626220703 }, { "epoch": 3.30352783203125e-05, "grad_norm": 0.0979032889008522, "learning_rate": 1.9726161359284286e-05, "loss": 0.0045, "step": 21650 }, { "epoch": 3.30352783203125e-05, "model_forward_time": 0.02527332305908203, "step": 21650 }, { "epoch": 3.30352783203125e-05, "step": 21650, "training_step_time": 0.10857629776000977 }, { "epoch": 3.303680419921875e-05, "model_forward_time": 0.025359153747558594, "step": 21651 }, { "epoch": 3.303680419921875e-05, "step": 21651, "training_step_time": 0.20366668701171875 }, { "epoch": 3.3038330078125e-05, "model_forward_time": 0.024358510971069336, "step": 21652 }, { "epoch": 3.3038330078125e-05, "step": 21652, "training_step_time": 0.11240530014038086 }, { "epoch": 3.303985595703125e-05, "model_forward_time": 0.02442026138305664, "step": 21653 }, { "epoch": 3.303985595703125e-05, "step": 21653, "training_step_time": 0.1396350860595703 }, { "epoch": 3.30413818359375e-05, "model_forward_time": 0.025272607803344727, "step": 21654 }, { "epoch": 3.30413818359375e-05, "step": 21654, "training_step_time": 0.1537952423095703 }, { "epoch": 3.304290771484375e-05, "model_forward_time": 0.024760961532592773, "step": 21655 }, { "epoch": 3.304290771484375e-05, "step": 21655, "training_step_time": 0.17072486877441406 }, { "epoch": 3.304443359375e-05, "model_forward_time": 0.02434086799621582, "step": 21656 }, { "epoch": 3.304443359375e-05, "step": 21656, "training_step_time": 0.17336511611938477 }, { "epoch": 3.304595947265625e-05, "model_forward_time": 0.02432560920715332, "step": 21657 }, { "epoch": 3.304595947265625e-05, "step": 21657, "training_step_time": 0.1010122299194336 }, { "epoch": 3.30474853515625e-05, "model_forward_time": 0.024654150009155273, "step": 21658 }, { "epoch": 3.30474853515625e-05, "step": 21658, "training_step_time": 0.10524225234985352 }, { "epoch": 3.304901123046875e-05, "model_forward_time": 0.025241851806640625, "step": 21659 }, { "epoch": 3.304901123046875e-05, "step": 21659, "training_step_time": 0.10726785659790039 }, { "epoch": 3.3050537109375e-05, "grad_norm": 0.2468523234128952, "learning_rate": 1.9682315247255894e-05, "loss": 0.0074, "step": 21660 }, { "epoch": 3.3050537109375e-05, "model_forward_time": 0.025350093841552734, "step": 21660 }, { "epoch": 3.3050537109375e-05, "step": 21660, "training_step_time": 0.10625147819519043 }, { "epoch": 3.305206298828125e-05, "model_forward_time": 0.0249478816986084, "step": 21661 }, { "epoch": 3.305206298828125e-05, "step": 21661, "training_step_time": 0.10512232780456543 }, { "epoch": 3.30535888671875e-05, "model_forward_time": 0.025229930877685547, "step": 21662 }, { "epoch": 3.30535888671875e-05, "step": 21662, "training_step_time": 0.1049351692199707 }, { "epoch": 3.305511474609375e-05, "model_forward_time": 0.02510356903076172, "step": 21663 }, { "epoch": 3.305511474609375e-05, "step": 21663, "training_step_time": 0.1073756217956543 }, { "epoch": 3.3056640625e-05, "model_forward_time": 0.025348186492919922, "step": 21664 }, { "epoch": 3.3056640625e-05, "step": 21664, "training_step_time": 0.10579681396484375 }, { "epoch": 3.305816650390625e-05, "model_forward_time": 0.024752378463745117, "step": 21665 }, { "epoch": 3.305816650390625e-05, "step": 21665, "training_step_time": 0.10666465759277344 }, { "epoch": 3.30596923828125e-05, "model_forward_time": 0.02475595474243164, "step": 21666 }, { "epoch": 3.30596923828125e-05, "step": 21666, "training_step_time": 0.1146688461303711 }, { "epoch": 3.306121826171875e-05, "model_forward_time": 0.02411794662475586, "step": 21667 }, { "epoch": 3.306121826171875e-05, "step": 21667, "training_step_time": 0.16798686981201172 }, { "epoch": 3.3062744140625e-05, "model_forward_time": 0.02433037757873535, "step": 21668 }, { "epoch": 3.3062744140625e-05, "step": 21668, "training_step_time": 0.1675407886505127 }, { "epoch": 3.306427001953125e-05, "model_forward_time": 0.024592876434326172, "step": 21669 }, { "epoch": 3.306427001953125e-05, "step": 21669, "training_step_time": 0.2050011157989502 }, { "epoch": 3.30657958984375e-05, "grad_norm": 0.33429640531539917, "learning_rate": 1.9638505974073234e-05, "loss": 0.0048, "step": 21670 }, { "epoch": 3.30657958984375e-05, "model_forward_time": 0.02393960952758789, "step": 21670 }, { "epoch": 3.30657958984375e-05, "step": 21670, "training_step_time": 0.19972658157348633 }, { "epoch": 3.306732177734375e-05, "model_forward_time": 0.02443075180053711, "step": 21671 }, { "epoch": 3.306732177734375e-05, "step": 21671, "training_step_time": 0.19254136085510254 }, { "epoch": 3.306884765625e-05, "model_forward_time": 0.02624058723449707, "step": 21672 }, { "epoch": 3.306884765625e-05, "step": 21672, "training_step_time": 0.19963860511779785 }, { "epoch": 3.307037353515625e-05, "model_forward_time": 0.02439260482788086, "step": 21673 }, { "epoch": 3.307037353515625e-05, "step": 21673, "training_step_time": 0.13341093063354492 }, { "epoch": 3.30718994140625e-05, "model_forward_time": 0.024617910385131836, "step": 21674 }, { "epoch": 3.30718994140625e-05, "step": 21674, "training_step_time": 0.1929759979248047 }, { "epoch": 3.307342529296875e-05, "model_forward_time": 0.025384187698364258, "step": 21675 }, { "epoch": 3.307342529296875e-05, "step": 21675, "training_step_time": 0.1546790599822998 }, { "epoch": 3.3074951171875e-05, "model_forward_time": 0.024454355239868164, "step": 21676 }, { "epoch": 3.3074951171875e-05, "step": 21676, "training_step_time": 0.11436963081359863 }, { "epoch": 3.307647705078125e-05, "model_forward_time": 0.025300025939941406, "step": 21677 }, { "epoch": 3.307647705078125e-05, "step": 21677, "training_step_time": 0.11842870712280273 }, { "epoch": 3.30780029296875e-05, "model_forward_time": 0.025342464447021484, "step": 21678 }, { "epoch": 3.30780029296875e-05, "step": 21678, "training_step_time": 0.1099100112915039 }, { "epoch": 3.307952880859375e-05, "model_forward_time": 0.027800321578979492, "step": 21679 }, { "epoch": 3.307952880859375e-05, "step": 21679, "training_step_time": 0.1094508171081543 }, { "epoch": 3.30810546875e-05, "grad_norm": 0.09189315140247345, "learning_rate": 1.9594733592968733e-05, "loss": 0.0047, "step": 21680 }, { "epoch": 3.30810546875e-05, "model_forward_time": 0.024841785430908203, "step": 21680 }, { "epoch": 3.30810546875e-05, "step": 21680, "training_step_time": 0.11111736297607422 }, { "epoch": 3.308258056640625e-05, "model_forward_time": 0.02526068687438965, "step": 21681 }, { "epoch": 3.308258056640625e-05, "step": 21681, "training_step_time": 0.10657525062561035 }, { "epoch": 3.30841064453125e-05, "model_forward_time": 0.024965286254882812, "step": 21682 }, { "epoch": 3.30841064453125e-05, "step": 21682, "training_step_time": 0.10718584060668945 }, { "epoch": 3.308563232421875e-05, "model_forward_time": 0.025114059448242188, "step": 21683 }, { "epoch": 3.308563232421875e-05, "step": 21683, "training_step_time": 0.10442137718200684 }, { "epoch": 3.3087158203125e-05, "model_forward_time": 0.025166034698486328, "step": 21684 }, { "epoch": 3.3087158203125e-05, "step": 21684, "training_step_time": 0.10658693313598633 }, { "epoch": 3.308868408203125e-05, "model_forward_time": 0.02519965171813965, "step": 21685 }, { "epoch": 3.308868408203125e-05, "step": 21685, "training_step_time": 0.10705184936523438 }, { "epoch": 3.30902099609375e-05, "model_forward_time": 0.025802135467529297, "step": 21686 }, { "epoch": 3.30902099609375e-05, "step": 21686, "training_step_time": 0.11495828628540039 }, { "epoch": 3.309173583984375e-05, "model_forward_time": 0.024657249450683594, "step": 21687 }, { "epoch": 3.309173583984375e-05, "step": 21687, "training_step_time": 0.1120753288269043 }, { "epoch": 3.309326171875e-05, "model_forward_time": 0.025519609451293945, "step": 21688 }, { "epoch": 3.309326171875e-05, "step": 21688, "training_step_time": 0.11072683334350586 }, { "epoch": 3.309478759765625e-05, "model_forward_time": 0.02558302879333496, "step": 21689 }, { "epoch": 3.309478759765625e-05, "step": 21689, "training_step_time": 0.10842299461364746 }, { "epoch": 3.30963134765625e-05, "grad_norm": 0.31479647755622864, "learning_rate": 1.9550998157129946e-05, "loss": 0.0064, "step": 21690 }, { "epoch": 3.30963134765625e-05, "model_forward_time": 0.02520442008972168, "step": 21690 }, { "epoch": 3.30963134765625e-05, "step": 21690, "training_step_time": 0.10814619064331055 }, { "epoch": 3.309783935546875e-05, "model_forward_time": 0.02498030662536621, "step": 21691 }, { "epoch": 3.309783935546875e-05, "step": 21691, "training_step_time": 0.10692453384399414 }, { "epoch": 3.3099365234375e-05, "model_forward_time": 0.026551485061645508, "step": 21692 }, { "epoch": 3.3099365234375e-05, "step": 21692, "training_step_time": 0.10693645477294922 }, { "epoch": 3.310089111328125e-05, "model_forward_time": 0.024756669998168945, "step": 21693 }, { "epoch": 3.310089111328125e-05, "step": 21693, "training_step_time": 0.1101675033569336 }, { "epoch": 3.31024169921875e-05, "model_forward_time": 0.02512979507446289, "step": 21694 }, { "epoch": 3.31024169921875e-05, "step": 21694, "training_step_time": 0.17913532257080078 }, { "epoch": 3.310394287109375e-05, "model_forward_time": 0.024075984954833984, "step": 21695 }, { "epoch": 3.310394287109375e-05, "step": 21695, "training_step_time": 0.11275863647460938 }, { "epoch": 3.310546875e-05, "model_forward_time": 0.024477243423461914, "step": 21696 }, { "epoch": 3.310546875e-05, "step": 21696, "training_step_time": 0.12874579429626465 }, { "epoch": 3.310699462890625e-05, "model_forward_time": 0.0252840518951416, "step": 21697 }, { "epoch": 3.310699462890625e-05, "step": 21697, "training_step_time": 0.15785908699035645 }, { "epoch": 3.31085205078125e-05, "model_forward_time": 0.024330854415893555, "step": 21698 }, { "epoch": 3.31085205078125e-05, "step": 21698, "training_step_time": 0.17954611778259277 }, { "epoch": 3.311004638671875e-05, "model_forward_time": 0.0243375301361084, "step": 21699 }, { "epoch": 3.311004638671875e-05, "step": 21699, "training_step_time": 0.17148637771606445 }, { "epoch": 3.3111572265625e-05, "grad_norm": 0.2427108734846115, "learning_rate": 1.950729971969955e-05, "loss": 0.008, "step": 21700 }, { "epoch": 3.3111572265625e-05, "model_forward_time": 0.024054765701293945, "step": 21700 }, { "epoch": 3.3111572265625e-05, "step": 21700, "training_step_time": 0.11722111701965332 }, { "epoch": 3.311309814453125e-05, "model_forward_time": 0.024266958236694336, "step": 21701 }, { "epoch": 3.311309814453125e-05, "step": 21701, "training_step_time": 0.11567187309265137 }, { "epoch": 3.31146240234375e-05, "model_forward_time": 0.027753591537475586, "step": 21702 }, { "epoch": 3.31146240234375e-05, "step": 21702, "training_step_time": 0.11170411109924316 }, { "epoch": 3.311614990234375e-05, "model_forward_time": 0.025176048278808594, "step": 21703 }, { "epoch": 3.311614990234375e-05, "step": 21703, "training_step_time": 0.11543750762939453 }, { "epoch": 3.311767578125e-05, "model_forward_time": 0.02476215362548828, "step": 21704 }, { "epoch": 3.311767578125e-05, "step": 21704, "training_step_time": 0.11217379570007324 }, { "epoch": 3.311920166015625e-05, "model_forward_time": 0.02484726905822754, "step": 21705 }, { "epoch": 3.311920166015625e-05, "step": 21705, "training_step_time": 0.11117172241210938 }, { "epoch": 3.31207275390625e-05, "model_forward_time": 0.02486395835876465, "step": 21706 }, { "epoch": 3.31207275390625e-05, "step": 21706, "training_step_time": 0.11106657981872559 }, { "epoch": 3.312225341796875e-05, "model_forward_time": 0.025017499923706055, "step": 21707 }, { "epoch": 3.312225341796875e-05, "step": 21707, "training_step_time": 0.1107938289642334 }, { "epoch": 3.3123779296875e-05, "model_forward_time": 0.025049924850463867, "step": 21708 }, { "epoch": 3.3123779296875e-05, "step": 21708, "training_step_time": 0.10879778861999512 }, { "epoch": 3.312530517578125e-05, "model_forward_time": 0.024872779846191406, "step": 21709 }, { "epoch": 3.312530517578125e-05, "step": 21709, "training_step_time": 0.10855412483215332 }, { "epoch": 3.31268310546875e-05, "grad_norm": 0.1840215027332306, "learning_rate": 1.9463638333775276e-05, "loss": 0.0047, "step": 21710 }, { "epoch": 3.31268310546875e-05, "model_forward_time": 0.024815082550048828, "step": 21710 }, { "epoch": 3.31268310546875e-05, "step": 21710, "training_step_time": 0.14074158668518066 }, { "epoch": 3.312835693359375e-05, "model_forward_time": 0.02416253089904785, "step": 21711 }, { "epoch": 3.312835693359375e-05, "step": 21711, "training_step_time": 0.15719342231750488 }, { "epoch": 3.31298828125e-05, "model_forward_time": 0.02443075180053711, "step": 21712 }, { "epoch": 3.31298828125e-05, "step": 21712, "training_step_time": 0.10923576354980469 }, { "epoch": 3.313140869140625e-05, "model_forward_time": 0.024692773818969727, "step": 21713 }, { "epoch": 3.313140869140625e-05, "step": 21713, "training_step_time": 0.13438987731933594 }, { "epoch": 3.31329345703125e-05, "model_forward_time": 0.02522134780883789, "step": 21714 }, { "epoch": 3.31329345703125e-05, "step": 21714, "training_step_time": 0.2067551612854004 }, { "epoch": 3.313446044921875e-05, "model_forward_time": 0.024754047393798828, "step": 21715 }, { "epoch": 3.313446044921875e-05, "step": 21715, "training_step_time": 0.16179752349853516 }, { "epoch": 3.3135986328125e-05, "model_forward_time": 0.023836374282836914, "step": 21716 }, { "epoch": 3.3135986328125e-05, "step": 21716, "training_step_time": 0.12055301666259766 }, { "epoch": 3.313751220703125e-05, "model_forward_time": 0.024665117263793945, "step": 21717 }, { "epoch": 3.313751220703125e-05, "step": 21717, "training_step_time": 0.12958717346191406 }, { "epoch": 3.31390380859375e-05, "model_forward_time": 0.025177717208862305, "step": 21718 }, { "epoch": 3.31390380859375e-05, "step": 21718, "training_step_time": 0.17693328857421875 }, { "epoch": 3.314056396484375e-05, "model_forward_time": 0.0240328311920166, "step": 21719 }, { "epoch": 3.314056396484375e-05, "step": 21719, "training_step_time": 0.16917681694030762 }, { "epoch": 3.314208984375e-05, "grad_norm": 0.3955047130584717, "learning_rate": 1.942001405240979e-05, "loss": 0.0116, "step": 21720 }, { "epoch": 3.314208984375e-05, "model_forward_time": 0.0245208740234375, "step": 21720 }, { "epoch": 3.314208984375e-05, "step": 21720, "training_step_time": 0.12347674369812012 }, { "epoch": 3.314361572265625e-05, "model_forward_time": 0.024047136306762695, "step": 21721 }, { "epoch": 3.314361572265625e-05, "step": 21721, "training_step_time": 0.11812472343444824 }, { "epoch": 3.31451416015625e-05, "model_forward_time": 0.026342153549194336, "step": 21722 }, { "epoch": 3.31451416015625e-05, "step": 21722, "training_step_time": 0.11606740951538086 }, { "epoch": 3.314666748046875e-05, "model_forward_time": 0.02803349494934082, "step": 21723 }, { "epoch": 3.314666748046875e-05, "step": 21723, "training_step_time": 0.10847878456115723 }, { "epoch": 3.3148193359375e-05, "model_forward_time": 0.02505350112915039, "step": 21724 }, { "epoch": 3.3148193359375e-05, "step": 21724, "training_step_time": 0.10520052909851074 }, { "epoch": 3.314971923828125e-05, "model_forward_time": 0.02536153793334961, "step": 21725 }, { "epoch": 3.314971923828125e-05, "step": 21725, "training_step_time": 0.10768342018127441 }, { "epoch": 3.31512451171875e-05, "model_forward_time": 0.024854183197021484, "step": 21726 }, { "epoch": 3.31512451171875e-05, "step": 21726, "training_step_time": 0.11243939399719238 }, { "epoch": 3.315277099609375e-05, "model_forward_time": 0.025372743606567383, "step": 21727 }, { "epoch": 3.315277099609375e-05, "step": 21727, "training_step_time": 0.11742782592773438 }, { "epoch": 3.3154296875e-05, "model_forward_time": 0.025104999542236328, "step": 21728 }, { "epoch": 3.3154296875e-05, "step": 21728, "training_step_time": 0.11244535446166992 }, { "epoch": 3.315582275390625e-05, "model_forward_time": 0.024924516677856445, "step": 21729 }, { "epoch": 3.315582275390625e-05, "step": 21729, "training_step_time": 0.10927176475524902 }, { "epoch": 3.31573486328125e-05, "grad_norm": 0.18369890749454498, "learning_rate": 1.937642692861076e-05, "loss": 0.0046, "step": 21730 }, { "epoch": 3.31573486328125e-05, "model_forward_time": 0.02471637725830078, "step": 21730 }, { "epoch": 3.31573486328125e-05, "step": 21730, "training_step_time": 0.10932779312133789 }, { "epoch": 3.315887451171875e-05, "model_forward_time": 0.02537226676940918, "step": 21731 }, { "epoch": 3.315887451171875e-05, "step": 21731, "training_step_time": 0.11021137237548828 }, { "epoch": 3.3160400390625e-05, "model_forward_time": 0.025327444076538086, "step": 21732 }, { "epoch": 3.3160400390625e-05, "step": 21732, "training_step_time": 0.11326432228088379 }, { "epoch": 3.316192626953125e-05, "model_forward_time": 0.0250551700592041, "step": 21733 }, { "epoch": 3.316192626953125e-05, "step": 21733, "training_step_time": 0.10610842704772949 }, { "epoch": 3.31634521484375e-05, "model_forward_time": 0.024434566497802734, "step": 21734 }, { "epoch": 3.31634521484375e-05, "step": 21734, "training_step_time": 0.10514402389526367 }, { "epoch": 3.316497802734375e-05, "model_forward_time": 0.025000810623168945, "step": 21735 }, { "epoch": 3.316497802734375e-05, "step": 21735, "training_step_time": 0.1047670841217041 }, { "epoch": 3.316650390625e-05, "model_forward_time": 0.02536153793334961, "step": 21736 }, { "epoch": 3.316650390625e-05, "step": 21736, "training_step_time": 0.10601592063903809 }, { "epoch": 3.316802978515625e-05, "model_forward_time": 0.02505350112915039, "step": 21737 }, { "epoch": 3.316802978515625e-05, "step": 21737, "training_step_time": 0.10398983955383301 }, { "epoch": 3.31695556640625e-05, "model_forward_time": 0.025296926498413086, "step": 21738 }, { "epoch": 3.31695556640625e-05, "step": 21738, "training_step_time": 0.10823702812194824 }, { "epoch": 3.317108154296875e-05, "model_forward_time": 0.02506732940673828, "step": 21739 }, { "epoch": 3.317108154296875e-05, "step": 21739, "training_step_time": 0.1528947353363037 }, { "epoch": 3.3172607421875e-05, "grad_norm": 0.16225546598434448, "learning_rate": 1.93328770153406e-05, "loss": 0.0068, "step": 21740 }, { "epoch": 3.3172607421875e-05, "model_forward_time": 0.02542734146118164, "step": 21740 }, { "epoch": 3.3172607421875e-05, "step": 21740, "training_step_time": 0.11113405227661133 }, { "epoch": 3.317413330078125e-05, "model_forward_time": 0.0246124267578125, "step": 21741 }, { "epoch": 3.317413330078125e-05, "step": 21741, "training_step_time": 0.1242818832397461 }, { "epoch": 3.31756591796875e-05, "model_forward_time": 0.025226354598999023, "step": 21742 }, { "epoch": 3.31756591796875e-05, "step": 21742, "training_step_time": 0.13660049438476562 }, { "epoch": 3.317718505859375e-05, "model_forward_time": 0.02536463737487793, "step": 21743 }, { "epoch": 3.317718505859375e-05, "step": 21743, "training_step_time": 0.11639690399169922 }, { "epoch": 3.31787109375e-05, "model_forward_time": 0.024898052215576172, "step": 21744 }, { "epoch": 3.31787109375e-05, "step": 21744, "training_step_time": 0.12708806991577148 }, { "epoch": 3.318023681640625e-05, "model_forward_time": 0.0249330997467041, "step": 21745 }, { "epoch": 3.318023681640625e-05, "step": 21745, "training_step_time": 0.12420868873596191 }, { "epoch": 3.31817626953125e-05, "model_forward_time": 0.02484726905822754, "step": 21746 }, { "epoch": 3.31817626953125e-05, "step": 21746, "training_step_time": 0.10887384414672852 }, { "epoch": 3.318328857421875e-05, "model_forward_time": 0.025100231170654297, "step": 21747 }, { "epoch": 3.318328857421875e-05, "step": 21747, "training_step_time": 0.10782909393310547 }, { "epoch": 3.3184814453125e-05, "model_forward_time": 0.025473594665527344, "step": 21748 }, { "epoch": 3.3184814453125e-05, "step": 21748, "training_step_time": 0.10842776298522949 }, { "epoch": 3.318634033203125e-05, "model_forward_time": 0.025015592575073242, "step": 21749 }, { "epoch": 3.318634033203125e-05, "step": 21749, "training_step_time": 0.10453271865844727 }, { "epoch": 3.31878662109375e-05, "grad_norm": 0.14747479557991028, "learning_rate": 1.928936436551661e-05, "loss": 0.0042, "step": 21750 }, { "epoch": 3.31878662109375e-05, "model_forward_time": 0.025118350982666016, "step": 21750 }, { "epoch": 3.31878662109375e-05, "step": 21750, "training_step_time": 0.1117696762084961 }, { "epoch": 3.318939208984375e-05, "model_forward_time": 0.02527308464050293, "step": 21751 }, { "epoch": 3.318939208984375e-05, "step": 21751, "training_step_time": 0.11162710189819336 }, { "epoch": 3.319091796875e-05, "model_forward_time": 0.025176048278808594, "step": 21752 }, { "epoch": 3.319091796875e-05, "step": 21752, "training_step_time": 0.10956978797912598 }, { "epoch": 3.319244384765625e-05, "model_forward_time": 0.02522587776184082, "step": 21753 }, { "epoch": 3.319244384765625e-05, "step": 21753, "training_step_time": 0.10582637786865234 }, { "epoch": 3.31939697265625e-05, "model_forward_time": 0.02535271644592285, "step": 21754 }, { "epoch": 3.31939697265625e-05, "step": 21754, "training_step_time": 0.11062860488891602 }, { "epoch": 3.319549560546875e-05, "model_forward_time": 0.025850772857666016, "step": 21755 }, { "epoch": 3.319549560546875e-05, "step": 21755, "training_step_time": 0.10478568077087402 }, { "epoch": 3.3197021484375e-05, "model_forward_time": 0.02476811408996582, "step": 21756 }, { "epoch": 3.3197021484375e-05, "step": 21756, "training_step_time": 0.14247465133666992 }, { "epoch": 3.319854736328125e-05, "model_forward_time": 0.02482128143310547, "step": 21757 }, { "epoch": 3.319854736328125e-05, "step": 21757, "training_step_time": 0.16814899444580078 }, { "epoch": 3.32000732421875e-05, "model_forward_time": 0.024598121643066406, "step": 21758 }, { "epoch": 3.32000732421875e-05, "step": 21758, "training_step_time": 0.1084432601928711 }, { "epoch": 3.320159912109375e-05, "model_forward_time": 0.0245974063873291, "step": 21759 }, { "epoch": 3.320159912109375e-05, "step": 21759, "training_step_time": 0.13166546821594238 }, { "epoch": 3.3203125e-05, "grad_norm": 0.3442796468734741, "learning_rate": 1.924588903201074e-05, "loss": 0.0155, "step": 21760 }, { "epoch": 3.3203125e-05, "model_forward_time": 0.02515268325805664, "step": 21760 }, { "epoch": 3.3203125e-05, "step": 21760, "training_step_time": 0.2077195644378662 }, { "epoch": 3.320465087890625e-05, "model_forward_time": 0.024456024169921875, "step": 21761 }, { "epoch": 3.320465087890625e-05, "step": 21761, "training_step_time": 0.12128114700317383 }, { "epoch": 3.32061767578125e-05, "model_forward_time": 0.0248410701751709, "step": 21762 }, { "epoch": 3.32061767578125e-05, "step": 21762, "training_step_time": 0.11387801170349121 }, { "epoch": 3.320770263671875e-05, "model_forward_time": 0.02542257308959961, "step": 21763 }, { "epoch": 3.320770263671875e-05, "step": 21763, "training_step_time": 0.206573486328125 }, { "epoch": 3.3209228515625e-05, "model_forward_time": 0.023366451263427734, "step": 21764 }, { "epoch": 3.3209228515625e-05, "step": 21764, "training_step_time": 0.20067834854125977 }, { "epoch": 3.321075439453125e-05, "model_forward_time": 0.024097204208374023, "step": 21765 }, { "epoch": 3.321075439453125e-05, "step": 21765, "training_step_time": 0.14866328239440918 }, { "epoch": 3.32122802734375e-05, "model_forward_time": 0.024332046508789062, "step": 21766 }, { "epoch": 3.32122802734375e-05, "step": 21766, "training_step_time": 0.13687849044799805 }, { "epoch": 3.321380615234375e-05, "model_forward_time": 0.023769855499267578, "step": 21767 }, { "epoch": 3.321380615234375e-05, "step": 21767, "training_step_time": 0.11251091957092285 }, { "epoch": 3.321533203125e-05, "model_forward_time": 0.025343656539916992, "step": 21768 }, { "epoch": 3.321533203125e-05, "step": 21768, "training_step_time": 0.10748910903930664 }, { "epoch": 3.321685791015625e-05, "model_forward_time": 0.025216341018676758, "step": 21769 }, { "epoch": 3.321685791015625e-05, "step": 21769, "training_step_time": 0.10693764686584473 }, { "epoch": 3.32183837890625e-05, "grad_norm": 0.30669525265693665, "learning_rate": 1.920245106764962e-05, "loss": 0.0092, "step": 21770 }, { "epoch": 3.32183837890625e-05, "model_forward_time": 0.02530050277709961, "step": 21770 }, { "epoch": 3.32183837890625e-05, "step": 21770, "training_step_time": 0.10506391525268555 }, { "epoch": 3.321990966796875e-05, "model_forward_time": 0.024978160858154297, "step": 21771 }, { "epoch": 3.321990966796875e-05, "step": 21771, "training_step_time": 0.10431337356567383 }, { "epoch": 3.3221435546875e-05, "model_forward_time": 0.02493429183959961, "step": 21772 }, { "epoch": 3.3221435546875e-05, "step": 21772, "training_step_time": 0.1049191951751709 }, { "epoch": 3.322296142578125e-05, "model_forward_time": 0.02518296241760254, "step": 21773 }, { "epoch": 3.322296142578125e-05, "step": 21773, "training_step_time": 0.10576462745666504 }, { "epoch": 3.32244873046875e-05, "model_forward_time": 0.025162935256958008, "step": 21774 }, { "epoch": 3.32244873046875e-05, "step": 21774, "training_step_time": 0.10674095153808594 }, { "epoch": 3.322601318359375e-05, "model_forward_time": 0.025419235229492188, "step": 21775 }, { "epoch": 3.322601318359375e-05, "step": 21775, "training_step_time": 0.10509443283081055 }, { "epoch": 3.32275390625e-05, "model_forward_time": 0.02595686912536621, "step": 21776 }, { "epoch": 3.32275390625e-05, "step": 21776, "training_step_time": 0.10840940475463867 }, { "epoch": 3.322906494140625e-05, "model_forward_time": 0.02485179901123047, "step": 21777 }, { "epoch": 3.322906494140625e-05, "step": 21777, "training_step_time": 0.1060950756072998 }, { "epoch": 3.32305908203125e-05, "model_forward_time": 0.02488851547241211, "step": 21778 }, { "epoch": 3.32305908203125e-05, "step": 21778, "training_step_time": 0.10925698280334473 }, { "epoch": 3.323211669921875e-05, "model_forward_time": 0.02502894401550293, "step": 21779 }, { "epoch": 3.323211669921875e-05, "step": 21779, "training_step_time": 0.10417723655700684 }, { "epoch": 3.3233642578125e-05, "grad_norm": 0.08915925770998001, "learning_rate": 1.9159050525214452e-05, "loss": 0.0188, "step": 21780 }, { "epoch": 3.3233642578125e-05, "model_forward_time": 0.024925947189331055, "step": 21780 }, { "epoch": 3.3233642578125e-05, "step": 21780, "training_step_time": 0.10466504096984863 }, { "epoch": 3.323516845703125e-05, "model_forward_time": 0.024986982345581055, "step": 21781 }, { "epoch": 3.323516845703125e-05, "step": 21781, "training_step_time": 0.10445022583007812 }, { "epoch": 3.32366943359375e-05, "model_forward_time": 0.024063825607299805, "step": 21782 }, { "epoch": 3.32366943359375e-05, "step": 21782, "training_step_time": 0.10645222663879395 }, { "epoch": 3.323822021484375e-05, "model_forward_time": 0.024182558059692383, "step": 21783 }, { "epoch": 3.323822021484375e-05, "step": 21783, "training_step_time": 0.11365580558776855 }, { "epoch": 3.323974609375e-05, "model_forward_time": 0.025199413299560547, "step": 21784 }, { "epoch": 3.323974609375e-05, "step": 21784, "training_step_time": 0.10591363906860352 }, { "epoch": 3.324127197265625e-05, "model_forward_time": 0.025432825088500977, "step": 21785 }, { "epoch": 3.324127197265625e-05, "step": 21785, "training_step_time": 0.14859294891357422 }, { "epoch": 3.32427978515625e-05, "model_forward_time": 0.024664640426635742, "step": 21786 }, { "epoch": 3.32427978515625e-05, "step": 21786, "training_step_time": 0.11029982566833496 }, { "epoch": 3.324432373046875e-05, "model_forward_time": 0.024808406829833984, "step": 21787 }, { "epoch": 3.324432373046875e-05, "step": 21787, "training_step_time": 0.12851190567016602 }, { "epoch": 3.3245849609375e-05, "model_forward_time": 0.02513599395751953, "step": 21788 }, { "epoch": 3.3245849609375e-05, "step": 21788, "training_step_time": 0.1403498649597168 }, { "epoch": 3.324737548828125e-05, "model_forward_time": 0.024633169174194336, "step": 21789 }, { "epoch": 3.324737548828125e-05, "step": 21789, "training_step_time": 0.11573958396911621 }, { "epoch": 3.32489013671875e-05, "grad_norm": 0.1732388287782669, "learning_rate": 1.9115687457441022e-05, "loss": 0.0065, "step": 21790 }, { "epoch": 3.32489013671875e-05, "model_forward_time": 0.025089502334594727, "step": 21790 }, { "epoch": 3.32489013671875e-05, "step": 21790, "training_step_time": 0.12963008880615234 }, { "epoch": 3.325042724609375e-05, "model_forward_time": 0.025164365768432617, "step": 21791 }, { "epoch": 3.325042724609375e-05, "step": 21791, "training_step_time": 0.12912774085998535 }, { "epoch": 3.3251953125e-05, "model_forward_time": 0.0277101993560791, "step": 21792 }, { "epoch": 3.3251953125e-05, "step": 21792, "training_step_time": 0.1105806827545166 }, { "epoch": 3.325347900390625e-05, "model_forward_time": 0.02494025230407715, "step": 21793 }, { "epoch": 3.325347900390625e-05, "step": 21793, "training_step_time": 0.11237120628356934 }, { "epoch": 3.32550048828125e-05, "model_forward_time": 0.024797439575195312, "step": 21794 }, { "epoch": 3.32550048828125e-05, "step": 21794, "training_step_time": 0.11377096176147461 }, { "epoch": 3.325653076171875e-05, "model_forward_time": 0.024846553802490234, "step": 21795 }, { "epoch": 3.325653076171875e-05, "step": 21795, "training_step_time": 0.11927103996276855 }, { "epoch": 3.3258056640625e-05, "model_forward_time": 0.02591538429260254, "step": 21796 }, { "epoch": 3.3258056640625e-05, "step": 21796, "training_step_time": 0.10949826240539551 }, { "epoch": 3.325958251953125e-05, "model_forward_time": 0.025827407836914062, "step": 21797 }, { "epoch": 3.325958251953125e-05, "step": 21797, "training_step_time": 0.10903382301330566 }, { "epoch": 3.32611083984375e-05, "model_forward_time": 0.025090694427490234, "step": 21798 }, { "epoch": 3.32611083984375e-05, "step": 21798, "training_step_time": 0.10832500457763672 }, { "epoch": 3.326263427734375e-05, "model_forward_time": 0.025237083435058594, "step": 21799 }, { "epoch": 3.326263427734375e-05, "step": 21799, "training_step_time": 0.1168668270111084 }, { "epoch": 3.326416015625e-05, "grad_norm": 0.4488953649997711, "learning_rate": 1.9072361917019536e-05, "loss": 0.0121, "step": 21800 }, { "epoch": 3.326416015625e-05, "model_forward_time": 0.025271177291870117, "step": 21800 }, { "epoch": 3.326416015625e-05, "step": 21800, "training_step_time": 0.11556291580200195 }, { "epoch": 3.326568603515625e-05, "model_forward_time": 0.02516007423400879, "step": 21801 }, { "epoch": 3.326568603515625e-05, "step": 21801, "training_step_time": 0.10564208030700684 }, { "epoch": 3.32672119140625e-05, "model_forward_time": 0.02442002296447754, "step": 21802 }, { "epoch": 3.32672119140625e-05, "step": 21802, "training_step_time": 0.15406036376953125 }, { "epoch": 3.326873779296875e-05, "model_forward_time": 0.025583744049072266, "step": 21803 }, { "epoch": 3.326873779296875e-05, "step": 21803, "training_step_time": 0.15313005447387695 }, { "epoch": 3.3270263671875e-05, "model_forward_time": 0.024120807647705078, "step": 21804 }, { "epoch": 3.3270263671875e-05, "step": 21804, "training_step_time": 0.11084198951721191 }, { "epoch": 3.327178955078125e-05, "model_forward_time": 0.024527549743652344, "step": 21805 }, { "epoch": 3.327178955078125e-05, "step": 21805, "training_step_time": 0.18859076499938965 }, { "epoch": 3.32733154296875e-05, "model_forward_time": 0.024230241775512695, "step": 21806 }, { "epoch": 3.32733154296875e-05, "step": 21806, "training_step_time": 0.14934015274047852 }, { "epoch": 3.327484130859375e-05, "model_forward_time": 0.024615764617919922, "step": 21807 }, { "epoch": 3.327484130859375e-05, "step": 21807, "training_step_time": 0.199371337890625 }, { "epoch": 3.32763671875e-05, "model_forward_time": 0.023859262466430664, "step": 21808 }, { "epoch": 3.32763671875e-05, "step": 21808, "training_step_time": 0.1276547908782959 }, { "epoch": 3.327789306640625e-05, "model_forward_time": 0.023954153060913086, "step": 21809 }, { "epoch": 3.327789306640625e-05, "step": 21809, "training_step_time": 0.15076637268066406 }, { "epoch": 3.32794189453125e-05, "grad_norm": 0.09804453700780869, "learning_rate": 1.9029073956594606e-05, "loss": 0.0041, "step": 21810 }, { "epoch": 3.32794189453125e-05, "model_forward_time": 0.024268388748168945, "step": 21810 }, { "epoch": 3.32794189453125e-05, "step": 21810, "training_step_time": 0.14701604843139648 }, { "epoch": 3.328094482421875e-05, "model_forward_time": 0.02439093589782715, "step": 21811 }, { "epoch": 3.328094482421875e-05, "step": 21811, "training_step_time": 0.20720624923706055 }, { "epoch": 3.3282470703125e-05, "model_forward_time": 0.024606704711914062, "step": 21812 }, { "epoch": 3.3282470703125e-05, "step": 21812, "training_step_time": 0.12167191505432129 }, { "epoch": 3.328399658203125e-05, "model_forward_time": 0.024305105209350586, "step": 21813 }, { "epoch": 3.328399658203125e-05, "step": 21813, "training_step_time": 0.11679887771606445 }, { "epoch": 3.32855224609375e-05, "model_forward_time": 0.025182247161865234, "step": 21814 }, { "epoch": 3.32855224609375e-05, "step": 21814, "training_step_time": 0.11378645896911621 }, { "epoch": 3.328704833984375e-05, "model_forward_time": 0.025525331497192383, "step": 21815 }, { "epoch": 3.328704833984375e-05, "step": 21815, "training_step_time": 0.10745811462402344 }, { "epoch": 3.328857421875e-05, "model_forward_time": 0.024863243103027344, "step": 21816 }, { "epoch": 3.328857421875e-05, "step": 21816, "training_step_time": 0.10639476776123047 }, { "epoch": 3.329010009765625e-05, "model_forward_time": 0.024791717529296875, "step": 21817 }, { "epoch": 3.329010009765625e-05, "step": 21817, "training_step_time": 0.1152498722076416 }, { "epoch": 3.32916259765625e-05, "model_forward_time": 0.024941205978393555, "step": 21818 }, { "epoch": 3.32916259765625e-05, "step": 21818, "training_step_time": 0.10890746116638184 }, { "epoch": 3.329315185546875e-05, "model_forward_time": 0.02552032470703125, "step": 21819 }, { "epoch": 3.329315185546875e-05, "step": 21819, "training_step_time": 0.11397576332092285 }, { "epoch": 3.3294677734375e-05, "grad_norm": 0.17043310403823853, "learning_rate": 1.8985823628765188e-05, "loss": 0.005, "step": 21820 }, { "epoch": 3.3294677734375e-05, "model_forward_time": 0.02506732940673828, "step": 21820 }, { "epoch": 3.3294677734375e-05, "step": 21820, "training_step_time": 0.10653829574584961 }, { "epoch": 3.329620361328125e-05, "model_forward_time": 0.025251150131225586, "step": 21821 }, { "epoch": 3.329620361328125e-05, "step": 21821, "training_step_time": 0.10847806930541992 }, { "epoch": 3.32977294921875e-05, "model_forward_time": 0.02530074119567871, "step": 21822 }, { "epoch": 3.32977294921875e-05, "step": 21822, "training_step_time": 0.1052849292755127 }, { "epoch": 3.329925537109375e-05, "model_forward_time": 0.025892019271850586, "step": 21823 }, { "epoch": 3.329925537109375e-05, "step": 21823, "training_step_time": 0.10567688941955566 }, { "epoch": 3.330078125e-05, "model_forward_time": 0.02531886100769043, "step": 21824 }, { "epoch": 3.330078125e-05, "step": 21824, "training_step_time": 0.10826921463012695 }, { "epoch": 3.330230712890625e-05, "model_forward_time": 0.02545905113220215, "step": 21825 }, { "epoch": 3.330230712890625e-05, "step": 21825, "training_step_time": 0.10414624214172363 }, { "epoch": 3.33038330078125e-05, "model_forward_time": 0.024924278259277344, "step": 21826 }, { "epoch": 3.33038330078125e-05, "step": 21826, "training_step_time": 0.10510563850402832 }, { "epoch": 3.330535888671875e-05, "model_forward_time": 0.024805784225463867, "step": 21827 }, { "epoch": 3.330535888671875e-05, "step": 21827, "training_step_time": 0.1055152416229248 }, { "epoch": 3.3306884765625e-05, "model_forward_time": 0.025597572326660156, "step": 21828 }, { "epoch": 3.3306884765625e-05, "step": 21828, "training_step_time": 0.10855960845947266 }, { "epoch": 3.330841064453125e-05, "model_forward_time": 0.025432825088500977, "step": 21829 }, { "epoch": 3.330841064453125e-05, "step": 21829, "training_step_time": 0.10846161842346191 }, { "epoch": 3.33099365234375e-05, "grad_norm": 0.4265593886375427, "learning_rate": 1.8942610986084486e-05, "loss": 0.0117, "step": 21830 }, { "epoch": 3.33099365234375e-05, "model_forward_time": 0.02542901039123535, "step": 21830 }, { "epoch": 3.33099365234375e-05, "step": 21830, "training_step_time": 0.17316174507141113 }, { "epoch": 3.331146240234375e-05, "model_forward_time": 0.02429676055908203, "step": 21831 }, { "epoch": 3.331146240234375e-05, "step": 21831, "training_step_time": 0.12160825729370117 }, { "epoch": 3.331298828125e-05, "model_forward_time": 0.024595975875854492, "step": 21832 }, { "epoch": 3.331298828125e-05, "step": 21832, "training_step_time": 0.1272883415222168 }, { "epoch": 3.331451416015625e-05, "model_forward_time": 0.02492976188659668, "step": 21833 }, { "epoch": 3.331451416015625e-05, "step": 21833, "training_step_time": 0.15986943244934082 }, { "epoch": 3.33160400390625e-05, "model_forward_time": 0.02434062957763672, "step": 21834 }, { "epoch": 3.33160400390625e-05, "step": 21834, "training_step_time": 0.18533587455749512 }, { "epoch": 3.331756591796875e-05, "model_forward_time": 0.024075031280517578, "step": 21835 }, { "epoch": 3.331756591796875e-05, "step": 21835, "training_step_time": 0.1640181541442871 }, { "epoch": 3.3319091796875e-05, "model_forward_time": 0.024425506591796875, "step": 21836 }, { "epoch": 3.3319091796875e-05, "step": 21836, "training_step_time": 0.11726951599121094 }, { "epoch": 3.332061767578125e-05, "model_forward_time": 0.02438640594482422, "step": 21837 }, { "epoch": 3.332061767578125e-05, "step": 21837, "training_step_time": 0.11077237129211426 }, { "epoch": 3.33221435546875e-05, "model_forward_time": 0.024880647659301758, "step": 21838 }, { "epoch": 3.33221435546875e-05, "step": 21838, "training_step_time": 0.11326217651367188 }, { "epoch": 3.332366943359375e-05, "model_forward_time": 0.0251467227935791, "step": 21839 }, { "epoch": 3.332366943359375e-05, "step": 21839, "training_step_time": 0.1108860969543457 }, { "epoch": 3.33251953125e-05, "grad_norm": 0.12453766167163849, "learning_rate": 1.8899436081059975e-05, "loss": 0.0061, "step": 21840 }, { "epoch": 3.33251953125e-05, "model_forward_time": 0.02491450309753418, "step": 21840 }, { "epoch": 3.33251953125e-05, "step": 21840, "training_step_time": 0.11063313484191895 }, { "epoch": 3.332672119140625e-05, "model_forward_time": 0.02473902702331543, "step": 21841 }, { "epoch": 3.332672119140625e-05, "step": 21841, "training_step_time": 0.10833239555358887 }, { "epoch": 3.33282470703125e-05, "model_forward_time": 0.02484273910522461, "step": 21842 }, { "epoch": 3.33282470703125e-05, "step": 21842, "training_step_time": 0.11195898056030273 }, { "epoch": 3.332977294921875e-05, "model_forward_time": 0.02499222755432129, "step": 21843 }, { "epoch": 3.332977294921875e-05, "step": 21843, "training_step_time": 0.10643482208251953 }, { "epoch": 3.3331298828125e-05, "model_forward_time": 0.02524256706237793, "step": 21844 }, { "epoch": 3.3331298828125e-05, "step": 21844, "training_step_time": 0.10648369789123535 }, { "epoch": 3.333282470703125e-05, "model_forward_time": 0.02487492561340332, "step": 21845 }, { "epoch": 3.333282470703125e-05, "step": 21845, "training_step_time": 0.10500550270080566 }, { "epoch": 3.33343505859375e-05, "model_forward_time": 0.024409770965576172, "step": 21846 }, { "epoch": 3.33343505859375e-05, "step": 21846, "training_step_time": 0.15057015419006348 }, { "epoch": 3.333587646484375e-05, "model_forward_time": 0.024445056915283203, "step": 21847 }, { "epoch": 3.333587646484375e-05, "step": 21847, "training_step_time": 0.1541757583618164 }, { "epoch": 3.333740234375e-05, "model_forward_time": 0.02426743507385254, "step": 21848 }, { "epoch": 3.333740234375e-05, "step": 21848, "training_step_time": 0.11188340187072754 }, { "epoch": 3.333892822265625e-05, "model_forward_time": 0.024758577346801758, "step": 21849 }, { "epoch": 3.333892822265625e-05, "step": 21849, "training_step_time": 0.1560688018798828 }, { "epoch": 3.33404541015625e-05, "grad_norm": 0.1800937056541443, "learning_rate": 1.8856298966153212e-05, "loss": 0.0065, "step": 21850 }, { "epoch": 3.33404541015625e-05, "model_forward_time": 0.024729251861572266, "step": 21850 }, { "epoch": 3.33404541015625e-05, "step": 21850, "training_step_time": 0.17142367362976074 }, { "epoch": 3.334197998046875e-05, "model_forward_time": 0.02407979965209961, "step": 21851 }, { "epoch": 3.334197998046875e-05, "step": 21851, "training_step_time": 0.17992830276489258 }, { "epoch": 3.3343505859375e-05, "model_forward_time": 0.02410149574279785, "step": 21852 }, { "epoch": 3.3343505859375e-05, "step": 21852, "training_step_time": 0.12167000770568848 }, { "epoch": 3.334503173828125e-05, "model_forward_time": 0.024135828018188477, "step": 21853 }, { "epoch": 3.334503173828125e-05, "step": 21853, "training_step_time": 0.10448479652404785 }, { "epoch": 3.33465576171875e-05, "model_forward_time": 0.025197267532348633, "step": 21854 }, { "epoch": 3.33465576171875e-05, "step": 21854, "training_step_time": 0.19836997985839844 }, { "epoch": 3.334808349609375e-05, "model_forward_time": 0.02428603172302246, "step": 21855 }, { "epoch": 3.334808349609375e-05, "step": 21855, "training_step_time": 0.16509294509887695 }, { "epoch": 3.3349609375e-05, "model_forward_time": 0.0242156982421875, "step": 21856 }, { "epoch": 3.3349609375e-05, "step": 21856, "training_step_time": 0.13178753852844238 }, { "epoch": 3.335113525390625e-05, "model_forward_time": 0.024380922317504883, "step": 21857 }, { "epoch": 3.335113525390625e-05, "step": 21857, "training_step_time": 0.12881207466125488 }, { "epoch": 3.33526611328125e-05, "model_forward_time": 0.024967670440673828, "step": 21858 }, { "epoch": 3.33526611328125e-05, "step": 21858, "training_step_time": 0.12106752395629883 }, { "epoch": 3.335418701171875e-05, "model_forward_time": 0.024941444396972656, "step": 21859 }, { "epoch": 3.335418701171875e-05, "step": 21859, "training_step_time": 0.11635017395019531 }, { "epoch": 3.3355712890625e-05, "grad_norm": 0.24301566183567047, "learning_rate": 1.881319969377987e-05, "loss": 0.0121, "step": 21860 }, { "epoch": 3.3355712890625e-05, "model_forward_time": 0.02521491050720215, "step": 21860 }, { "epoch": 3.3355712890625e-05, "step": 21860, "training_step_time": 0.1148219108581543 }, { "epoch": 3.335723876953125e-05, "model_forward_time": 0.024951696395874023, "step": 21861 }, { "epoch": 3.335723876953125e-05, "step": 21861, "training_step_time": 0.11199665069580078 }, { "epoch": 3.33587646484375e-05, "model_forward_time": 0.02507495880126953, "step": 21862 }, { "epoch": 3.33587646484375e-05, "step": 21862, "training_step_time": 0.10988974571228027 }, { "epoch": 3.336029052734375e-05, "model_forward_time": 0.024872303009033203, "step": 21863 }, { "epoch": 3.336029052734375e-05, "step": 21863, "training_step_time": 0.1070561408996582 }, { "epoch": 3.336181640625e-05, "model_forward_time": 0.026783227920532227, "step": 21864 }, { "epoch": 3.336181640625e-05, "step": 21864, "training_step_time": 0.1087348461151123 }, { "epoch": 3.336334228515625e-05, "model_forward_time": 0.024792909622192383, "step": 21865 }, { "epoch": 3.336334228515625e-05, "step": 21865, "training_step_time": 0.10808944702148438 }, { "epoch": 3.33648681640625e-05, "model_forward_time": 0.023998260498046875, "step": 21866 }, { "epoch": 3.33648681640625e-05, "step": 21866, "training_step_time": 0.10508894920349121 }, { "epoch": 3.336639404296875e-05, "model_forward_time": 0.024866342544555664, "step": 21867 }, { "epoch": 3.336639404296875e-05, "step": 21867, "training_step_time": 0.10407638549804688 }, { "epoch": 3.3367919921875e-05, "model_forward_time": 0.025317668914794922, "step": 21868 }, { "epoch": 3.3367919921875e-05, "step": 21868, "training_step_time": 0.10530734062194824 }, { "epoch": 3.336944580078125e-05, "model_forward_time": 0.025447845458984375, "step": 21869 }, { "epoch": 3.336944580078125e-05, "step": 21869, "training_step_time": 0.10706901550292969 }, { "epoch": 3.33709716796875e-05, "grad_norm": 0.10550834983587265, "learning_rate": 1.877013831630961e-05, "loss": 0.0075, "step": 21870 }, { "epoch": 3.33709716796875e-05, "model_forward_time": 0.024760007858276367, "step": 21870 }, { "epoch": 3.33709716796875e-05, "step": 21870, "training_step_time": 0.11013126373291016 }, { "epoch": 3.337249755859375e-05, "model_forward_time": 0.025163888931274414, "step": 21871 }, { "epoch": 3.337249755859375e-05, "step": 21871, "training_step_time": 0.10971283912658691 }, { "epoch": 3.33740234375e-05, "model_forward_time": 0.02463698387145996, "step": 21872 }, { "epoch": 3.33740234375e-05, "step": 21872, "training_step_time": 0.1077272891998291 }, { "epoch": 3.337554931640625e-05, "model_forward_time": 0.0252532958984375, "step": 21873 }, { "epoch": 3.337554931640625e-05, "step": 21873, "training_step_time": 0.10522007942199707 }, { "epoch": 3.33770751953125e-05, "model_forward_time": 0.025236129760742188, "step": 21874 }, { "epoch": 3.33770751953125e-05, "step": 21874, "training_step_time": 0.17871475219726562 }, { "epoch": 3.337860107421875e-05, "model_forward_time": 0.024168968200683594, "step": 21875 }, { "epoch": 3.337860107421875e-05, "step": 21875, "training_step_time": 0.12221479415893555 }, { "epoch": 3.3380126953125e-05, "model_forward_time": 0.02402472496032715, "step": 21876 }, { "epoch": 3.3380126953125e-05, "step": 21876, "training_step_time": 0.10711050033569336 }, { "epoch": 3.338165283203125e-05, "model_forward_time": 0.02476668357849121, "step": 21877 }, { "epoch": 3.338165283203125e-05, "step": 21877, "training_step_time": 0.10479950904846191 }, { "epoch": 3.33831787109375e-05, "model_forward_time": 0.02499079704284668, "step": 21878 }, { "epoch": 3.33831787109375e-05, "step": 21878, "training_step_time": 0.21601033210754395 }, { "epoch": 3.338470458984375e-05, "model_forward_time": 0.02414846420288086, "step": 21879 }, { "epoch": 3.338470458984375e-05, "step": 21879, "training_step_time": 0.12385129928588867 }, { "epoch": 3.338623046875e-05, "grad_norm": 0.17147311568260193, "learning_rate": 1.872711488606609e-05, "loss": 0.004, "step": 21880 }, { "epoch": 3.338623046875e-05, "model_forward_time": 0.023589611053466797, "step": 21880 }, { "epoch": 3.338623046875e-05, "step": 21880, "training_step_time": 0.11432909965515137 }, { "epoch": 3.338775634765625e-05, "model_forward_time": 0.024756669998168945, "step": 21881 }, { "epoch": 3.338775634765625e-05, "step": 21881, "training_step_time": 0.10818362236022949 }, { "epoch": 3.33892822265625e-05, "model_forward_time": 0.02466726303100586, "step": 21882 }, { "epoch": 3.33892822265625e-05, "step": 21882, "training_step_time": 0.10753607749938965 }, { "epoch": 3.339080810546875e-05, "model_forward_time": 0.024960994720458984, "step": 21883 }, { "epoch": 3.339080810546875e-05, "step": 21883, "training_step_time": 0.10529494285583496 }, { "epoch": 3.3392333984375e-05, "model_forward_time": 0.024820804595947266, "step": 21884 }, { "epoch": 3.3392333984375e-05, "step": 21884, "training_step_time": 0.1078195571899414 }, { "epoch": 3.339385986328125e-05, "model_forward_time": 0.025045156478881836, "step": 21885 }, { "epoch": 3.339385986328125e-05, "step": 21885, "training_step_time": 0.10800671577453613 }, { "epoch": 3.33953857421875e-05, "model_forward_time": 0.025008201599121094, "step": 21886 }, { "epoch": 3.33953857421875e-05, "step": 21886, "training_step_time": 0.10856294631958008 }, { "epoch": 3.339691162109375e-05, "model_forward_time": 0.024991273880004883, "step": 21887 }, { "epoch": 3.339691162109375e-05, "step": 21887, "training_step_time": 0.10384631156921387 }, { "epoch": 3.33984375e-05, "model_forward_time": 0.02796316146850586, "step": 21888 }, { "epoch": 3.33984375e-05, "step": 21888, "training_step_time": 0.10702967643737793 }, { "epoch": 3.339996337890625e-05, "model_forward_time": 0.02507638931274414, "step": 21889 }, { "epoch": 3.339996337890625e-05, "step": 21889, "training_step_time": 0.10507607460021973 }, { "epoch": 3.34014892578125e-05, "grad_norm": 0.19023002684116364, "learning_rate": 1.868412945532681e-05, "loss": 0.0048, "step": 21890 }, { "epoch": 3.34014892578125e-05, "model_forward_time": 0.02390909194946289, "step": 21890 }, { "epoch": 3.34014892578125e-05, "step": 21890, "training_step_time": 0.10432052612304688 }, { "epoch": 3.340301513671875e-05, "model_forward_time": 0.02433633804321289, "step": 21891 }, { "epoch": 3.340301513671875e-05, "step": 21891, "training_step_time": 0.103302001953125 }, { "epoch": 3.3404541015625e-05, "model_forward_time": 0.02447199821472168, "step": 21892 }, { "epoch": 3.3404541015625e-05, "step": 21892, "training_step_time": 0.14989686012268066 }, { "epoch": 3.340606689453125e-05, "model_forward_time": 0.02468252182006836, "step": 21893 }, { "epoch": 3.340606689453125e-05, "step": 21893, "training_step_time": 0.15548014640808105 }, { "epoch": 3.34075927734375e-05, "model_forward_time": 0.024575471878051758, "step": 21894 }, { "epoch": 3.34075927734375e-05, "step": 21894, "training_step_time": 0.1885085105895996 }, { "epoch": 3.340911865234375e-05, "model_forward_time": 0.024600505828857422, "step": 21895 }, { "epoch": 3.340911865234375e-05, "step": 21895, "training_step_time": 0.15296506881713867 }, { "epoch": 3.341064453125e-05, "model_forward_time": 0.024071216583251953, "step": 21896 }, { "epoch": 3.341064453125e-05, "step": 21896, "training_step_time": 0.10956358909606934 }, { "epoch": 3.341217041015625e-05, "model_forward_time": 0.025008678436279297, "step": 21897 }, { "epoch": 3.341217041015625e-05, "step": 21897, "training_step_time": 0.10748910903930664 }, { "epoch": 3.34136962890625e-05, "model_forward_time": 0.025221824645996094, "step": 21898 }, { "epoch": 3.34136962890625e-05, "step": 21898, "training_step_time": 0.11350536346435547 }, { "epoch": 3.341522216796875e-05, "model_forward_time": 0.025104999542236328, "step": 21899 }, { "epoch": 3.341522216796875e-05, "step": 21899, "training_step_time": 0.1551041603088379 }, { "epoch": 3.3416748046875e-05, "grad_norm": 0.1172272339463234, "learning_rate": 1.8641182076323148e-05, "loss": 0.0062, "step": 21900 }, { "epoch": 3.3416748046875e-05, "model_forward_time": 0.024877071380615234, "step": 21900 }, { "epoch": 3.3416748046875e-05, "step": 21900, "training_step_time": 0.1443798542022705 }, { "epoch": 3.341827392578125e-05, "model_forward_time": 0.02397322654724121, "step": 21901 }, { "epoch": 3.341827392578125e-05, "step": 21901, "training_step_time": 0.11443853378295898 }, { "epoch": 3.34197998046875e-05, "model_forward_time": 0.025059223175048828, "step": 21902 }, { "epoch": 3.34197998046875e-05, "step": 21902, "training_step_time": 0.11507773399353027 }, { "epoch": 3.342132568359375e-05, "model_forward_time": 0.02497720718383789, "step": 21903 }, { "epoch": 3.342132568359375e-05, "step": 21903, "training_step_time": 0.11725187301635742 }, { "epoch": 3.34228515625e-05, "model_forward_time": 0.02496790885925293, "step": 21904 }, { "epoch": 3.34228515625e-05, "step": 21904, "training_step_time": 0.12517571449279785 }, { "epoch": 3.342437744140625e-05, "model_forward_time": 0.025188684463500977, "step": 21905 }, { "epoch": 3.342437744140625e-05, "step": 21905, "training_step_time": 0.1178278923034668 }, { "epoch": 3.34259033203125e-05, "model_forward_time": 0.024924516677856445, "step": 21906 }, { "epoch": 3.34259033203125e-05, "step": 21906, "training_step_time": 0.11422872543334961 }, { "epoch": 3.342742919921875e-05, "model_forward_time": 0.024988412857055664, "step": 21907 }, { "epoch": 3.342742919921875e-05, "step": 21907, "training_step_time": 0.11598563194274902 }, { "epoch": 3.3428955078125e-05, "model_forward_time": 0.026479005813598633, "step": 21908 }, { "epoch": 3.3428955078125e-05, "step": 21908, "training_step_time": 0.11613059043884277 }, { "epoch": 3.343048095703125e-05, "model_forward_time": 0.025203227996826172, "step": 21909 }, { "epoch": 3.343048095703125e-05, "step": 21909, "training_step_time": 0.11009955406188965 }, { "epoch": 3.34320068359375e-05, "grad_norm": 0.2895066738128662, "learning_rate": 1.8598272801240213e-05, "loss": 0.0104, "step": 21910 }, { "epoch": 3.34320068359375e-05, "model_forward_time": 0.024789094924926758, "step": 21910 }, { "epoch": 3.34320068359375e-05, "step": 21910, "training_step_time": 0.10646462440490723 }, { "epoch": 3.343353271484375e-05, "model_forward_time": 0.025174856185913086, "step": 21911 }, { "epoch": 3.343353271484375e-05, "step": 21911, "training_step_time": 0.10925126075744629 }, { "epoch": 3.343505859375e-05, "model_forward_time": 0.024848461151123047, "step": 21912 }, { "epoch": 3.343505859375e-05, "step": 21912, "training_step_time": 0.10745668411254883 }, { "epoch": 3.343658447265625e-05, "model_forward_time": 0.024905681610107422, "step": 21913 }, { "epoch": 3.343658447265625e-05, "step": 21913, "training_step_time": 0.1065225601196289 }, { "epoch": 3.34381103515625e-05, "model_forward_time": 0.025066852569580078, "step": 21914 }, { "epoch": 3.34381103515625e-05, "step": 21914, "training_step_time": 0.10826826095581055 }, { "epoch": 3.343963623046875e-05, "model_forward_time": 0.024744749069213867, "step": 21915 }, { "epoch": 3.343963623046875e-05, "step": 21915, "training_step_time": 0.10559749603271484 }, { "epoch": 3.3441162109375e-05, "model_forward_time": 0.024999141693115234, "step": 21916 }, { "epoch": 3.3441162109375e-05, "step": 21916, "training_step_time": 0.10587930679321289 }, { "epoch": 3.344268798828125e-05, "model_forward_time": 0.02464127540588379, "step": 21917 }, { "epoch": 3.344268798828125e-05, "step": 21917, "training_step_time": 0.10648727416992188 }, { "epoch": 3.34442138671875e-05, "model_forward_time": 0.028539419174194336, "step": 21918 }, { "epoch": 3.34442138671875e-05, "step": 21918, "training_step_time": 0.10886812210083008 }, { "epoch": 3.344573974609375e-05, "model_forward_time": 0.02498149871826172, "step": 21919 }, { "epoch": 3.344573974609375e-05, "step": 21919, "training_step_time": 0.10808062553405762 }, { "epoch": 3.3447265625e-05, "grad_norm": 0.15119513869285583, "learning_rate": 1.855540168221681e-05, "loss": 0.0073, "step": 21920 }, { "epoch": 3.3447265625e-05, "model_forward_time": 0.024871349334716797, "step": 21920 }, { "epoch": 3.3447265625e-05, "step": 21920, "training_step_time": 0.10555005073547363 }, { "epoch": 3.344879150390625e-05, "model_forward_time": 0.02491450309753418, "step": 21921 }, { "epoch": 3.344879150390625e-05, "step": 21921, "training_step_time": 0.13134169578552246 }, { "epoch": 3.34503173828125e-05, "model_forward_time": 0.0251920223236084, "step": 21922 }, { "epoch": 3.34503173828125e-05, "step": 21922, "training_step_time": 0.11861729621887207 }, { "epoch": 3.345184326171875e-05, "model_forward_time": 0.02498602867126465, "step": 21923 }, { "epoch": 3.345184326171875e-05, "step": 21923, "training_step_time": 0.13724589347839355 }, { "epoch": 3.3453369140625e-05, "model_forward_time": 0.024552345275878906, "step": 21924 }, { "epoch": 3.3453369140625e-05, "step": 21924, "training_step_time": 0.10592198371887207 }, { "epoch": 3.345489501953125e-05, "model_forward_time": 0.024903297424316406, "step": 21925 }, { "epoch": 3.345489501953125e-05, "step": 21925, "training_step_time": 0.1693730354309082 }, { "epoch": 3.34564208984375e-05, "model_forward_time": 0.02442336082458496, "step": 21926 }, { "epoch": 3.34564208984375e-05, "step": 21926, "training_step_time": 0.13880157470703125 }, { "epoch": 3.345794677734375e-05, "model_forward_time": 0.023960590362548828, "step": 21927 }, { "epoch": 3.345794677734375e-05, "step": 21927, "training_step_time": 0.11413073539733887 }, { "epoch": 3.345947265625e-05, "model_forward_time": 0.02466297149658203, "step": 21928 }, { "epoch": 3.345947265625e-05, "step": 21928, "training_step_time": 0.10674023628234863 }, { "epoch": 3.346099853515625e-05, "model_forward_time": 0.0251309871673584, "step": 21929 }, { "epoch": 3.346099853515625e-05, "step": 21929, "training_step_time": 0.1786787509918213 }, { "epoch": 3.34625244140625e-05, "grad_norm": 0.16395282745361328, "learning_rate": 1.851256877134538e-05, "loss": 0.0047, "step": 21930 }, { "epoch": 3.34625244140625e-05, "model_forward_time": 0.02409839630126953, "step": 21930 }, { "epoch": 3.34625244140625e-05, "step": 21930, "training_step_time": 0.20003819465637207 }, { "epoch": 3.346405029296875e-05, "model_forward_time": 0.024295806884765625, "step": 21931 }, { "epoch": 3.346405029296875e-05, "step": 21931, "training_step_time": 0.19412827491760254 }, { "epoch": 3.3465576171875e-05, "model_forward_time": 0.024642229080200195, "step": 21932 }, { "epoch": 3.3465576171875e-05, "step": 21932, "training_step_time": 0.18524527549743652 }, { "epoch": 3.346710205078125e-05, "model_forward_time": 0.02411627769470215, "step": 21933 }, { "epoch": 3.346710205078125e-05, "step": 21933, "training_step_time": 0.1680307388305664 }, { "epoch": 3.34686279296875e-05, "model_forward_time": 0.023997068405151367, "step": 21934 }, { "epoch": 3.34686279296875e-05, "step": 21934, "training_step_time": 0.11161065101623535 }, { "epoch": 3.347015380859375e-05, "model_forward_time": 0.025363445281982422, "step": 21935 }, { "epoch": 3.347015380859375e-05, "step": 21935, "training_step_time": 0.10446786880493164 }, { "epoch": 3.34716796875e-05, "model_forward_time": 0.025089025497436523, "step": 21936 }, { "epoch": 3.34716796875e-05, "step": 21936, "training_step_time": 0.14411163330078125 }, { "epoch": 3.347320556640625e-05, "model_forward_time": 0.024474143981933594, "step": 21937 }, { "epoch": 3.347320556640625e-05, "step": 21937, "training_step_time": 0.15804171562194824 }, { "epoch": 3.34747314453125e-05, "model_forward_time": 0.026667356491088867, "step": 21938 }, { "epoch": 3.34747314453125e-05, "step": 21938, "training_step_time": 0.16880297660827637 }, { "epoch": 3.347625732421875e-05, "model_forward_time": 0.024512529373168945, "step": 21939 }, { "epoch": 3.347625732421875e-05, "step": 21939, "training_step_time": 0.16932439804077148 }, { "epoch": 3.3477783203125e-05, "grad_norm": 0.41240185499191284, "learning_rate": 1.846977412067198e-05, "loss": 0.0073, "step": 21940 }, { "epoch": 3.3477783203125e-05, "model_forward_time": 0.024316072463989258, "step": 21940 }, { "epoch": 3.3477783203125e-05, "step": 21940, "training_step_time": 0.17650890350341797 }, { "epoch": 3.347930908203125e-05, "model_forward_time": 0.024280548095703125, "step": 21941 }, { "epoch": 3.347930908203125e-05, "step": 21941, "training_step_time": 0.11753559112548828 }, { "epoch": 3.34808349609375e-05, "model_forward_time": 0.02404046058654785, "step": 21942 }, { "epoch": 3.34808349609375e-05, "step": 21942, "training_step_time": 0.10599541664123535 }, { "epoch": 3.348236083984375e-05, "model_forward_time": 0.024616241455078125, "step": 21943 }, { "epoch": 3.348236083984375e-05, "step": 21943, "training_step_time": 0.1569383144378662 }, { "epoch": 3.348388671875e-05, "model_forward_time": 0.024314165115356445, "step": 21944 }, { "epoch": 3.348388671875e-05, "step": 21944, "training_step_time": 0.14064669609069824 }, { "epoch": 3.348541259765625e-05, "model_forward_time": 0.02416396141052246, "step": 21945 }, { "epoch": 3.348541259765625e-05, "step": 21945, "training_step_time": 0.18384146690368652 }, { "epoch": 3.34869384765625e-05, "model_forward_time": 0.0240323543548584, "step": 21946 }, { "epoch": 3.34869384765625e-05, "step": 21946, "training_step_time": 0.15659189224243164 }, { "epoch": 3.348846435546875e-05, "model_forward_time": 0.023503780364990234, "step": 21947 }, { "epoch": 3.348846435546875e-05, "step": 21947, "training_step_time": 0.12787342071533203 }, { "epoch": 3.3489990234375e-05, "model_forward_time": 0.024297714233398438, "step": 21948 }, { "epoch": 3.3489990234375e-05, "step": 21948, "training_step_time": 0.12018895149230957 }, { "epoch": 3.349151611328125e-05, "model_forward_time": 0.026633262634277344, "step": 21949 }, { "epoch": 3.349151611328125e-05, "step": 21949, "training_step_time": 0.11145448684692383 }, { "epoch": 3.34930419921875e-05, "grad_norm": 0.11369265615940094, "learning_rate": 1.8427017782196127e-05, "loss": 0.0037, "step": 21950 }, { "epoch": 3.34930419921875e-05, "model_forward_time": 0.02529144287109375, "step": 21950 }, { "epoch": 3.34930419921875e-05, "step": 21950, "training_step_time": 0.1411571502685547 }, { "epoch": 3.349456787109375e-05, "model_forward_time": 0.028241634368896484, "step": 21951 }, { "epoch": 3.349456787109375e-05, "step": 21951, "training_step_time": 0.1082770824432373 }, { "epoch": 3.349609375e-05, "model_forward_time": 0.026320695877075195, "step": 21952 }, { "epoch": 3.349609375e-05, "step": 21952, "training_step_time": 0.1068868637084961 }, { "epoch": 3.349761962890625e-05, "model_forward_time": 0.02522420883178711, "step": 21953 }, { "epoch": 3.349761962890625e-05, "step": 21953, "training_step_time": 0.10429120063781738 }, { "epoch": 3.34991455078125e-05, "model_forward_time": 0.02511906623840332, "step": 21954 }, { "epoch": 3.34991455078125e-05, "step": 21954, "training_step_time": 0.10689425468444824 }, { "epoch": 3.350067138671875e-05, "model_forward_time": 0.02586531639099121, "step": 21955 }, { "epoch": 3.350067138671875e-05, "step": 21955, "training_step_time": 0.10547304153442383 }, { "epoch": 3.3502197265625e-05, "model_forward_time": 0.025156736373901367, "step": 21956 }, { "epoch": 3.3502197265625e-05, "step": 21956, "training_step_time": 0.10576033592224121 }, { "epoch": 3.350372314453125e-05, "model_forward_time": 0.024840354919433594, "step": 21957 }, { "epoch": 3.350372314453125e-05, "step": 21957, "training_step_time": 0.10561513900756836 }, { "epoch": 3.35052490234375e-05, "model_forward_time": 0.025061845779418945, "step": 21958 }, { "epoch": 3.35052490234375e-05, "step": 21958, "training_step_time": 0.10975503921508789 }, { "epoch": 3.350677490234375e-05, "model_forward_time": 0.02515101432800293, "step": 21959 }, { "epoch": 3.350677490234375e-05, "step": 21959, "training_step_time": 0.1128835678100586 }, { "epoch": 3.350830078125e-05, "grad_norm": 0.18088452517986298, "learning_rate": 1.838429980787081e-05, "loss": 0.0065, "step": 21960 }, { "epoch": 3.350830078125e-05, "model_forward_time": 0.025295734405517578, "step": 21960 }, { "epoch": 3.350830078125e-05, "step": 21960, "training_step_time": 0.1054234504699707 }, { "epoch": 3.350982666015625e-05, "model_forward_time": 0.025385379791259766, "step": 21961 }, { "epoch": 3.350982666015625e-05, "step": 21961, "training_step_time": 0.1065981388092041 }, { "epoch": 3.35113525390625e-05, "model_forward_time": 0.02478647232055664, "step": 21962 }, { "epoch": 3.35113525390625e-05, "step": 21962, "training_step_time": 0.10497903823852539 }, { "epoch": 3.351287841796875e-05, "model_forward_time": 0.025162935256958008, "step": 21963 }, { "epoch": 3.351287841796875e-05, "step": 21963, "training_step_time": 0.10491132736206055 }, { "epoch": 3.3514404296875e-05, "model_forward_time": 0.024976730346679688, "step": 21964 }, { "epoch": 3.3514404296875e-05, "step": 21964, "training_step_time": 0.14592242240905762 }, { "epoch": 3.351593017578125e-05, "model_forward_time": 0.024506092071533203, "step": 21965 }, { "epoch": 3.351593017578125e-05, "step": 21965, "training_step_time": 0.12034487724304199 }, { "epoch": 3.35174560546875e-05, "model_forward_time": 0.02522444725036621, "step": 21966 }, { "epoch": 3.35174560546875e-05, "step": 21966, "training_step_time": 0.12931394577026367 }, { "epoch": 3.351898193359375e-05, "model_forward_time": 0.024785518646240234, "step": 21967 }, { "epoch": 3.351898193359375e-05, "step": 21967, "training_step_time": 0.15756988525390625 }, { "epoch": 3.35205078125e-05, "model_forward_time": 0.024558305740356445, "step": 21968 }, { "epoch": 3.35205078125e-05, "step": 21968, "training_step_time": 0.10171723365783691 }, { "epoch": 3.352203369140625e-05, "model_forward_time": 0.025147676467895508, "step": 21969 }, { "epoch": 3.352203369140625e-05, "step": 21969, "training_step_time": 0.12040376663208008 }, { "epoch": 3.35235595703125e-05, "grad_norm": 0.10777976363897324, "learning_rate": 1.8341620249602387e-05, "loss": 0.0073, "step": 21970 }, { "epoch": 3.35235595703125e-05, "model_forward_time": 0.02538776397705078, "step": 21970 }, { "epoch": 3.35235595703125e-05, "step": 21970, "training_step_time": 0.11500930786132812 }, { "epoch": 3.352508544921875e-05, "model_forward_time": 0.02500295639038086, "step": 21971 }, { "epoch": 3.352508544921875e-05, "step": 21971, "training_step_time": 0.10777401924133301 }, { "epoch": 3.3526611328125e-05, "model_forward_time": 0.024872303009033203, "step": 21972 }, { "epoch": 3.3526611328125e-05, "step": 21972, "training_step_time": 0.1077277660369873 }, { "epoch": 3.352813720703125e-05, "model_forward_time": 0.025223493576049805, "step": 21973 }, { "epoch": 3.352813720703125e-05, "step": 21973, "training_step_time": 0.10329580307006836 }, { "epoch": 3.35296630859375e-05, "model_forward_time": 0.025937795639038086, "step": 21974 }, { "epoch": 3.35296630859375e-05, "step": 21974, "training_step_time": 0.10796785354614258 }, { "epoch": 3.353118896484375e-05, "model_forward_time": 0.02512836456298828, "step": 21975 }, { "epoch": 3.353118896484375e-05, "step": 21975, "training_step_time": 0.10573673248291016 }, { "epoch": 3.353271484375e-05, "model_forward_time": 0.02514505386352539, "step": 21976 }, { "epoch": 3.353271484375e-05, "step": 21976, "training_step_time": 0.10509777069091797 }, { "epoch": 3.353424072265625e-05, "model_forward_time": 0.02508687973022461, "step": 21977 }, { "epoch": 3.353424072265625e-05, "step": 21977, "training_step_time": 0.1089315414428711 }, { "epoch": 3.35357666015625e-05, "model_forward_time": 0.024575471878051758, "step": 21978 }, { "epoch": 3.35357666015625e-05, "step": 21978, "training_step_time": 0.10536813735961914 }, { "epoch": 3.353729248046875e-05, "model_forward_time": 0.025023698806762695, "step": 21979 }, { "epoch": 3.353729248046875e-05, "step": 21979, "training_step_time": 0.10579252243041992 }, { "epoch": 3.3538818359375e-05, "grad_norm": 0.1644177883863449, "learning_rate": 1.8298979159250557e-05, "loss": 0.0055, "step": 21980 }, { "epoch": 3.3538818359375e-05, "model_forward_time": 0.024699926376342773, "step": 21980 }, { "epoch": 3.3538818359375e-05, "step": 21980, "training_step_time": 0.10270261764526367 }, { "epoch": 3.354034423828125e-05, "model_forward_time": 0.024265289306640625, "step": 21981 }, { "epoch": 3.354034423828125e-05, "step": 21981, "training_step_time": 0.15401196479797363 }, { "epoch": 3.35418701171875e-05, "model_forward_time": 0.025017499923706055, "step": 21982 }, { "epoch": 3.35418701171875e-05, "step": 21982, "training_step_time": 0.15387392044067383 }, { "epoch": 3.354339599609375e-05, "model_forward_time": 0.024634122848510742, "step": 21983 }, { "epoch": 3.354339599609375e-05, "step": 21983, "training_step_time": 0.1511986255645752 }, { "epoch": 3.3544921875e-05, "model_forward_time": 0.025026559829711914, "step": 21984 }, { "epoch": 3.3544921875e-05, "step": 21984, "training_step_time": 0.17862486839294434 }, { "epoch": 3.354644775390625e-05, "model_forward_time": 0.02434086799621582, "step": 21985 }, { "epoch": 3.354644775390625e-05, "step": 21985, "training_step_time": 0.1838853359222412 }, { "epoch": 3.35479736328125e-05, "model_forward_time": 0.02460789680480957, "step": 21986 }, { "epoch": 3.35479736328125e-05, "step": 21986, "training_step_time": 0.12901043891906738 }, { "epoch": 3.354949951171875e-05, "model_forward_time": 0.024167776107788086, "step": 21987 }, { "epoch": 3.354949951171875e-05, "step": 21987, "training_step_time": 0.17749977111816406 }, { "epoch": 3.3551025390625e-05, "model_forward_time": 0.024352312088012695, "step": 21988 }, { "epoch": 3.3551025390625e-05, "step": 21988, "training_step_time": 0.11972546577453613 }, { "epoch": 3.355255126953125e-05, "model_forward_time": 0.02433180809020996, "step": 21989 }, { "epoch": 3.355255126953125e-05, "step": 21989, "training_step_time": 0.17307186126708984 }, { "epoch": 3.35540771484375e-05, "grad_norm": 0.13825196027755737, "learning_rate": 1.8256376588628238e-05, "loss": 0.006, "step": 21990 }, { "epoch": 3.35540771484375e-05, "model_forward_time": 0.024563074111938477, "step": 21990 }, { "epoch": 3.35540771484375e-05, "step": 21990, "training_step_time": 0.12290668487548828 }, { "epoch": 3.355560302734375e-05, "model_forward_time": 0.024152517318725586, "step": 21991 }, { "epoch": 3.355560302734375e-05, "step": 21991, "training_step_time": 0.13365983963012695 }, { "epoch": 3.355712890625e-05, "model_forward_time": 0.024890899658203125, "step": 21992 }, { "epoch": 3.355712890625e-05, "step": 21992, "training_step_time": 0.1719675064086914 }, { "epoch": 3.355865478515625e-05, "model_forward_time": 0.026370763778686523, "step": 21993 }, { "epoch": 3.355865478515625e-05, "step": 21993, "training_step_time": 0.1783008575439453 }, { "epoch": 3.35601806640625e-05, "model_forward_time": 0.02379441261291504, "step": 21994 }, { "epoch": 3.35601806640625e-05, "step": 21994, "training_step_time": 0.12861299514770508 }, { "epoch": 3.356170654296875e-05, "model_forward_time": 0.024871826171875, "step": 21995 }, { "epoch": 3.356170654296875e-05, "step": 21995, "training_step_time": 0.11438393592834473 }, { "epoch": 3.3563232421875e-05, "model_forward_time": 0.024910926818847656, "step": 21996 }, { "epoch": 3.3563232421875e-05, "step": 21996, "training_step_time": 0.10733199119567871 }, { "epoch": 3.356475830078125e-05, "model_forward_time": 0.025028705596923828, "step": 21997 }, { "epoch": 3.356475830078125e-05, "step": 21997, "training_step_time": 0.10549330711364746 }, { "epoch": 3.35662841796875e-05, "model_forward_time": 0.024802446365356445, "step": 21998 }, { "epoch": 3.35662841796875e-05, "step": 21998, "training_step_time": 0.10600852966308594 }, { "epoch": 3.356781005859375e-05, "model_forward_time": 0.025032520294189453, "step": 21999 }, { "epoch": 3.356781005859375e-05, "step": 21999, "training_step_time": 0.10514092445373535 }, { "epoch": 3.35693359375e-05, "grad_norm": 0.15530382096767426, "learning_rate": 1.821381258950161e-05, "loss": 0.0055, "step": 22000 }, { "epoch": 3.35693359375e-05, "model_forward_time": 0.02475452423095703, "step": 22000 }, { "epoch": 3.35693359375e-05, "step": 22000, "training_step_time": 0.09588789939880371 }, { "epoch": 3.357086181640625e-05, "model_forward_time": 0.022913455963134766, "step": 22001 }, { "epoch": 3.357086181640625e-05, "step": 22001, "training_step_time": 0.09594488143920898 }, { "epoch": 3.35723876953125e-05, "model_forward_time": 0.024338483810424805, "step": 22002 }, { "epoch": 3.35723876953125e-05, "step": 22002, "training_step_time": 0.1024024486541748 }, { "epoch": 3.357391357421875e-05, "model_forward_time": 0.024985790252685547, "step": 22003 }, { "epoch": 3.357391357421875e-05, "step": 22003, "training_step_time": 0.1040964126586914 }, { "epoch": 3.3575439453125e-05, "model_forward_time": 0.0244293212890625, "step": 22004 }, { "epoch": 3.3575439453125e-05, "step": 22004, "training_step_time": 0.10966968536376953 }, { "epoch": 3.357696533203125e-05, "model_forward_time": 0.02476024627685547, "step": 22005 }, { "epoch": 3.357696533203125e-05, "step": 22005, "training_step_time": 0.10764956474304199 }, { "epoch": 3.35784912109375e-05, "model_forward_time": 0.02488541603088379, "step": 22006 }, { "epoch": 3.35784912109375e-05, "step": 22006, "training_step_time": 0.10936117172241211 }, { "epoch": 3.358001708984375e-05, "model_forward_time": 0.024812936782836914, "step": 22007 }, { "epoch": 3.358001708984375e-05, "step": 22007, "training_step_time": 0.10627460479736328 }, { "epoch": 3.358154296875e-05, "model_forward_time": 0.02492046356201172, "step": 22008 }, { "epoch": 3.358154296875e-05, "step": 22008, "training_step_time": 0.10655641555786133 }, { "epoch": 3.358306884765625e-05, "model_forward_time": 0.023703575134277344, "step": 22009 }, { "epoch": 3.358306884765625e-05, "step": 22009, "training_step_time": 0.10750484466552734 }, { "epoch": 3.35845947265625e-05, "grad_norm": 0.16007855534553528, "learning_rate": 1.817128721358991e-05, "loss": 0.0042, "step": 22010 }, { "epoch": 3.35845947265625e-05, "model_forward_time": 0.024477481842041016, "step": 22010 }, { "epoch": 3.35845947265625e-05, "step": 22010, "training_step_time": 0.19192862510681152 }, { "epoch": 3.358612060546875e-05, "model_forward_time": 0.024020671844482422, "step": 22011 }, { "epoch": 3.358612060546875e-05, "step": 22011, "training_step_time": 0.12665152549743652 }, { "epoch": 3.3587646484375e-05, "model_forward_time": 0.02410888671875, "step": 22012 }, { "epoch": 3.3587646484375e-05, "step": 22012, "training_step_time": 0.1696009635925293 }, { "epoch": 3.358917236328125e-05, "model_forward_time": 0.02480459213256836, "step": 22013 }, { "epoch": 3.358917236328125e-05, "step": 22013, "training_step_time": 0.1442399024963379 }, { "epoch": 3.35906982421875e-05, "model_forward_time": 0.024608135223388672, "step": 22014 }, { "epoch": 3.35906982421875e-05, "step": 22014, "training_step_time": 0.1261286735534668 }, { "epoch": 3.359222412109375e-05, "model_forward_time": 0.024415969848632812, "step": 22015 }, { "epoch": 3.359222412109375e-05, "step": 22015, "training_step_time": 0.15874719619750977 }, { "epoch": 3.359375e-05, "model_forward_time": 0.024023771286010742, "step": 22016 }, { "epoch": 3.359375e-05, "step": 22016, "training_step_time": 0.10609292984008789 }, { "epoch": 3.359527587890625e-05, "model_forward_time": 0.024616241455078125, "step": 22017 }, { "epoch": 3.359527587890625e-05, "step": 22017, "training_step_time": 0.10281562805175781 }, { "epoch": 3.35968017578125e-05, "model_forward_time": 0.025092601776123047, "step": 22018 }, { "epoch": 3.35968017578125e-05, "step": 22018, "training_step_time": 0.10621118545532227 }, { "epoch": 3.359832763671875e-05, "model_forward_time": 0.025038957595825195, "step": 22019 }, { "epoch": 3.359832763671875e-05, "step": 22019, "training_step_time": 0.10753583908081055 }, { "epoch": 3.3599853515625e-05, "grad_norm": 0.12033859640359879, "learning_rate": 1.8128800512565513e-05, "loss": 0.0081, "step": 22020 }, { "epoch": 3.3599853515625e-05, "model_forward_time": 0.025561809539794922, "step": 22020 }, { "epoch": 3.3599853515625e-05, "step": 22020, "training_step_time": 0.1090993881225586 }, { "epoch": 3.360137939453125e-05, "model_forward_time": 0.024938344955444336, "step": 22021 }, { "epoch": 3.360137939453125e-05, "step": 22021, "training_step_time": 0.11868143081665039 }, { "epoch": 3.36029052734375e-05, "model_forward_time": 0.025056123733520508, "step": 22022 }, { "epoch": 3.36029052734375e-05, "step": 22022, "training_step_time": 0.11347270011901855 }, { "epoch": 3.360443115234375e-05, "model_forward_time": 0.024766206741333008, "step": 22023 }, { "epoch": 3.360443115234375e-05, "step": 22023, "training_step_time": 0.11670064926147461 }, { "epoch": 3.360595703125e-05, "model_forward_time": 0.024721145629882812, "step": 22024 }, { "epoch": 3.360595703125e-05, "step": 22024, "training_step_time": 0.1120598316192627 }, { "epoch": 3.360748291015625e-05, "model_forward_time": 0.024762868881225586, "step": 22025 }, { "epoch": 3.360748291015625e-05, "step": 22025, "training_step_time": 0.10821819305419922 }, { "epoch": 3.36090087890625e-05, "model_forward_time": 0.02478957176208496, "step": 22026 }, { "epoch": 3.36090087890625e-05, "step": 22026, "training_step_time": 0.1033179759979248 }, { "epoch": 3.361053466796875e-05, "model_forward_time": 0.0240328311920166, "step": 22027 }, { "epoch": 3.361053466796875e-05, "step": 22027, "training_step_time": 0.15194106101989746 }, { "epoch": 3.3612060546875e-05, "model_forward_time": 0.024971961975097656, "step": 22028 }, { "epoch": 3.3612060546875e-05, "step": 22028, "training_step_time": 0.16945433616638184 }, { "epoch": 3.361358642578125e-05, "model_forward_time": 0.0243375301361084, "step": 22029 }, { "epoch": 3.361358642578125e-05, "step": 22029, "training_step_time": 0.17234539985656738 }, { "epoch": 3.36151123046875e-05, "grad_norm": 0.19749611616134644, "learning_rate": 1.808635253805376e-05, "loss": 0.0095, "step": 22030 }, { "epoch": 3.36151123046875e-05, "model_forward_time": 0.024533748626708984, "step": 22030 }, { "epoch": 3.36151123046875e-05, "step": 22030, "training_step_time": 0.17444086074829102 }, { "epoch": 3.361663818359375e-05, "model_forward_time": 0.024257183074951172, "step": 22031 }, { "epoch": 3.361663818359375e-05, "step": 22031, "training_step_time": 0.11185789108276367 }, { "epoch": 3.36181640625e-05, "model_forward_time": 0.024164676666259766, "step": 22032 }, { "epoch": 3.36181640625e-05, "step": 22032, "training_step_time": 0.11118507385253906 }, { "epoch": 3.361968994140625e-05, "model_forward_time": 0.024956941604614258, "step": 22033 }, { "epoch": 3.361968994140625e-05, "step": 22033, "training_step_time": 0.16123604774475098 }, { "epoch": 3.36212158203125e-05, "model_forward_time": 0.024712800979614258, "step": 22034 }, { "epoch": 3.36212158203125e-05, "step": 22034, "training_step_time": 0.14887022972106934 }, { "epoch": 3.362274169921875e-05, "model_forward_time": 0.023926258087158203, "step": 22035 }, { "epoch": 3.362274169921875e-05, "step": 22035, "training_step_time": 0.10612106323242188 }, { "epoch": 3.3624267578125e-05, "model_forward_time": 0.02471637725830078, "step": 22036 }, { "epoch": 3.3624267578125e-05, "step": 22036, "training_step_time": 0.10458660125732422 }, { "epoch": 3.362579345703125e-05, "model_forward_time": 0.024910926818847656, "step": 22037 }, { "epoch": 3.362579345703125e-05, "step": 22037, "training_step_time": 0.12972354888916016 }, { "epoch": 3.36273193359375e-05, "model_forward_time": 0.02482914924621582, "step": 22038 }, { "epoch": 3.36273193359375e-05, "step": 22038, "training_step_time": 0.10700535774230957 }, { "epoch": 3.362884521484375e-05, "model_forward_time": 0.024960756301879883, "step": 22039 }, { "epoch": 3.362884521484375e-05, "step": 22039, "training_step_time": 0.10965728759765625 }, { "epoch": 3.363037109375e-05, "grad_norm": 0.17156188189983368, "learning_rate": 1.8043943341632907e-05, "loss": 0.0088, "step": 22040 }, { "epoch": 3.363037109375e-05, "model_forward_time": 0.025137662887573242, "step": 22040 }, { "epoch": 3.363037109375e-05, "step": 22040, "training_step_time": 0.12963414192199707 }, { "epoch": 3.363189697265625e-05, "model_forward_time": 0.02479410171508789, "step": 22041 }, { "epoch": 3.363189697265625e-05, "step": 22041, "training_step_time": 0.12388968467712402 }, { "epoch": 3.36334228515625e-05, "model_forward_time": 0.02495741844177246, "step": 22042 }, { "epoch": 3.36334228515625e-05, "step": 22042, "training_step_time": 0.11787271499633789 }, { "epoch": 3.363494873046875e-05, "model_forward_time": 0.02500152587890625, "step": 22043 }, { "epoch": 3.363494873046875e-05, "step": 22043, "training_step_time": 0.11094880104064941 }, { "epoch": 3.3636474609375e-05, "model_forward_time": 0.024794340133666992, "step": 22044 }, { "epoch": 3.3636474609375e-05, "step": 22044, "training_step_time": 0.10430240631103516 }, { "epoch": 3.363800048828125e-05, "model_forward_time": 0.024932861328125, "step": 22045 }, { "epoch": 3.363800048828125e-05, "step": 22045, "training_step_time": 0.10292458534240723 }, { "epoch": 3.36395263671875e-05, "model_forward_time": 0.024818897247314453, "step": 22046 }, { "epoch": 3.36395263671875e-05, "step": 22046, "training_step_time": 0.10722613334655762 }, { "epoch": 3.364105224609375e-05, "model_forward_time": 0.025091886520385742, "step": 22047 }, { "epoch": 3.364105224609375e-05, "step": 22047, "training_step_time": 0.10452675819396973 }, { "epoch": 3.3642578125e-05, "model_forward_time": 0.024979829788208008, "step": 22048 }, { "epoch": 3.3642578125e-05, "step": 22048, "training_step_time": 0.10580730438232422 }, { "epoch": 3.364410400390625e-05, "model_forward_time": 0.024852514266967773, "step": 22049 }, { "epoch": 3.364410400390625e-05, "step": 22049, "training_step_time": 0.10592484474182129 }, { "epoch": 3.36456298828125e-05, "grad_norm": 0.09379428625106812, "learning_rate": 1.800157297483417e-05, "loss": 0.006, "step": 22050 }, { "epoch": 3.36456298828125e-05, "model_forward_time": 0.02552485466003418, "step": 22050 }, { "epoch": 3.36456298828125e-05, "step": 22050, "training_step_time": 0.10558485984802246 }, { "epoch": 3.364715576171875e-05, "model_forward_time": 0.02476644515991211, "step": 22051 }, { "epoch": 3.364715576171875e-05, "step": 22051, "training_step_time": 0.11562156677246094 }, { "epoch": 3.3648681640625e-05, "model_forward_time": 0.02506279945373535, "step": 22052 }, { "epoch": 3.3648681640625e-05, "step": 22052, "training_step_time": 0.11258411407470703 }, { "epoch": 3.365020751953125e-05, "model_forward_time": 0.025385141372680664, "step": 22053 }, { "epoch": 3.365020751953125e-05, "step": 22053, "training_step_time": 0.1081690788269043 }, { "epoch": 3.36517333984375e-05, "model_forward_time": 0.02518296241760254, "step": 22054 }, { "epoch": 3.36517333984375e-05, "step": 22054, "training_step_time": 0.10996127128601074 }, { "epoch": 3.365325927734375e-05, "model_forward_time": 0.024766206741333008, "step": 22055 }, { "epoch": 3.365325927734375e-05, "step": 22055, "training_step_time": 0.107208251953125 }, { "epoch": 3.365478515625e-05, "model_forward_time": 0.025093555450439453, "step": 22056 }, { "epoch": 3.365478515625e-05, "step": 22056, "training_step_time": 0.12782049179077148 }, { "epoch": 3.365631103515625e-05, "model_forward_time": 0.025124073028564453, "step": 22057 }, { "epoch": 3.365631103515625e-05, "step": 22057, "training_step_time": 0.12386012077331543 }, { "epoch": 3.36578369140625e-05, "model_forward_time": 0.02506732940673828, "step": 22058 }, { "epoch": 3.36578369140625e-05, "step": 22058, "training_step_time": 0.132612943649292 }, { "epoch": 3.365936279296875e-05, "model_forward_time": 0.0253298282623291, "step": 22059 }, { "epoch": 3.365936279296875e-05, "step": 22059, "training_step_time": 0.11371159553527832 }, { "epoch": 3.3660888671875e-05, "grad_norm": 0.11976824700832367, "learning_rate": 1.7959241489141525e-05, "loss": 0.0043, "step": 22060 }, { "epoch": 3.3660888671875e-05, "model_forward_time": 0.024948596954345703, "step": 22060 }, { "epoch": 3.3660888671875e-05, "step": 22060, "training_step_time": 0.17679500579833984 }, { "epoch": 3.366241455078125e-05, "model_forward_time": 0.024548053741455078, "step": 22061 }, { "epoch": 3.366241455078125e-05, "step": 22061, "training_step_time": 0.12395381927490234 }, { "epoch": 3.36639404296875e-05, "model_forward_time": 0.02429509162902832, "step": 22062 }, { "epoch": 3.36639404296875e-05, "step": 22062, "training_step_time": 0.12175154685974121 }, { "epoch": 3.366546630859375e-05, "model_forward_time": 0.025143861770629883, "step": 22063 }, { "epoch": 3.366546630859375e-05, "step": 22063, "training_step_time": 0.10907435417175293 }, { "epoch": 3.36669921875e-05, "model_forward_time": 0.02524590492248535, "step": 22064 }, { "epoch": 3.36669921875e-05, "step": 22064, "training_step_time": 0.11278891563415527 }, { "epoch": 3.366851806640625e-05, "model_forward_time": 0.025288105010986328, "step": 22065 }, { "epoch": 3.366851806640625e-05, "step": 22065, "training_step_time": 0.11050915718078613 }, { "epoch": 3.36700439453125e-05, "model_forward_time": 0.025485754013061523, "step": 22066 }, { "epoch": 3.36700439453125e-05, "step": 22066, "training_step_time": 0.11541104316711426 }, { "epoch": 3.367156982421875e-05, "model_forward_time": 0.025388002395629883, "step": 22067 }, { "epoch": 3.367156982421875e-05, "step": 22067, "training_step_time": 0.10773277282714844 }, { "epoch": 3.3673095703125e-05, "model_forward_time": 0.024590253829956055, "step": 22068 }, { "epoch": 3.3673095703125e-05, "step": 22068, "training_step_time": 0.11530232429504395 }, { "epoch": 3.367462158203125e-05, "model_forward_time": 0.024728775024414062, "step": 22069 }, { "epoch": 3.367462158203125e-05, "step": 22069, "training_step_time": 0.10981535911560059 }, { "epoch": 3.36761474609375e-05, "grad_norm": 0.27646952867507935, "learning_rate": 1.7916948935991718e-05, "loss": 0.0067, "step": 22070 }, { "epoch": 3.36761474609375e-05, "model_forward_time": 0.02508258819580078, "step": 22070 }, { "epoch": 3.36761474609375e-05, "step": 22070, "training_step_time": 0.11499714851379395 }, { "epoch": 3.367767333984375e-05, "model_forward_time": 0.02522873878479004, "step": 22071 }, { "epoch": 3.367767333984375e-05, "step": 22071, "training_step_time": 0.11368536949157715 }, { "epoch": 3.367919921875e-05, "model_forward_time": 0.02501082420349121, "step": 22072 }, { "epoch": 3.367919921875e-05, "step": 22072, "training_step_time": 0.10786843299865723 }, { "epoch": 3.368072509765625e-05, "model_forward_time": 0.0254364013671875, "step": 22073 }, { "epoch": 3.368072509765625e-05, "step": 22073, "training_step_time": 0.10866641998291016 }, { "epoch": 3.36822509765625e-05, "model_forward_time": 0.024925947189331055, "step": 22074 }, { "epoch": 3.36822509765625e-05, "step": 22074, "training_step_time": 0.15185117721557617 }, { "epoch": 3.368377685546875e-05, "model_forward_time": 0.024457693099975586, "step": 22075 }, { "epoch": 3.368377685546875e-05, "step": 22075, "training_step_time": 0.16089320182800293 }, { "epoch": 3.3685302734375e-05, "model_forward_time": 0.02427530288696289, "step": 22076 }, { "epoch": 3.3685302734375e-05, "step": 22076, "training_step_time": 0.13083410263061523 }, { "epoch": 3.368682861328125e-05, "model_forward_time": 0.024447202682495117, "step": 22077 }, { "epoch": 3.368682861328125e-05, "step": 22077, "training_step_time": 0.1368699073791504 }, { "epoch": 3.36883544921875e-05, "model_forward_time": 0.026310443878173828, "step": 22078 }, { "epoch": 3.36883544921875e-05, "step": 22078, "training_step_time": 0.17287015914916992 }, { "epoch": 3.368988037109375e-05, "model_forward_time": 0.024475812911987305, "step": 22079 }, { "epoch": 3.368988037109375e-05, "step": 22079, "training_step_time": 0.1528306007385254 }, { "epoch": 3.369140625e-05, "grad_norm": 0.20525725185871124, "learning_rate": 1.787469536677419e-05, "loss": 0.0057, "step": 22080 }, { "epoch": 3.369140625e-05, "model_forward_time": 0.02459096908569336, "step": 22080 }, { "epoch": 3.369140625e-05, "step": 22080, "training_step_time": 0.10769009590148926 }, { "epoch": 3.369293212890625e-05, "model_forward_time": 0.027686119079589844, "step": 22081 }, { "epoch": 3.369293212890625e-05, "step": 22081, "training_step_time": 0.19274330139160156 }, { "epoch": 3.36944580078125e-05, "model_forward_time": 0.02444601058959961, "step": 22082 }, { "epoch": 3.36944580078125e-05, "step": 22082, "training_step_time": 0.10213184356689453 }, { "epoch": 3.369598388671875e-05, "model_forward_time": 0.025621891021728516, "step": 22083 }, { "epoch": 3.369598388671875e-05, "step": 22083, "training_step_time": 0.10543990135192871 }, { "epoch": 3.3697509765625e-05, "model_forward_time": 0.025042295455932617, "step": 22084 }, { "epoch": 3.3697509765625e-05, "step": 22084, "training_step_time": 0.14552044868469238 }, { "epoch": 3.369903564453125e-05, "model_forward_time": 0.024957895278930664, "step": 22085 }, { "epoch": 3.369903564453125e-05, "step": 22085, "training_step_time": 0.11055850982666016 }, { "epoch": 3.37005615234375e-05, "model_forward_time": 0.02511310577392578, "step": 22086 }, { "epoch": 3.37005615234375e-05, "step": 22086, "training_step_time": 0.11157870292663574 }, { "epoch": 3.370208740234375e-05, "model_forward_time": 0.02510523796081543, "step": 22087 }, { "epoch": 3.370208740234375e-05, "step": 22087, "training_step_time": 0.12030315399169922 }, { "epoch": 3.370361328125e-05, "model_forward_time": 0.024698972702026367, "step": 22088 }, { "epoch": 3.370361328125e-05, "step": 22088, "training_step_time": 0.16036152839660645 }, { "epoch": 3.370513916015625e-05, "model_forward_time": 0.024704933166503906, "step": 22089 }, { "epoch": 3.370513916015625e-05, "step": 22089, "training_step_time": 0.1086881160736084 }, { "epoch": 3.37066650390625e-05, "grad_norm": 0.16050368547439575, "learning_rate": 1.7832480832830987e-05, "loss": 0.0106, "step": 22090 }, { "epoch": 3.37066650390625e-05, "model_forward_time": 0.02433037757873535, "step": 22090 }, { "epoch": 3.37066650390625e-05, "step": 22090, "training_step_time": 0.10866379737854004 }, { "epoch": 3.370819091796875e-05, "model_forward_time": 0.024655580520629883, "step": 22091 }, { "epoch": 3.370819091796875e-05, "step": 22091, "training_step_time": 0.10538935661315918 }, { "epoch": 3.3709716796875e-05, "model_forward_time": 0.025115489959716797, "step": 22092 }, { "epoch": 3.3709716796875e-05, "step": 22092, "training_step_time": 0.10508275032043457 }, { "epoch": 3.371124267578125e-05, "model_forward_time": 0.024903297424316406, "step": 22093 }, { "epoch": 3.371124267578125e-05, "step": 22093, "training_step_time": 0.1112513542175293 }, { "epoch": 3.37127685546875e-05, "model_forward_time": 0.0251009464263916, "step": 22094 }, { "epoch": 3.37127685546875e-05, "step": 22094, "training_step_time": 0.11023330688476562 }, { "epoch": 3.371429443359375e-05, "model_forward_time": 0.02552628517150879, "step": 22095 }, { "epoch": 3.371429443359375e-05, "step": 22095, "training_step_time": 0.1098787784576416 }, { "epoch": 3.37158203125e-05, "model_forward_time": 0.025022506713867188, "step": 22096 }, { "epoch": 3.37158203125e-05, "step": 22096, "training_step_time": 0.10639142990112305 }, { "epoch": 3.371734619140625e-05, "model_forward_time": 0.024855852127075195, "step": 22097 }, { "epoch": 3.371734619140625e-05, "step": 22097, "training_step_time": 0.10494542121887207 }, { "epoch": 3.37188720703125e-05, "model_forward_time": 0.0251312255859375, "step": 22098 }, { "epoch": 3.37188720703125e-05, "step": 22098, "training_step_time": 0.10486745834350586 }, { "epoch": 3.372039794921875e-05, "model_forward_time": 0.024608135223388672, "step": 22099 }, { "epoch": 3.372039794921875e-05, "step": 22099, "training_step_time": 0.10403752326965332 }, { "epoch": 3.3721923828125e-05, "grad_norm": 0.1448470950126648, "learning_rate": 1.7790305385456795e-05, "loss": 0.0055, "step": 22100 }, { "epoch": 3.3721923828125e-05, "model_forward_time": 0.024161338806152344, "step": 22100 }, { "epoch": 3.3721923828125e-05, "step": 22100, "training_step_time": 0.10615205764770508 }, { "epoch": 3.372344970703125e-05, "model_forward_time": 0.025004148483276367, "step": 22101 }, { "epoch": 3.372344970703125e-05, "step": 22101, "training_step_time": 0.1066277027130127 }, { "epoch": 3.37249755859375e-05, "model_forward_time": 0.024811983108520508, "step": 22102 }, { "epoch": 3.37249755859375e-05, "step": 22102, "training_step_time": 0.18333148956298828 }, { "epoch": 3.372650146484375e-05, "model_forward_time": 0.0244905948638916, "step": 22103 }, { "epoch": 3.372650146484375e-05, "step": 22103, "training_step_time": 0.1650533676147461 }, { "epoch": 3.372802734375e-05, "model_forward_time": 0.02401876449584961, "step": 22104 }, { "epoch": 3.372802734375e-05, "step": 22104, "training_step_time": 0.1381528377532959 }, { "epoch": 3.372955322265625e-05, "model_forward_time": 0.024289369583129883, "step": 22105 }, { "epoch": 3.372955322265625e-05, "step": 22105, "training_step_time": 0.1543292999267578 }, { "epoch": 3.37310791015625e-05, "model_forward_time": 0.024353981018066406, "step": 22106 }, { "epoch": 3.37310791015625e-05, "step": 22106, "training_step_time": 0.10492968559265137 }, { "epoch": 3.373260498046875e-05, "model_forward_time": 0.0246884822845459, "step": 22107 }, { "epoch": 3.373260498046875e-05, "step": 22107, "training_step_time": 0.12307620048522949 }, { "epoch": 3.3734130859375e-05, "model_forward_time": 0.024834394454956055, "step": 22108 }, { "epoch": 3.3734130859375e-05, "step": 22108, "training_step_time": 0.10532760620117188 }, { "epoch": 3.373565673828125e-05, "model_forward_time": 0.02520751953125, "step": 22109 }, { "epoch": 3.373565673828125e-05, "step": 22109, "training_step_time": 0.10733604431152344 }, { "epoch": 3.37371826171875e-05, "grad_norm": 0.20625461637973785, "learning_rate": 1.774816907589873e-05, "loss": 0.0171, "step": 22110 }, { "epoch": 3.37371826171875e-05, "model_forward_time": 0.02508378028869629, "step": 22110 }, { "epoch": 3.37371826171875e-05, "step": 22110, "training_step_time": 0.10889363288879395 }, { "epoch": 3.373870849609375e-05, "model_forward_time": 0.025385379791259766, "step": 22111 }, { "epoch": 3.373870849609375e-05, "step": 22111, "training_step_time": 0.10715579986572266 }, { "epoch": 3.3740234375e-05, "model_forward_time": 0.025226116180419922, "step": 22112 }, { "epoch": 3.3740234375e-05, "step": 22112, "training_step_time": 0.10509681701660156 }, { "epoch": 3.374176025390625e-05, "model_forward_time": 0.024886369705200195, "step": 22113 }, { "epoch": 3.374176025390625e-05, "step": 22113, "training_step_time": 0.11054396629333496 }, { "epoch": 3.37432861328125e-05, "model_forward_time": 0.02523016929626465, "step": 22114 }, { "epoch": 3.37432861328125e-05, "step": 22114, "training_step_time": 0.10825610160827637 }, { "epoch": 3.374481201171875e-05, "model_forward_time": 0.024632692337036133, "step": 22115 }, { "epoch": 3.374481201171875e-05, "step": 22115, "training_step_time": 0.10953164100646973 }, { "epoch": 3.3746337890625e-05, "model_forward_time": 0.024777650833129883, "step": 22116 }, { "epoch": 3.3746337890625e-05, "step": 22116, "training_step_time": 0.10456180572509766 }, { "epoch": 3.374786376953125e-05, "model_forward_time": 0.02474236488342285, "step": 22117 }, { "epoch": 3.374786376953125e-05, "step": 22117, "training_step_time": 0.10730457305908203 }, { "epoch": 3.37493896484375e-05, "model_forward_time": 0.025014400482177734, "step": 22118 }, { "epoch": 3.37493896484375e-05, "step": 22118, "training_step_time": 0.10668706893920898 }, { "epoch": 3.375091552734375e-05, "model_forward_time": 0.025732755661010742, "step": 22119 }, { "epoch": 3.375091552734375e-05, "step": 22119, "training_step_time": 0.10856461524963379 }, { "epoch": 3.375244140625e-05, "grad_norm": 0.1136520728468895, "learning_rate": 1.770607195535639e-05, "loss": 0.0163, "step": 22120 }, { "epoch": 3.375244140625e-05, "model_forward_time": 0.0243377685546875, "step": 22120 }, { "epoch": 3.375244140625e-05, "step": 22120, "training_step_time": 0.13463807106018066 }, { "epoch": 3.375396728515625e-05, "model_forward_time": 0.024973392486572266, "step": 22121 }, { "epoch": 3.375396728515625e-05, "step": 22121, "training_step_time": 0.16428232192993164 }, { "epoch": 3.37554931640625e-05, "model_forward_time": 0.02422809600830078, "step": 22122 }, { "epoch": 3.37554931640625e-05, "step": 22122, "training_step_time": 0.11235189437866211 }, { "epoch": 3.375701904296875e-05, "model_forward_time": 0.02428913116455078, "step": 22123 }, { "epoch": 3.375701904296875e-05, "step": 22123, "training_step_time": 0.13874268531799316 }, { "epoch": 3.3758544921875e-05, "model_forward_time": 0.02481818199157715, "step": 22124 }, { "epoch": 3.3758544921875e-05, "step": 22124, "training_step_time": 0.18659186363220215 }, { "epoch": 3.376007080078125e-05, "model_forward_time": 0.023975610733032227, "step": 22125 }, { "epoch": 3.376007080078125e-05, "step": 22125, "training_step_time": 0.14794087409973145 }, { "epoch": 3.37615966796875e-05, "model_forward_time": 0.024412155151367188, "step": 22126 }, { "epoch": 3.37615966796875e-05, "step": 22126, "training_step_time": 0.10832095146179199 }, { "epoch": 3.376312255859375e-05, "model_forward_time": 0.02550482749938965, "step": 22127 }, { "epoch": 3.376312255859375e-05, "step": 22127, "training_step_time": 0.10826539993286133 }, { "epoch": 3.37646484375e-05, "model_forward_time": 0.02492499351501465, "step": 22128 }, { "epoch": 3.37646484375e-05, "step": 22128, "training_step_time": 0.10705804824829102 }, { "epoch": 3.376617431640625e-05, "model_forward_time": 0.025075674057006836, "step": 22129 }, { "epoch": 3.376617431640625e-05, "step": 22129, "training_step_time": 0.1069955825805664 }, { "epoch": 3.37677001953125e-05, "grad_norm": 0.10590667277574539, "learning_rate": 1.7664014074981742e-05, "loss": 0.0085, "step": 22130 }, { "epoch": 3.37677001953125e-05, "model_forward_time": 0.025167226791381836, "step": 22130 }, { "epoch": 3.37677001953125e-05, "step": 22130, "training_step_time": 0.10798859596252441 }, { "epoch": 3.376922607421875e-05, "model_forward_time": 0.024977684020996094, "step": 22131 }, { "epoch": 3.376922607421875e-05, "step": 22131, "training_step_time": 0.20845770835876465 }, { "epoch": 3.3770751953125e-05, "model_forward_time": 0.02465224266052246, "step": 22132 }, { "epoch": 3.3770751953125e-05, "step": 22132, "training_step_time": 0.10904312133789062 }, { "epoch": 3.377227783203125e-05, "model_forward_time": 0.024756193161010742, "step": 22133 }, { "epoch": 3.377227783203125e-05, "step": 22133, "training_step_time": 0.10879039764404297 }, { "epoch": 3.37738037109375e-05, "model_forward_time": 0.024891376495361328, "step": 22134 }, { "epoch": 3.37738037109375e-05, "step": 22134, "training_step_time": 0.1254711151123047 }, { "epoch": 3.377532958984375e-05, "model_forward_time": 0.024827003479003906, "step": 22135 }, { "epoch": 3.377532958984375e-05, "step": 22135, "training_step_time": 0.12429451942443848 }, { "epoch": 3.377685546875e-05, "model_forward_time": 0.025003433227539062, "step": 22136 }, { "epoch": 3.377685546875e-05, "step": 22136, "training_step_time": 0.15282225608825684 }, { "epoch": 3.377838134765625e-05, "model_forward_time": 0.024444580078125, "step": 22137 }, { "epoch": 3.377838134765625e-05, "step": 22137, "training_step_time": 0.1221153736114502 }, { "epoch": 3.37799072265625e-05, "model_forward_time": 0.024462461471557617, "step": 22138 }, { "epoch": 3.37799072265625e-05, "step": 22138, "training_step_time": 0.10831356048583984 }, { "epoch": 3.378143310546875e-05, "model_forward_time": 0.025004148483276367, "step": 22139 }, { "epoch": 3.378143310546875e-05, "step": 22139, "training_step_time": 0.11420869827270508 }, { "epoch": 3.3782958984375e-05, "grad_norm": 0.11597984284162521, "learning_rate": 1.7621995485879062e-05, "loss": 0.006, "step": 22140 }, { "epoch": 3.3782958984375e-05, "model_forward_time": 0.02406454086303711, "step": 22140 }, { "epoch": 3.3782958984375e-05, "step": 22140, "training_step_time": 0.11019039154052734 }, { "epoch": 3.378448486328125e-05, "model_forward_time": 0.025536537170410156, "step": 22141 }, { "epoch": 3.378448486328125e-05, "step": 22141, "training_step_time": 0.1114048957824707 }, { "epoch": 3.37860107421875e-05, "model_forward_time": 0.025068998336791992, "step": 22142 }, { "epoch": 3.37860107421875e-05, "step": 22142, "training_step_time": 0.10976409912109375 }, { "epoch": 3.378753662109375e-05, "model_forward_time": 0.02652883529663086, "step": 22143 }, { "epoch": 3.378753662109375e-05, "step": 22143, "training_step_time": 0.11104369163513184 }, { "epoch": 3.37890625e-05, "model_forward_time": 0.02503228187561035, "step": 22144 }, { "epoch": 3.37890625e-05, "step": 22144, "training_step_time": 0.10775947570800781 }, { "epoch": 3.379058837890625e-05, "model_forward_time": 0.02527451515197754, "step": 22145 }, { "epoch": 3.379058837890625e-05, "step": 22145, "training_step_time": 0.10788321495056152 }, { "epoch": 3.37921142578125e-05, "model_forward_time": 0.02521204948425293, "step": 22146 }, { "epoch": 3.37921142578125e-05, "step": 22146, "training_step_time": 0.11015009880065918 }, { "epoch": 3.379364013671875e-05, "model_forward_time": 0.025229454040527344, "step": 22147 }, { "epoch": 3.379364013671875e-05, "step": 22147, "training_step_time": 0.1063072681427002 }, { "epoch": 3.3795166015625e-05, "model_forward_time": 0.025147676467895508, "step": 22148 }, { "epoch": 3.3795166015625e-05, "step": 22148, "training_step_time": 0.20652055740356445 }, { "epoch": 3.379669189453125e-05, "model_forward_time": 0.0242154598236084, "step": 22149 }, { "epoch": 3.379669189453125e-05, "step": 22149, "training_step_time": 0.12357640266418457 }, { "epoch": 3.37982177734375e-05, "grad_norm": 0.08215171098709106, "learning_rate": 1.7580016239104924e-05, "loss": 0.0066, "step": 22150 }, { "epoch": 3.37982177734375e-05, "model_forward_time": 0.0243072509765625, "step": 22150 }, { "epoch": 3.37982177734375e-05, "step": 22150, "training_step_time": 0.10963940620422363 }, { "epoch": 3.379974365234375e-05, "model_forward_time": 0.024663448333740234, "step": 22151 }, { "epoch": 3.379974365234375e-05, "step": 22151, "training_step_time": 0.1157689094543457 }, { "epoch": 3.380126953125e-05, "model_forward_time": 0.02483534812927246, "step": 22152 }, { "epoch": 3.380126953125e-05, "step": 22152, "training_step_time": 0.11003780364990234 }, { "epoch": 3.380279541015625e-05, "model_forward_time": 0.024890899658203125, "step": 22153 }, { "epoch": 3.380279541015625e-05, "step": 22153, "training_step_time": 0.10624408721923828 }, { "epoch": 3.38043212890625e-05, "model_forward_time": 0.025231122970581055, "step": 22154 }, { "epoch": 3.38043212890625e-05, "step": 22154, "training_step_time": 0.1752769947052002 }, { "epoch": 3.380584716796875e-05, "model_forward_time": 0.02527904510498047, "step": 22155 }, { "epoch": 3.380584716796875e-05, "step": 22155, "training_step_time": 0.10510063171386719 }, { "epoch": 3.3807373046875e-05, "model_forward_time": 0.026553869247436523, "step": 22156 }, { "epoch": 3.3807373046875e-05, "step": 22156, "training_step_time": 0.10563945770263672 }, { "epoch": 3.380889892578125e-05, "model_forward_time": 0.02507495880126953, "step": 22157 }, { "epoch": 3.380889892578125e-05, "step": 22157, "training_step_time": 0.1085667610168457 }, { "epoch": 3.38104248046875e-05, "model_forward_time": 0.024009227752685547, "step": 22158 }, { "epoch": 3.38104248046875e-05, "step": 22158, "training_step_time": 0.10533285140991211 }, { "epoch": 3.381195068359375e-05, "model_forward_time": 0.023781776428222656, "step": 22159 }, { "epoch": 3.381195068359375e-05, "step": 22159, "training_step_time": 0.10631918907165527 }, { "epoch": 3.38134765625e-05, "grad_norm": 0.10888959467411041, "learning_rate": 1.753807638566805e-05, "loss": 0.0047, "step": 22160 }, { "epoch": 3.38134765625e-05, "model_forward_time": 0.02486896514892578, "step": 22160 }, { "epoch": 3.38134765625e-05, "step": 22160, "training_step_time": 0.10628581047058105 }, { "epoch": 3.381500244140625e-05, "model_forward_time": 0.024901151657104492, "step": 22161 }, { "epoch": 3.381500244140625e-05, "step": 22161, "training_step_time": 0.1091153621673584 }, { "epoch": 3.38165283203125e-05, "model_forward_time": 0.025144100189208984, "step": 22162 }, { "epoch": 3.38165283203125e-05, "step": 22162, "training_step_time": 0.1085355281829834 }, { "epoch": 3.381805419921875e-05, "model_forward_time": 0.024973392486572266, "step": 22163 }, { "epoch": 3.381805419921875e-05, "step": 22163, "training_step_time": 0.17773723602294922 }, { "epoch": 3.3819580078125e-05, "model_forward_time": 0.024677515029907227, "step": 22164 }, { "epoch": 3.3819580078125e-05, "step": 22164, "training_step_time": 0.20174765586853027 }, { "epoch": 3.382110595703125e-05, "model_forward_time": 0.024209976196289062, "step": 22165 }, { "epoch": 3.382110595703125e-05, "step": 22165, "training_step_time": 0.2161257266998291 }, { "epoch": 3.38226318359375e-05, "model_forward_time": 0.024022579193115234, "step": 22166 }, { "epoch": 3.38226318359375e-05, "step": 22166, "training_step_time": 0.18233847618103027 }, { "epoch": 3.382415771484375e-05, "model_forward_time": 0.024090290069580078, "step": 22167 }, { "epoch": 3.382415771484375e-05, "step": 22167, "training_step_time": 0.1870427131652832 }, { "epoch": 3.382568359375e-05, "model_forward_time": 0.024547338485717773, "step": 22168 }, { "epoch": 3.382568359375e-05, "step": 22168, "training_step_time": 0.17144465446472168 }, { "epoch": 3.382720947265625e-05, "model_forward_time": 0.024591684341430664, "step": 22169 }, { "epoch": 3.382720947265625e-05, "step": 22169, "training_step_time": 0.17792367935180664 }, { "epoch": 3.38287353515625e-05, "grad_norm": 0.09951245784759521, "learning_rate": 1.749617597652934e-05, "loss": 0.0044, "step": 22170 }, { "epoch": 3.38287353515625e-05, "model_forward_time": 0.024028539657592773, "step": 22170 }, { "epoch": 3.38287353515625e-05, "step": 22170, "training_step_time": 0.10448408126831055 }, { "epoch": 3.383026123046875e-05, "model_forward_time": 0.024554967880249023, "step": 22171 }, { "epoch": 3.383026123046875e-05, "step": 22171, "training_step_time": 0.12530231475830078 }, { "epoch": 3.3831787109375e-05, "model_forward_time": 0.024995088577270508, "step": 22172 }, { "epoch": 3.3831787109375e-05, "step": 22172, "training_step_time": 0.12049150466918945 }, { "epoch": 3.383331298828125e-05, "model_forward_time": 0.02483987808227539, "step": 22173 }, { "epoch": 3.383331298828125e-05, "step": 22173, "training_step_time": 0.10395693778991699 }, { "epoch": 3.38348388671875e-05, "model_forward_time": 0.02515554428100586, "step": 22174 }, { "epoch": 3.38348388671875e-05, "step": 22174, "training_step_time": 0.10426592826843262 }, { "epoch": 3.383636474609375e-05, "model_forward_time": 0.025090694427490234, "step": 22175 }, { "epoch": 3.383636474609375e-05, "step": 22175, "training_step_time": 0.18278145790100098 }, { "epoch": 3.3837890625e-05, "model_forward_time": 0.02492356300354004, "step": 22176 }, { "epoch": 3.3837890625e-05, "step": 22176, "training_step_time": 0.1110689640045166 }, { "epoch": 3.383941650390625e-05, "model_forward_time": 0.024241209030151367, "step": 22177 }, { "epoch": 3.383941650390625e-05, "step": 22177, "training_step_time": 0.11284708976745605 }, { "epoch": 3.38409423828125e-05, "model_forward_time": 0.024901151657104492, "step": 22178 }, { "epoch": 3.38409423828125e-05, "step": 22178, "training_step_time": 0.128889799118042 }, { "epoch": 3.384246826171875e-05, "model_forward_time": 0.024940967559814453, "step": 22179 }, { "epoch": 3.384246826171875e-05, "step": 22179, "training_step_time": 0.12832355499267578 }, { "epoch": 3.3843994140625e-05, "grad_norm": 0.16980668902397156, "learning_rate": 1.745431506260173e-05, "loss": 0.0047, "step": 22180 }, { "epoch": 3.3843994140625e-05, "model_forward_time": 0.025179624557495117, "step": 22180 }, { "epoch": 3.3843994140625e-05, "step": 22180, "training_step_time": 0.11362218856811523 }, { "epoch": 3.384552001953125e-05, "model_forward_time": 0.025287151336669922, "step": 22181 }, { "epoch": 3.384552001953125e-05, "step": 22181, "training_step_time": 0.11606049537658691 }, { "epoch": 3.38470458984375e-05, "model_forward_time": 0.02514481544494629, "step": 22182 }, { "epoch": 3.38470458984375e-05, "step": 22182, "training_step_time": 0.10805630683898926 }, { "epoch": 3.384857177734375e-05, "model_forward_time": 0.0252840518951416, "step": 22183 }, { "epoch": 3.384857177734375e-05, "step": 22183, "training_step_time": 0.10616350173950195 }, { "epoch": 3.385009765625e-05, "model_forward_time": 0.025262117385864258, "step": 22184 }, { "epoch": 3.385009765625e-05, "step": 22184, "training_step_time": 0.10661983489990234 }, { "epoch": 3.385162353515625e-05, "model_forward_time": 0.024952173233032227, "step": 22185 }, { "epoch": 3.385162353515625e-05, "step": 22185, "training_step_time": 0.11705470085144043 }, { "epoch": 3.38531494140625e-05, "model_forward_time": 0.025152206420898438, "step": 22186 }, { "epoch": 3.38531494140625e-05, "step": 22186, "training_step_time": 0.11028861999511719 }, { "epoch": 3.385467529296875e-05, "model_forward_time": 0.025119543075561523, "step": 22187 }, { "epoch": 3.385467529296875e-05, "step": 22187, "training_step_time": 0.11005067825317383 }, { "epoch": 3.3856201171875e-05, "model_forward_time": 0.025055885314941406, "step": 22188 }, { "epoch": 3.3856201171875e-05, "step": 22188, "training_step_time": 0.11140322685241699 }, { "epoch": 3.385772705078125e-05, "model_forward_time": 0.02509784698486328, "step": 22189 }, { "epoch": 3.385772705078125e-05, "step": 22189, "training_step_time": 0.10689234733581543 }, { "epoch": 3.38592529296875e-05, "grad_norm": 0.10665614902973175, "learning_rate": 1.7412493694750176e-05, "loss": 0.0057, "step": 22190 }, { "epoch": 3.38592529296875e-05, "model_forward_time": 0.02534031867980957, "step": 22190 }, { "epoch": 3.38592529296875e-05, "step": 22190, "training_step_time": 0.11113286018371582 }, { "epoch": 3.386077880859375e-05, "model_forward_time": 0.02509284019470215, "step": 22191 }, { "epoch": 3.386077880859375e-05, "step": 22191, "training_step_time": 0.11188507080078125 }, { "epoch": 3.38623046875e-05, "model_forward_time": 0.0254514217376709, "step": 22192 }, { "epoch": 3.38623046875e-05, "step": 22192, "training_step_time": 0.11149263381958008 }, { "epoch": 3.386383056640625e-05, "model_forward_time": 0.02530813217163086, "step": 22193 }, { "epoch": 3.386383056640625e-05, "step": 22193, "training_step_time": 0.10808205604553223 }, { "epoch": 3.38653564453125e-05, "model_forward_time": 0.025127172470092773, "step": 22194 }, { "epoch": 3.38653564453125e-05, "step": 22194, "training_step_time": 0.11192512512207031 }, { "epoch": 3.386688232421875e-05, "model_forward_time": 0.024953603744506836, "step": 22195 }, { "epoch": 3.386688232421875e-05, "step": 22195, "training_step_time": 0.2112903594970703 }, { "epoch": 3.3868408203125e-05, "model_forward_time": 0.02407050132751465, "step": 22196 }, { "epoch": 3.3868408203125e-05, "step": 22196, "training_step_time": 0.10657572746276855 }, { "epoch": 3.386993408203125e-05, "model_forward_time": 0.024010896682739258, "step": 22197 }, { "epoch": 3.386993408203125e-05, "step": 22197, "training_step_time": 0.11416506767272949 }, { "epoch": 3.38714599609375e-05, "model_forward_time": 0.025293827056884766, "step": 22198 }, { "epoch": 3.38714599609375e-05, "step": 22198, "training_step_time": 0.16232728958129883 }, { "epoch": 3.387298583984375e-05, "model_forward_time": 0.024297714233398438, "step": 22199 }, { "epoch": 3.387298583984375e-05, "step": 22199, "training_step_time": 0.10534906387329102 }, { "epoch": 3.387451171875e-05, "grad_norm": 0.07339908927679062, "learning_rate": 1.7370711923791567e-05, "loss": 0.0067, "step": 22200 }, { "epoch": 3.387451171875e-05, "model_forward_time": 0.02508687973022461, "step": 22200 }, { "epoch": 3.387451171875e-05, "step": 22200, "training_step_time": 0.10323643684387207 }, { "epoch": 3.387603759765625e-05, "model_forward_time": 0.026215553283691406, "step": 22201 }, { "epoch": 3.387603759765625e-05, "step": 22201, "training_step_time": 0.10805249214172363 }, { "epoch": 3.38775634765625e-05, "model_forward_time": 0.025249481201171875, "step": 22202 }, { "epoch": 3.38775634765625e-05, "step": 22202, "training_step_time": 0.10402250289916992 }, { "epoch": 3.387908935546875e-05, "model_forward_time": 0.02498483657836914, "step": 22203 }, { "epoch": 3.387908935546875e-05, "step": 22203, "training_step_time": 0.10381770133972168 }, { "epoch": 3.3880615234375e-05, "model_forward_time": 0.02507328987121582, "step": 22204 }, { "epoch": 3.3880615234375e-05, "step": 22204, "training_step_time": 0.10397219657897949 }, { "epoch": 3.388214111328125e-05, "model_forward_time": 0.025532007217407227, "step": 22205 }, { "epoch": 3.388214111328125e-05, "step": 22205, "training_step_time": 0.10472702980041504 }, { "epoch": 3.38836669921875e-05, "model_forward_time": 0.025216102600097656, "step": 22206 }, { "epoch": 3.38836669921875e-05, "step": 22206, "training_step_time": 0.10619211196899414 }, { "epoch": 3.388519287109375e-05, "model_forward_time": 0.025568723678588867, "step": 22207 }, { "epoch": 3.388519287109375e-05, "step": 22207, "training_step_time": 0.10539555549621582 }, { "epoch": 3.388671875e-05, "model_forward_time": 0.025257349014282227, "step": 22208 }, { "epoch": 3.388671875e-05, "step": 22208, "training_step_time": 0.10530948638916016 }, { "epoch": 3.388824462890625e-05, "model_forward_time": 0.02499103546142578, "step": 22209 }, { "epoch": 3.388824462890625e-05, "step": 22209, "training_step_time": 0.10581517219543457 }, { "epoch": 3.38897705078125e-05, "grad_norm": 0.28890460729599, "learning_rate": 1.7328969800494726e-05, "loss": 0.0072, "step": 22210 }, { "epoch": 3.38897705078125e-05, "model_forward_time": 0.0258481502532959, "step": 22210 }, { "epoch": 3.38897705078125e-05, "step": 22210, "training_step_time": 0.10332131385803223 }, { "epoch": 3.389129638671875e-05, "model_forward_time": 0.02541518211364746, "step": 22211 }, { "epoch": 3.389129638671875e-05, "step": 22211, "training_step_time": 0.14841604232788086 }, { "epoch": 3.3892822265625e-05, "model_forward_time": 0.024779319763183594, "step": 22212 }, { "epoch": 3.3892822265625e-05, "step": 22212, "training_step_time": 0.15446972846984863 }, { "epoch": 3.389434814453125e-05, "model_forward_time": 0.024474620819091797, "step": 22213 }, { "epoch": 3.389434814453125e-05, "step": 22213, "training_step_time": 0.22321629524230957 }, { "epoch": 3.38958740234375e-05, "model_forward_time": 0.024509906768798828, "step": 22214 }, { "epoch": 3.38958740234375e-05, "step": 22214, "training_step_time": 0.1200108528137207 }, { "epoch": 3.389739990234375e-05, "model_forward_time": 0.02464771270751953, "step": 22215 }, { "epoch": 3.389739990234375e-05, "step": 22215, "training_step_time": 0.11345171928405762 }, { "epoch": 3.389892578125e-05, "model_forward_time": 0.02524542808532715, "step": 22216 }, { "epoch": 3.389892578125e-05, "step": 22216, "training_step_time": 0.11906933784484863 }, { "epoch": 3.390045166015625e-05, "model_forward_time": 0.025357723236083984, "step": 22217 }, { "epoch": 3.390045166015625e-05, "step": 22217, "training_step_time": 0.11003661155700684 }, { "epoch": 3.39019775390625e-05, "model_forward_time": 0.02523183822631836, "step": 22218 }, { "epoch": 3.39019775390625e-05, "step": 22218, "training_step_time": 0.10879087448120117 }, { "epoch": 3.390350341796875e-05, "model_forward_time": 0.025473594665527344, "step": 22219 }, { "epoch": 3.390350341796875e-05, "step": 22219, "training_step_time": 0.11221528053283691 }, { "epoch": 3.3905029296875e-05, "grad_norm": 0.12778474390506744, "learning_rate": 1.7287267375580256e-05, "loss": 0.0056, "step": 22220 }, { "epoch": 3.3905029296875e-05, "model_forward_time": 0.025042295455932617, "step": 22220 }, { "epoch": 3.3905029296875e-05, "step": 22220, "training_step_time": 0.10469317436218262 }, { "epoch": 3.390655517578125e-05, "model_forward_time": 0.02385544776916504, "step": 22221 }, { "epoch": 3.390655517578125e-05, "step": 22221, "training_step_time": 0.10703158378601074 }, { "epoch": 3.39080810546875e-05, "model_forward_time": 0.02529287338256836, "step": 22222 }, { "epoch": 3.39080810546875e-05, "step": 22222, "training_step_time": 0.16131997108459473 }, { "epoch": 3.390960693359375e-05, "model_forward_time": 0.024844884872436523, "step": 22223 }, { "epoch": 3.390960693359375e-05, "step": 22223, "training_step_time": 0.11198925971984863 }, { "epoch": 3.39111328125e-05, "model_forward_time": 0.024639368057250977, "step": 22224 }, { "epoch": 3.39111328125e-05, "step": 22224, "training_step_time": 0.1094520092010498 }, { "epoch": 3.391265869140625e-05, "model_forward_time": 0.024764060974121094, "step": 22225 }, { "epoch": 3.391265869140625e-05, "step": 22225, "training_step_time": 0.1083519458770752 }, { "epoch": 3.39141845703125e-05, "model_forward_time": 0.025512218475341797, "step": 22226 }, { "epoch": 3.39141845703125e-05, "step": 22226, "training_step_time": 0.12638640403747559 }, { "epoch": 3.391571044921875e-05, "model_forward_time": 0.02500152587890625, "step": 22227 }, { "epoch": 3.391571044921875e-05, "step": 22227, "training_step_time": 0.1152350902557373 }, { "epoch": 3.3917236328125e-05, "model_forward_time": 0.02442145347595215, "step": 22228 }, { "epoch": 3.3917236328125e-05, "step": 22228, "training_step_time": 0.11396646499633789 }, { "epoch": 3.391876220703125e-05, "model_forward_time": 0.02481985092163086, "step": 22229 }, { "epoch": 3.391876220703125e-05, "step": 22229, "training_step_time": 0.10477828979492188 }, { "epoch": 3.39202880859375e-05, "grad_norm": 0.1929740309715271, "learning_rate": 1.7245604699720535e-05, "loss": 0.0037, "step": 22230 }, { "epoch": 3.39202880859375e-05, "model_forward_time": 0.025355100631713867, "step": 22230 }, { "epoch": 3.39202880859375e-05, "step": 22230, "training_step_time": 0.10429716110229492 }, { "epoch": 3.392181396484375e-05, "model_forward_time": 0.025200366973876953, "step": 22231 }, { "epoch": 3.392181396484375e-05, "step": 22231, "training_step_time": 0.10840916633605957 }, { "epoch": 3.392333984375e-05, "model_forward_time": 0.02507781982421875, "step": 22232 }, { "epoch": 3.392333984375e-05, "step": 22232, "training_step_time": 0.10611605644226074 }, { "epoch": 3.392486572265625e-05, "model_forward_time": 0.024096965789794922, "step": 22233 }, { "epoch": 3.392486572265625e-05, "step": 22233, "training_step_time": 0.10585379600524902 }, { "epoch": 3.39263916015625e-05, "model_forward_time": 0.025354385375976562, "step": 22234 }, { "epoch": 3.39263916015625e-05, "step": 22234, "training_step_time": 0.1062319278717041 }, { "epoch": 3.392791748046875e-05, "model_forward_time": 0.02522587776184082, "step": 22235 }, { "epoch": 3.392791748046875e-05, "step": 22235, "training_step_time": 0.10551214218139648 }, { "epoch": 3.3929443359375e-05, "model_forward_time": 0.024923324584960938, "step": 22236 }, { "epoch": 3.3929443359375e-05, "step": 22236, "training_step_time": 0.10493135452270508 }, { "epoch": 3.393096923828125e-05, "model_forward_time": 0.02538466453552246, "step": 22237 }, { "epoch": 3.393096923828125e-05, "step": 22237, "training_step_time": 0.11011934280395508 }, { "epoch": 3.39324951171875e-05, "model_forward_time": 0.025057315826416016, "step": 22238 }, { "epoch": 3.39324951171875e-05, "step": 22238, "training_step_time": 0.11074638366699219 }, { "epoch": 3.393402099609375e-05, "model_forward_time": 0.025115489959716797, "step": 22239 }, { "epoch": 3.393402099609375e-05, "step": 22239, "training_step_time": 0.10809516906738281 }, { "epoch": 3.3935546875e-05, "grad_norm": 0.43977484107017517, "learning_rate": 1.7203981823539643e-05, "loss": 0.0115, "step": 22240 }, { "epoch": 3.3935546875e-05, "model_forward_time": 0.02527141571044922, "step": 22240 }, { "epoch": 3.3935546875e-05, "step": 22240, "training_step_time": 0.18401718139648438 }, { "epoch": 3.393707275390625e-05, "model_forward_time": 0.024701356887817383, "step": 22241 }, { "epoch": 3.393707275390625e-05, "step": 22241, "training_step_time": 0.18105816841125488 }, { "epoch": 3.39385986328125e-05, "model_forward_time": 0.024062395095825195, "step": 22242 }, { "epoch": 3.39385986328125e-05, "step": 22242, "training_step_time": 0.19898295402526855 }, { "epoch": 3.394012451171875e-05, "model_forward_time": 0.0249788761138916, "step": 22243 }, { "epoch": 3.394012451171875e-05, "step": 22243, "training_step_time": 0.1036984920501709 }, { "epoch": 3.3941650390625e-05, "model_forward_time": 0.023246288299560547, "step": 22244 }, { "epoch": 3.3941650390625e-05, "step": 22244, "training_step_time": 0.10319852828979492 }, { "epoch": 3.394317626953125e-05, "model_forward_time": 0.025272130966186523, "step": 22245 }, { "epoch": 3.394317626953125e-05, "step": 22245, "training_step_time": 0.10411477088928223 }, { "epoch": 3.39447021484375e-05, "model_forward_time": 0.024898290634155273, "step": 22246 }, { "epoch": 3.39447021484375e-05, "step": 22246, "training_step_time": 0.10325217247009277 }, { "epoch": 3.394622802734375e-05, "model_forward_time": 0.024779558181762695, "step": 22247 }, { "epoch": 3.394622802734375e-05, "step": 22247, "training_step_time": 0.10794520378112793 }, { "epoch": 3.394775390625e-05, "model_forward_time": 0.025064468383789062, "step": 22248 }, { "epoch": 3.394775390625e-05, "step": 22248, "training_step_time": 0.10546875 }, { "epoch": 3.394927978515625e-05, "model_forward_time": 0.02550220489501953, "step": 22249 }, { "epoch": 3.394927978515625e-05, "step": 22249, "training_step_time": 0.10567975044250488 }, { "epoch": 3.39508056640625e-05, "grad_norm": 0.3760296404361725, "learning_rate": 1.7162398797613282e-05, "loss": 0.0036, "step": 22250 }, { "epoch": 3.39508056640625e-05, "model_forward_time": 0.02500438690185547, "step": 22250 }, { "epoch": 3.39508056640625e-05, "step": 22250, "training_step_time": 0.10545969009399414 }, { "epoch": 3.395233154296875e-05, "model_forward_time": 0.02496790885925293, "step": 22251 }, { "epoch": 3.395233154296875e-05, "step": 22251, "training_step_time": 0.10519289970397949 }, { "epoch": 3.3953857421875e-05, "model_forward_time": 0.024667978286743164, "step": 22252 }, { "epoch": 3.3953857421875e-05, "step": 22252, "training_step_time": 0.10599684715270996 }, { "epoch": 3.395538330078125e-05, "model_forward_time": 0.024966955184936523, "step": 22253 }, { "epoch": 3.395538330078125e-05, "step": 22253, "training_step_time": 0.10933423042297363 }, { "epoch": 3.39569091796875e-05, "model_forward_time": 0.025258779525756836, "step": 22254 }, { "epoch": 3.39569091796875e-05, "step": 22254, "training_step_time": 0.10359883308410645 }, { "epoch": 3.395843505859375e-05, "model_forward_time": 0.02504587173461914, "step": 22255 }, { "epoch": 3.395843505859375e-05, "step": 22255, "training_step_time": 0.10813450813293457 }, { "epoch": 3.39599609375e-05, "model_forward_time": 0.024923086166381836, "step": 22256 }, { "epoch": 3.39599609375e-05, "step": 22256, "training_step_time": 0.10460448265075684 }, { "epoch": 3.396148681640625e-05, "model_forward_time": 0.024698972702026367, "step": 22257 }, { "epoch": 3.396148681640625e-05, "step": 22257, "training_step_time": 0.10447359085083008 }, { "epoch": 3.39630126953125e-05, "model_forward_time": 0.02630758285522461, "step": 22258 }, { "epoch": 3.39630126953125e-05, "step": 22258, "training_step_time": 0.11006546020507812 }, { "epoch": 3.396453857421875e-05, "model_forward_time": 0.025466203689575195, "step": 22259 }, { "epoch": 3.396453857421875e-05, "step": 22259, "training_step_time": 0.10492157936096191 }, { "epoch": 3.3966064453125e-05, "grad_norm": 0.24082809686660767, "learning_rate": 1.712085567246878e-05, "loss": 0.0083, "step": 22260 }, { "epoch": 3.3966064453125e-05, "model_forward_time": 0.02491903305053711, "step": 22260 }, { "epoch": 3.3966064453125e-05, "step": 22260, "training_step_time": 0.13901996612548828 }, { "epoch": 3.396759033203125e-05, "model_forward_time": 0.02578139305114746, "step": 22261 }, { "epoch": 3.396759033203125e-05, "step": 22261, "training_step_time": 0.16664409637451172 }, { "epoch": 3.39691162109375e-05, "model_forward_time": 0.02461981773376465, "step": 22262 }, { "epoch": 3.39691162109375e-05, "step": 22262, "training_step_time": 0.17667698860168457 }, { "epoch": 3.397064208984375e-05, "model_forward_time": 0.0245206356048584, "step": 22263 }, { "epoch": 3.397064208984375e-05, "step": 22263, "training_step_time": 0.15718841552734375 }, { "epoch": 3.397216796875e-05, "model_forward_time": 0.02418828010559082, "step": 22264 }, { "epoch": 3.397216796875e-05, "step": 22264, "training_step_time": 0.16265416145324707 }, { "epoch": 3.397369384765625e-05, "model_forward_time": 0.024004697799682617, "step": 22265 }, { "epoch": 3.397369384765625e-05, "step": 22265, "training_step_time": 0.11006522178649902 }, { "epoch": 3.39752197265625e-05, "model_forward_time": 0.024549484252929688, "step": 22266 }, { "epoch": 3.39752197265625e-05, "step": 22266, "training_step_time": 0.13738775253295898 }, { "epoch": 3.397674560546875e-05, "model_forward_time": 0.024753808975219727, "step": 22267 }, { "epoch": 3.397674560546875e-05, "step": 22267, "training_step_time": 0.13679766654968262 }, { "epoch": 3.3978271484375e-05, "model_forward_time": 0.024460315704345703, "step": 22268 }, { "epoch": 3.3978271484375e-05, "step": 22268, "training_step_time": 0.1369616985321045 }, { "epoch": 3.397979736328125e-05, "model_forward_time": 0.023296117782592773, "step": 22269 }, { "epoch": 3.397979736328125e-05, "step": 22269, "training_step_time": 0.17188215255737305 }, { "epoch": 3.39813232421875e-05, "grad_norm": 0.12613904476165771, "learning_rate": 1.7079352498584934e-05, "loss": 0.0044, "step": 22270 }, { "epoch": 3.39813232421875e-05, "model_forward_time": 0.024170875549316406, "step": 22270 }, { "epoch": 3.39813232421875e-05, "step": 22270, "training_step_time": 0.13015198707580566 }, { "epoch": 3.398284912109375e-05, "model_forward_time": 0.023164987564086914, "step": 22271 }, { "epoch": 3.398284912109375e-05, "step": 22271, "training_step_time": 0.20395278930664062 }, { "epoch": 3.3984375e-05, "model_forward_time": 0.024168014526367188, "step": 22272 }, { "epoch": 3.3984375e-05, "step": 22272, "training_step_time": 0.1381852626800537 }, { "epoch": 3.398590087890625e-05, "model_forward_time": 0.024350643157958984, "step": 22273 }, { "epoch": 3.398590087890625e-05, "step": 22273, "training_step_time": 0.11548519134521484 }, { "epoch": 3.39874267578125e-05, "model_forward_time": 0.023637771606445312, "step": 22274 }, { "epoch": 3.39874267578125e-05, "step": 22274, "training_step_time": 0.11224865913391113 }, { "epoch": 3.398895263671875e-05, "model_forward_time": 0.025110721588134766, "step": 22275 }, { "epoch": 3.398895263671875e-05, "step": 22275, "training_step_time": 0.10746049880981445 }, { "epoch": 3.3990478515625e-05, "model_forward_time": 0.0253140926361084, "step": 22276 }, { "epoch": 3.3990478515625e-05, "step": 22276, "training_step_time": 0.10890531539916992 }, { "epoch": 3.399200439453125e-05, "model_forward_time": 0.024935007095336914, "step": 22277 }, { "epoch": 3.399200439453125e-05, "step": 22277, "training_step_time": 0.10553598403930664 }, { "epoch": 3.39935302734375e-05, "model_forward_time": 0.02522110939025879, "step": 22278 }, { "epoch": 3.39935302734375e-05, "step": 22278, "training_step_time": 0.10856199264526367 }, { "epoch": 3.399505615234375e-05, "model_forward_time": 0.025410175323486328, "step": 22279 }, { "epoch": 3.399505615234375e-05, "step": 22279, "training_step_time": 0.10534143447875977 }, { "epoch": 3.399658203125e-05, "grad_norm": 0.10447924584150314, "learning_rate": 1.703788932639202e-05, "loss": 0.0071, "step": 22280 }, { "epoch": 3.399658203125e-05, "model_forward_time": 0.025168180465698242, "step": 22280 }, { "epoch": 3.399658203125e-05, "step": 22280, "training_step_time": 0.10451126098632812 }, { "epoch": 3.399810791015625e-05, "model_forward_time": 0.025101900100708008, "step": 22281 }, { "epoch": 3.399810791015625e-05, "step": 22281, "training_step_time": 0.10871267318725586 }, { "epoch": 3.39996337890625e-05, "model_forward_time": 0.0251007080078125, "step": 22282 }, { "epoch": 3.39996337890625e-05, "step": 22282, "training_step_time": 0.1053922176361084 }, { "epoch": 3.400115966796875e-05, "model_forward_time": 0.025167226791381836, "step": 22283 }, { "epoch": 3.400115966796875e-05, "step": 22283, "training_step_time": 0.10477089881896973 }, { "epoch": 3.4002685546875e-05, "model_forward_time": 0.025130510330200195, "step": 22284 }, { "epoch": 3.4002685546875e-05, "step": 22284, "training_step_time": 0.12216544151306152 }, { "epoch": 3.400421142578125e-05, "model_forward_time": 0.025210142135620117, "step": 22285 }, { "epoch": 3.400421142578125e-05, "step": 22285, "training_step_time": 0.10521864891052246 }, { "epoch": 3.40057373046875e-05, "model_forward_time": 0.024967193603515625, "step": 22286 }, { "epoch": 3.40057373046875e-05, "step": 22286, "training_step_time": 0.21430444717407227 }, { "epoch": 3.400726318359375e-05, "model_forward_time": 0.02446722984313965, "step": 22287 }, { "epoch": 3.400726318359375e-05, "step": 22287, "training_step_time": 0.10526037216186523 }, { "epoch": 3.40087890625e-05, "model_forward_time": 0.02478623390197754, "step": 22288 }, { "epoch": 3.40087890625e-05, "step": 22288, "training_step_time": 0.11983728408813477 }, { "epoch": 3.401031494140625e-05, "model_forward_time": 0.024987459182739258, "step": 22289 }, { "epoch": 3.401031494140625e-05, "step": 22289, "training_step_time": 0.15848779678344727 }, { "epoch": 3.40118408203125e-05, "grad_norm": 0.08365624397993088, "learning_rate": 1.699646620627168e-05, "loss": 0.0059, "step": 22290 }, { "epoch": 3.40118408203125e-05, "model_forward_time": 0.02446770668029785, "step": 22290 }, { "epoch": 3.40118408203125e-05, "step": 22290, "training_step_time": 0.1025381088256836 }, { "epoch": 3.401336669921875e-05, "model_forward_time": 0.024798154830932617, "step": 22291 }, { "epoch": 3.401336669921875e-05, "step": 22291, "training_step_time": 0.10355854034423828 }, { "epoch": 3.4014892578125e-05, "model_forward_time": 0.024913787841796875, "step": 22292 }, { "epoch": 3.4014892578125e-05, "step": 22292, "training_step_time": 0.10693526268005371 }, { "epoch": 3.401641845703125e-05, "model_forward_time": 0.025327444076538086, "step": 22293 }, { "epoch": 3.401641845703125e-05, "step": 22293, "training_step_time": 0.10769486427307129 }, { "epoch": 3.40179443359375e-05, "model_forward_time": 0.02492380142211914, "step": 22294 }, { "epoch": 3.40179443359375e-05, "step": 22294, "training_step_time": 0.10181808471679688 }, { "epoch": 3.401947021484375e-05, "model_forward_time": 0.025163650512695312, "step": 22295 }, { "epoch": 3.401947021484375e-05, "step": 22295, "training_step_time": 0.1043705940246582 }, { "epoch": 3.402099609375e-05, "model_forward_time": 0.02514958381652832, "step": 22296 }, { "epoch": 3.402099609375e-05, "step": 22296, "training_step_time": 0.1081244945526123 }, { "epoch": 3.402252197265625e-05, "model_forward_time": 0.025298357009887695, "step": 22297 }, { "epoch": 3.402252197265625e-05, "step": 22297, "training_step_time": 0.10935306549072266 }, { "epoch": 3.40240478515625e-05, "model_forward_time": 0.0252687931060791, "step": 22298 }, { "epoch": 3.40240478515625e-05, "step": 22298, "training_step_time": 0.10853719711303711 }, { "epoch": 3.402557373046875e-05, "model_forward_time": 0.02474188804626465, "step": 22299 }, { "epoch": 3.402557373046875e-05, "step": 22299, "training_step_time": 0.10465073585510254 }, { "epoch": 3.4027099609375e-05, "grad_norm": 0.15075549483299255, "learning_rate": 1.6955083188556947e-05, "loss": 0.0055, "step": 22300 }, { "epoch": 3.4027099609375e-05, "model_forward_time": 0.02494645118713379, "step": 22300 }, { "epoch": 3.4027099609375e-05, "step": 22300, "training_step_time": 0.10527801513671875 }, { "epoch": 3.402862548828125e-05, "model_forward_time": 0.02557063102722168, "step": 22301 }, { "epoch": 3.402862548828125e-05, "step": 22301, "training_step_time": 0.10534882545471191 }, { "epoch": 3.40301513671875e-05, "model_forward_time": 0.02717757225036621, "step": 22302 }, { "epoch": 3.40301513671875e-05, "step": 22302, "training_step_time": 0.10916256904602051 }, { "epoch": 3.403167724609375e-05, "model_forward_time": 0.024744749069213867, "step": 22303 }, { "epoch": 3.403167724609375e-05, "step": 22303, "training_step_time": 0.1046910285949707 }, { "epoch": 3.4033203125e-05, "model_forward_time": 0.02456951141357422, "step": 22304 }, { "epoch": 3.4033203125e-05, "step": 22304, "training_step_time": 0.10333991050720215 }, { "epoch": 3.403472900390625e-05, "model_forward_time": 0.024039745330810547, "step": 22305 }, { "epoch": 3.403472900390625e-05, "step": 22305, "training_step_time": 0.14157509803771973 }, { "epoch": 3.40362548828125e-05, "model_forward_time": 0.02763652801513672, "step": 22306 }, { "epoch": 3.40362548828125e-05, "step": 22306, "training_step_time": 0.1618332862854004 }, { "epoch": 3.403778076171875e-05, "model_forward_time": 0.024574756622314453, "step": 22307 }, { "epoch": 3.403778076171875e-05, "step": 22307, "training_step_time": 0.18008875846862793 }, { "epoch": 3.4039306640625e-05, "model_forward_time": 0.024396181106567383, "step": 22308 }, { "epoch": 3.4039306640625e-05, "step": 22308, "training_step_time": 0.1524195671081543 }, { "epoch": 3.404083251953125e-05, "model_forward_time": 0.024350881576538086, "step": 22309 }, { "epoch": 3.404083251953125e-05, "step": 22309, "training_step_time": 0.17176151275634766 }, { "epoch": 3.40423583984375e-05, "grad_norm": 0.2075805515050888, "learning_rate": 1.691374032353205e-05, "loss": 0.0075, "step": 22310 }, { "epoch": 3.40423583984375e-05, "model_forward_time": 0.024282217025756836, "step": 22310 }, { "epoch": 3.40423583984375e-05, "step": 22310, "training_step_time": 0.10498762130737305 }, { "epoch": 3.404388427734375e-05, "model_forward_time": 0.024506330490112305, "step": 22311 }, { "epoch": 3.404388427734375e-05, "step": 22311, "training_step_time": 0.1050260066986084 }, { "epoch": 3.404541015625e-05, "model_forward_time": 0.02545166015625, "step": 22312 }, { "epoch": 3.404541015625e-05, "step": 22312, "training_step_time": 0.10775494575500488 }, { "epoch": 3.404693603515625e-05, "model_forward_time": 0.025567054748535156, "step": 22313 }, { "epoch": 3.404693603515625e-05, "step": 22313, "training_step_time": 0.1628279685974121 }, { "epoch": 3.40484619140625e-05, "model_forward_time": 0.026265859603881836, "step": 22314 }, { "epoch": 3.40484619140625e-05, "step": 22314, "training_step_time": 0.16417717933654785 }, { "epoch": 3.404998779296875e-05, "model_forward_time": 0.0243680477142334, "step": 22315 }, { "epoch": 3.404998779296875e-05, "step": 22315, "training_step_time": 0.16736769676208496 }, { "epoch": 3.4051513671875e-05, "model_forward_time": 0.024075984954833984, "step": 22316 }, { "epoch": 3.4051513671875e-05, "step": 22316, "training_step_time": 0.16303777694702148 }, { "epoch": 3.405303955078125e-05, "model_forward_time": 0.023956298828125, "step": 22317 }, { "epoch": 3.405303955078125e-05, "step": 22317, "training_step_time": 0.13863325119018555 }, { "epoch": 3.40545654296875e-05, "model_forward_time": 0.024230241775512695, "step": 22318 }, { "epoch": 3.40545654296875e-05, "step": 22318, "training_step_time": 0.13014554977416992 }, { "epoch": 3.405609130859375e-05, "model_forward_time": 0.024568796157836914, "step": 22319 }, { "epoch": 3.405609130859375e-05, "step": 22319, "training_step_time": 0.16027235984802246 }, { "epoch": 3.40576171875e-05, "grad_norm": 0.3397980034351349, "learning_rate": 1.6872437661432517e-05, "loss": 0.0098, "step": 22320 }, { "epoch": 3.40576171875e-05, "model_forward_time": 0.02552008628845215, "step": 22320 }, { "epoch": 3.40576171875e-05, "step": 22320, "training_step_time": 0.1410675048828125 }, { "epoch": 3.405914306640625e-05, "model_forward_time": 0.024914979934692383, "step": 22321 }, { "epoch": 3.405914306640625e-05, "step": 22321, "training_step_time": 0.10501766204833984 }, { "epoch": 3.40606689453125e-05, "model_forward_time": 0.025391101837158203, "step": 22322 }, { "epoch": 3.40606689453125e-05, "step": 22322, "training_step_time": 0.10269570350646973 }, { "epoch": 3.406219482421875e-05, "model_forward_time": 0.02498149871826172, "step": 22323 }, { "epoch": 3.406219482421875e-05, "step": 22323, "training_step_time": 0.10476851463317871 }, { "epoch": 3.4063720703125e-05, "model_forward_time": 0.0253751277923584, "step": 22324 }, { "epoch": 3.4063720703125e-05, "step": 22324, "training_step_time": 0.10556697845458984 }, { "epoch": 3.406524658203125e-05, "model_forward_time": 0.02503514289855957, "step": 22325 }, { "epoch": 3.406524658203125e-05, "step": 22325, "training_step_time": 0.10655093193054199 }, { "epoch": 3.40667724609375e-05, "model_forward_time": 0.025315284729003906, "step": 22326 }, { "epoch": 3.40667724609375e-05, "step": 22326, "training_step_time": 0.10497641563415527 }, { "epoch": 3.406829833984375e-05, "model_forward_time": 0.025534629821777344, "step": 22327 }, { "epoch": 3.406829833984375e-05, "step": 22327, "training_step_time": 0.10580945014953613 }, { "epoch": 3.406982421875e-05, "model_forward_time": 0.02538013458251953, "step": 22328 }, { "epoch": 3.406982421875e-05, "step": 22328, "training_step_time": 0.11018967628479004 }, { "epoch": 3.407135009765625e-05, "model_forward_time": 0.02521347999572754, "step": 22329 }, { "epoch": 3.407135009765625e-05, "step": 22329, "training_step_time": 0.1338033676147461 }, { "epoch": 3.40728759765625e-05, "grad_norm": 0.21587461233139038, "learning_rate": 1.6831175252444943e-05, "loss": 0.0048, "step": 22330 }, { "epoch": 3.40728759765625e-05, "model_forward_time": 0.025536060333251953, "step": 22330 }, { "epoch": 3.40728759765625e-05, "step": 22330, "training_step_time": 0.10819196701049805 }, { "epoch": 3.407440185546875e-05, "model_forward_time": 0.024944782257080078, "step": 22331 }, { "epoch": 3.407440185546875e-05, "step": 22331, "training_step_time": 0.21061944961547852 }, { "epoch": 3.4075927734375e-05, "model_forward_time": 0.024202585220336914, "step": 22332 }, { "epoch": 3.4075927734375e-05, "step": 22332, "training_step_time": 0.10684037208557129 }, { "epoch": 3.407745361328125e-05, "model_forward_time": 0.024291038513183594, "step": 22333 }, { "epoch": 3.407745361328125e-05, "step": 22333, "training_step_time": 0.11098670959472656 }, { "epoch": 3.40789794921875e-05, "model_forward_time": 0.025118350982666016, "step": 22334 }, { "epoch": 3.40789794921875e-05, "step": 22334, "training_step_time": 0.10969853401184082 }, { "epoch": 3.408050537109375e-05, "model_forward_time": 0.025268077850341797, "step": 22335 }, { "epoch": 3.408050537109375e-05, "step": 22335, "training_step_time": 0.10558390617370605 }, { "epoch": 3.408203125e-05, "model_forward_time": 0.025547266006469727, "step": 22336 }, { "epoch": 3.408203125e-05, "step": 22336, "training_step_time": 0.1079864501953125 }, { "epoch": 3.408355712890625e-05, "model_forward_time": 0.024543046951293945, "step": 22337 }, { "epoch": 3.408355712890625e-05, "step": 22337, "training_step_time": 0.10457801818847656 }, { "epoch": 3.40850830078125e-05, "model_forward_time": 0.025679588317871094, "step": 22338 }, { "epoch": 3.40850830078125e-05, "step": 22338, "training_step_time": 0.1058967113494873 }, { "epoch": 3.408660888671875e-05, "model_forward_time": 0.025058984756469727, "step": 22339 }, { "epoch": 3.408660888671875e-05, "step": 22339, "training_step_time": 0.10463333129882812 }, { "epoch": 3.4088134765625e-05, "grad_norm": 0.08035118132829666, "learning_rate": 1.6789953146707053e-05, "loss": 0.0121, "step": 22340 }, { "epoch": 3.4088134765625e-05, "model_forward_time": 0.02535414695739746, "step": 22340 }, { "epoch": 3.4088134765625e-05, "step": 22340, "training_step_time": 0.10564303398132324 }, { "epoch": 3.408966064453125e-05, "model_forward_time": 0.02519965171813965, "step": 22341 }, { "epoch": 3.408966064453125e-05, "step": 22341, "training_step_time": 0.10824823379516602 }, { "epoch": 3.40911865234375e-05, "model_forward_time": 0.02512216567993164, "step": 22342 }, { "epoch": 3.40911865234375e-05, "step": 22342, "training_step_time": 0.10604286193847656 }, { "epoch": 3.409271240234375e-05, "model_forward_time": 0.025490760803222656, "step": 22343 }, { "epoch": 3.409271240234375e-05, "step": 22343, "training_step_time": 0.10480117797851562 }, { "epoch": 3.409423828125e-05, "model_forward_time": 0.0247952938079834, "step": 22344 }, { "epoch": 3.409423828125e-05, "step": 22344, "training_step_time": 0.10836601257324219 }, { "epoch": 3.409576416015625e-05, "model_forward_time": 0.025383710861206055, "step": 22345 }, { "epoch": 3.409576416015625e-05, "step": 22345, "training_step_time": 0.10429072380065918 }, { "epoch": 3.40972900390625e-05, "model_forward_time": 0.025011062622070312, "step": 22346 }, { "epoch": 3.40972900390625e-05, "step": 22346, "training_step_time": 0.10444331169128418 }, { "epoch": 3.409881591796875e-05, "model_forward_time": 0.02515435218811035, "step": 22347 }, { "epoch": 3.409881591796875e-05, "step": 22347, "training_step_time": 0.10367679595947266 }, { "epoch": 3.4100341796875e-05, "model_forward_time": 0.02533411979675293, "step": 22348 }, { "epoch": 3.4100341796875e-05, "step": 22348, "training_step_time": 0.10473203659057617 }, { "epoch": 3.410186767578125e-05, "model_forward_time": 0.02552628517150879, "step": 22349 }, { "epoch": 3.410186767578125e-05, "step": 22349, "training_step_time": 0.1061854362487793 }, { "epoch": 3.41033935546875e-05, "grad_norm": 0.15022511780261993, "learning_rate": 1.6748771394307585e-05, "loss": 0.0102, "step": 22350 }, { "epoch": 3.41033935546875e-05, "model_forward_time": 0.02396869659423828, "step": 22350 }, { "epoch": 3.41033935546875e-05, "step": 22350, "training_step_time": 0.14788818359375 }, { "epoch": 3.410491943359375e-05, "model_forward_time": 0.025060176849365234, "step": 22351 }, { "epoch": 3.410491943359375e-05, "step": 22351, "training_step_time": 0.15220975875854492 }, { "epoch": 3.41064453125e-05, "model_forward_time": 0.024693012237548828, "step": 22352 }, { "epoch": 3.41064453125e-05, "step": 22352, "training_step_time": 0.11061310768127441 }, { "epoch": 3.410797119140625e-05, "model_forward_time": 0.024750471115112305, "step": 22353 }, { "epoch": 3.410797119140625e-05, "step": 22353, "training_step_time": 0.13216876983642578 }, { "epoch": 3.41094970703125e-05, "model_forward_time": 0.025804758071899414, "step": 22354 }, { "epoch": 3.41094970703125e-05, "step": 22354, "training_step_time": 0.20363235473632812 }, { "epoch": 3.411102294921875e-05, "model_forward_time": 0.023818254470825195, "step": 22355 }, { "epoch": 3.411102294921875e-05, "step": 22355, "training_step_time": 0.10530424118041992 }, { "epoch": 3.4112548828125e-05, "model_forward_time": 0.024637699127197266, "step": 22356 }, { "epoch": 3.4112548828125e-05, "step": 22356, "training_step_time": 0.10890626907348633 }, { "epoch": 3.411407470703125e-05, "model_forward_time": 0.025214672088623047, "step": 22357 }, { "epoch": 3.411407470703125e-05, "step": 22357, "training_step_time": 0.11024284362792969 }, { "epoch": 3.41156005859375e-05, "model_forward_time": 0.024851322174072266, "step": 22358 }, { "epoch": 3.41156005859375e-05, "step": 22358, "training_step_time": 0.10738706588745117 }, { "epoch": 3.411712646484375e-05, "model_forward_time": 0.024818897247314453, "step": 22359 }, { "epoch": 3.411712646484375e-05, "step": 22359, "training_step_time": 0.11810970306396484 }, { "epoch": 3.411865234375e-05, "grad_norm": 0.11675149947404861, "learning_rate": 1.6707630045286265e-05, "loss": 0.0043, "step": 22360 }, { "epoch": 3.411865234375e-05, "model_forward_time": 0.025145292282104492, "step": 22360 }, { "epoch": 3.411865234375e-05, "step": 22360, "training_step_time": 0.10600924491882324 }, { "epoch": 3.412017822265625e-05, "model_forward_time": 0.025168180465698242, "step": 22361 }, { "epoch": 3.412017822265625e-05, "step": 22361, "training_step_time": 0.19519591331481934 }, { "epoch": 3.41217041015625e-05, "model_forward_time": 0.024242877960205078, "step": 22362 }, { "epoch": 3.41217041015625e-05, "step": 22362, "training_step_time": 0.11031818389892578 }, { "epoch": 3.412322998046875e-05, "model_forward_time": 0.024910688400268555, "step": 22363 }, { "epoch": 3.412322998046875e-05, "step": 22363, "training_step_time": 0.1074976921081543 }, { "epoch": 3.4124755859375e-05, "model_forward_time": 0.025353431701660156, "step": 22364 }, { "epoch": 3.4124755859375e-05, "step": 22364, "training_step_time": 0.13373827934265137 }, { "epoch": 3.412628173828125e-05, "model_forward_time": 0.02839493751525879, "step": 22365 }, { "epoch": 3.412628173828125e-05, "step": 22365, "training_step_time": 0.12445473670959473 }, { "epoch": 3.41278076171875e-05, "model_forward_time": 0.024983882904052734, "step": 22366 }, { "epoch": 3.41278076171875e-05, "step": 22366, "training_step_time": 0.11909723281860352 }, { "epoch": 3.412933349609375e-05, "model_forward_time": 0.023677825927734375, "step": 22367 }, { "epoch": 3.412933349609375e-05, "step": 22367, "training_step_time": 0.1439499855041504 }, { "epoch": 3.4130859375e-05, "model_forward_time": 0.023617267608642578, "step": 22368 }, { "epoch": 3.4130859375e-05, "step": 22368, "training_step_time": 0.14217066764831543 }, { "epoch": 3.413238525390625e-05, "model_forward_time": 0.024296998977661133, "step": 22369 }, { "epoch": 3.413238525390625e-05, "step": 22369, "training_step_time": 0.1460132598876953 }, { "epoch": 3.41339111328125e-05, "grad_norm": 0.12481307238340378, "learning_rate": 1.666652914963371e-05, "loss": 0.0178, "step": 22370 }, { "epoch": 3.41339111328125e-05, "model_forward_time": 0.024140357971191406, "step": 22370 }, { "epoch": 3.41339111328125e-05, "step": 22370, "training_step_time": 0.14366579055786133 }, { "epoch": 3.413543701171875e-05, "model_forward_time": 0.024472713470458984, "step": 22371 }, { "epoch": 3.413543701171875e-05, "step": 22371, "training_step_time": 0.13518762588500977 }, { "epoch": 3.4136962890625e-05, "model_forward_time": 0.024513721466064453, "step": 22372 }, { "epoch": 3.4136962890625e-05, "step": 22372, "training_step_time": 0.12340426445007324 }, { "epoch": 3.413848876953125e-05, "model_forward_time": 0.024428129196166992, "step": 22373 }, { "epoch": 3.413848876953125e-05, "step": 22373, "training_step_time": 0.12000799179077148 }, { "epoch": 3.41400146484375e-05, "model_forward_time": 0.024433374404907227, "step": 22374 }, { "epoch": 3.41400146484375e-05, "step": 22374, "training_step_time": 0.18062996864318848 }, { "epoch": 3.414154052734375e-05, "model_forward_time": 0.024607419967651367, "step": 22375 }, { "epoch": 3.414154052734375e-05, "step": 22375, "training_step_time": 0.11812496185302734 }, { "epoch": 3.414306640625e-05, "model_forward_time": 0.024066448211669922, "step": 22376 }, { "epoch": 3.414306640625e-05, "step": 22376, "training_step_time": 0.20033907890319824 }, { "epoch": 3.414459228515625e-05, "model_forward_time": 0.02404165267944336, "step": 22377 }, { "epoch": 3.414459228515625e-05, "step": 22377, "training_step_time": 0.19022870063781738 }, { "epoch": 3.41461181640625e-05, "model_forward_time": 0.027565479278564453, "step": 22378 }, { "epoch": 3.41461181640625e-05, "step": 22378, "training_step_time": 0.13741850852966309 }, { "epoch": 3.414764404296875e-05, "model_forward_time": 0.024247407913208008, "step": 22379 }, { "epoch": 3.414764404296875e-05, "step": 22379, "training_step_time": 0.11807107925415039 }, { "epoch": 3.4149169921875e-05, "grad_norm": 0.11971784383058548, "learning_rate": 1.662546875729138e-05, "loss": 0.0072, "step": 22380 }, { "epoch": 3.4149169921875e-05, "model_forward_time": 0.02432847023010254, "step": 22380 }, { "epoch": 3.4149169921875e-05, "step": 22380, "training_step_time": 0.10497665405273438 }, { "epoch": 3.415069580078125e-05, "model_forward_time": 0.024884462356567383, "step": 22381 }, { "epoch": 3.415069580078125e-05, "step": 22381, "training_step_time": 0.10737729072570801 }, { "epoch": 3.41522216796875e-05, "model_forward_time": 0.024908065795898438, "step": 22382 }, { "epoch": 3.41522216796875e-05, "step": 22382, "training_step_time": 0.10604476928710938 }, { "epoch": 3.415374755859375e-05, "model_forward_time": 0.024859189987182617, "step": 22383 }, { "epoch": 3.415374755859375e-05, "step": 22383, "training_step_time": 0.10712456703186035 }, { "epoch": 3.41552734375e-05, "model_forward_time": 0.025123119354248047, "step": 22384 }, { "epoch": 3.41552734375e-05, "step": 22384, "training_step_time": 0.1079111099243164 }, { "epoch": 3.415679931640625e-05, "model_forward_time": 0.024837970733642578, "step": 22385 }, { "epoch": 3.415679931640625e-05, "step": 22385, "training_step_time": 0.10713815689086914 }, { "epoch": 3.41583251953125e-05, "model_forward_time": 0.024560213088989258, "step": 22386 }, { "epoch": 3.41583251953125e-05, "step": 22386, "training_step_time": 0.10690021514892578 }, { "epoch": 3.415985107421875e-05, "model_forward_time": 0.02489185333251953, "step": 22387 }, { "epoch": 3.415985107421875e-05, "step": 22387, "training_step_time": 0.10455060005187988 }, { "epoch": 3.4161376953125e-05, "model_forward_time": 0.024939775466918945, "step": 22388 }, { "epoch": 3.4161376953125e-05, "step": 22388, "training_step_time": 0.10802173614501953 }, { "epoch": 3.416290283203125e-05, "model_forward_time": 0.025206565856933594, "step": 22389 }, { "epoch": 3.416290283203125e-05, "step": 22389, "training_step_time": 0.10504388809204102 }, { "epoch": 3.41644287109375e-05, "grad_norm": 0.09075043350458145, "learning_rate": 1.658444891815152e-05, "loss": 0.0035, "step": 22390 }, { "epoch": 3.41644287109375e-05, "model_forward_time": 0.024889469146728516, "step": 22390 }, { "epoch": 3.41644287109375e-05, "step": 22390, "training_step_time": 0.10416698455810547 }, { "epoch": 3.416595458984375e-05, "model_forward_time": 0.024668216705322266, "step": 22391 }, { "epoch": 3.416595458984375e-05, "step": 22391, "training_step_time": 0.10533714294433594 }, { "epoch": 3.416748046875e-05, "model_forward_time": 0.024922609329223633, "step": 22392 }, { "epoch": 3.416748046875e-05, "step": 22392, "training_step_time": 0.10623502731323242 }, { "epoch": 3.416900634765625e-05, "model_forward_time": 0.024741411209106445, "step": 22393 }, { "epoch": 3.416900634765625e-05, "step": 22393, "training_step_time": 0.10268163681030273 }, { "epoch": 3.41705322265625e-05, "model_forward_time": 0.02620410919189453, "step": 22394 }, { "epoch": 3.41705322265625e-05, "step": 22394, "training_step_time": 0.12650251388549805 }, { "epoch": 3.417205810546875e-05, "model_forward_time": 0.025969266891479492, "step": 22395 }, { "epoch": 3.417205810546875e-05, "step": 22395, "training_step_time": 0.16384100914001465 }, { "epoch": 3.4173583984375e-05, "model_forward_time": 0.02457404136657715, "step": 22396 }, { "epoch": 3.4173583984375e-05, "step": 22396, "training_step_time": 0.10990381240844727 }, { "epoch": 3.417510986328125e-05, "model_forward_time": 0.0243833065032959, "step": 22397 }, { "epoch": 3.417510986328125e-05, "step": 22397, "training_step_time": 0.13201379776000977 }, { "epoch": 3.41766357421875e-05, "model_forward_time": 0.025101661682128906, "step": 22398 }, { "epoch": 3.41766357421875e-05, "step": 22398, "training_step_time": 0.19716978073120117 }, { "epoch": 3.417816162109375e-05, "model_forward_time": 0.024075984954833984, "step": 22399 }, { "epoch": 3.417816162109375e-05, "step": 22399, "training_step_time": 0.11251068115234375 }, { "epoch": 3.41796875e-05, "grad_norm": 0.396480917930603, "learning_rate": 1.6543469682057106e-05, "loss": 0.0116, "step": 22400 }, { "epoch": 3.41796875e-05, "model_forward_time": 0.02446126937866211, "step": 22400 }, { "epoch": 3.41796875e-05, "step": 22400, "training_step_time": 0.1990652084350586 }, { "epoch": 3.418121337890625e-05, "model_forward_time": 0.024676084518432617, "step": 22401 }, { "epoch": 3.418121337890625e-05, "step": 22401, "training_step_time": 0.10377097129821777 }, { "epoch": 3.41827392578125e-05, "model_forward_time": 0.025716066360473633, "step": 22402 }, { "epoch": 3.41827392578125e-05, "step": 22402, "training_step_time": 0.14365172386169434 }, { "epoch": 3.418426513671875e-05, "model_forward_time": 0.02464747428894043, "step": 22403 }, { "epoch": 3.418426513671875e-05, "step": 22403, "training_step_time": 0.17332959175109863 }, { "epoch": 3.4185791015625e-05, "model_forward_time": 0.024372339248657227, "step": 22404 }, { "epoch": 3.4185791015625e-05, "step": 22404, "training_step_time": 0.19342589378356934 }, { "epoch": 3.418731689453125e-05, "model_forward_time": 0.02457118034362793, "step": 22405 }, { "epoch": 3.418731689453125e-05, "step": 22405, "training_step_time": 0.15049076080322266 }, { "epoch": 3.41888427734375e-05, "model_forward_time": 0.023029565811157227, "step": 22406 }, { "epoch": 3.41888427734375e-05, "step": 22406, "training_step_time": 0.21095061302185059 }, { "epoch": 3.419036865234375e-05, "model_forward_time": 0.024020671844482422, "step": 22407 }, { "epoch": 3.419036865234375e-05, "step": 22407, "training_step_time": 0.13986468315124512 }, { "epoch": 3.419189453125e-05, "model_forward_time": 0.023830890655517578, "step": 22408 }, { "epoch": 3.419189453125e-05, "step": 22408, "training_step_time": 0.10308551788330078 }, { "epoch": 3.419342041015625e-05, "model_forward_time": 0.024825096130371094, "step": 22409 }, { "epoch": 3.419342041015625e-05, "step": 22409, "training_step_time": 0.11868810653686523 }, { "epoch": 3.41949462890625e-05, "grad_norm": 0.10397058725357056, "learning_rate": 1.6502531098801753e-05, "loss": 0.0186, "step": 22410 }, { "epoch": 3.41949462890625e-05, "model_forward_time": 0.025275468826293945, "step": 22410 }, { "epoch": 3.41949462890625e-05, "step": 22410, "training_step_time": 0.10603189468383789 }, { "epoch": 3.419647216796875e-05, "model_forward_time": 0.024883270263671875, "step": 22411 }, { "epoch": 3.419647216796875e-05, "step": 22411, "training_step_time": 0.10431623458862305 }, { "epoch": 3.4197998046875e-05, "model_forward_time": 0.02496027946472168, "step": 22412 }, { "epoch": 3.4197998046875e-05, "step": 22412, "training_step_time": 0.10762619972229004 }, { "epoch": 3.419952392578125e-05, "model_forward_time": 0.024773120880126953, "step": 22413 }, { "epoch": 3.419952392578125e-05, "step": 22413, "training_step_time": 0.1041421890258789 }, { "epoch": 3.42010498046875e-05, "model_forward_time": 0.027117490768432617, "step": 22414 }, { "epoch": 3.42010498046875e-05, "step": 22414, "training_step_time": 0.10729074478149414 }, { "epoch": 3.420257568359375e-05, "model_forward_time": 0.024976491928100586, "step": 22415 }, { "epoch": 3.420257568359375e-05, "step": 22415, "training_step_time": 0.10467028617858887 }, { "epoch": 3.42041015625e-05, "model_forward_time": 0.02530527114868164, "step": 22416 }, { "epoch": 3.42041015625e-05, "step": 22416, "training_step_time": 0.13192224502563477 }, { "epoch": 3.420562744140625e-05, "model_forward_time": 0.024147987365722656, "step": 22417 }, { "epoch": 3.420562744140625e-05, "step": 22417, "training_step_time": 0.14472723007202148 }, { "epoch": 3.42071533203125e-05, "model_forward_time": 0.023494720458984375, "step": 22418 }, { "epoch": 3.42071533203125e-05, "step": 22418, "training_step_time": 0.13454699516296387 }, { "epoch": 3.420867919921875e-05, "model_forward_time": 0.023492097854614258, "step": 22419 }, { "epoch": 3.420867919921875e-05, "step": 22419, "training_step_time": 0.21616315841674805 }, { "epoch": 3.4210205078125e-05, "grad_norm": 0.276035875082016, "learning_rate": 1.646163321812974e-05, "loss": 0.0079, "step": 22420 }, { "epoch": 3.4210205078125e-05, "model_forward_time": 0.023929595947265625, "step": 22420 }, { "epoch": 3.4210205078125e-05, "step": 22420, "training_step_time": 0.14091014862060547 }, { "epoch": 3.421173095703125e-05, "model_forward_time": 0.02567601203918457, "step": 22421 }, { "epoch": 3.421173095703125e-05, "step": 22421, "training_step_time": 0.11544156074523926 }, { "epoch": 3.42132568359375e-05, "model_forward_time": 0.02512836456298828, "step": 22422 }, { "epoch": 3.42132568359375e-05, "step": 22422, "training_step_time": 0.12088990211486816 }, { "epoch": 3.421478271484375e-05, "model_forward_time": 0.0251615047454834, "step": 22423 }, { "epoch": 3.421478271484375e-05, "step": 22423, "training_step_time": 0.111785888671875 }, { "epoch": 3.421630859375e-05, "model_forward_time": 0.025393009185791016, "step": 22424 }, { "epoch": 3.421630859375e-05, "step": 22424, "training_step_time": 0.10827946662902832 }, { "epoch": 3.421783447265625e-05, "model_forward_time": 0.024881362915039062, "step": 22425 }, { "epoch": 3.421783447265625e-05, "step": 22425, "training_step_time": 0.10597825050354004 }, { "epoch": 3.42193603515625e-05, "model_forward_time": 0.025358915328979492, "step": 22426 }, { "epoch": 3.42193603515625e-05, "step": 22426, "training_step_time": 0.10537934303283691 }, { "epoch": 3.422088623046875e-05, "model_forward_time": 0.0249631404876709, "step": 22427 }, { "epoch": 3.422088623046875e-05, "step": 22427, "training_step_time": 0.11023807525634766 }, { "epoch": 3.4222412109375e-05, "model_forward_time": 0.02502751350402832, "step": 22428 }, { "epoch": 3.4222412109375e-05, "step": 22428, "training_step_time": 0.10494303703308105 }, { "epoch": 3.422393798828125e-05, "model_forward_time": 0.02562713623046875, "step": 22429 }, { "epoch": 3.422393798828125e-05, "step": 22429, "training_step_time": 0.11208915710449219 }, { "epoch": 3.42254638671875e-05, "grad_norm": 0.3813806474208832, "learning_rate": 1.6420776089735827e-05, "loss": 0.0051, "step": 22430 }, { "epoch": 3.42254638671875e-05, "model_forward_time": 0.02512073516845703, "step": 22430 }, { "epoch": 3.42254638671875e-05, "step": 22430, "training_step_time": 0.11412310600280762 }, { "epoch": 3.422698974609375e-05, "model_forward_time": 0.024928569793701172, "step": 22431 }, { "epoch": 3.422698974609375e-05, "step": 22431, "training_step_time": 0.10455942153930664 }, { "epoch": 3.4228515625e-05, "model_forward_time": 0.025096416473388672, "step": 22432 }, { "epoch": 3.4228515625e-05, "step": 22432, "training_step_time": 0.10565042495727539 }, { "epoch": 3.423004150390625e-05, "model_forward_time": 0.025542736053466797, "step": 22433 }, { "epoch": 3.423004150390625e-05, "step": 22433, "training_step_time": 0.10500717163085938 }, { "epoch": 3.42315673828125e-05, "model_forward_time": 0.0256502628326416, "step": 22434 }, { "epoch": 3.42315673828125e-05, "step": 22434, "training_step_time": 0.10578346252441406 }, { "epoch": 3.423309326171875e-05, "model_forward_time": 0.025389671325683594, "step": 22435 }, { "epoch": 3.423309326171875e-05, "step": 22435, "training_step_time": 0.10383963584899902 }, { "epoch": 3.4234619140625e-05, "model_forward_time": 0.025395631790161133, "step": 22436 }, { "epoch": 3.4234619140625e-05, "step": 22436, "training_step_time": 0.11177444458007812 }, { "epoch": 3.423614501953125e-05, "model_forward_time": 0.02590489387512207, "step": 22437 }, { "epoch": 3.423614501953125e-05, "step": 22437, "training_step_time": 0.10516667366027832 }, { "epoch": 3.42376708984375e-05, "model_forward_time": 0.024885177612304688, "step": 22438 }, { "epoch": 3.42376708984375e-05, "step": 22438, "training_step_time": 0.15067505836486816 }, { "epoch": 3.423919677734375e-05, "model_forward_time": 0.02453756332397461, "step": 22439 }, { "epoch": 3.423919677734375e-05, "step": 22439, "training_step_time": 0.21863007545471191 }, { "epoch": 3.424072265625e-05, "grad_norm": 0.1491369605064392, "learning_rate": 1.637995976326527e-05, "loss": 0.0054, "step": 22440 }, { "epoch": 3.424072265625e-05, "model_forward_time": 0.02414107322692871, "step": 22440 }, { "epoch": 3.424072265625e-05, "step": 22440, "training_step_time": 0.1854255199432373 }, { "epoch": 3.424224853515625e-05, "model_forward_time": 0.024416685104370117, "step": 22441 }, { "epoch": 3.424224853515625e-05, "step": 22441, "training_step_time": 0.1526956558227539 }, { "epoch": 3.42437744140625e-05, "model_forward_time": 0.02459430694580078, "step": 22442 }, { "epoch": 3.42437744140625e-05, "step": 22442, "training_step_time": 0.10424470901489258 }, { "epoch": 3.424530029296875e-05, "model_forward_time": 0.027668476104736328, "step": 22443 }, { "epoch": 3.424530029296875e-05, "step": 22443, "training_step_time": 0.19573497772216797 }, { "epoch": 3.4246826171875e-05, "model_forward_time": 0.02541494369506836, "step": 22444 }, { "epoch": 3.4246826171875e-05, "step": 22444, "training_step_time": 0.10525894165039062 }, { "epoch": 3.424835205078125e-05, "model_forward_time": 0.02418828010559082, "step": 22445 }, { "epoch": 3.424835205078125e-05, "step": 22445, "training_step_time": 0.10398197174072266 }, { "epoch": 3.42498779296875e-05, "model_forward_time": 0.025159597396850586, "step": 22446 }, { "epoch": 3.42498779296875e-05, "step": 22446, "training_step_time": 0.15280818939208984 }, { "epoch": 3.425140380859375e-05, "model_forward_time": 0.024649858474731445, "step": 22447 }, { "epoch": 3.425140380859375e-05, "step": 22447, "training_step_time": 0.18122506141662598 }, { "epoch": 3.42529296875e-05, "model_forward_time": 0.023980140686035156, "step": 22448 }, { "epoch": 3.42529296875e-05, "step": 22448, "training_step_time": 0.20926713943481445 }, { "epoch": 3.425445556640625e-05, "model_forward_time": 0.027878522872924805, "step": 22449 }, { "epoch": 3.425445556640625e-05, "step": 22449, "training_step_time": 0.1589667797088623 }, { "epoch": 3.42559814453125e-05, "grad_norm": 0.31565800309181213, "learning_rate": 1.633918428831377e-05, "loss": 0.0089, "step": 22450 }, { "epoch": 3.42559814453125e-05, "model_forward_time": 0.023316144943237305, "step": 22450 }, { "epoch": 3.42559814453125e-05, "step": 22450, "training_step_time": 0.17870235443115234 }, { "epoch": 3.425750732421875e-05, "model_forward_time": 0.02413201332092285, "step": 22451 }, { "epoch": 3.425750732421875e-05, "step": 22451, "training_step_time": 0.1248466968536377 }, { "epoch": 3.4259033203125e-05, "model_forward_time": 0.02429342269897461, "step": 22452 }, { "epoch": 3.4259033203125e-05, "step": 22452, "training_step_time": 0.11592984199523926 }, { "epoch": 3.426055908203125e-05, "model_forward_time": 0.025311946868896484, "step": 22453 }, { "epoch": 3.426055908203125e-05, "step": 22453, "training_step_time": 0.11795568466186523 }, { "epoch": 3.42620849609375e-05, "model_forward_time": 0.025185585021972656, "step": 22454 }, { "epoch": 3.42620849609375e-05, "step": 22454, "training_step_time": 0.10916757583618164 }, { "epoch": 3.426361083984375e-05, "model_forward_time": 0.024597644805908203, "step": 22455 }, { "epoch": 3.426361083984375e-05, "step": 22455, "training_step_time": 0.10643172264099121 }, { "epoch": 3.426513671875e-05, "model_forward_time": 0.026105642318725586, "step": 22456 }, { "epoch": 3.426513671875e-05, "step": 22456, "training_step_time": 0.1096343994140625 }, { "epoch": 3.426666259765625e-05, "model_forward_time": 0.025262832641601562, "step": 22457 }, { "epoch": 3.426666259765625e-05, "step": 22457, "training_step_time": 0.10553240776062012 }, { "epoch": 3.42681884765625e-05, "model_forward_time": 0.025252342224121094, "step": 22458 }, { "epoch": 3.42681884765625e-05, "step": 22458, "training_step_time": 0.10580205917358398 }, { "epoch": 3.426971435546875e-05, "model_forward_time": 0.02510356903076172, "step": 22459 }, { "epoch": 3.426971435546875e-05, "step": 22459, "training_step_time": 0.10562634468078613 }, { "epoch": 3.4271240234375e-05, "grad_norm": 0.38021135330200195, "learning_rate": 1.6298449714427355e-05, "loss": 0.0081, "step": 22460 }, { "epoch": 3.4271240234375e-05, "model_forward_time": 0.02519512176513672, "step": 22460 }, { "epoch": 3.4271240234375e-05, "step": 22460, "training_step_time": 0.10873627662658691 }, { "epoch": 3.427276611328125e-05, "model_forward_time": 0.024911880493164062, "step": 22461 }, { "epoch": 3.427276611328125e-05, "step": 22461, "training_step_time": 0.15426111221313477 }, { "epoch": 3.42742919921875e-05, "model_forward_time": 0.024502992630004883, "step": 22462 }, { "epoch": 3.42742919921875e-05, "step": 22462, "training_step_time": 0.154313325881958 }, { "epoch": 3.427581787109375e-05, "model_forward_time": 0.024194717407226562, "step": 22463 }, { "epoch": 3.427581787109375e-05, "step": 22463, "training_step_time": 0.10732030868530273 }, { "epoch": 3.427734375e-05, "model_forward_time": 0.02803325653076172, "step": 22464 }, { "epoch": 3.427734375e-05, "step": 22464, "training_step_time": 0.14589881896972656 }, { "epoch": 3.427886962890625e-05, "model_forward_time": 0.024923086166381836, "step": 22465 }, { "epoch": 3.427886962890625e-05, "step": 22465, "training_step_time": 0.1748206615447998 }, { "epoch": 3.42803955078125e-05, "model_forward_time": 0.024135112762451172, "step": 22466 }, { "epoch": 3.42803955078125e-05, "step": 22466, "training_step_time": 0.1683053970336914 }, { "epoch": 3.428192138671875e-05, "model_forward_time": 0.0239715576171875, "step": 22467 }, { "epoch": 3.428192138671875e-05, "step": 22467, "training_step_time": 0.10049223899841309 }, { "epoch": 3.4283447265625e-05, "model_forward_time": 0.024181842803955078, "step": 22468 }, { "epoch": 3.4283447265625e-05, "step": 22468, "training_step_time": 0.10140013694763184 }, { "epoch": 3.428497314453125e-05, "model_forward_time": 0.02485942840576172, "step": 22469 }, { "epoch": 3.428497314453125e-05, "step": 22469, "training_step_time": 0.1057901382446289 }, { "epoch": 3.42864990234375e-05, "grad_norm": 0.16420020163059235, "learning_rate": 1.62577560911024e-05, "loss": 0.008, "step": 22470 }, { "epoch": 3.42864990234375e-05, "model_forward_time": 0.025231122970581055, "step": 22470 }, { "epoch": 3.42864990234375e-05, "step": 22470, "training_step_time": 0.10350680351257324 }, { "epoch": 3.428802490234375e-05, "model_forward_time": 0.025012969970703125, "step": 22471 }, { "epoch": 3.428802490234375e-05, "step": 22471, "training_step_time": 0.10654830932617188 }, { "epoch": 3.428955078125e-05, "model_forward_time": 0.025200366973876953, "step": 22472 }, { "epoch": 3.428955078125e-05, "step": 22472, "training_step_time": 0.10405397415161133 }, { "epoch": 3.429107666015625e-05, "model_forward_time": 0.024828672409057617, "step": 22473 }, { "epoch": 3.429107666015625e-05, "step": 22473, "training_step_time": 0.10475754737854004 }, { "epoch": 3.42926025390625e-05, "model_forward_time": 0.024593353271484375, "step": 22474 }, { "epoch": 3.42926025390625e-05, "step": 22474, "training_step_time": 0.10272073745727539 }, { "epoch": 3.429412841796875e-05, "model_forward_time": 0.025143146514892578, "step": 22475 }, { "epoch": 3.429412841796875e-05, "step": 22475, "training_step_time": 0.10408949851989746 }, { "epoch": 3.4295654296875e-05, "model_forward_time": 0.02501535415649414, "step": 22476 }, { "epoch": 3.4295654296875e-05, "step": 22476, "training_step_time": 0.10512495040893555 }, { "epoch": 3.429718017578125e-05, "model_forward_time": 0.024796485900878906, "step": 22477 }, { "epoch": 3.429718017578125e-05, "step": 22477, "training_step_time": 0.10399150848388672 }, { "epoch": 3.42987060546875e-05, "model_forward_time": 0.02478957176208496, "step": 22478 }, { "epoch": 3.42987060546875e-05, "step": 22478, "training_step_time": 0.10568785667419434 }, { "epoch": 3.430023193359375e-05, "model_forward_time": 0.024737834930419922, "step": 22479 }, { "epoch": 3.430023193359375e-05, "step": 22479, "training_step_time": 0.1050727367401123 }, { "epoch": 3.43017578125e-05, "grad_norm": 0.4871219992637634, "learning_rate": 1.6217103467785484e-05, "loss": 0.0077, "step": 22480 }, { "epoch": 3.43017578125e-05, "model_forward_time": 0.02924799919128418, "step": 22480 }, { "epoch": 3.43017578125e-05, "step": 22480, "training_step_time": 0.11047172546386719 }, { "epoch": 3.430328369140625e-05, "model_forward_time": 0.02570343017578125, "step": 22481 }, { "epoch": 3.430328369140625e-05, "step": 22481, "training_step_time": 0.11348581314086914 }, { "epoch": 3.43048095703125e-05, "model_forward_time": 0.024979352951049805, "step": 22482 }, { "epoch": 3.43048095703125e-05, "step": 22482, "training_step_time": 0.13289809226989746 }, { "epoch": 3.430633544921875e-05, "model_forward_time": 0.024374961853027344, "step": 22483 }, { "epoch": 3.430633544921875e-05, "step": 22483, "training_step_time": 0.17105555534362793 }, { "epoch": 3.4307861328125e-05, "model_forward_time": 0.02447962760925293, "step": 22484 }, { "epoch": 3.4307861328125e-05, "step": 22484, "training_step_time": 0.2177739143371582 }, { "epoch": 3.430938720703125e-05, "model_forward_time": 0.024221420288085938, "step": 22485 }, { "epoch": 3.430938720703125e-05, "step": 22485, "training_step_time": 0.2430107593536377 }, { "epoch": 3.43109130859375e-05, "model_forward_time": 0.024776458740234375, "step": 22486 }, { "epoch": 3.43109130859375e-05, "step": 22486, "training_step_time": 0.2290935516357422 }, { "epoch": 3.431243896484375e-05, "model_forward_time": 0.02419757843017578, "step": 22487 }, { "epoch": 3.431243896484375e-05, "step": 22487, "training_step_time": 0.1952812671661377 }, { "epoch": 3.431396484375e-05, "model_forward_time": 0.024350881576538086, "step": 22488 }, { "epoch": 3.431396484375e-05, "step": 22488, "training_step_time": 0.17877912521362305 }, { "epoch": 3.431549072265625e-05, "model_forward_time": 0.026100635528564453, "step": 22489 }, { "epoch": 3.431549072265625e-05, "step": 22489, "training_step_time": 0.17620015144348145 }, { "epoch": 3.43170166015625e-05, "grad_norm": 0.41838160157203674, "learning_rate": 1.617649189387337e-05, "loss": 0.0065, "step": 22490 }, { "epoch": 3.43170166015625e-05, "model_forward_time": 0.02673935890197754, "step": 22490 }, { "epoch": 3.43170166015625e-05, "step": 22490, "training_step_time": 0.1646428108215332 }, { "epoch": 3.431854248046875e-05, "model_forward_time": 0.027776718139648438, "step": 22491 }, { "epoch": 3.431854248046875e-05, "step": 22491, "training_step_time": 0.19440221786499023 }, { "epoch": 3.4320068359375e-05, "model_forward_time": 0.02987527847290039, "step": 22492 }, { "epoch": 3.4320068359375e-05, "step": 22492, "training_step_time": 0.2585277557373047 }, { "epoch": 3.432159423828125e-05, "model_forward_time": 0.027833938598632812, "step": 22493 }, { "epoch": 3.432159423828125e-05, "step": 22493, "training_step_time": 0.3719778060913086 }, { "epoch": 3.43231201171875e-05, "model_forward_time": 0.031086444854736328, "step": 22494 }, { "epoch": 3.43231201171875e-05, "step": 22494, "training_step_time": 0.2861180305480957 }, { "epoch": 3.432464599609375e-05, "model_forward_time": 0.0289461612701416, "step": 22495 }, { "epoch": 3.432464599609375e-05, "step": 22495, "training_step_time": 0.3135216236114502 }, { "epoch": 3.4326171875e-05, "model_forward_time": 0.031242847442626953, "step": 22496 }, { "epoch": 3.4326171875e-05, "step": 22496, "training_step_time": 0.2987210750579834 }, { "epoch": 3.432769775390625e-05, "model_forward_time": 0.030518770217895508, "step": 22497 }, { "epoch": 3.432769775390625e-05, "step": 22497, "training_step_time": 0.3424403667449951 }, { "epoch": 3.43292236328125e-05, "model_forward_time": 0.030771732330322266, "step": 22498 }, { "epoch": 3.43292236328125e-05, "step": 22498, "training_step_time": 0.3419816493988037 }, { "epoch": 3.433074951171875e-05, "model_forward_time": 0.034188032150268555, "step": 22499 }, { "epoch": 3.433074951171875e-05, "step": 22499, "training_step_time": 0.33868932723999023 }, { "epoch": 3.4332275390625e-05, "grad_norm": 0.1720426231622696, "learning_rate": 1.6135921418712956e-05, "loss": 0.0051, "step": 22500 }, { "epoch": 3.4332275390625e-05, "model_forward_time": 0.029853343963623047, "step": 22500 }, { "epoch": 3.4332275390625e-05, "step": 22500, "training_step_time": 0.20296096801757812 }, { "epoch": 3.433380126953125e-05, "model_forward_time": 0.030093908309936523, "step": 22501 }, { "epoch": 3.433380126953125e-05, "step": 22501, "training_step_time": 0.27199268341064453 }, { "epoch": 3.43353271484375e-05, "model_forward_time": 0.030057668685913086, "step": 22502 }, { "epoch": 3.43353271484375e-05, "step": 22502, "training_step_time": 0.2082653045654297 }, { "epoch": 3.433685302734375e-05, "model_forward_time": 0.0322413444519043, "step": 22503 }, { "epoch": 3.433685302734375e-05, "step": 22503, "training_step_time": 0.17333030700683594 }, { "epoch": 3.433837890625e-05, "model_forward_time": 0.03046393394470215, "step": 22504 }, { "epoch": 3.433837890625e-05, "step": 22504, "training_step_time": 0.17946076393127441 }, { "epoch": 3.433990478515625e-05, "model_forward_time": 0.033746957778930664, "step": 22505 }, { "epoch": 3.433990478515625e-05, "step": 22505, "training_step_time": 0.1766035556793213 }, { "epoch": 3.43414306640625e-05, "model_forward_time": 0.02998948097229004, "step": 22506 }, { "epoch": 3.43414306640625e-05, "step": 22506, "training_step_time": 0.12865447998046875 }, { "epoch": 3.434295654296875e-05, "model_forward_time": 0.028602123260498047, "step": 22507 }, { "epoch": 3.434295654296875e-05, "step": 22507, "training_step_time": 0.1841588020324707 }, { "epoch": 3.4344482421875e-05, "model_forward_time": 0.02686476707458496, "step": 22508 }, { "epoch": 3.4344482421875e-05, "step": 22508, "training_step_time": 0.12012243270874023 }, { "epoch": 3.434600830078125e-05, "model_forward_time": 0.026732444763183594, "step": 22509 }, { "epoch": 3.434600830078125e-05, "step": 22509, "training_step_time": 0.20315885543823242 }, { "epoch": 3.43475341796875e-05, "grad_norm": 0.08240101486444473, "learning_rate": 1.6095392091601175e-05, "loss": 0.0032, "step": 22510 }, { "epoch": 3.43475341796875e-05, "model_forward_time": 0.025716781616210938, "step": 22510 }, { "epoch": 3.43475341796875e-05, "step": 22510, "training_step_time": 0.14647555351257324 }, { "epoch": 3.434906005859375e-05, "model_forward_time": 0.02506089210510254, "step": 22511 }, { "epoch": 3.434906005859375e-05, "step": 22511, "training_step_time": 0.2005445957183838 }, { "epoch": 3.43505859375e-05, "model_forward_time": 0.023823022842407227, "step": 22512 }, { "epoch": 3.43505859375e-05, "step": 22512, "training_step_time": 0.11275124549865723 }, { "epoch": 3.435211181640625e-05, "model_forward_time": 0.02328014373779297, "step": 22513 }, { "epoch": 3.435211181640625e-05, "step": 22513, "training_step_time": 0.12714266777038574 }, { "epoch": 3.43536376953125e-05, "model_forward_time": 0.024764299392700195, "step": 22514 }, { "epoch": 3.43536376953125e-05, "step": 22514, "training_step_time": 0.1269221305847168 }, { "epoch": 3.435516357421875e-05, "model_forward_time": 0.02464151382446289, "step": 22515 }, { "epoch": 3.435516357421875e-05, "step": 22515, "training_step_time": 0.12345552444458008 }, { "epoch": 3.4356689453125e-05, "model_forward_time": 0.025038957595825195, "step": 22516 }, { "epoch": 3.4356689453125e-05, "step": 22516, "training_step_time": 0.11983561515808105 }, { "epoch": 3.435821533203125e-05, "model_forward_time": 0.024979591369628906, "step": 22517 }, { "epoch": 3.435821533203125e-05, "step": 22517, "training_step_time": 0.11545276641845703 }, { "epoch": 3.43597412109375e-05, "model_forward_time": 0.02479720115661621, "step": 22518 }, { "epoch": 3.43597412109375e-05, "step": 22518, "training_step_time": 0.11314558982849121 }, { "epoch": 3.436126708984375e-05, "model_forward_time": 0.025104284286499023, "step": 22519 }, { "epoch": 3.436126708984375e-05, "step": 22519, "training_step_time": 0.11342072486877441 }, { "epoch": 3.436279296875e-05, "grad_norm": 0.09012985974550247, "learning_rate": 1.6054903961785013e-05, "loss": 0.0099, "step": 22520 }, { "epoch": 3.436279296875e-05, "model_forward_time": 0.0248565673828125, "step": 22520 }, { "epoch": 3.436279296875e-05, "step": 22520, "training_step_time": 0.12018513679504395 }, { "epoch": 3.436431884765625e-05, "model_forward_time": 0.024838924407958984, "step": 22521 }, { "epoch": 3.436431884765625e-05, "step": 22521, "training_step_time": 0.23431944847106934 }, { "epoch": 3.43658447265625e-05, "model_forward_time": 0.024327754974365234, "step": 22522 }, { "epoch": 3.43658447265625e-05, "step": 22522, "training_step_time": 0.10803937911987305 }, { "epoch": 3.436737060546875e-05, "model_forward_time": 0.024684667587280273, "step": 22523 }, { "epoch": 3.436737060546875e-05, "step": 22523, "training_step_time": 0.12930750846862793 }, { "epoch": 3.4368896484375e-05, "model_forward_time": 0.02517104148864746, "step": 22524 }, { "epoch": 3.4368896484375e-05, "step": 22524, "training_step_time": 0.11920332908630371 }, { "epoch": 3.437042236328125e-05, "model_forward_time": 0.024694442749023438, "step": 22525 }, { "epoch": 3.437042236328125e-05, "step": 22525, "training_step_time": 0.11605215072631836 }, { "epoch": 3.43719482421875e-05, "model_forward_time": 0.024904966354370117, "step": 22526 }, { "epoch": 3.43719482421875e-05, "step": 22526, "training_step_time": 0.14576005935668945 }, { "epoch": 3.437347412109375e-05, "model_forward_time": 0.024609804153442383, "step": 22527 }, { "epoch": 3.437347412109375e-05, "step": 22527, "training_step_time": 0.10378193855285645 }, { "epoch": 3.4375e-05, "model_forward_time": 0.02496051788330078, "step": 22528 }, { "epoch": 3.4375e-05, "step": 22528, "training_step_time": 0.10738110542297363 }, { "epoch": 3.437652587890625e-05, "model_forward_time": 0.025020360946655273, "step": 22529 }, { "epoch": 3.437652587890625e-05, "step": 22529, "training_step_time": 0.10481619834899902 }, { "epoch": 3.43780517578125e-05, "grad_norm": 0.2845122218132019, "learning_rate": 1.6014457078461353e-05, "loss": 0.0111, "step": 22530 }, { "epoch": 3.43780517578125e-05, "model_forward_time": 0.024991750717163086, "step": 22530 }, { "epoch": 3.43780517578125e-05, "step": 22530, "training_step_time": 0.10554981231689453 }, { "epoch": 3.437957763671875e-05, "model_forward_time": 0.025631189346313477, "step": 22531 }, { "epoch": 3.437957763671875e-05, "step": 22531, "training_step_time": 0.10950636863708496 }, { "epoch": 3.4381103515625e-05, "model_forward_time": 0.025404930114746094, "step": 22532 }, { "epoch": 3.4381103515625e-05, "step": 22532, "training_step_time": 0.10576534271240234 }, { "epoch": 3.438262939453125e-05, "model_forward_time": 0.026144027709960938, "step": 22533 }, { "epoch": 3.438262939453125e-05, "step": 22533, "training_step_time": 0.1797025203704834 }, { "epoch": 3.43841552734375e-05, "model_forward_time": 0.025594234466552734, "step": 22534 }, { "epoch": 3.43841552734375e-05, "step": 22534, "training_step_time": 0.11345505714416504 }, { "epoch": 3.438568115234375e-05, "model_forward_time": 0.024806976318359375, "step": 22535 }, { "epoch": 3.438568115234375e-05, "step": 22535, "training_step_time": 0.1372518539428711 }, { "epoch": 3.438720703125e-05, "model_forward_time": 0.02469778060913086, "step": 22536 }, { "epoch": 3.438720703125e-05, "step": 22536, "training_step_time": 0.156052827835083 }, { "epoch": 3.438873291015625e-05, "model_forward_time": 0.02478194236755371, "step": 22537 }, { "epoch": 3.438873291015625e-05, "step": 22537, "training_step_time": 0.10473942756652832 }, { "epoch": 3.43902587890625e-05, "model_forward_time": 0.024973630905151367, "step": 22538 }, { "epoch": 3.43902587890625e-05, "step": 22538, "training_step_time": 0.11530303955078125 }, { "epoch": 3.439178466796875e-05, "model_forward_time": 0.02603745460510254, "step": 22539 }, { "epoch": 3.439178466796875e-05, "step": 22539, "training_step_time": 0.10576319694519043 }, { "epoch": 3.4393310546875e-05, "grad_norm": 0.1552228033542633, "learning_rate": 1.597405149077697e-05, "loss": 0.0033, "step": 22540 }, { "epoch": 3.4393310546875e-05, "model_forward_time": 0.024698257446289062, "step": 22540 }, { "epoch": 3.4393310546875e-05, "step": 22540, "training_step_time": 0.10421323776245117 }, { "epoch": 3.439483642578125e-05, "model_forward_time": 0.024886608123779297, "step": 22541 }, { "epoch": 3.439483642578125e-05, "step": 22541, "training_step_time": 0.10962653160095215 }, { "epoch": 3.43963623046875e-05, "model_forward_time": 0.02501535415649414, "step": 22542 }, { "epoch": 3.43963623046875e-05, "step": 22542, "training_step_time": 0.11786580085754395 }, { "epoch": 3.439788818359375e-05, "model_forward_time": 0.024881601333618164, "step": 22543 }, { "epoch": 3.439788818359375e-05, "step": 22543, "training_step_time": 0.11621427536010742 }, { "epoch": 3.43994140625e-05, "model_forward_time": 0.025180578231811523, "step": 22544 }, { "epoch": 3.43994140625e-05, "step": 22544, "training_step_time": 0.11540102958679199 }, { "epoch": 3.440093994140625e-05, "model_forward_time": 0.02502751350402832, "step": 22545 }, { "epoch": 3.440093994140625e-05, "step": 22545, "training_step_time": 0.10991787910461426 }, { "epoch": 3.44024658203125e-05, "model_forward_time": 0.025009632110595703, "step": 22546 }, { "epoch": 3.44024658203125e-05, "step": 22546, "training_step_time": 0.11142349243164062 }, { "epoch": 3.440399169921875e-05, "model_forward_time": 0.02531123161315918, "step": 22547 }, { "epoch": 3.440399169921875e-05, "step": 22547, "training_step_time": 0.10947823524475098 }, { "epoch": 3.4405517578125e-05, "model_forward_time": 0.02483391761779785, "step": 22548 }, { "epoch": 3.4405517578125e-05, "step": 22548, "training_step_time": 0.1094655990600586 }, { "epoch": 3.440704345703125e-05, "model_forward_time": 0.025200605392456055, "step": 22549 }, { "epoch": 3.440704345703125e-05, "step": 22549, "training_step_time": 0.10857582092285156 }, { "epoch": 3.44085693359375e-05, "grad_norm": 0.07073287665843964, "learning_rate": 1.593368724782846e-05, "loss": 0.0123, "step": 22550 }, { "epoch": 3.44085693359375e-05, "model_forward_time": 0.02495121955871582, "step": 22550 }, { "epoch": 3.44085693359375e-05, "step": 22550, "training_step_time": 0.10446429252624512 }, { "epoch": 3.441009521484375e-05, "model_forward_time": 0.025267601013183594, "step": 22551 }, { "epoch": 3.441009521484375e-05, "step": 22551, "training_step_time": 0.10826683044433594 }, { "epoch": 3.441162109375e-05, "model_forward_time": 0.025191068649291992, "step": 22552 }, { "epoch": 3.441162109375e-05, "step": 22552, "training_step_time": 0.1363527774810791 }, { "epoch": 3.441314697265625e-05, "model_forward_time": 0.024929523468017578, "step": 22553 }, { "epoch": 3.441314697265625e-05, "step": 22553, "training_step_time": 0.14258790016174316 }, { "epoch": 3.44146728515625e-05, "model_forward_time": 0.024688005447387695, "step": 22554 }, { "epoch": 3.44146728515625e-05, "step": 22554, "training_step_time": 0.13992524147033691 }, { "epoch": 3.441619873046875e-05, "model_forward_time": 0.024988412857055664, "step": 22555 }, { "epoch": 3.441619873046875e-05, "step": 22555, "training_step_time": 0.1925981044769287 }, { "epoch": 3.4417724609375e-05, "model_forward_time": 0.024658203125, "step": 22556 }, { "epoch": 3.4417724609375e-05, "step": 22556, "training_step_time": 0.14413022994995117 }, { "epoch": 3.441925048828125e-05, "model_forward_time": 0.024521350860595703, "step": 22557 }, { "epoch": 3.441925048828125e-05, "step": 22557, "training_step_time": 0.15850448608398438 }, { "epoch": 3.44207763671875e-05, "model_forward_time": 0.024738788604736328, "step": 22558 }, { "epoch": 3.44207763671875e-05, "step": 22558, "training_step_time": 0.10285162925720215 }, { "epoch": 3.442230224609375e-05, "model_forward_time": 0.02480292320251465, "step": 22559 }, { "epoch": 3.442230224609375e-05, "step": 22559, "training_step_time": 0.10341787338256836 }, { "epoch": 3.4423828125e-05, "grad_norm": 0.2642311155796051, "learning_rate": 1.5893364398662176e-05, "loss": 0.0182, "step": 22560 }, { "epoch": 3.4423828125e-05, "model_forward_time": 0.025300025939941406, "step": 22560 }, { "epoch": 3.4423828125e-05, "step": 22560, "training_step_time": 0.11081051826477051 }, { "epoch": 3.442535400390625e-05, "model_forward_time": 0.025388002395629883, "step": 22561 }, { "epoch": 3.442535400390625e-05, "step": 22561, "training_step_time": 0.10899734497070312 }, { "epoch": 3.44268798828125e-05, "model_forward_time": 0.025207042694091797, "step": 22562 }, { "epoch": 3.44268798828125e-05, "step": 22562, "training_step_time": 0.10673236846923828 }, { "epoch": 3.442840576171875e-05, "model_forward_time": 0.025087833404541016, "step": 22563 }, { "epoch": 3.442840576171875e-05, "step": 22563, "training_step_time": 0.10633063316345215 }, { "epoch": 3.4429931640625e-05, "model_forward_time": 0.024871110916137695, "step": 22564 }, { "epoch": 3.4429931640625e-05, "step": 22564, "training_step_time": 0.10430598258972168 }, { "epoch": 3.443145751953125e-05, "model_forward_time": 0.025195837020874023, "step": 22565 }, { "epoch": 3.443145751953125e-05, "step": 22565, "training_step_time": 0.10355305671691895 }, { "epoch": 3.44329833984375e-05, "model_forward_time": 0.02477860450744629, "step": 22566 }, { "epoch": 3.44329833984375e-05, "step": 22566, "training_step_time": 0.10409164428710938 }, { "epoch": 3.443450927734375e-05, "model_forward_time": 0.02439284324645996, "step": 22567 }, { "epoch": 3.443450927734375e-05, "step": 22567, "training_step_time": 0.13987946510314941 }, { "epoch": 3.443603515625e-05, "model_forward_time": 0.02516341209411621, "step": 22568 }, { "epoch": 3.443603515625e-05, "step": 22568, "training_step_time": 0.11322283744812012 }, { "epoch": 3.443756103515625e-05, "model_forward_time": 0.024591922760009766, "step": 22569 }, { "epoch": 3.443756103515625e-05, "step": 22569, "training_step_time": 0.14094328880310059 }, { "epoch": 3.44390869140625e-05, "grad_norm": 0.17577330768108368, "learning_rate": 1.5853082992274205e-05, "loss": 0.0049, "step": 22570 }, { "epoch": 3.44390869140625e-05, "model_forward_time": 0.02508997917175293, "step": 22570 }, { "epoch": 3.44390869140625e-05, "step": 22570, "training_step_time": 0.21321320533752441 }, { "epoch": 3.444061279296875e-05, "model_forward_time": 0.025599241256713867, "step": 22571 }, { "epoch": 3.444061279296875e-05, "step": 22571, "training_step_time": 0.11498737335205078 }, { "epoch": 3.4442138671875e-05, "model_forward_time": 0.024283170700073242, "step": 22572 }, { "epoch": 3.4442138671875e-05, "step": 22572, "training_step_time": 0.11481904983520508 }, { "epoch": 3.444366455078125e-05, "model_forward_time": 0.025135040283203125, "step": 22573 }, { "epoch": 3.444366455078125e-05, "step": 22573, "training_step_time": 0.14130783081054688 }, { "epoch": 3.44451904296875e-05, "model_forward_time": 0.024860143661499023, "step": 22574 }, { "epoch": 3.44451904296875e-05, "step": 22574, "training_step_time": 0.10347294807434082 }, { "epoch": 3.444671630859375e-05, "model_forward_time": 0.025289297103881836, "step": 22575 }, { "epoch": 3.444671630859375e-05, "step": 22575, "training_step_time": 0.1062467098236084 }, { "epoch": 3.44482421875e-05, "model_forward_time": 0.025598764419555664, "step": 22576 }, { "epoch": 3.44482421875e-05, "step": 22576, "training_step_time": 0.10679435729980469 }, { "epoch": 3.444976806640625e-05, "model_forward_time": 0.02544403076171875, "step": 22577 }, { "epoch": 3.444976806640625e-05, "step": 22577, "training_step_time": 0.10471463203430176 }, { "epoch": 3.44512939453125e-05, "model_forward_time": 0.024773597717285156, "step": 22578 }, { "epoch": 3.44512939453125e-05, "step": 22578, "training_step_time": 0.10529422760009766 }, { "epoch": 3.445281982421875e-05, "model_forward_time": 0.024950742721557617, "step": 22579 }, { "epoch": 3.445281982421875e-05, "step": 22579, "training_step_time": 0.13483572006225586 }, { "epoch": 3.4454345703125e-05, "grad_norm": 0.1434876024723053, "learning_rate": 1.581284307761024e-05, "loss": 0.0052, "step": 22580 }, { "epoch": 3.4454345703125e-05, "model_forward_time": 0.025471210479736328, "step": 22580 }, { "epoch": 3.4454345703125e-05, "step": 22580, "training_step_time": 0.18225598335266113 }, { "epoch": 3.445587158203125e-05, "model_forward_time": 0.02443528175354004, "step": 22581 }, { "epoch": 3.445587158203125e-05, "step": 22581, "training_step_time": 0.12352919578552246 }, { "epoch": 3.44573974609375e-05, "model_forward_time": 0.024270296096801758, "step": 22582 }, { "epoch": 3.44573974609375e-05, "step": 22582, "training_step_time": 0.11872267723083496 }, { "epoch": 3.445892333984375e-05, "model_forward_time": 0.024873018264770508, "step": 22583 }, { "epoch": 3.445892333984375e-05, "step": 22583, "training_step_time": 0.20750904083251953 }, { "epoch": 3.446044921875e-05, "model_forward_time": 0.023919105529785156, "step": 22584 }, { "epoch": 3.446044921875e-05, "step": 22584, "training_step_time": 0.1170196533203125 }, { "epoch": 3.446197509765625e-05, "model_forward_time": 0.024164199829101562, "step": 22585 }, { "epoch": 3.446197509765625e-05, "step": 22585, "training_step_time": 0.11087894439697266 }, { "epoch": 3.44635009765625e-05, "model_forward_time": 0.025058984756469727, "step": 22586 }, { "epoch": 3.44635009765625e-05, "step": 22586, "training_step_time": 0.10880374908447266 }, { "epoch": 3.446502685546875e-05, "model_forward_time": 0.02532172203063965, "step": 22587 }, { "epoch": 3.446502685546875e-05, "step": 22587, "training_step_time": 0.10690116882324219 }, { "epoch": 3.4466552734375e-05, "model_forward_time": 0.025583505630493164, "step": 22588 }, { "epoch": 3.4466552734375e-05, "step": 22588, "training_step_time": 0.1074683666229248 }, { "epoch": 3.446807861328125e-05, "model_forward_time": 0.025719642639160156, "step": 22589 }, { "epoch": 3.446807861328125e-05, "step": 22589, "training_step_time": 0.11057281494140625 }, { "epoch": 3.44696044921875e-05, "grad_norm": 0.167452871799469, "learning_rate": 1.5772644703565565e-05, "loss": 0.0041, "step": 22590 }, { "epoch": 3.44696044921875e-05, "model_forward_time": 0.02565145492553711, "step": 22590 }, { "epoch": 3.44696044921875e-05, "step": 22590, "training_step_time": 0.11219048500061035 }, { "epoch": 3.447113037109375e-05, "model_forward_time": 0.025362491607666016, "step": 22591 }, { "epoch": 3.447113037109375e-05, "step": 22591, "training_step_time": 0.11288070678710938 }, { "epoch": 3.447265625e-05, "model_forward_time": 0.025502681732177734, "step": 22592 }, { "epoch": 3.447265625e-05, "step": 22592, "training_step_time": 0.1077110767364502 }, { "epoch": 3.447418212890625e-05, "model_forward_time": 0.025272130966186523, "step": 22593 }, { "epoch": 3.447418212890625e-05, "step": 22593, "training_step_time": 0.1120448112487793 }, { "epoch": 3.44757080078125e-05, "model_forward_time": 0.0257110595703125, "step": 22594 }, { "epoch": 3.44757080078125e-05, "step": 22594, "training_step_time": 0.10993242263793945 }, { "epoch": 3.447723388671875e-05, "model_forward_time": 0.025301456451416016, "step": 22595 }, { "epoch": 3.447723388671875e-05, "step": 22595, "training_step_time": 0.10610342025756836 }, { "epoch": 3.4478759765625e-05, "model_forward_time": 0.025497913360595703, "step": 22596 }, { "epoch": 3.4478759765625e-05, "step": 22596, "training_step_time": 0.10670733451843262 }, { "epoch": 3.448028564453125e-05, "model_forward_time": 0.025922060012817383, "step": 22597 }, { "epoch": 3.448028564453125e-05, "step": 22597, "training_step_time": 0.10722804069519043 }, { "epoch": 3.44818115234375e-05, "model_forward_time": 0.024599552154541016, "step": 22598 }, { "epoch": 3.44818115234375e-05, "step": 22598, "training_step_time": 0.15507769584655762 }, { "epoch": 3.448333740234375e-05, "model_forward_time": 0.0252077579498291, "step": 22599 }, { "epoch": 3.448333740234375e-05, "step": 22599, "training_step_time": 0.15497732162475586 }, { "epoch": 3.448486328125e-05, "grad_norm": 0.16000565886497498, "learning_rate": 1.5732487918985018e-05, "loss": 0.0047, "step": 22600 }, { "epoch": 3.448486328125e-05, "model_forward_time": 0.02480936050415039, "step": 22600 }, { "epoch": 3.448486328125e-05, "step": 22600, "training_step_time": 0.1786642074584961 }, { "epoch": 3.448638916015625e-05, "model_forward_time": 0.025177478790283203, "step": 22601 }, { "epoch": 3.448638916015625e-05, "step": 22601, "training_step_time": 0.15608811378479004 }, { "epoch": 3.44879150390625e-05, "model_forward_time": 0.02507948875427246, "step": 22602 }, { "epoch": 3.44879150390625e-05, "step": 22602, "training_step_time": 0.14945292472839355 }, { "epoch": 3.448944091796875e-05, "model_forward_time": 0.02465987205505371, "step": 22603 }, { "epoch": 3.448944091796875e-05, "step": 22603, "training_step_time": 0.10299515724182129 }, { "epoch": 3.4490966796875e-05, "model_forward_time": 0.02558159828186035, "step": 22604 }, { "epoch": 3.4490966796875e-05, "step": 22604, "training_step_time": 0.10664725303649902 }, { "epoch": 3.449249267578125e-05, "model_forward_time": 0.025170564651489258, "step": 22605 }, { "epoch": 3.449249267578125e-05, "step": 22605, "training_step_time": 0.10531949996948242 }, { "epoch": 3.44940185546875e-05, "model_forward_time": 0.02552938461303711, "step": 22606 }, { "epoch": 3.44940185546875e-05, "step": 22606, "training_step_time": 0.10657715797424316 }, { "epoch": 3.449554443359375e-05, "model_forward_time": 0.02535390853881836, "step": 22607 }, { "epoch": 3.449554443359375e-05, "step": 22607, "training_step_time": 0.10749149322509766 }, { "epoch": 3.44970703125e-05, "model_forward_time": 0.025127410888671875, "step": 22608 }, { "epoch": 3.44970703125e-05, "step": 22608, "training_step_time": 0.18455839157104492 }, { "epoch": 3.449859619140625e-05, "model_forward_time": 0.023488759994506836, "step": 22609 }, { "epoch": 3.449859619140625e-05, "step": 22609, "training_step_time": 0.21075034141540527 }, { "epoch": 3.45001220703125e-05, "grad_norm": 0.27791628241539, "learning_rate": 1.569237277266286e-05, "loss": 0.0041, "step": 22610 }, { "epoch": 3.45001220703125e-05, "model_forward_time": 0.023622989654541016, "step": 22610 }, { "epoch": 3.45001220703125e-05, "step": 22610, "training_step_time": 0.19536995887756348 }, { "epoch": 3.450164794921875e-05, "model_forward_time": 0.023660659790039062, "step": 22611 }, { "epoch": 3.450164794921875e-05, "step": 22611, "training_step_time": 0.19393348693847656 }, { "epoch": 3.4503173828125e-05, "model_forward_time": 0.023743152618408203, "step": 22612 }, { "epoch": 3.4503173828125e-05, "step": 22612, "training_step_time": 0.18079209327697754 }, { "epoch": 3.450469970703125e-05, "model_forward_time": 0.024435758590698242, "step": 22613 }, { "epoch": 3.450469970703125e-05, "step": 22613, "training_step_time": 0.2048492431640625 }, { "epoch": 3.45062255859375e-05, "model_forward_time": 0.02657032012939453, "step": 22614 }, { "epoch": 3.45062255859375e-05, "step": 22614, "training_step_time": 0.1594257354736328 }, { "epoch": 3.450775146484375e-05, "model_forward_time": 0.024350404739379883, "step": 22615 }, { "epoch": 3.450775146484375e-05, "step": 22615, "training_step_time": 0.16452717781066895 }, { "epoch": 3.450927734375e-05, "model_forward_time": 0.024178743362426758, "step": 22616 }, { "epoch": 3.450927734375e-05, "step": 22616, "training_step_time": 0.10221052169799805 }, { "epoch": 3.451080322265625e-05, "model_forward_time": 0.02451634407043457, "step": 22617 }, { "epoch": 3.451080322265625e-05, "step": 22617, "training_step_time": 0.10526561737060547 }, { "epoch": 3.45123291015625e-05, "model_forward_time": 0.026382923126220703, "step": 22618 }, { "epoch": 3.45123291015625e-05, "step": 22618, "training_step_time": 0.10652875900268555 }, { "epoch": 3.451385498046875e-05, "model_forward_time": 0.025501012802124023, "step": 22619 }, { "epoch": 3.451385498046875e-05, "step": 22619, "training_step_time": 0.10457086563110352 }, { "epoch": 3.4515380859375e-05, "grad_norm": 0.12451514601707458, "learning_rate": 1.5652299313342773e-05, "loss": 0.0057, "step": 22620 }, { "epoch": 3.4515380859375e-05, "model_forward_time": 0.025288105010986328, "step": 22620 }, { "epoch": 3.4515380859375e-05, "step": 22620, "training_step_time": 0.10478425025939941 }, { "epoch": 3.451690673828125e-05, "model_forward_time": 0.02518630027770996, "step": 22621 }, { "epoch": 3.451690673828125e-05, "step": 22621, "training_step_time": 0.10658121109008789 }, { "epoch": 3.45184326171875e-05, "model_forward_time": 0.02536487579345703, "step": 22622 }, { "epoch": 3.45184326171875e-05, "step": 22622, "training_step_time": 0.10521841049194336 }, { "epoch": 3.451995849609375e-05, "model_forward_time": 0.025150775909423828, "step": 22623 }, { "epoch": 3.451995849609375e-05, "step": 22623, "training_step_time": 0.10679292678833008 }, { "epoch": 3.4521484375e-05, "model_forward_time": 0.025905132293701172, "step": 22624 }, { "epoch": 3.4521484375e-05, "step": 22624, "training_step_time": 0.2043473720550537 }, { "epoch": 3.452301025390625e-05, "model_forward_time": 0.024646997451782227, "step": 22625 }, { "epoch": 3.452301025390625e-05, "step": 22625, "training_step_time": 0.10651206970214844 }, { "epoch": 3.45245361328125e-05, "model_forward_time": 0.02467942237854004, "step": 22626 }, { "epoch": 3.45245361328125e-05, "step": 22626, "training_step_time": 0.17296290397644043 }, { "epoch": 3.452606201171875e-05, "model_forward_time": 0.024392366409301758, "step": 22627 }, { "epoch": 3.452606201171875e-05, "step": 22627, "training_step_time": 0.15138888359069824 }, { "epoch": 3.4527587890625e-05, "model_forward_time": 0.02470850944519043, "step": 22628 }, { "epoch": 3.4527587890625e-05, "step": 22628, "training_step_time": 0.10401511192321777 }, { "epoch": 3.452911376953125e-05, "model_forward_time": 0.024695873260498047, "step": 22629 }, { "epoch": 3.452911376953125e-05, "step": 22629, "training_step_time": 0.10762476921081543 }, { "epoch": 3.45306396484375e-05, "grad_norm": 0.15308569371700287, "learning_rate": 1.5612267589717805e-05, "loss": 0.0051, "step": 22630 }, { "epoch": 3.45306396484375e-05, "model_forward_time": 0.025216341018676758, "step": 22630 }, { "epoch": 3.45306396484375e-05, "step": 22630, "training_step_time": 0.10547304153442383 }, { "epoch": 3.453216552734375e-05, "model_forward_time": 0.025307893753051758, "step": 22631 }, { "epoch": 3.453216552734375e-05, "step": 22631, "training_step_time": 0.10705280303955078 }, { "epoch": 3.453369140625e-05, "model_forward_time": 0.024931907653808594, "step": 22632 }, { "epoch": 3.453369140625e-05, "step": 22632, "training_step_time": 0.10941815376281738 }, { "epoch": 3.453521728515625e-05, "model_forward_time": 0.024862289428710938, "step": 22633 }, { "epoch": 3.453521728515625e-05, "step": 22633, "training_step_time": 0.10802412033081055 }, { "epoch": 3.45367431640625e-05, "model_forward_time": 0.024967432022094727, "step": 22634 }, { "epoch": 3.45367431640625e-05, "step": 22634, "training_step_time": 0.10782623291015625 }, { "epoch": 3.453826904296875e-05, "model_forward_time": 0.025029659271240234, "step": 22635 }, { "epoch": 3.453826904296875e-05, "step": 22635, "training_step_time": 0.1077737808227539 }, { "epoch": 3.4539794921875e-05, "model_forward_time": 0.02444171905517578, "step": 22636 }, { "epoch": 3.4539794921875e-05, "step": 22636, "training_step_time": 0.10875582695007324 }, { "epoch": 3.454132080078125e-05, "model_forward_time": 0.02518606185913086, "step": 22637 }, { "epoch": 3.454132080078125e-05, "step": 22637, "training_step_time": 0.11926770210266113 }, { "epoch": 3.45428466796875e-05, "model_forward_time": 0.024074554443359375, "step": 22638 }, { "epoch": 3.45428466796875e-05, "step": 22638, "training_step_time": 0.12267231941223145 }, { "epoch": 3.454437255859375e-05, "model_forward_time": 0.023903369903564453, "step": 22639 }, { "epoch": 3.454437255859375e-05, "step": 22639, "training_step_time": 0.1225888729095459 }, { "epoch": 3.45458984375e-05, "grad_norm": 0.09998887032270432, "learning_rate": 1.557227765043027e-05, "loss": 0.013, "step": 22640 }, { "epoch": 3.45458984375e-05, "model_forward_time": 0.024883747100830078, "step": 22640 }, { "epoch": 3.45458984375e-05, "step": 22640, "training_step_time": 0.17571377754211426 }, { "epoch": 3.454742431640625e-05, "model_forward_time": 0.024603605270385742, "step": 22641 }, { "epoch": 3.454742431640625e-05, "step": 22641, "training_step_time": 0.16199898719787598 }, { "epoch": 3.45489501953125e-05, "model_forward_time": 0.024004220962524414, "step": 22642 }, { "epoch": 3.45489501953125e-05, "step": 22642, "training_step_time": 0.1923356056213379 }, { "epoch": 3.455047607421875e-05, "model_forward_time": 0.02419281005859375, "step": 22643 }, { "epoch": 3.455047607421875e-05, "step": 22643, "training_step_time": 0.16680335998535156 }, { "epoch": 3.4552001953125e-05, "model_forward_time": 0.023984909057617188, "step": 22644 }, { "epoch": 3.4552001953125e-05, "step": 22644, "training_step_time": 0.1653447151184082 }, { "epoch": 3.455352783203125e-05, "model_forward_time": 0.023545503616333008, "step": 22645 }, { "epoch": 3.455352783203125e-05, "step": 22645, "training_step_time": 0.10384798049926758 }, { "epoch": 3.45550537109375e-05, "model_forward_time": 0.024919509887695312, "step": 22646 }, { "epoch": 3.45550537109375e-05, "step": 22646, "training_step_time": 0.10674858093261719 }, { "epoch": 3.455657958984375e-05, "model_forward_time": 0.02539825439453125, "step": 22647 }, { "epoch": 3.455657958984375e-05, "step": 22647, "training_step_time": 0.10599875450134277 }, { "epoch": 3.455810546875e-05, "model_forward_time": 0.024866580963134766, "step": 22648 }, { "epoch": 3.455810546875e-05, "step": 22648, "training_step_time": 0.1106269359588623 }, { "epoch": 3.455963134765625e-05, "model_forward_time": 0.02497076988220215, "step": 22649 }, { "epoch": 3.455963134765625e-05, "step": 22649, "training_step_time": 0.11391568183898926 }, { "epoch": 3.45611572265625e-05, "grad_norm": 0.36277642846107483, "learning_rate": 1.553232954407171e-05, "loss": 0.0094, "step": 22650 }, { "epoch": 3.45611572265625e-05, "model_forward_time": 0.02516913414001465, "step": 22650 }, { "epoch": 3.45611572265625e-05, "step": 22650, "training_step_time": 0.10614895820617676 }, { "epoch": 3.456268310546875e-05, "model_forward_time": 0.024924278259277344, "step": 22651 }, { "epoch": 3.456268310546875e-05, "step": 22651, "training_step_time": 0.10732841491699219 }, { "epoch": 3.4564208984375e-05, "model_forward_time": 0.02500605583190918, "step": 22652 }, { "epoch": 3.4564208984375e-05, "step": 22652, "training_step_time": 0.10872411727905273 }, { "epoch": 3.456573486328125e-05, "model_forward_time": 0.024770259857177734, "step": 22653 }, { "epoch": 3.456573486328125e-05, "step": 22653, "training_step_time": 0.10824036598205566 }, { "epoch": 3.45672607421875e-05, "model_forward_time": 0.024863719940185547, "step": 22654 }, { "epoch": 3.45672607421875e-05, "step": 22654, "training_step_time": 0.10634136199951172 }, { "epoch": 3.456878662109375e-05, "model_forward_time": 0.025053739547729492, "step": 22655 }, { "epoch": 3.456878662109375e-05, "step": 22655, "training_step_time": 0.16725754737854004 }, { "epoch": 3.45703125e-05, "model_forward_time": 0.02429938316345215, "step": 22656 }, { "epoch": 3.45703125e-05, "step": 22656, "training_step_time": 0.11075115203857422 }, { "epoch": 3.457183837890625e-05, "model_forward_time": 0.024128198623657227, "step": 22657 }, { "epoch": 3.457183837890625e-05, "step": 22657, "training_step_time": 0.12139439582824707 }, { "epoch": 3.45733642578125e-05, "model_forward_time": 0.025157451629638672, "step": 22658 }, { "epoch": 3.45733642578125e-05, "step": 22658, "training_step_time": 0.13670086860656738 }, { "epoch": 3.457489013671875e-05, "model_forward_time": 0.024887800216674805, "step": 22659 }, { "epoch": 3.457489013671875e-05, "step": 22659, "training_step_time": 0.13590121269226074 }, { "epoch": 3.4576416015625e-05, "grad_norm": 0.27834823727607727, "learning_rate": 1.549242331918285e-05, "loss": 0.0069, "step": 22660 }, { "epoch": 3.4576416015625e-05, "model_forward_time": 0.024457216262817383, "step": 22660 }, { "epoch": 3.4576416015625e-05, "step": 22660, "training_step_time": 0.11397528648376465 }, { "epoch": 3.457794189453125e-05, "model_forward_time": 0.02589130401611328, "step": 22661 }, { "epoch": 3.457794189453125e-05, "step": 22661, "training_step_time": 0.12010002136230469 }, { "epoch": 3.45794677734375e-05, "model_forward_time": 0.024899721145629883, "step": 22662 }, { "epoch": 3.45794677734375e-05, "step": 22662, "training_step_time": 0.1097261905670166 }, { "epoch": 3.458099365234375e-05, "model_forward_time": 0.024814844131469727, "step": 22663 }, { "epoch": 3.458099365234375e-05, "step": 22663, "training_step_time": 0.10960960388183594 }, { "epoch": 3.458251953125e-05, "model_forward_time": 0.02419567108154297, "step": 22664 }, { "epoch": 3.458251953125e-05, "step": 22664, "training_step_time": 0.10557389259338379 }, { "epoch": 3.458404541015625e-05, "model_forward_time": 0.023843050003051758, "step": 22665 }, { "epoch": 3.458404541015625e-05, "step": 22665, "training_step_time": 0.10669684410095215 }, { "epoch": 3.45855712890625e-05, "model_forward_time": 0.02442169189453125, "step": 22666 }, { "epoch": 3.45855712890625e-05, "step": 22666, "training_step_time": 0.10704565048217773 }, { "epoch": 3.458709716796875e-05, "model_forward_time": 0.023878097534179688, "step": 22667 }, { "epoch": 3.458709716796875e-05, "step": 22667, "training_step_time": 0.1396334171295166 }, { "epoch": 3.4588623046875e-05, "model_forward_time": 0.024099349975585938, "step": 22668 }, { "epoch": 3.4588623046875e-05, "step": 22668, "training_step_time": 0.11092042922973633 }, { "epoch": 3.459014892578125e-05, "model_forward_time": 0.026486873626708984, "step": 22669 }, { "epoch": 3.459014892578125e-05, "step": 22669, "training_step_time": 0.21377038955688477 }, { "epoch": 3.45916748046875e-05, "grad_norm": 0.12981250882148743, "learning_rate": 1.5452559024253487e-05, "loss": 0.0045, "step": 22670 }, { "epoch": 3.45916748046875e-05, "model_forward_time": 0.023784875869750977, "step": 22670 }, { "epoch": 3.45916748046875e-05, "step": 22670, "training_step_time": 0.10393810272216797 }, { "epoch": 3.459320068359375e-05, "model_forward_time": 0.02320241928100586, "step": 22671 }, { "epoch": 3.459320068359375e-05, "step": 22671, "training_step_time": 0.11812043190002441 }, { "epoch": 3.45947265625e-05, "model_forward_time": 0.024200916290283203, "step": 22672 }, { "epoch": 3.45947265625e-05, "step": 22672, "training_step_time": 0.11285090446472168 }, { "epoch": 3.459625244140625e-05, "model_forward_time": 0.024064302444458008, "step": 22673 }, { "epoch": 3.459625244140625e-05, "step": 22673, "training_step_time": 0.11010909080505371 }, { "epoch": 3.45977783203125e-05, "model_forward_time": 0.024003982543945312, "step": 22674 }, { "epoch": 3.45977783203125e-05, "step": 22674, "training_step_time": 0.10930681228637695 }, { "epoch": 3.459930419921875e-05, "model_forward_time": 0.024041414260864258, "step": 22675 }, { "epoch": 3.459930419921875e-05, "step": 22675, "training_step_time": 0.10636377334594727 }, { "epoch": 3.4600830078125e-05, "model_forward_time": 0.02397012710571289, "step": 22676 }, { "epoch": 3.4600830078125e-05, "step": 22676, "training_step_time": 0.10981583595275879 }, { "epoch": 3.460235595703125e-05, "model_forward_time": 0.024153947830200195, "step": 22677 }, { "epoch": 3.460235595703125e-05, "step": 22677, "training_step_time": 0.10694766044616699 }, { "epoch": 3.46038818359375e-05, "model_forward_time": 0.023944616317749023, "step": 22678 }, { "epoch": 3.46038818359375e-05, "step": 22678, "training_step_time": 0.10927891731262207 }, { "epoch": 3.460540771484375e-05, "model_forward_time": 0.024237632751464844, "step": 22679 }, { "epoch": 3.460540771484375e-05, "step": 22679, "training_step_time": 0.10911321640014648 }, { "epoch": 3.460693359375e-05, "grad_norm": 0.16855190694332123, "learning_rate": 1.5412736707722537e-05, "loss": 0.0066, "step": 22680 }, { "epoch": 3.460693359375e-05, "model_forward_time": 0.024281024932861328, "step": 22680 }, { "epoch": 3.460693359375e-05, "step": 22680, "training_step_time": 0.10901141166687012 }, { "epoch": 3.460845947265625e-05, "model_forward_time": 0.023990869522094727, "step": 22681 }, { "epoch": 3.460845947265625e-05, "step": 22681, "training_step_time": 0.10925793647766113 }, { "epoch": 3.46099853515625e-05, "model_forward_time": 0.02411961555480957, "step": 22682 }, { "epoch": 3.46099853515625e-05, "step": 22682, "training_step_time": 0.10846257209777832 }, { "epoch": 3.461151123046875e-05, "model_forward_time": 0.024064302444458008, "step": 22683 }, { "epoch": 3.461151123046875e-05, "step": 22683, "training_step_time": 0.10765647888183594 }, { "epoch": 3.4613037109375e-05, "model_forward_time": 0.025329113006591797, "step": 22684 }, { "epoch": 3.4613037109375e-05, "step": 22684, "training_step_time": 0.11897540092468262 }, { "epoch": 3.461456298828125e-05, "model_forward_time": 0.023889780044555664, "step": 22685 }, { "epoch": 3.461456298828125e-05, "step": 22685, "training_step_time": 0.16896295547485352 }, { "epoch": 3.46160888671875e-05, "model_forward_time": 0.024741411209106445, "step": 22686 }, { "epoch": 3.46160888671875e-05, "step": 22686, "training_step_time": 0.11070466041564941 }, { "epoch": 3.461761474609375e-05, "model_forward_time": 0.023485898971557617, "step": 22687 }, { "epoch": 3.461761474609375e-05, "step": 22687, "training_step_time": 0.16466975212097168 }, { "epoch": 3.4619140625e-05, "model_forward_time": 0.02356719970703125, "step": 22688 }, { "epoch": 3.4619140625e-05, "step": 22688, "training_step_time": 0.1440896987915039 }, { "epoch": 3.462066650390625e-05, "model_forward_time": 0.023482799530029297, "step": 22689 }, { "epoch": 3.462066650390625e-05, "step": 22689, "training_step_time": 0.17079472541809082 }, { "epoch": 3.46221923828125e-05, "grad_norm": 0.13016711175441742, "learning_rate": 1.537295641797785e-05, "loss": 0.0047, "step": 22690 }, { "epoch": 3.46221923828125e-05, "model_forward_time": 0.02337336540222168, "step": 22690 }, { "epoch": 3.46221923828125e-05, "step": 22690, "training_step_time": 0.15986394882202148 }, { "epoch": 3.462371826171875e-05, "model_forward_time": 0.023081541061401367, "step": 22691 }, { "epoch": 3.462371826171875e-05, "step": 22691, "training_step_time": 0.11078834533691406 }, { "epoch": 3.4625244140625e-05, "model_forward_time": 0.023950576782226562, "step": 22692 }, { "epoch": 3.4625244140625e-05, "step": 22692, "training_step_time": 0.11227250099182129 }, { "epoch": 3.462677001953125e-05, "model_forward_time": 0.024293899536132812, "step": 22693 }, { "epoch": 3.462677001953125e-05, "step": 22693, "training_step_time": 0.10295867919921875 }, { "epoch": 3.46282958984375e-05, "model_forward_time": 0.02438640594482422, "step": 22694 }, { "epoch": 3.46282958984375e-05, "step": 22694, "training_step_time": 0.10865664482116699 }, { "epoch": 3.462982177734375e-05, "model_forward_time": 0.023991107940673828, "step": 22695 }, { "epoch": 3.462982177734375e-05, "step": 22695, "training_step_time": 0.10357046127319336 }, { "epoch": 3.463134765625e-05, "model_forward_time": 0.023816585540771484, "step": 22696 }, { "epoch": 3.463134765625e-05, "step": 22696, "training_step_time": 0.10399127006530762 }, { "epoch": 3.463287353515625e-05, "model_forward_time": 0.025452136993408203, "step": 22697 }, { "epoch": 3.463287353515625e-05, "step": 22697, "training_step_time": 0.10942459106445312 }, { "epoch": 3.46343994140625e-05, "model_forward_time": 0.025284290313720703, "step": 22698 }, { "epoch": 3.46343994140625e-05, "step": 22698, "training_step_time": 0.1551656723022461 }, { "epoch": 3.463592529296875e-05, "model_forward_time": 0.02498340606689453, "step": 22699 }, { "epoch": 3.463592529296875e-05, "step": 22699, "training_step_time": 0.1642286777496338 }, { "epoch": 3.4637451171875e-05, "grad_norm": 0.22802887856960297, "learning_rate": 1.5333218203356243e-05, "loss": 0.0053, "step": 22700 }, { "epoch": 3.4637451171875e-05, "model_forward_time": 0.02660226821899414, "step": 22700 }, { "epoch": 3.4637451171875e-05, "step": 22700, "training_step_time": 0.15730714797973633 }, { "epoch": 3.463897705078125e-05, "model_forward_time": 0.02332139015197754, "step": 22701 }, { "epoch": 3.463897705078125e-05, "step": 22701, "training_step_time": 0.17769217491149902 }, { "epoch": 3.46405029296875e-05, "model_forward_time": 0.02424478530883789, "step": 22702 }, { "epoch": 3.46405029296875e-05, "step": 22702, "training_step_time": 0.1382887363433838 }, { "epoch": 3.464202880859375e-05, "model_forward_time": 0.02326035499572754, "step": 22703 }, { "epoch": 3.464202880859375e-05, "step": 22703, "training_step_time": 0.1977241039276123 }, { "epoch": 3.46435546875e-05, "model_forward_time": 0.024146556854248047, "step": 22704 }, { "epoch": 3.46435546875e-05, "step": 22704, "training_step_time": 0.13750624656677246 }, { "epoch": 3.464508056640625e-05, "model_forward_time": 0.024793624877929688, "step": 22705 }, { "epoch": 3.464508056640625e-05, "step": 22705, "training_step_time": 0.1199800968170166 }, { "epoch": 3.46466064453125e-05, "model_forward_time": 0.024591445922851562, "step": 22706 }, { "epoch": 3.46466064453125e-05, "step": 22706, "training_step_time": 0.11993622779846191 }, { "epoch": 3.464813232421875e-05, "model_forward_time": 0.024833202362060547, "step": 22707 }, { "epoch": 3.464813232421875e-05, "step": 22707, "training_step_time": 0.11265420913696289 }, { "epoch": 3.4649658203125e-05, "model_forward_time": 0.02526068687438965, "step": 22708 }, { "epoch": 3.4649658203125e-05, "step": 22708, "training_step_time": 0.11073946952819824 }, { "epoch": 3.465118408203125e-05, "model_forward_time": 0.024870872497558594, "step": 22709 }, { "epoch": 3.465118408203125e-05, "step": 22709, "training_step_time": 0.1105196475982666 }, { "epoch": 3.46527099609375e-05, "grad_norm": 0.259838730096817, "learning_rate": 1.5293522112143373e-05, "loss": 0.0168, "step": 22710 }, { "epoch": 3.46527099609375e-05, "model_forward_time": 0.024914026260375977, "step": 22710 }, { "epoch": 3.46527099609375e-05, "step": 22710, "training_step_time": 0.10899138450622559 }, { "epoch": 3.465423583984375e-05, "model_forward_time": 0.0247189998626709, "step": 22711 }, { "epoch": 3.465423583984375e-05, "step": 22711, "training_step_time": 0.17776775360107422 }, { "epoch": 3.465576171875e-05, "model_forward_time": 0.02428889274597168, "step": 22712 }, { "epoch": 3.465576171875e-05, "step": 22712, "training_step_time": 0.11378073692321777 }, { "epoch": 3.465728759765625e-05, "model_forward_time": 0.024118423461914062, "step": 22713 }, { "epoch": 3.465728759765625e-05, "step": 22713, "training_step_time": 0.21210861206054688 }, { "epoch": 3.46588134765625e-05, "model_forward_time": 0.02452230453491211, "step": 22714 }, { "epoch": 3.46588134765625e-05, "step": 22714, "training_step_time": 0.10902857780456543 }, { "epoch": 3.466033935546875e-05, "model_forward_time": 0.024440288543701172, "step": 22715 }, { "epoch": 3.466033935546875e-05, "step": 22715, "training_step_time": 0.1206057071685791 }, { "epoch": 3.4661865234375e-05, "model_forward_time": 0.025306224822998047, "step": 22716 }, { "epoch": 3.4661865234375e-05, "step": 22716, "training_step_time": 0.20368123054504395 }, { "epoch": 3.466339111328125e-05, "model_forward_time": 0.024439096450805664, "step": 22717 }, { "epoch": 3.466339111328125e-05, "step": 22717, "training_step_time": 0.1036231517791748 }, { "epoch": 3.46649169921875e-05, "model_forward_time": 0.024217844009399414, "step": 22718 }, { "epoch": 3.46649169921875e-05, "step": 22718, "training_step_time": 0.10343003273010254 }, { "epoch": 3.466644287109375e-05, "model_forward_time": 0.02548384666442871, "step": 22719 }, { "epoch": 3.466644287109375e-05, "step": 22719, "training_step_time": 0.11218547821044922 }, { "epoch": 3.466796875e-05, "grad_norm": 0.17345885932445526, "learning_rate": 1.5253868192573729e-05, "loss": 0.0036, "step": 22720 }, { "epoch": 3.466796875e-05, "model_forward_time": 0.025122642517089844, "step": 22720 }, { "epoch": 3.466796875e-05, "step": 22720, "training_step_time": 0.11033177375793457 }, { "epoch": 3.466949462890625e-05, "model_forward_time": 0.025322914123535156, "step": 22721 }, { "epoch": 3.466949462890625e-05, "step": 22721, "training_step_time": 0.10634517669677734 }, { "epoch": 3.46710205078125e-05, "model_forward_time": 0.025482892990112305, "step": 22722 }, { "epoch": 3.46710205078125e-05, "step": 22722, "training_step_time": 0.10773372650146484 }, { "epoch": 3.467254638671875e-05, "model_forward_time": 0.025668859481811523, "step": 22723 }, { "epoch": 3.467254638671875e-05, "step": 22723, "training_step_time": 0.10601925849914551 }, { "epoch": 3.4674072265625e-05, "model_forward_time": 0.024875879287719727, "step": 22724 }, { "epoch": 3.4674072265625e-05, "step": 22724, "training_step_time": 0.10669136047363281 }, { "epoch": 3.467559814453125e-05, "model_forward_time": 0.024714946746826172, "step": 22725 }, { "epoch": 3.467559814453125e-05, "step": 22725, "training_step_time": 0.10521101951599121 }, { "epoch": 3.46771240234375e-05, "model_forward_time": 0.02600264549255371, "step": 22726 }, { "epoch": 3.46771240234375e-05, "step": 22726, "training_step_time": 0.1052401065826416 }, { "epoch": 3.467864990234375e-05, "model_forward_time": 0.025722026824951172, "step": 22727 }, { "epoch": 3.467864990234375e-05, "step": 22727, "training_step_time": 0.1058499813079834 }, { "epoch": 3.468017578125e-05, "model_forward_time": 0.025041580200195312, "step": 22728 }, { "epoch": 3.468017578125e-05, "step": 22728, "training_step_time": 0.16454577445983887 }, { "epoch": 3.468170166015625e-05, "model_forward_time": 0.024910688400268555, "step": 22729 }, { "epoch": 3.468170166015625e-05, "step": 22729, "training_step_time": 0.13653278350830078 }, { "epoch": 3.46832275390625e-05, "grad_norm": 0.2135002762079239, "learning_rate": 1.5214256492830598e-05, "loss": 0.0053, "step": 22730 }, { "epoch": 3.46832275390625e-05, "model_forward_time": 0.0253143310546875, "step": 22730 }, { "epoch": 3.46832275390625e-05, "step": 22730, "training_step_time": 0.10994267463684082 }, { "epoch": 3.468475341796875e-05, "model_forward_time": 0.025034427642822266, "step": 22731 }, { "epoch": 3.468475341796875e-05, "step": 22731, "training_step_time": 0.1519179344177246 }, { "epoch": 3.4686279296875e-05, "model_forward_time": 0.024996042251586914, "step": 22732 }, { "epoch": 3.4686279296875e-05, "step": 22732, "training_step_time": 0.15223169326782227 }, { "epoch": 3.468780517578125e-05, "model_forward_time": 0.02487492561340332, "step": 22733 }, { "epoch": 3.468780517578125e-05, "step": 22733, "training_step_time": 0.1062781810760498 }, { "epoch": 3.46893310546875e-05, "model_forward_time": 0.025048494338989258, "step": 22734 }, { "epoch": 3.46893310546875e-05, "step": 22734, "training_step_time": 0.12725567817687988 }, { "epoch": 3.469085693359375e-05, "model_forward_time": 0.025586366653442383, "step": 22735 }, { "epoch": 3.469085693359375e-05, "step": 22735, "training_step_time": 0.11344647407531738 }, { "epoch": 3.46923828125e-05, "model_forward_time": 0.02557849884033203, "step": 22736 }, { "epoch": 3.46923828125e-05, "step": 22736, "training_step_time": 0.12065982818603516 }, { "epoch": 3.469390869140625e-05, "model_forward_time": 0.025480031967163086, "step": 22737 }, { "epoch": 3.469390869140625e-05, "step": 22737, "training_step_time": 0.10644078254699707 }, { "epoch": 3.46954345703125e-05, "model_forward_time": 0.025440692901611328, "step": 22738 }, { "epoch": 3.46954345703125e-05, "step": 22738, "training_step_time": 0.11100530624389648 }, { "epoch": 3.469696044921875e-05, "model_forward_time": 0.02528977394104004, "step": 22739 }, { "epoch": 3.469696044921875e-05, "step": 22739, "training_step_time": 0.10768246650695801 }, { "epoch": 3.4698486328125e-05, "grad_norm": 0.15731625258922577, "learning_rate": 1.517468706104589e-05, "loss": 0.0068, "step": 22740 }, { "epoch": 3.4698486328125e-05, "model_forward_time": 0.02524256706237793, "step": 22740 }, { "epoch": 3.4698486328125e-05, "step": 22740, "training_step_time": 0.10785794258117676 }, { "epoch": 3.470001220703125e-05, "model_forward_time": 0.025379657745361328, "step": 22741 }, { "epoch": 3.470001220703125e-05, "step": 22741, "training_step_time": 0.10427021980285645 }, { "epoch": 3.47015380859375e-05, "model_forward_time": 0.025455474853515625, "step": 22742 }, { "epoch": 3.47015380859375e-05, "step": 22742, "training_step_time": 0.10503840446472168 }, { "epoch": 3.470306396484375e-05, "model_forward_time": 0.02528548240661621, "step": 22743 }, { "epoch": 3.470306396484375e-05, "step": 22743, "training_step_time": 0.11847162246704102 }, { "epoch": 3.470458984375e-05, "model_forward_time": 0.02529621124267578, "step": 22744 }, { "epoch": 3.470458984375e-05, "step": 22744, "training_step_time": 0.13333678245544434 }, { "epoch": 3.470611572265625e-05, "model_forward_time": 0.025431394577026367, "step": 22745 }, { "epoch": 3.470611572265625e-05, "step": 22745, "training_step_time": 0.1319727897644043 }, { "epoch": 3.47076416015625e-05, "model_forward_time": 0.025043487548828125, "step": 22746 }, { "epoch": 3.47076416015625e-05, "step": 22746, "training_step_time": 0.12191534042358398 }, { "epoch": 3.470916748046875e-05, "model_forward_time": 0.024849653244018555, "step": 22747 }, { "epoch": 3.470916748046875e-05, "step": 22747, "training_step_time": 0.12015867233276367 }, { "epoch": 3.4710693359375e-05, "model_forward_time": 0.025035619735717773, "step": 22748 }, { "epoch": 3.4710693359375e-05, "step": 22748, "training_step_time": 0.11684918403625488 }, { "epoch": 3.471221923828125e-05, "model_forward_time": 0.02492809295654297, "step": 22749 }, { "epoch": 3.471221923828125e-05, "step": 22749, "training_step_time": 0.21842622756958008 }, { "epoch": 3.47137451171875e-05, "grad_norm": 0.24600502848625183, "learning_rate": 1.5135159945300231e-05, "loss": 0.0115, "step": 22750 }, { "epoch": 3.47137451171875e-05, "model_forward_time": 0.024547338485717773, "step": 22750 }, { "epoch": 3.47137451171875e-05, "step": 22750, "training_step_time": 0.12700390815734863 }, { "epoch": 3.471527099609375e-05, "model_forward_time": 0.024763107299804688, "step": 22751 }, { "epoch": 3.471527099609375e-05, "step": 22751, "training_step_time": 0.11107373237609863 }, { "epoch": 3.4716796875e-05, "model_forward_time": 0.0253298282623291, "step": 22752 }, { "epoch": 3.4716796875e-05, "step": 22752, "training_step_time": 0.11928462982177734 }, { "epoch": 3.471832275390625e-05, "model_forward_time": 0.02521347999572754, "step": 22753 }, { "epoch": 3.471832275390625e-05, "step": 22753, "training_step_time": 0.11527156829833984 }, { "epoch": 3.47198486328125e-05, "model_forward_time": 0.025943279266357422, "step": 22754 }, { "epoch": 3.47198486328125e-05, "step": 22754, "training_step_time": 0.10757660865783691 }, { "epoch": 3.472137451171875e-05, "model_forward_time": 0.02525782585144043, "step": 22755 }, { "epoch": 3.472137451171875e-05, "step": 22755, "training_step_time": 0.10715699195861816 }, { "epoch": 3.4722900390625e-05, "model_forward_time": 0.025412559509277344, "step": 22756 }, { "epoch": 3.4722900390625e-05, "step": 22756, "training_step_time": 0.1069040298461914 }, { "epoch": 3.472442626953125e-05, "model_forward_time": 0.025170326232910156, "step": 22757 }, { "epoch": 3.472442626953125e-05, "step": 22757, "training_step_time": 0.1368885040283203 }, { "epoch": 3.47259521484375e-05, "model_forward_time": 0.02606654167175293, "step": 22758 }, { "epoch": 3.47259521484375e-05, "step": 22758, "training_step_time": 0.10951089859008789 }, { "epoch": 3.472747802734375e-05, "model_forward_time": 0.025597810745239258, "step": 22759 }, { "epoch": 3.472747802734375e-05, "step": 22759, "training_step_time": 0.20629644393920898 }, { "epoch": 3.472900390625e-05, "grad_norm": 0.3221125304698944, "learning_rate": 1.5095675193622777e-05, "loss": 0.0117, "step": 22760 }, { "epoch": 3.472900390625e-05, "model_forward_time": 0.024474143981933594, "step": 22760 }, { "epoch": 3.472900390625e-05, "step": 22760, "training_step_time": 0.10703825950622559 }, { "epoch": 3.473052978515625e-05, "model_forward_time": 0.024786710739135742, "step": 22761 }, { "epoch": 3.473052978515625e-05, "step": 22761, "training_step_time": 0.10791206359863281 }, { "epoch": 3.47320556640625e-05, "model_forward_time": 0.025203466415405273, "step": 22762 }, { "epoch": 3.47320556640625e-05, "step": 22762, "training_step_time": 0.10649824142456055 }, { "epoch": 3.473358154296875e-05, "model_forward_time": 0.0252535343170166, "step": 22763 }, { "epoch": 3.473358154296875e-05, "step": 22763, "training_step_time": 0.10558271408081055 }, { "epoch": 3.4735107421875e-05, "model_forward_time": 0.025072813034057617, "step": 22764 }, { "epoch": 3.4735107421875e-05, "step": 22764, "training_step_time": 0.10522699356079102 }, { "epoch": 3.473663330078125e-05, "model_forward_time": 0.025418758392333984, "step": 22765 }, { "epoch": 3.473663330078125e-05, "step": 22765, "training_step_time": 0.10479927062988281 }, { "epoch": 3.47381591796875e-05, "model_forward_time": 0.02541208267211914, "step": 22766 }, { "epoch": 3.47381591796875e-05, "step": 22766, "training_step_time": 0.10793066024780273 }, { "epoch": 3.473968505859375e-05, "model_forward_time": 0.02559661865234375, "step": 22767 }, { "epoch": 3.473968505859375e-05, "step": 22767, "training_step_time": 0.10595011711120605 }, { "epoch": 3.47412109375e-05, "model_forward_time": 0.02535843849182129, "step": 22768 }, { "epoch": 3.47412109375e-05, "step": 22768, "training_step_time": 0.10576105117797852 }, { "epoch": 3.474273681640625e-05, "model_forward_time": 0.025388717651367188, "step": 22769 }, { "epoch": 3.474273681640625e-05, "step": 22769, "training_step_time": 0.10571813583374023 }, { "epoch": 3.47442626953125e-05, "grad_norm": 0.14750151336193085, "learning_rate": 1.5056232853991209e-05, "loss": 0.0068, "step": 22770 }, { "epoch": 3.47442626953125e-05, "model_forward_time": 0.0250701904296875, "step": 22770 }, { "epoch": 3.47442626953125e-05, "step": 22770, "training_step_time": 0.10787010192871094 }, { "epoch": 3.474578857421875e-05, "model_forward_time": 0.025307655334472656, "step": 22771 }, { "epoch": 3.474578857421875e-05, "step": 22771, "training_step_time": 0.10696268081665039 }, { "epoch": 3.4747314453125e-05, "model_forward_time": 0.02534627914428711, "step": 22772 }, { "epoch": 3.4747314453125e-05, "step": 22772, "training_step_time": 0.11025595664978027 }, { "epoch": 3.474884033203125e-05, "model_forward_time": 0.025321483612060547, "step": 22773 }, { "epoch": 3.474884033203125e-05, "step": 22773, "training_step_time": 0.10450434684753418 }, { "epoch": 3.47503662109375e-05, "model_forward_time": 0.025637149810791016, "step": 22774 }, { "epoch": 3.47503662109375e-05, "step": 22774, "training_step_time": 0.10747432708740234 }, { "epoch": 3.475189208984375e-05, "model_forward_time": 0.025498628616333008, "step": 22775 }, { "epoch": 3.475189208984375e-05, "step": 22775, "training_step_time": 0.19253134727478027 }, { "epoch": 3.475341796875e-05, "model_forward_time": 0.024778127670288086, "step": 22776 }, { "epoch": 3.475341796875e-05, "step": 22776, "training_step_time": 0.14162492752075195 }, { "epoch": 3.475494384765625e-05, "model_forward_time": 0.025519132614135742, "step": 22777 }, { "epoch": 3.475494384765625e-05, "step": 22777, "training_step_time": 0.10752582550048828 }, { "epoch": 3.47564697265625e-05, "model_forward_time": 0.025004863739013672, "step": 22778 }, { "epoch": 3.47564697265625e-05, "step": 22778, "training_step_time": 0.18317270278930664 }, { "epoch": 3.475799560546875e-05, "model_forward_time": 0.025086641311645508, "step": 22779 }, { "epoch": 3.475799560546875e-05, "step": 22779, "training_step_time": 0.16836309432983398 }, { "epoch": 3.4759521484375e-05, "grad_norm": 0.13970600068569183, "learning_rate": 1.5016832974331724e-05, "loss": 0.0044, "step": 22780 }, { "epoch": 3.4759521484375e-05, "model_forward_time": 0.024425029754638672, "step": 22780 }, { "epoch": 3.4759521484375e-05, "step": 22780, "training_step_time": 0.16679859161376953 }, { "epoch": 3.476104736328125e-05, "model_forward_time": 0.02862858772277832, "step": 22781 }, { "epoch": 3.476104736328125e-05, "step": 22781, "training_step_time": 0.1746206283569336 }, { "epoch": 3.47625732421875e-05, "model_forward_time": 0.024951696395874023, "step": 22782 }, { "epoch": 3.47625732421875e-05, "step": 22782, "training_step_time": 0.12057971954345703 }, { "epoch": 3.476409912109375e-05, "model_forward_time": 0.024610280990600586, "step": 22783 }, { "epoch": 3.476409912109375e-05, "step": 22783, "training_step_time": 0.11373066902160645 }, { "epoch": 3.4765625e-05, "model_forward_time": 0.025423765182495117, "step": 22784 }, { "epoch": 3.4765625e-05, "step": 22784, "training_step_time": 0.11100149154663086 }, { "epoch": 3.476715087890625e-05, "model_forward_time": 0.025137662887573242, "step": 22785 }, { "epoch": 3.476715087890625e-05, "step": 22785, "training_step_time": 0.11122798919677734 }, { "epoch": 3.47686767578125e-05, "model_forward_time": 0.0242769718170166, "step": 22786 }, { "epoch": 3.47686767578125e-05, "step": 22786, "training_step_time": 0.11081910133361816 }, { "epoch": 3.477020263671875e-05, "model_forward_time": 0.025257110595703125, "step": 22787 }, { "epoch": 3.477020263671875e-05, "step": 22787, "training_step_time": 0.10951066017150879 }, { "epoch": 3.4771728515625e-05, "model_forward_time": 0.024289369583129883, "step": 22788 }, { "epoch": 3.4771728515625e-05, "step": 22788, "training_step_time": 0.10925769805908203 }, { "epoch": 3.477325439453125e-05, "model_forward_time": 0.024606943130493164, "step": 22789 }, { "epoch": 3.477325439453125e-05, "step": 22789, "training_step_time": 0.10770869255065918 }, { "epoch": 3.47747802734375e-05, "grad_norm": 0.20419451594352722, "learning_rate": 1.4977475602518876e-05, "loss": 0.0036, "step": 22790 }, { "epoch": 3.47747802734375e-05, "model_forward_time": 0.026028871536254883, "step": 22790 }, { "epoch": 3.47747802734375e-05, "step": 22790, "training_step_time": 0.10820865631103516 }, { "epoch": 3.477630615234375e-05, "model_forward_time": 0.025558948516845703, "step": 22791 }, { "epoch": 3.477630615234375e-05, "step": 22791, "training_step_time": 0.10811018943786621 }, { "epoch": 3.477783203125e-05, "model_forward_time": 0.025388240814208984, "step": 22792 }, { "epoch": 3.477783203125e-05, "step": 22792, "training_step_time": 0.10717916488647461 }, { "epoch": 3.477935791015625e-05, "model_forward_time": 0.02521038055419922, "step": 22793 }, { "epoch": 3.477935791015625e-05, "step": 22793, "training_step_time": 0.12821364402770996 }, { "epoch": 3.47808837890625e-05, "model_forward_time": 0.026283979415893555, "step": 22794 }, { "epoch": 3.47808837890625e-05, "step": 22794, "training_step_time": 0.1070408821105957 }, { "epoch": 3.478240966796875e-05, "model_forward_time": 0.025435209274291992, "step": 22795 }, { "epoch": 3.478240966796875e-05, "step": 22795, "training_step_time": 0.10952568054199219 }, { "epoch": 3.4783935546875e-05, "model_forward_time": 0.025142908096313477, "step": 22796 }, { "epoch": 3.4783935546875e-05, "step": 22796, "training_step_time": 0.11936044692993164 }, { "epoch": 3.478546142578125e-05, "model_forward_time": 0.027043581008911133, "step": 22797 }, { "epoch": 3.478546142578125e-05, "step": 22797, "training_step_time": 0.12593579292297363 }, { "epoch": 3.47869873046875e-05, "model_forward_time": 0.02536177635192871, "step": 22798 }, { "epoch": 3.47869873046875e-05, "step": 22798, "training_step_time": 0.12269258499145508 }, { "epoch": 3.478851318359375e-05, "model_forward_time": 0.025135517120361328, "step": 22799 }, { "epoch": 3.478851318359375e-05, "step": 22799, "training_step_time": 0.11824393272399902 }, { "epoch": 3.47900390625e-05, "grad_norm": 0.09725761413574219, "learning_rate": 1.4938160786375572e-05, "loss": 0.0035, "step": 22800 }, { "epoch": 3.47900390625e-05, "model_forward_time": 0.02498626708984375, "step": 22800 }, { "epoch": 3.47900390625e-05, "step": 22800, "training_step_time": 0.10482048988342285 }, { "epoch": 3.479156494140625e-05, "model_forward_time": 0.02560734748840332, "step": 22801 }, { "epoch": 3.479156494140625e-05, "step": 22801, "training_step_time": 0.10585522651672363 }, { "epoch": 3.47930908203125e-05, "model_forward_time": 0.025606870651245117, "step": 22802 }, { "epoch": 3.47930908203125e-05, "step": 22802, "training_step_time": 0.1055445671081543 }, { "epoch": 3.479461669921875e-05, "model_forward_time": 0.02541661262512207, "step": 22803 }, { "epoch": 3.479461669921875e-05, "step": 22803, "training_step_time": 0.1771228313446045 }, { "epoch": 3.4796142578125e-05, "model_forward_time": 0.025107622146606445, "step": 22804 }, { "epoch": 3.4796142578125e-05, "step": 22804, "training_step_time": 0.1158151626586914 }, { "epoch": 3.479766845703125e-05, "model_forward_time": 0.02494525909423828, "step": 22805 }, { "epoch": 3.479766845703125e-05, "step": 22805, "training_step_time": 0.20346641540527344 }, { "epoch": 3.47991943359375e-05, "model_forward_time": 0.024654626846313477, "step": 22806 }, { "epoch": 3.47991943359375e-05, "step": 22806, "training_step_time": 0.18082714080810547 }, { "epoch": 3.480072021484375e-05, "model_forward_time": 0.024167299270629883, "step": 22807 }, { "epoch": 3.480072021484375e-05, "step": 22807, "training_step_time": 0.1410210132598877 }, { "epoch": 3.480224609375e-05, "model_forward_time": 0.02449512481689453, "step": 22808 }, { "epoch": 3.480224609375e-05, "step": 22808, "training_step_time": 0.10700321197509766 }, { "epoch": 3.480377197265625e-05, "model_forward_time": 0.025849580764770508, "step": 22809 }, { "epoch": 3.480377197265625e-05, "step": 22809, "training_step_time": 0.10742712020874023 }, { "epoch": 3.48052978515625e-05, "grad_norm": 0.14344458281993866, "learning_rate": 1.4898888573673031e-05, "loss": 0.0068, "step": 22810 }, { "epoch": 3.48052978515625e-05, "model_forward_time": 0.02580571174621582, "step": 22810 }, { "epoch": 3.48052978515625e-05, "step": 22810, "training_step_time": 0.10942959785461426 }, { "epoch": 3.480682373046875e-05, "model_forward_time": 0.02559804916381836, "step": 22811 }, { "epoch": 3.480682373046875e-05, "step": 22811, "training_step_time": 0.10956358909606934 }, { "epoch": 3.4808349609375e-05, "model_forward_time": 0.025168895721435547, "step": 22812 }, { "epoch": 3.4808349609375e-05, "step": 22812, "training_step_time": 0.10886597633361816 }, { "epoch": 3.480987548828125e-05, "model_forward_time": 0.025579214096069336, "step": 22813 }, { "epoch": 3.480987548828125e-05, "step": 22813, "training_step_time": 0.10538458824157715 }, { "epoch": 3.48114013671875e-05, "model_forward_time": 0.025473594665527344, "step": 22814 }, { "epoch": 3.48114013671875e-05, "step": 22814, "training_step_time": 0.10645079612731934 }, { "epoch": 3.481292724609375e-05, "model_forward_time": 0.024853944778442383, "step": 22815 }, { "epoch": 3.481292724609375e-05, "step": 22815, "training_step_time": 0.10262393951416016 }, { "epoch": 3.4814453125e-05, "model_forward_time": 0.025220632553100586, "step": 22816 }, { "epoch": 3.4814453125e-05, "step": 22816, "training_step_time": 0.10415506362915039 }, { "epoch": 3.481597900390625e-05, "model_forward_time": 0.02528095245361328, "step": 22817 }, { "epoch": 3.481597900390625e-05, "step": 22817, "training_step_time": 0.10394644737243652 }, { "epoch": 3.48175048828125e-05, "model_forward_time": 0.0253903865814209, "step": 22818 }, { "epoch": 3.48175048828125e-05, "step": 22818, "training_step_time": 0.10526227951049805 }, { "epoch": 3.481903076171875e-05, "model_forward_time": 0.02604508399963379, "step": 22819 }, { "epoch": 3.481903076171875e-05, "step": 22819, "training_step_time": 0.10596489906311035 }, { "epoch": 3.4820556640625e-05, "grad_norm": 0.12907643616199493, "learning_rate": 1.4859659012130695e-05, "loss": 0.011, "step": 22820 }, { "epoch": 3.4820556640625e-05, "model_forward_time": 0.028371095657348633, "step": 22820 }, { "epoch": 3.4820556640625e-05, "step": 22820, "training_step_time": 0.10895442962646484 }, { "epoch": 3.482208251953125e-05, "model_forward_time": 0.025696516036987305, "step": 22821 }, { "epoch": 3.482208251953125e-05, "step": 22821, "training_step_time": 0.10643839836120605 }, { "epoch": 3.48236083984375e-05, "model_forward_time": 0.025859832763671875, "step": 22822 }, { "epoch": 3.48236083984375e-05, "step": 22822, "training_step_time": 0.11932826042175293 }, { "epoch": 3.482513427734375e-05, "model_forward_time": 0.02556324005126953, "step": 22823 }, { "epoch": 3.482513427734375e-05, "step": 22823, "training_step_time": 0.10962700843811035 }, { "epoch": 3.482666015625e-05, "model_forward_time": 0.02634143829345703, "step": 22824 }, { "epoch": 3.482666015625e-05, "step": 22824, "training_step_time": 0.1068723201751709 }, { "epoch": 3.482818603515625e-05, "model_forward_time": 0.025702476501464844, "step": 22825 }, { "epoch": 3.482818603515625e-05, "step": 22825, "training_step_time": 0.21186447143554688 }, { "epoch": 3.48297119140625e-05, "model_forward_time": 0.02496170997619629, "step": 22826 }, { "epoch": 3.48297119140625e-05, "step": 22826, "training_step_time": 0.21672582626342773 }, { "epoch": 3.483123779296875e-05, "model_forward_time": 0.024808645248413086, "step": 22827 }, { "epoch": 3.483123779296875e-05, "step": 22827, "training_step_time": 0.20836377143859863 }, { "epoch": 3.4832763671875e-05, "model_forward_time": 0.024509429931640625, "step": 22828 }, { "epoch": 3.4832763671875e-05, "step": 22828, "training_step_time": 0.2055835723876953 }, { "epoch": 3.483428955078125e-05, "model_forward_time": 0.025012493133544922, "step": 22829 }, { "epoch": 3.483428955078125e-05, "step": 22829, "training_step_time": 0.17716026306152344 }, { "epoch": 3.48358154296875e-05, "grad_norm": 0.144840270280838, "learning_rate": 1.4820472149416154e-05, "loss": 0.0057, "step": 22830 }, { "epoch": 3.48358154296875e-05, "model_forward_time": 0.02431654930114746, "step": 22830 }, { "epoch": 3.48358154296875e-05, "step": 22830, "training_step_time": 0.14636969566345215 }, { "epoch": 3.483734130859375e-05, "model_forward_time": 0.02350902557373047, "step": 22831 }, { "epoch": 3.483734130859375e-05, "step": 22831, "training_step_time": 0.13284611701965332 }, { "epoch": 3.48388671875e-05, "model_forward_time": 0.0274050235748291, "step": 22832 }, { "epoch": 3.48388671875e-05, "step": 22832, "training_step_time": 0.12695646286010742 }, { "epoch": 3.484039306640625e-05, "model_forward_time": 0.023859739303588867, "step": 22833 }, { "epoch": 3.484039306640625e-05, "step": 22833, "training_step_time": 0.12520742416381836 }, { "epoch": 3.48419189453125e-05, "model_forward_time": 0.02385878562927246, "step": 22834 }, { "epoch": 3.48419189453125e-05, "step": 22834, "training_step_time": 0.10430216789245605 }, { "epoch": 3.484344482421875e-05, "model_forward_time": 0.02612757682800293, "step": 22835 }, { "epoch": 3.484344482421875e-05, "step": 22835, "training_step_time": 0.1042177677154541 }, { "epoch": 3.4844970703125e-05, "model_forward_time": 0.02556300163269043, "step": 22836 }, { "epoch": 3.4844970703125e-05, "step": 22836, "training_step_time": 0.10341835021972656 }, { "epoch": 3.484649658203125e-05, "model_forward_time": 0.025195837020874023, "step": 22837 }, { "epoch": 3.484649658203125e-05, "step": 22837, "training_step_time": 0.17187213897705078 }, { "epoch": 3.48480224609375e-05, "model_forward_time": 0.024822473526000977, "step": 22838 }, { "epoch": 3.48480224609375e-05, "step": 22838, "training_step_time": 0.14420104026794434 }, { "epoch": 3.484954833984375e-05, "model_forward_time": 0.024406909942626953, "step": 22839 }, { "epoch": 3.484954833984375e-05, "step": 22839, "training_step_time": 0.1162106990814209 }, { "epoch": 3.485107421875e-05, "grad_norm": 0.2122463583946228, "learning_rate": 1.4781328033145187e-05, "loss": 0.0093, "step": 22840 }, { "epoch": 3.485107421875e-05, "model_forward_time": 0.02503657341003418, "step": 22840 }, { "epoch": 3.485107421875e-05, "step": 22840, "training_step_time": 0.1175847053527832 }, { "epoch": 3.485260009765625e-05, "model_forward_time": 0.0252535343170166, "step": 22841 }, { "epoch": 3.485260009765625e-05, "step": 22841, "training_step_time": 0.1319141387939453 }, { "epoch": 3.48541259765625e-05, "model_forward_time": 0.02534627914428711, "step": 22842 }, { "epoch": 3.48541259765625e-05, "step": 22842, "training_step_time": 0.11172199249267578 }, { "epoch": 3.485565185546875e-05, "model_forward_time": 0.024980545043945312, "step": 22843 }, { "epoch": 3.485565185546875e-05, "step": 22843, "training_step_time": 0.11527371406555176 }, { "epoch": 3.4857177734375e-05, "model_forward_time": 0.025072813034057617, "step": 22844 }, { "epoch": 3.4857177734375e-05, "step": 22844, "training_step_time": 0.1045684814453125 }, { "epoch": 3.485870361328125e-05, "model_forward_time": 0.025394201278686523, "step": 22845 }, { "epoch": 3.485870361328125e-05, "step": 22845, "training_step_time": 0.10884904861450195 }, { "epoch": 3.48602294921875e-05, "model_forward_time": 0.024939298629760742, "step": 22846 }, { "epoch": 3.48602294921875e-05, "step": 22846, "training_step_time": 0.1121675968170166 }, { "epoch": 3.486175537109375e-05, "model_forward_time": 0.02510809898376465, "step": 22847 }, { "epoch": 3.486175537109375e-05, "step": 22847, "training_step_time": 0.11913251876831055 }, { "epoch": 3.486328125e-05, "model_forward_time": 0.025386333465576172, "step": 22848 }, { "epoch": 3.486328125e-05, "step": 22848, "training_step_time": 0.13124442100524902 }, { "epoch": 3.486480712890625e-05, "model_forward_time": 0.024923086166381836, "step": 22849 }, { "epoch": 3.486480712890625e-05, "step": 22849, "training_step_time": 0.21547269821166992 }, { "epoch": 3.48663330078125e-05, "grad_norm": 0.35279932618141174, "learning_rate": 1.4742226710881558e-05, "loss": 0.0187, "step": 22850 }, { "epoch": 3.48663330078125e-05, "model_forward_time": 0.024524211883544922, "step": 22850 }, { "epoch": 3.48663330078125e-05, "step": 22850, "training_step_time": 0.1030418872833252 }, { "epoch": 3.486785888671875e-05, "model_forward_time": 0.02425551414489746, "step": 22851 }, { "epoch": 3.486785888671875e-05, "step": 22851, "training_step_time": 0.11866044998168945 }, { "epoch": 3.4869384765625e-05, "model_forward_time": 0.02402663230895996, "step": 22852 }, { "epoch": 3.4869384765625e-05, "step": 22852, "training_step_time": 0.11206412315368652 }, { "epoch": 3.487091064453125e-05, "model_forward_time": 0.02507328987121582, "step": 22853 }, { "epoch": 3.487091064453125e-05, "step": 22853, "training_step_time": 0.11282539367675781 }, { "epoch": 3.48724365234375e-05, "model_forward_time": 0.024970293045043945, "step": 22854 }, { "epoch": 3.48724365234375e-05, "step": 22854, "training_step_time": 0.11034035682678223 }, { "epoch": 3.487396240234375e-05, "model_forward_time": 0.02519679069519043, "step": 22855 }, { "epoch": 3.487396240234375e-05, "step": 22855, "training_step_time": 0.10639619827270508 }, { "epoch": 3.487548828125e-05, "model_forward_time": 0.024964570999145508, "step": 22856 }, { "epoch": 3.487548828125e-05, "step": 22856, "training_step_time": 0.10882806777954102 }, { "epoch": 3.487701416015625e-05, "model_forward_time": 0.02490091323852539, "step": 22857 }, { "epoch": 3.487701416015625e-05, "step": 22857, "training_step_time": 0.10769200325012207 }, { "epoch": 3.48785400390625e-05, "model_forward_time": 0.025066614151000977, "step": 22858 }, { "epoch": 3.48785400390625e-05, "step": 22858, "training_step_time": 0.1067354679107666 }, { "epoch": 3.488006591796875e-05, "model_forward_time": 0.024793386459350586, "step": 22859 }, { "epoch": 3.488006591796875e-05, "step": 22859, "training_step_time": 0.10522818565368652 }, { "epoch": 3.4881591796875e-05, "grad_norm": 0.10908864438533783, "learning_rate": 1.470316823013707e-05, "loss": 0.0047, "step": 22860 }, { "epoch": 3.4881591796875e-05, "model_forward_time": 0.024829626083374023, "step": 22860 }, { "epoch": 3.4881591796875e-05, "step": 22860, "training_step_time": 0.10541844367980957 }, { "epoch": 3.488311767578125e-05, "model_forward_time": 0.02483201026916504, "step": 22861 }, { "epoch": 3.488311767578125e-05, "step": 22861, "training_step_time": 0.10969853401184082 }, { "epoch": 3.48846435546875e-05, "model_forward_time": 0.024955272674560547, "step": 22862 }, { "epoch": 3.48846435546875e-05, "step": 22862, "training_step_time": 0.10819816589355469 }, { "epoch": 3.488616943359375e-05, "model_forward_time": 0.02535223960876465, "step": 22863 }, { "epoch": 3.488616943359375e-05, "step": 22863, "training_step_time": 0.11041426658630371 }, { "epoch": 3.48876953125e-05, "model_forward_time": 0.025101184844970703, "step": 22864 }, { "epoch": 3.48876953125e-05, "step": 22864, "training_step_time": 0.10503387451171875 }, { "epoch": 3.488922119140625e-05, "model_forward_time": 0.024740934371948242, "step": 22865 }, { "epoch": 3.488922119140625e-05, "step": 22865, "training_step_time": 0.11049509048461914 }, { "epoch": 3.48907470703125e-05, "model_forward_time": 0.024889469146728516, "step": 22866 }, { "epoch": 3.48907470703125e-05, "step": 22866, "training_step_time": 0.1351768970489502 }, { "epoch": 3.489227294921875e-05, "model_forward_time": 0.025843143463134766, "step": 22867 }, { "epoch": 3.489227294921875e-05, "step": 22867, "training_step_time": 0.11082577705383301 }, { "epoch": 3.4893798828125e-05, "model_forward_time": 0.02529597282409668, "step": 22868 }, { "epoch": 3.4893798828125e-05, "step": 22868, "training_step_time": 0.10524177551269531 }, { "epoch": 3.489532470703125e-05, "model_forward_time": 0.024091720581054688, "step": 22869 }, { "epoch": 3.489532470703125e-05, "step": 22869, "training_step_time": 0.15042614936828613 }, { "epoch": 3.48968505859375e-05, "grad_norm": 0.2644929885864258, "learning_rate": 1.4664152638371437e-05, "loss": 0.0059, "step": 22870 }, { "epoch": 3.48968505859375e-05, "model_forward_time": 0.02468395233154297, "step": 22870 }, { "epoch": 3.48968505859375e-05, "step": 22870, "training_step_time": 0.16089081764221191 }, { "epoch": 3.489837646484375e-05, "model_forward_time": 0.02477240562438965, "step": 22871 }, { "epoch": 3.489837646484375e-05, "step": 22871, "training_step_time": 0.10635781288146973 }, { "epoch": 3.489990234375e-05, "model_forward_time": 0.024487733840942383, "step": 22872 }, { "epoch": 3.489990234375e-05, "step": 22872, "training_step_time": 0.1285703182220459 }, { "epoch": 3.490142822265625e-05, "model_forward_time": 0.024829387664794922, "step": 22873 }, { "epoch": 3.490142822265625e-05, "step": 22873, "training_step_time": 0.19692635536193848 }, { "epoch": 3.49029541015625e-05, "model_forward_time": 0.024796247482299805, "step": 22874 }, { "epoch": 3.49029541015625e-05, "step": 22874, "training_step_time": 0.1048426628112793 }, { "epoch": 3.490447998046875e-05, "model_forward_time": 0.024367332458496094, "step": 22875 }, { "epoch": 3.490447998046875e-05, "step": 22875, "training_step_time": 0.10435175895690918 }, { "epoch": 3.4906005859375e-05, "model_forward_time": 0.025068283081054688, "step": 22876 }, { "epoch": 3.4906005859375e-05, "step": 22876, "training_step_time": 0.10718822479248047 }, { "epoch": 3.490753173828125e-05, "model_forward_time": 0.025122642517089844, "step": 22877 }, { "epoch": 3.490753173828125e-05, "step": 22877, "training_step_time": 0.10822153091430664 }, { "epoch": 3.49090576171875e-05, "model_forward_time": 0.025420427322387695, "step": 22878 }, { "epoch": 3.49090576171875e-05, "step": 22878, "training_step_time": 0.10799002647399902 }, { "epoch": 3.491058349609375e-05, "model_forward_time": 0.024608850479125977, "step": 22879 }, { "epoch": 3.491058349609375e-05, "step": 22879, "training_step_time": 0.10671520233154297 }, { "epoch": 3.4912109375e-05, "grad_norm": 0.20742835104465485, "learning_rate": 1.4625179982992321e-05, "loss": 0.0123, "step": 22880 }, { "epoch": 3.4912109375e-05, "model_forward_time": 0.024595022201538086, "step": 22880 }, { "epoch": 3.4912109375e-05, "step": 22880, "training_step_time": 0.10769152641296387 }, { "epoch": 3.491363525390625e-05, "model_forward_time": 0.025026798248291016, "step": 22881 }, { "epoch": 3.491363525390625e-05, "step": 22881, "training_step_time": 0.10483670234680176 }, { "epoch": 3.49151611328125e-05, "model_forward_time": 0.02530217170715332, "step": 22882 }, { "epoch": 3.49151611328125e-05, "step": 22882, "training_step_time": 0.10591793060302734 }, { "epoch": 3.491668701171875e-05, "model_forward_time": 0.024617433547973633, "step": 22883 }, { "epoch": 3.491668701171875e-05, "step": 22883, "training_step_time": 0.10512900352478027 }, { "epoch": 3.4918212890625e-05, "model_forward_time": 0.025096654891967773, "step": 22884 }, { "epoch": 3.4918212890625e-05, "step": 22884, "training_step_time": 0.1108396053314209 }, { "epoch": 3.491973876953125e-05, "model_forward_time": 0.024796485900878906, "step": 22885 }, { "epoch": 3.491973876953125e-05, "step": 22885, "training_step_time": 0.13176536560058594 }, { "epoch": 3.49212646484375e-05, "model_forward_time": 0.025580883026123047, "step": 22886 }, { "epoch": 3.49212646484375e-05, "step": 22886, "training_step_time": 0.10870170593261719 }, { "epoch": 3.492279052734375e-05, "model_forward_time": 0.025025606155395508, "step": 22887 }, { "epoch": 3.492279052734375e-05, "step": 22887, "training_step_time": 0.11175060272216797 }, { "epoch": 3.492431640625e-05, "model_forward_time": 0.025172710418701172, "step": 22888 }, { "epoch": 3.492431640625e-05, "step": 22888, "training_step_time": 0.12007713317871094 }, { "epoch": 3.492584228515625e-05, "model_forward_time": 0.025093793869018555, "step": 22889 }, { "epoch": 3.492584228515625e-05, "step": 22889, "training_step_time": 0.12108016014099121 }, { "epoch": 3.49273681640625e-05, "grad_norm": 0.09625491499900818, "learning_rate": 1.4586250311355132e-05, "loss": 0.0043, "step": 22890 }, { "epoch": 3.49273681640625e-05, "model_forward_time": 0.024801254272460938, "step": 22890 }, { "epoch": 3.49273681640625e-05, "step": 22890, "training_step_time": 0.11254644393920898 }, { "epoch": 3.492889404296875e-05, "model_forward_time": 0.025102853775024414, "step": 22891 }, { "epoch": 3.492889404296875e-05, "step": 22891, "training_step_time": 0.1310122013092041 }, { "epoch": 3.4930419921875e-05, "model_forward_time": 0.025182247161865234, "step": 22892 }, { "epoch": 3.4930419921875e-05, "step": 22892, "training_step_time": 0.14044928550720215 }, { "epoch": 3.493194580078125e-05, "model_forward_time": 0.024358510971069336, "step": 22893 }, { "epoch": 3.493194580078125e-05, "step": 22893, "training_step_time": 0.1328907012939453 }, { "epoch": 3.49334716796875e-05, "model_forward_time": 0.024767398834228516, "step": 22894 }, { "epoch": 3.49334716796875e-05, "step": 22894, "training_step_time": 0.12484884262084961 }, { "epoch": 3.493499755859375e-05, "model_forward_time": 0.024884462356567383, "step": 22895 }, { "epoch": 3.493499755859375e-05, "step": 22895, "training_step_time": 0.15195035934448242 }, { "epoch": 3.49365234375e-05, "model_forward_time": 0.024791479110717773, "step": 22896 }, { "epoch": 3.49365234375e-05, "step": 22896, "training_step_time": 0.1569521427154541 }, { "epoch": 3.493804931640625e-05, "model_forward_time": 0.024165868759155273, "step": 22897 }, { "epoch": 3.493804931640625e-05, "step": 22897, "training_step_time": 0.11577486991882324 }, { "epoch": 3.49395751953125e-05, "model_forward_time": 0.02445077896118164, "step": 22898 }, { "epoch": 3.49395751953125e-05, "step": 22898, "training_step_time": 0.1087496280670166 }, { "epoch": 3.494110107421875e-05, "model_forward_time": 0.02503037452697754, "step": 22899 }, { "epoch": 3.494110107421875e-05, "step": 22899, "training_step_time": 0.1879105567932129 }, { "epoch": 3.4942626953125e-05, "grad_norm": 0.14945359528064728, "learning_rate": 1.4547363670763137e-05, "loss": 0.0149, "step": 22900 }, { "epoch": 3.4942626953125e-05, "model_forward_time": 0.024821996688842773, "step": 22900 }, { "epoch": 3.4942626953125e-05, "step": 22900, "training_step_time": 0.10581135749816895 }, { "epoch": 3.494415283203125e-05, "model_forward_time": 0.024337053298950195, "step": 22901 }, { "epoch": 3.494415283203125e-05, "step": 22901, "training_step_time": 0.10126185417175293 }, { "epoch": 3.49456787109375e-05, "model_forward_time": 0.02523350715637207, "step": 22902 }, { "epoch": 3.49456787109375e-05, "step": 22902, "training_step_time": 0.10530734062194824 }, { "epoch": 3.494720458984375e-05, "model_forward_time": 0.025290489196777344, "step": 22903 }, { "epoch": 3.494720458984375e-05, "step": 22903, "training_step_time": 0.10489821434020996 }, { "epoch": 3.494873046875e-05, "model_forward_time": 0.025685548782348633, "step": 22904 }, { "epoch": 3.494873046875e-05, "step": 22904, "training_step_time": 0.10569548606872559 }, { "epoch": 3.495025634765625e-05, "model_forward_time": 0.025206327438354492, "step": 22905 }, { "epoch": 3.495025634765625e-05, "step": 22905, "training_step_time": 0.10495781898498535 }, { "epoch": 3.49517822265625e-05, "model_forward_time": 0.025297880172729492, "step": 22906 }, { "epoch": 3.49517822265625e-05, "step": 22906, "training_step_time": 0.10867953300476074 }, { "epoch": 3.495330810546875e-05, "model_forward_time": 0.025444984436035156, "step": 22907 }, { "epoch": 3.495330810546875e-05, "step": 22907, "training_step_time": 0.1203775405883789 }, { "epoch": 3.4954833984375e-05, "model_forward_time": 0.025078773498535156, "step": 22908 }, { "epoch": 3.4954833984375e-05, "step": 22908, "training_step_time": 0.10955357551574707 }, { "epoch": 3.495635986328125e-05, "model_forward_time": 0.025688648223876953, "step": 22909 }, { "epoch": 3.495635986328125e-05, "step": 22909, "training_step_time": 0.11077165603637695 }, { "epoch": 3.49578857421875e-05, "grad_norm": 0.08774767816066742, "learning_rate": 1.450852010846724e-05, "loss": 0.0081, "step": 22910 }, { "epoch": 3.49578857421875e-05, "model_forward_time": 0.025636672973632812, "step": 22910 }, { "epoch": 3.49578857421875e-05, "step": 22910, "training_step_time": 0.11340618133544922 }, { "epoch": 3.495941162109375e-05, "model_forward_time": 0.024908065795898438, "step": 22911 }, { "epoch": 3.495941162109375e-05, "step": 22911, "training_step_time": 0.12753081321716309 }, { "epoch": 3.49609375e-05, "model_forward_time": 0.024957656860351562, "step": 22912 }, { "epoch": 3.49609375e-05, "step": 22912, "training_step_time": 0.12898826599121094 }, { "epoch": 3.496246337890625e-05, "model_forward_time": 0.024945735931396484, "step": 22913 }, { "epoch": 3.496246337890625e-05, "step": 22913, "training_step_time": 0.10939145088195801 }, { "epoch": 3.49639892578125e-05, "model_forward_time": 0.025281906127929688, "step": 22914 }, { "epoch": 3.49639892578125e-05, "step": 22914, "training_step_time": 0.1091461181640625 }, { "epoch": 3.496551513671875e-05, "model_forward_time": 0.02581191062927246, "step": 22915 }, { "epoch": 3.496551513671875e-05, "step": 22915, "training_step_time": 0.10804224014282227 }, { "epoch": 3.4967041015625e-05, "model_forward_time": 0.025528430938720703, "step": 22916 }, { "epoch": 3.4967041015625e-05, "step": 22916, "training_step_time": 0.16184377670288086 }, { "epoch": 3.496856689453125e-05, "model_forward_time": 0.024809598922729492, "step": 22917 }, { "epoch": 3.496856689453125e-05, "step": 22917, "training_step_time": 0.10933709144592285 }, { "epoch": 3.49700927734375e-05, "model_forward_time": 0.02513909339904785, "step": 22918 }, { "epoch": 3.49700927734375e-05, "step": 22918, "training_step_time": 0.19405436515808105 }, { "epoch": 3.497161865234375e-05, "model_forward_time": 0.02425074577331543, "step": 22919 }, { "epoch": 3.497161865234375e-05, "step": 22919, "training_step_time": 0.13287615776062012 }, { "epoch": 3.497314453125e-05, "grad_norm": 0.39075523614883423, "learning_rate": 1.4469719671666043e-05, "loss": 0.01, "step": 22920 }, { "epoch": 3.497314453125e-05, "model_forward_time": 0.0246734619140625, "step": 22920 }, { "epoch": 3.497314453125e-05, "step": 22920, "training_step_time": 0.19736862182617188 }, { "epoch": 3.497467041015625e-05, "model_forward_time": 0.024672508239746094, "step": 22921 }, { "epoch": 3.497467041015625e-05, "step": 22921, "training_step_time": 0.10254263877868652 }, { "epoch": 3.49761962890625e-05, "model_forward_time": 0.024676084518432617, "step": 22922 }, { "epoch": 3.49761962890625e-05, "step": 22922, "training_step_time": 0.10589241981506348 }, { "epoch": 3.497772216796875e-05, "model_forward_time": 0.025455236434936523, "step": 22923 }, { "epoch": 3.497772216796875e-05, "step": 22923, "training_step_time": 0.10478353500366211 }, { "epoch": 3.4979248046875e-05, "model_forward_time": 0.025585651397705078, "step": 22924 }, { "epoch": 3.4979248046875e-05, "step": 22924, "training_step_time": 0.10547375679016113 }, { "epoch": 3.498077392578125e-05, "model_forward_time": 0.02533578872680664, "step": 22925 }, { "epoch": 3.498077392578125e-05, "step": 22925, "training_step_time": 0.11114168167114258 }, { "epoch": 3.49822998046875e-05, "model_forward_time": 0.025166034698486328, "step": 22926 }, { "epoch": 3.49822998046875e-05, "step": 22926, "training_step_time": 0.10577011108398438 }, { "epoch": 3.498382568359375e-05, "model_forward_time": 0.024971961975097656, "step": 22927 }, { "epoch": 3.498382568359375e-05, "step": 22927, "training_step_time": 0.10732793807983398 }, { "epoch": 3.49853515625e-05, "model_forward_time": 0.025249958038330078, "step": 22928 }, { "epoch": 3.49853515625e-05, "step": 22928, "training_step_time": 0.10660409927368164 }, { "epoch": 3.498687744140625e-05, "model_forward_time": 0.025095224380493164, "step": 22929 }, { "epoch": 3.498687744140625e-05, "step": 22929, "training_step_time": 0.10963749885559082 }, { "epoch": 3.49884033203125e-05, "grad_norm": 0.1738383024930954, "learning_rate": 1.443096240750571e-05, "loss": 0.0045, "step": 22930 }, { "epoch": 3.49884033203125e-05, "model_forward_time": 0.02469658851623535, "step": 22930 }, { "epoch": 3.49884033203125e-05, "step": 22930, "training_step_time": 0.10951495170593262 }, { "epoch": 3.498992919921875e-05, "model_forward_time": 0.025074243545532227, "step": 22931 }, { "epoch": 3.498992919921875e-05, "step": 22931, "training_step_time": 0.1098470687866211 }, { "epoch": 3.4991455078125e-05, "model_forward_time": 0.02485942840576172, "step": 22932 }, { "epoch": 3.4991455078125e-05, "step": 22932, "training_step_time": 0.1109466552734375 }, { "epoch": 3.499298095703125e-05, "model_forward_time": 0.024941444396972656, "step": 22933 }, { "epoch": 3.499298095703125e-05, "step": 22933, "training_step_time": 0.10934066772460938 }, { "epoch": 3.49945068359375e-05, "model_forward_time": 0.026477575302124023, "step": 22934 }, { "epoch": 3.49945068359375e-05, "step": 22934, "training_step_time": 0.1725609302520752 }, { "epoch": 3.499603271484375e-05, "model_forward_time": 0.024517536163330078, "step": 22935 }, { "epoch": 3.499603271484375e-05, "step": 22935, "training_step_time": 0.1756279468536377 }, { "epoch": 3.499755859375e-05, "model_forward_time": 0.0244600772857666, "step": 22936 }, { "epoch": 3.499755859375e-05, "step": 22936, "training_step_time": 0.13087034225463867 }, { "epoch": 3.499908447265625e-05, "model_forward_time": 0.024407148361206055, "step": 22937 }, { "epoch": 3.499908447265625e-05, "step": 22937, "training_step_time": 0.11875247955322266 }, { "epoch": 3.50006103515625e-05, "model_forward_time": 0.024937152862548828, "step": 22938 }, { "epoch": 3.50006103515625e-05, "step": 22938, "training_step_time": 0.13707780838012695 }, { "epoch": 3.500213623046875e-05, "model_forward_time": 0.02476358413696289, "step": 22939 }, { "epoch": 3.500213623046875e-05, "step": 22939, "training_step_time": 0.1017601490020752 }, { "epoch": 3.5003662109375e-05, "grad_norm": 0.18951627612113953, "learning_rate": 1.439224836308002e-05, "loss": 0.0035, "step": 22940 }, { "epoch": 3.5003662109375e-05, "model_forward_time": 0.0251309871673584, "step": 22940 }, { "epoch": 3.5003662109375e-05, "step": 22940, "training_step_time": 0.1058347225189209 }, { "epoch": 3.500518798828125e-05, "model_forward_time": 0.0251619815826416, "step": 22941 }, { "epoch": 3.500518798828125e-05, "step": 22941, "training_step_time": 0.11094164848327637 }, { "epoch": 3.50067138671875e-05, "model_forward_time": 0.025304317474365234, "step": 22942 }, { "epoch": 3.50067138671875e-05, "step": 22942, "training_step_time": 0.2035377025604248 }, { "epoch": 3.500823974609375e-05, "model_forward_time": 0.0241241455078125, "step": 22943 }, { "epoch": 3.500823974609375e-05, "step": 22943, "training_step_time": 0.13550353050231934 }, { "epoch": 3.5009765625e-05, "model_forward_time": 0.02397608757019043, "step": 22944 }, { "epoch": 3.5009765625e-05, "step": 22944, "training_step_time": 0.19083309173583984 }, { "epoch": 3.501129150390625e-05, "model_forward_time": 0.02388620376586914, "step": 22945 }, { "epoch": 3.501129150390625e-05, "step": 22945, "training_step_time": 0.11823248863220215 }, { "epoch": 3.50128173828125e-05, "model_forward_time": 0.024020910263061523, "step": 22946 }, { "epoch": 3.50128173828125e-05, "step": 22946, "training_step_time": 0.1153266429901123 }, { "epoch": 3.501434326171875e-05, "model_forward_time": 0.02592945098876953, "step": 22947 }, { "epoch": 3.501434326171875e-05, "step": 22947, "training_step_time": 0.11437845230102539 }, { "epoch": 3.5015869140625e-05, "model_forward_time": 0.024970054626464844, "step": 22948 }, { "epoch": 3.5015869140625e-05, "step": 22948, "training_step_time": 0.11403203010559082 }, { "epoch": 3.501739501953125e-05, "model_forward_time": 0.025075435638427734, "step": 22949 }, { "epoch": 3.501739501953125e-05, "step": 22949, "training_step_time": 0.1113893985748291 }, { "epoch": 3.50189208984375e-05, "grad_norm": 0.3043539822101593, "learning_rate": 1.435357758543015e-05, "loss": 0.0156, "step": 22950 }, { "epoch": 3.50189208984375e-05, "model_forward_time": 0.025061607360839844, "step": 22950 }, { "epoch": 3.50189208984375e-05, "step": 22950, "training_step_time": 0.10552120208740234 }, { "epoch": 3.502044677734375e-05, "model_forward_time": 0.025180339813232422, "step": 22951 }, { "epoch": 3.502044677734375e-05, "step": 22951, "training_step_time": 0.10758447647094727 }, { "epoch": 3.502197265625e-05, "model_forward_time": 0.025279760360717773, "step": 22952 }, { "epoch": 3.502197265625e-05, "step": 22952, "training_step_time": 0.10790395736694336 }, { "epoch": 3.502349853515625e-05, "model_forward_time": 0.02527451515197754, "step": 22953 }, { "epoch": 3.502349853515625e-05, "step": 22953, "training_step_time": 0.10855746269226074 }, { "epoch": 3.50250244140625e-05, "model_forward_time": 0.025063276290893555, "step": 22954 }, { "epoch": 3.50250244140625e-05, "step": 22954, "training_step_time": 0.10828876495361328 }, { "epoch": 3.502655029296875e-05, "model_forward_time": 0.024831771850585938, "step": 22955 }, { "epoch": 3.502655029296875e-05, "step": 22955, "training_step_time": 0.1081075668334961 }, { "epoch": 3.5028076171875e-05, "model_forward_time": 0.025151968002319336, "step": 22956 }, { "epoch": 3.5028076171875e-05, "step": 22956, "training_step_time": 0.1655285358428955 }, { "epoch": 3.502960205078125e-05, "model_forward_time": 0.02429962158203125, "step": 22957 }, { "epoch": 3.502960205078125e-05, "step": 22957, "training_step_time": 0.14154386520385742 }, { "epoch": 3.50311279296875e-05, "model_forward_time": 0.0243990421295166, "step": 22958 }, { "epoch": 3.50311279296875e-05, "step": 22958, "training_step_time": 0.1049356460571289 }, { "epoch": 3.503265380859375e-05, "model_forward_time": 0.025194644927978516, "step": 22959 }, { "epoch": 3.503265380859375e-05, "step": 22959, "training_step_time": 0.1134331226348877 }, { "epoch": 3.50341796875e-05, "grad_norm": 0.10754483938217163, "learning_rate": 1.4314950121544756e-05, "loss": 0.0034, "step": 22960 }, { "epoch": 3.50341796875e-05, "model_forward_time": 0.02427220344543457, "step": 22960 }, { "epoch": 3.50341796875e-05, "step": 22960, "training_step_time": 0.10371589660644531 }, { "epoch": 3.503570556640625e-05, "model_forward_time": 0.02450108528137207, "step": 22961 }, { "epoch": 3.503570556640625e-05, "step": 22961, "training_step_time": 0.17071270942687988 }, { "epoch": 3.50372314453125e-05, "model_forward_time": 0.02419447898864746, "step": 22962 }, { "epoch": 3.50372314453125e-05, "step": 22962, "training_step_time": 0.1515488624572754 }, { "epoch": 3.503875732421875e-05, "model_forward_time": 0.0246279239654541, "step": 22963 }, { "epoch": 3.503875732421875e-05, "step": 22963, "training_step_time": 0.11121082305908203 }, { "epoch": 3.5040283203125e-05, "model_forward_time": 0.025051116943359375, "step": 22964 }, { "epoch": 3.5040283203125e-05, "step": 22964, "training_step_time": 0.13140201568603516 }, { "epoch": 3.504180908203125e-05, "model_forward_time": 0.024945974349975586, "step": 22965 }, { "epoch": 3.504180908203125e-05, "step": 22965, "training_step_time": 0.21582770347595215 }, { "epoch": 3.50433349609375e-05, "model_forward_time": 0.024689674377441406, "step": 22966 }, { "epoch": 3.50433349609375e-05, "step": 22966, "training_step_time": 0.10262036323547363 }, { "epoch": 3.504486083984375e-05, "model_forward_time": 0.024833202362060547, "step": 22967 }, { "epoch": 3.504486083984375e-05, "step": 22967, "training_step_time": 0.10364842414855957 }, { "epoch": 3.504638671875e-05, "model_forward_time": 0.02597808837890625, "step": 22968 }, { "epoch": 3.504638671875e-05, "step": 22968, "training_step_time": 0.10544991493225098 }, { "epoch": 3.504791259765625e-05, "model_forward_time": 0.025086164474487305, "step": 22969 }, { "epoch": 3.504791259765625e-05, "step": 22969, "training_step_time": 0.1083533763885498 }, { "epoch": 3.50494384765625e-05, "grad_norm": 0.10444493591785431, "learning_rate": 1.4276366018359844e-05, "loss": 0.0038, "step": 22970 }, { "epoch": 3.50494384765625e-05, "model_forward_time": 0.024846792221069336, "step": 22970 }, { "epoch": 3.50494384765625e-05, "step": 22970, "training_step_time": 0.1086270809173584 }, { "epoch": 3.505096435546875e-05, "model_forward_time": 0.025808334350585938, "step": 22971 }, { "epoch": 3.505096435546875e-05, "step": 22971, "training_step_time": 0.1075587272644043 }, { "epoch": 3.5052490234375e-05, "model_forward_time": 0.02546072006225586, "step": 22972 }, { "epoch": 3.5052490234375e-05, "step": 22972, "training_step_time": 0.10783171653747559 }, { "epoch": 3.505401611328125e-05, "model_forward_time": 0.026033401489257812, "step": 22973 }, { "epoch": 3.505401611328125e-05, "step": 22973, "training_step_time": 0.11246109008789062 }, { "epoch": 3.50555419921875e-05, "model_forward_time": 0.025255203247070312, "step": 22974 }, { "epoch": 3.50555419921875e-05, "step": 22974, "training_step_time": 0.10934209823608398 }, { "epoch": 3.505706787109375e-05, "model_forward_time": 0.02425384521484375, "step": 22975 }, { "epoch": 3.505706787109375e-05, "step": 22975, "training_step_time": 0.10840249061584473 }, { "epoch": 3.505859375e-05, "model_forward_time": 0.025038480758666992, "step": 22976 }, { "epoch": 3.505859375e-05, "step": 22976, "training_step_time": 0.10570311546325684 }, { "epoch": 3.506011962890625e-05, "model_forward_time": 0.025494098663330078, "step": 22977 }, { "epoch": 3.506011962890625e-05, "step": 22977, "training_step_time": 0.16730833053588867 }, { "epoch": 3.50616455078125e-05, "model_forward_time": 0.02465510368347168, "step": 22978 }, { "epoch": 3.50616455078125e-05, "step": 22978, "training_step_time": 0.2264251708984375 }, { "epoch": 3.506317138671875e-05, "model_forward_time": 0.02477860450744629, "step": 22979 }, { "epoch": 3.506317138671875e-05, "step": 22979, "training_step_time": 0.11684513092041016 }, { "epoch": 3.5064697265625e-05, "grad_norm": 0.10280394554138184, "learning_rate": 1.4237825322758736e-05, "loss": 0.0067, "step": 22980 }, { "epoch": 3.5064697265625e-05, "model_forward_time": 0.02480792999267578, "step": 22980 }, { "epoch": 3.5064697265625e-05, "step": 22980, "training_step_time": 0.1286776065826416 }, { "epoch": 3.506622314453125e-05, "model_forward_time": 0.02496623992919922, "step": 22981 }, { "epoch": 3.506622314453125e-05, "step": 22981, "training_step_time": 0.11635470390319824 }, { "epoch": 3.50677490234375e-05, "model_forward_time": 0.02700185775756836, "step": 22982 }, { "epoch": 3.50677490234375e-05, "step": 22982, "training_step_time": 0.12810301780700684 }, { "epoch": 3.506927490234375e-05, "model_forward_time": 0.0252227783203125, "step": 22983 }, { "epoch": 3.506927490234375e-05, "step": 22983, "training_step_time": 0.1192173957824707 }, { "epoch": 3.507080078125e-05, "model_forward_time": 0.026024341583251953, "step": 22984 }, { "epoch": 3.507080078125e-05, "step": 22984, "training_step_time": 0.15346455574035645 }, { "epoch": 3.507232666015625e-05, "model_forward_time": 0.024796485900878906, "step": 22985 }, { "epoch": 3.507232666015625e-05, "step": 22985, "training_step_time": 0.10400652885437012 }, { "epoch": 3.50738525390625e-05, "model_forward_time": 0.024823665618896484, "step": 22986 }, { "epoch": 3.50738525390625e-05, "step": 22986, "training_step_time": 0.1661064624786377 }, { "epoch": 3.507537841796875e-05, "model_forward_time": 0.02460503578186035, "step": 22987 }, { "epoch": 3.507537841796875e-05, "step": 22987, "training_step_time": 0.1453406810760498 }, { "epoch": 3.5076904296875e-05, "model_forward_time": 0.02445054054260254, "step": 22988 }, { "epoch": 3.5076904296875e-05, "step": 22988, "training_step_time": 0.12819671630859375 }, { "epoch": 3.507843017578125e-05, "model_forward_time": 0.02554774284362793, "step": 22989 }, { "epoch": 3.507843017578125e-05, "step": 22989, "training_step_time": 0.18590402603149414 }, { "epoch": 3.50799560546875e-05, "grad_norm": 0.14856906235218048, "learning_rate": 1.4199328081572e-05, "loss": 0.0086, "step": 22990 }, { "epoch": 3.50799560546875e-05, "model_forward_time": 0.024710655212402344, "step": 22990 }, { "epoch": 3.50799560546875e-05, "step": 22990, "training_step_time": 0.10460996627807617 }, { "epoch": 3.508148193359375e-05, "model_forward_time": 0.024793624877929688, "step": 22991 }, { "epoch": 3.508148193359375e-05, "step": 22991, "training_step_time": 0.10935020446777344 }, { "epoch": 3.50830078125e-05, "model_forward_time": 0.025073528289794922, "step": 22992 }, { "epoch": 3.50830078125e-05, "step": 22992, "training_step_time": 0.11066222190856934 }, { "epoch": 3.508453369140625e-05, "model_forward_time": 0.024153947830200195, "step": 22993 }, { "epoch": 3.508453369140625e-05, "step": 22993, "training_step_time": 0.11229610443115234 }, { "epoch": 3.50860595703125e-05, "model_forward_time": 0.024278879165649414, "step": 22994 }, { "epoch": 3.50860595703125e-05, "step": 22994, "training_step_time": 0.10435652732849121 }, { "epoch": 3.508758544921875e-05, "model_forward_time": 0.024259567260742188, "step": 22995 }, { "epoch": 3.508758544921875e-05, "step": 22995, "training_step_time": 0.10274624824523926 }, { "epoch": 3.5089111328125e-05, "model_forward_time": 0.025200843811035156, "step": 22996 }, { "epoch": 3.5089111328125e-05, "step": 22996, "training_step_time": 0.11021280288696289 }, { "epoch": 3.509063720703125e-05, "model_forward_time": 0.025528669357299805, "step": 22997 }, { "epoch": 3.509063720703125e-05, "step": 22997, "training_step_time": 0.10526347160339355 }, { "epoch": 3.50921630859375e-05, "model_forward_time": 0.024500370025634766, "step": 22998 }, { "epoch": 3.50921630859375e-05, "step": 22998, "training_step_time": 0.10307455062866211 }, { "epoch": 3.509368896484375e-05, "model_forward_time": 0.027987241744995117, "step": 22999 }, { "epoch": 3.509368896484375e-05, "step": 22999, "training_step_time": 0.10934042930603027 }, { "epoch": 3.509521484375e-05, "grad_norm": 0.25646406412124634, "learning_rate": 1.4160874341577446e-05, "loss": 0.0046, "step": 23000 }, { "epoch": 3.509521484375e-05, "model_forward_time": 0.02539801597595215, "step": 23000 }, { "epoch": 3.509521484375e-05, "step": 23000, "training_step_time": 0.0993657112121582 }, { "epoch": 3.509674072265625e-05, "model_forward_time": 0.02325606346130371, "step": 23001 }, { "epoch": 3.509674072265625e-05, "step": 23001, "training_step_time": 0.09928417205810547 }, { "epoch": 3.50982666015625e-05, "model_forward_time": 0.02511000633239746, "step": 23002 }, { "epoch": 3.50982666015625e-05, "step": 23002, "training_step_time": 0.1422569751739502 }, { "epoch": 3.509979248046875e-05, "model_forward_time": 0.025448083877563477, "step": 23003 }, { "epoch": 3.509979248046875e-05, "step": 23003, "training_step_time": 0.10786032676696777 }, { "epoch": 3.5101318359375e-05, "model_forward_time": 0.02525806427001953, "step": 23004 }, { "epoch": 3.5101318359375e-05, "step": 23004, "training_step_time": 0.1040186882019043 }, { "epoch": 3.510284423828125e-05, "model_forward_time": 0.02498149871826172, "step": 23005 }, { "epoch": 3.510284423828125e-05, "step": 23005, "training_step_time": 0.1151738166809082 }, { "epoch": 3.51043701171875e-05, "model_forward_time": 0.025829315185546875, "step": 23006 }, { "epoch": 3.51043701171875e-05, "step": 23006, "training_step_time": 0.1173093318939209 }, { "epoch": 3.510589599609375e-05, "model_forward_time": 0.025655269622802734, "step": 23007 }, { "epoch": 3.510589599609375e-05, "step": 23007, "training_step_time": 0.18257522583007812 }, { "epoch": 3.5107421875e-05, "model_forward_time": 0.02471780776977539, "step": 23008 }, { "epoch": 3.5107421875e-05, "step": 23008, "training_step_time": 0.12736892700195312 }, { "epoch": 3.510894775390625e-05, "model_forward_time": 0.0252838134765625, "step": 23009 }, { "epoch": 3.510894775390625e-05, "step": 23009, "training_step_time": 0.16756510734558105 }, { "epoch": 3.51104736328125e-05, "grad_norm": 0.09030667692422867, "learning_rate": 1.412246414949997e-05, "loss": 0.0045, "step": 23010 }, { "epoch": 3.51104736328125e-05, "model_forward_time": 0.0251157283782959, "step": 23010 }, { "epoch": 3.51104736328125e-05, "step": 23010, "training_step_time": 0.10673999786376953 }, { "epoch": 3.511199951171875e-05, "model_forward_time": 0.024898529052734375, "step": 23011 }, { "epoch": 3.511199951171875e-05, "step": 23011, "training_step_time": 0.18826651573181152 }, { "epoch": 3.5113525390625e-05, "model_forward_time": 0.024615049362182617, "step": 23012 }, { "epoch": 3.5113525390625e-05, "step": 23012, "training_step_time": 0.10674023628234863 }, { "epoch": 3.511505126953125e-05, "model_forward_time": 0.024814844131469727, "step": 23013 }, { "epoch": 3.511505126953125e-05, "step": 23013, "training_step_time": 0.10528326034545898 }, { "epoch": 3.51165771484375e-05, "model_forward_time": 0.025429725646972656, "step": 23014 }, { "epoch": 3.51165771484375e-05, "step": 23014, "training_step_time": 0.1090695858001709 }, { "epoch": 3.511810302734375e-05, "model_forward_time": 0.025153160095214844, "step": 23015 }, { "epoch": 3.511810302734375e-05, "step": 23015, "training_step_time": 0.10849857330322266 }, { "epoch": 3.511962890625e-05, "model_forward_time": 0.02527451515197754, "step": 23016 }, { "epoch": 3.511962890625e-05, "step": 23016, "training_step_time": 0.10800957679748535 }, { "epoch": 3.512115478515625e-05, "model_forward_time": 0.02562713623046875, "step": 23017 }, { "epoch": 3.512115478515625e-05, "step": 23017, "training_step_time": 0.10434126853942871 }, { "epoch": 3.51226806640625e-05, "model_forward_time": 0.0255887508392334, "step": 23018 }, { "epoch": 3.51226806640625e-05, "step": 23018, "training_step_time": 0.10509490966796875 }, { "epoch": 3.512420654296875e-05, "model_forward_time": 0.025323867797851562, "step": 23019 }, { "epoch": 3.512420654296875e-05, "step": 23019, "training_step_time": 0.11030936241149902 }, { "epoch": 3.5125732421875e-05, "grad_norm": 0.21688659489154816, "learning_rate": 1.4084097552011571e-05, "loss": 0.0045, "step": 23020 }, { "epoch": 3.5125732421875e-05, "model_forward_time": 0.023921966552734375, "step": 23020 }, { "epoch": 3.5125732421875e-05, "step": 23020, "training_step_time": 0.10755419731140137 }, { "epoch": 3.512725830078125e-05, "model_forward_time": 0.024764537811279297, "step": 23021 }, { "epoch": 3.512725830078125e-05, "step": 23021, "training_step_time": 0.10832977294921875 }, { "epoch": 3.51287841796875e-05, "model_forward_time": 0.025289535522460938, "step": 23022 }, { "epoch": 3.51287841796875e-05, "step": 23022, "training_step_time": 0.10703611373901367 }, { "epoch": 3.513031005859375e-05, "model_forward_time": 0.025289058685302734, "step": 23023 }, { "epoch": 3.513031005859375e-05, "step": 23023, "training_step_time": 0.10833477973937988 }, { "epoch": 3.51318359375e-05, "model_forward_time": 0.025257349014282227, "step": 23024 }, { "epoch": 3.51318359375e-05, "step": 23024, "training_step_time": 0.1061863899230957 }, { "epoch": 3.513336181640625e-05, "model_forward_time": 0.024889469146728516, "step": 23025 }, { "epoch": 3.513336181640625e-05, "step": 23025, "training_step_time": 0.13547468185424805 }, { "epoch": 3.51348876953125e-05, "model_forward_time": 0.025437593460083008, "step": 23026 }, { "epoch": 3.51348876953125e-05, "step": 23026, "training_step_time": 0.10654854774475098 }, { "epoch": 3.513641357421875e-05, "model_forward_time": 0.025429964065551758, "step": 23027 }, { "epoch": 3.513641357421875e-05, "step": 23027, "training_step_time": 0.1110081672668457 }, { "epoch": 3.5137939453125e-05, "model_forward_time": 0.02485942840576172, "step": 23028 }, { "epoch": 3.5137939453125e-05, "step": 23028, "training_step_time": 0.131011962890625 }, { "epoch": 3.513946533203125e-05, "model_forward_time": 0.02566695213317871, "step": 23029 }, { "epoch": 3.513946533203125e-05, "step": 23029, "training_step_time": 0.11715555191040039 }, { "epoch": 3.51409912109375e-05, "grad_norm": 0.1771112084388733, "learning_rate": 1.4045774595731315e-05, "loss": 0.0084, "step": 23030 }, { "epoch": 3.51409912109375e-05, "model_forward_time": 0.02570199966430664, "step": 23030 }, { "epoch": 3.51409912109375e-05, "step": 23030, "training_step_time": 0.11069273948669434 }, { "epoch": 3.514251708984375e-05, "model_forward_time": 0.025145530700683594, "step": 23031 }, { "epoch": 3.514251708984375e-05, "step": 23031, "training_step_time": 0.1827259063720703 }, { "epoch": 3.514404296875e-05, "model_forward_time": 0.024221181869506836, "step": 23032 }, { "epoch": 3.514404296875e-05, "step": 23032, "training_step_time": 0.11976385116577148 }, { "epoch": 3.514556884765625e-05, "model_forward_time": 0.024445056915283203, "step": 23033 }, { "epoch": 3.514556884765625e-05, "step": 23033, "training_step_time": 0.20771121978759766 }, { "epoch": 3.51470947265625e-05, "model_forward_time": 0.024549007415771484, "step": 23034 }, { "epoch": 3.51470947265625e-05, "step": 23034, "training_step_time": 0.10373735427856445 }, { "epoch": 3.514862060546875e-05, "model_forward_time": 0.0247650146484375, "step": 23035 }, { "epoch": 3.514862060546875e-05, "step": 23035, "training_step_time": 0.11725425720214844 }, { "epoch": 3.5150146484375e-05, "model_forward_time": 0.0253908634185791, "step": 23036 }, { "epoch": 3.5150146484375e-05, "step": 23036, "training_step_time": 0.16239690780639648 }, { "epoch": 3.515167236328125e-05, "model_forward_time": 0.02458786964416504, "step": 23037 }, { "epoch": 3.515167236328125e-05, "step": 23037, "training_step_time": 0.1058499813079834 }, { "epoch": 3.51531982421875e-05, "model_forward_time": 0.02828216552734375, "step": 23038 }, { "epoch": 3.51531982421875e-05, "step": 23038, "training_step_time": 0.11051392555236816 }, { "epoch": 3.515472412109375e-05, "model_forward_time": 0.025793075561523438, "step": 23039 }, { "epoch": 3.515472412109375e-05, "step": 23039, "training_step_time": 0.10580945014953613 }, { "epoch": 3.515625e-05, "grad_norm": 0.3599991500377655, "learning_rate": 1.4007495327225162e-05, "loss": 0.0069, "step": 23040 }, { "epoch": 3.515625e-05, "model_forward_time": 0.025115013122558594, "step": 23040 }, { "epoch": 3.515625e-05, "step": 23040, "training_step_time": 0.10543584823608398 }, { "epoch": 3.515777587890625e-05, "model_forward_time": 0.024872779846191406, "step": 23041 }, { "epoch": 3.515777587890625e-05, "step": 23041, "training_step_time": 0.10440373420715332 }, { "epoch": 3.51593017578125e-05, "model_forward_time": 0.02507781982421875, "step": 23042 }, { "epoch": 3.51593017578125e-05, "step": 23042, "training_step_time": 0.10541987419128418 }, { "epoch": 3.516082763671875e-05, "model_forward_time": 0.025162696838378906, "step": 23043 }, { "epoch": 3.516082763671875e-05, "step": 23043, "training_step_time": 0.11371517181396484 }, { "epoch": 3.5162353515625e-05, "model_forward_time": 0.025356531143188477, "step": 23044 }, { "epoch": 3.5162353515625e-05, "step": 23044, "training_step_time": 0.10441851615905762 }, { "epoch": 3.516387939453125e-05, "model_forward_time": 0.025207042694091797, "step": 23045 }, { "epoch": 3.516387939453125e-05, "step": 23045, "training_step_time": 0.10316824913024902 }, { "epoch": 3.51654052734375e-05, "model_forward_time": 0.024935245513916016, "step": 23046 }, { "epoch": 3.51654052734375e-05, "step": 23046, "training_step_time": 0.10908913612365723 }, { "epoch": 3.516693115234375e-05, "model_forward_time": 0.024891376495361328, "step": 23047 }, { "epoch": 3.516693115234375e-05, "step": 23047, "training_step_time": 0.10562610626220703 }, { "epoch": 3.516845703125e-05, "model_forward_time": 0.024912118911743164, "step": 23048 }, { "epoch": 3.516845703125e-05, "step": 23048, "training_step_time": 0.12527012825012207 }, { "epoch": 3.516998291015625e-05, "model_forward_time": 0.024872303009033203, "step": 23049 }, { "epoch": 3.516998291015625e-05, "step": 23049, "training_step_time": 0.14543557167053223 }, { "epoch": 3.51715087890625e-05, "grad_norm": 0.32509398460388184, "learning_rate": 1.3969259793006079e-05, "loss": 0.0124, "step": 23050 }, { "epoch": 3.51715087890625e-05, "model_forward_time": 0.024800777435302734, "step": 23050 }, { "epoch": 3.51715087890625e-05, "step": 23050, "training_step_time": 0.10483479499816895 }, { "epoch": 3.517303466796875e-05, "model_forward_time": 0.024719953536987305, "step": 23051 }, { "epoch": 3.517303466796875e-05, "step": 23051, "training_step_time": 0.10918498039245605 }, { "epoch": 3.5174560546875e-05, "model_forward_time": 0.025378704071044922, "step": 23052 }, { "epoch": 3.5174560546875e-05, "step": 23052, "training_step_time": 0.1071021556854248 }, { "epoch": 3.517608642578125e-05, "model_forward_time": 0.02514934539794922, "step": 23053 }, { "epoch": 3.517608642578125e-05, "step": 23053, "training_step_time": 0.10806655883789062 }, { "epoch": 3.51776123046875e-05, "model_forward_time": 0.02511119842529297, "step": 23054 }, { "epoch": 3.51776123046875e-05, "step": 23054, "training_step_time": 0.15474462509155273 }, { "epoch": 3.517913818359375e-05, "model_forward_time": 0.028865814208984375, "step": 23055 }, { "epoch": 3.517913818359375e-05, "step": 23055, "training_step_time": 0.1471726894378662 }, { "epoch": 3.51806640625e-05, "model_forward_time": 0.024655818939208984, "step": 23056 }, { "epoch": 3.51806640625e-05, "step": 23056, "training_step_time": 0.11116456985473633 }, { "epoch": 3.518218994140625e-05, "model_forward_time": 0.027028322219848633, "step": 23057 }, { "epoch": 3.518218994140625e-05, "step": 23057, "training_step_time": 0.1247415542602539 }, { "epoch": 3.51837158203125e-05, "model_forward_time": 0.025287151336669922, "step": 23058 }, { "epoch": 3.51837158203125e-05, "step": 23058, "training_step_time": 0.17093443870544434 }, { "epoch": 3.518524169921875e-05, "model_forward_time": 0.024871110916137695, "step": 23059 }, { "epoch": 3.518524169921875e-05, "step": 23059, "training_step_time": 0.10534405708312988 }, { "epoch": 3.5186767578125e-05, "grad_norm": 0.27006369829177856, "learning_rate": 1.3931068039533823e-05, "loss": 0.0061, "step": 23060 }, { "epoch": 3.5186767578125e-05, "model_forward_time": 0.024516582489013672, "step": 23060 }, { "epoch": 3.5186767578125e-05, "step": 23060, "training_step_time": 0.10267424583435059 }, { "epoch": 3.518829345703125e-05, "model_forward_time": 0.0251312255859375, "step": 23061 }, { "epoch": 3.518829345703125e-05, "step": 23061, "training_step_time": 0.10782456398010254 }, { "epoch": 3.51898193359375e-05, "model_forward_time": 0.02488994598388672, "step": 23062 }, { "epoch": 3.51898193359375e-05, "step": 23062, "training_step_time": 0.10932207107543945 }, { "epoch": 3.519134521484375e-05, "model_forward_time": 0.024297475814819336, "step": 23063 }, { "epoch": 3.519134521484375e-05, "step": 23063, "training_step_time": 0.10717177391052246 }, { "epoch": 3.519287109375e-05, "model_forward_time": 0.025592803955078125, "step": 23064 }, { "epoch": 3.519287109375e-05, "step": 23064, "training_step_time": 0.10600733757019043 }, { "epoch": 3.519439697265625e-05, "model_forward_time": 0.024846315383911133, "step": 23065 }, { "epoch": 3.519439697265625e-05, "step": 23065, "training_step_time": 0.1053462028503418 }, { "epoch": 3.51959228515625e-05, "model_forward_time": 0.024908065795898438, "step": 23066 }, { "epoch": 3.51959228515625e-05, "step": 23066, "training_step_time": 0.10908961296081543 }, { "epoch": 3.519744873046875e-05, "model_forward_time": 0.027004003524780273, "step": 23067 }, { "epoch": 3.519744873046875e-05, "step": 23067, "training_step_time": 0.10745906829833984 }, { "epoch": 3.5198974609375e-05, "model_forward_time": 0.025081872940063477, "step": 23068 }, { "epoch": 3.5198974609375e-05, "step": 23068, "training_step_time": 0.10512280464172363 }, { "epoch": 3.520050048828125e-05, "model_forward_time": 0.025026798248291016, "step": 23069 }, { "epoch": 3.520050048828125e-05, "step": 23069, "training_step_time": 0.11208939552307129 }, { "epoch": 3.52020263671875e-05, "grad_norm": 0.3704567849636078, "learning_rate": 1.389292011321498e-05, "loss": 0.0043, "step": 23070 }, { "epoch": 3.52020263671875e-05, "model_forward_time": 0.024922609329223633, "step": 23070 }, { "epoch": 3.52020263671875e-05, "step": 23070, "training_step_time": 0.10541915893554688 }, { "epoch": 3.520355224609375e-05, "model_forward_time": 0.024843931198120117, "step": 23071 }, { "epoch": 3.520355224609375e-05, "step": 23071, "training_step_time": 0.10347819328308105 }, { "epoch": 3.5205078125e-05, "model_forward_time": 0.024923086166381836, "step": 23072 }, { "epoch": 3.5205078125e-05, "step": 23072, "training_step_time": 0.20636868476867676 }, { "epoch": 3.520660400390625e-05, "model_forward_time": 0.024051427841186523, "step": 23073 }, { "epoch": 3.520660400390625e-05, "step": 23073, "training_step_time": 0.10555410385131836 }, { "epoch": 3.52081298828125e-05, "model_forward_time": 0.02428746223449707, "step": 23074 }, { "epoch": 3.52081298828125e-05, "step": 23074, "training_step_time": 0.1076200008392334 }, { "epoch": 3.520965576171875e-05, "model_forward_time": 0.02544403076171875, "step": 23075 }, { "epoch": 3.520965576171875e-05, "step": 23075, "training_step_time": 0.13357901573181152 }, { "epoch": 3.5211181640625e-05, "model_forward_time": 0.024820327758789062, "step": 23076 }, { "epoch": 3.5211181640625e-05, "step": 23076, "training_step_time": 0.12658071517944336 }, { "epoch": 3.521270751953125e-05, "model_forward_time": 0.024715423583984375, "step": 23077 }, { "epoch": 3.521270751953125e-05, "step": 23077, "training_step_time": 0.10562872886657715 }, { "epoch": 3.52142333984375e-05, "model_forward_time": 0.024915218353271484, "step": 23078 }, { "epoch": 3.52142333984375e-05, "step": 23078, "training_step_time": 0.12357902526855469 }, { "epoch": 3.521575927734375e-05, "model_forward_time": 0.02485346794128418, "step": 23079 }, { "epoch": 3.521575927734375e-05, "step": 23079, "training_step_time": 0.10692644119262695 }, { "epoch": 3.521728515625e-05, "grad_norm": 0.35327544808387756, "learning_rate": 1.385481606040287e-05, "loss": 0.0061, "step": 23080 }, { "epoch": 3.521728515625e-05, "model_forward_time": 0.024805307388305664, "step": 23080 }, { "epoch": 3.521728515625e-05, "step": 23080, "training_step_time": 0.20168209075927734 }, { "epoch": 3.521881103515625e-05, "model_forward_time": 0.024309635162353516, "step": 23081 }, { "epoch": 3.521881103515625e-05, "step": 23081, "training_step_time": 0.10931873321533203 }, { "epoch": 3.52203369140625e-05, "model_forward_time": 0.0267789363861084, "step": 23082 }, { "epoch": 3.52203369140625e-05, "step": 23082, "training_step_time": 0.12189364433288574 }, { "epoch": 3.522186279296875e-05, "model_forward_time": 0.025970458984375, "step": 23083 }, { "epoch": 3.522186279296875e-05, "step": 23083, "training_step_time": 0.18715143203735352 }, { "epoch": 3.5223388671875e-05, "model_forward_time": 0.02407097816467285, "step": 23084 }, { "epoch": 3.5223388671875e-05, "step": 23084, "training_step_time": 0.10316228866577148 }, { "epoch": 3.522491455078125e-05, "model_forward_time": 0.025057315826416016, "step": 23085 }, { "epoch": 3.522491455078125e-05, "step": 23085, "training_step_time": 0.10377860069274902 }, { "epoch": 3.52264404296875e-05, "model_forward_time": 0.02531886100769043, "step": 23086 }, { "epoch": 3.52264404296875e-05, "step": 23086, "training_step_time": 0.10595059394836426 }, { "epoch": 3.522796630859375e-05, "model_forward_time": 0.02520012855529785, "step": 23087 }, { "epoch": 3.522796630859375e-05, "step": 23087, "training_step_time": 0.10639595985412598 }, { "epoch": 3.52294921875e-05, "model_forward_time": 0.025005817413330078, "step": 23088 }, { "epoch": 3.52294921875e-05, "step": 23088, "training_step_time": 0.10934972763061523 }, { "epoch": 3.523101806640625e-05, "model_forward_time": 0.025043010711669922, "step": 23089 }, { "epoch": 3.523101806640625e-05, "step": 23089, "training_step_time": 0.10528826713562012 }, { "epoch": 3.52325439453125e-05, "grad_norm": 0.1638215035200119, "learning_rate": 1.3816755927397502e-05, "loss": 0.0049, "step": 23090 }, { "epoch": 3.52325439453125e-05, "model_forward_time": 0.026592016220092773, "step": 23090 }, { "epoch": 3.52325439453125e-05, "step": 23090, "training_step_time": 0.1082158088684082 }, { "epoch": 3.523406982421875e-05, "model_forward_time": 0.025236129760742188, "step": 23091 }, { "epoch": 3.523406982421875e-05, "step": 23091, "training_step_time": 0.10436344146728516 }, { "epoch": 3.5235595703125e-05, "model_forward_time": 0.025329113006591797, "step": 23092 }, { "epoch": 3.5235595703125e-05, "step": 23092, "training_step_time": 0.13971710205078125 }, { "epoch": 3.523712158203125e-05, "model_forward_time": 0.025399446487426758, "step": 23093 }, { "epoch": 3.523712158203125e-05, "step": 23093, "training_step_time": 0.16233420372009277 }, { "epoch": 3.52386474609375e-05, "model_forward_time": 0.024213075637817383, "step": 23094 }, { "epoch": 3.52386474609375e-05, "step": 23094, "training_step_time": 0.15911197662353516 }, { "epoch": 3.524017333984375e-05, "model_forward_time": 0.024246692657470703, "step": 23095 }, { "epoch": 3.524017333984375e-05, "step": 23095, "training_step_time": 0.14734292030334473 }, { "epoch": 3.524169921875e-05, "model_forward_time": 0.02434086799621582, "step": 23096 }, { "epoch": 3.524169921875e-05, "step": 23096, "training_step_time": 0.19723749160766602 }, { "epoch": 3.524322509765625e-05, "model_forward_time": 0.024396181106567383, "step": 23097 }, { "epoch": 3.524322509765625e-05, "step": 23097, "training_step_time": 0.12333488464355469 }, { "epoch": 3.52447509765625e-05, "model_forward_time": 0.025194406509399414, "step": 23098 }, { "epoch": 3.52447509765625e-05, "step": 23098, "training_step_time": 0.1395728588104248 }, { "epoch": 3.524627685546875e-05, "model_forward_time": 0.02460932731628418, "step": 23099 }, { "epoch": 3.524627685546875e-05, "step": 23099, "training_step_time": 0.1540238857269287 }, { "epoch": 3.5247802734375e-05, "grad_norm": 0.16723492741584778, "learning_rate": 1.3778739760445552e-05, "loss": 0.0095, "step": 23100 }, { "epoch": 3.5247802734375e-05, "model_forward_time": 0.026525259017944336, "step": 23100 }, { "epoch": 3.5247802734375e-05, "step": 23100, "training_step_time": 0.1522533893585205 }, { "epoch": 3.524932861328125e-05, "model_forward_time": 0.0241241455078125, "step": 23101 }, { "epoch": 3.524932861328125e-05, "step": 23101, "training_step_time": 0.11867475509643555 }, { "epoch": 3.52508544921875e-05, "model_forward_time": 0.025692462921142578, "step": 23102 }, { "epoch": 3.52508544921875e-05, "step": 23102, "training_step_time": 0.11806654930114746 }, { "epoch": 3.525238037109375e-05, "model_forward_time": 0.027798891067504883, "step": 23103 }, { "epoch": 3.525238037109375e-05, "step": 23103, "training_step_time": 0.1245884895324707 }, { "epoch": 3.525390625e-05, "model_forward_time": 0.025104522705078125, "step": 23104 }, { "epoch": 3.525390625e-05, "step": 23104, "training_step_time": 0.10880351066589355 }, { "epoch": 3.525543212890625e-05, "model_forward_time": 0.02618575096130371, "step": 23105 }, { "epoch": 3.525543212890625e-05, "step": 23105, "training_step_time": 0.11228346824645996 }, { "epoch": 3.52569580078125e-05, "model_forward_time": 0.025060653686523438, "step": 23106 }, { "epoch": 3.52569580078125e-05, "step": 23106, "training_step_time": 0.10997462272644043 }, { "epoch": 3.525848388671875e-05, "model_forward_time": 0.024953842163085938, "step": 23107 }, { "epoch": 3.525848388671875e-05, "step": 23107, "training_step_time": 0.1102900505065918 }, { "epoch": 3.5260009765625e-05, "model_forward_time": 0.02556014060974121, "step": 23108 }, { "epoch": 3.5260009765625e-05, "step": 23108, "training_step_time": 0.10829019546508789 }, { "epoch": 3.526153564453125e-05, "model_forward_time": 0.024992704391479492, "step": 23109 }, { "epoch": 3.526153564453125e-05, "step": 23109, "training_step_time": 0.10482978820800781 }, { "epoch": 3.52630615234375e-05, "grad_norm": 0.12765149772167206, "learning_rate": 1.374076760574024e-05, "loss": 0.01, "step": 23110 }, { "epoch": 3.52630615234375e-05, "model_forward_time": 0.025246143341064453, "step": 23110 }, { "epoch": 3.52630615234375e-05, "step": 23110, "training_step_time": 0.10931563377380371 }, { "epoch": 3.526458740234375e-05, "model_forward_time": 0.025652408599853516, "step": 23111 }, { "epoch": 3.526458740234375e-05, "step": 23111, "training_step_time": 0.10915565490722656 }, { "epoch": 3.526611328125e-05, "model_forward_time": 0.025240182876586914, "step": 23112 }, { "epoch": 3.526611328125e-05, "step": 23112, "training_step_time": 0.10574889183044434 }, { "epoch": 3.526763916015625e-05, "model_forward_time": 0.024881601333618164, "step": 23113 }, { "epoch": 3.526763916015625e-05, "step": 23113, "training_step_time": 0.10488080978393555 }, { "epoch": 3.52691650390625e-05, "model_forward_time": 0.02550220489501953, "step": 23114 }, { "epoch": 3.52691650390625e-05, "step": 23114, "training_step_time": 0.10786986351013184 }, { "epoch": 3.527069091796875e-05, "model_forward_time": 0.025188684463500977, "step": 23115 }, { "epoch": 3.527069091796875e-05, "step": 23115, "training_step_time": 0.1053304672241211 }, { "epoch": 3.5272216796875e-05, "model_forward_time": 0.025440454483032227, "step": 23116 }, { "epoch": 3.5272216796875e-05, "step": 23116, "training_step_time": 0.10437893867492676 }, { "epoch": 3.527374267578125e-05, "model_forward_time": 0.025090932846069336, "step": 23117 }, { "epoch": 3.527374267578125e-05, "step": 23117, "training_step_time": 0.10689616203308105 }, { "epoch": 3.52752685546875e-05, "model_forward_time": 0.025070667266845703, "step": 23118 }, { "epoch": 3.52752685546875e-05, "step": 23118, "training_step_time": 0.1650702953338623 }, { "epoch": 3.527679443359375e-05, "model_forward_time": 0.024255752563476562, "step": 23119 }, { "epoch": 3.527679443359375e-05, "step": 23119, "training_step_time": 0.10512971878051758 }, { "epoch": 3.52783203125e-05, "grad_norm": 0.18373161554336548, "learning_rate": 1.3702839509421305e-05, "loss": 0.007, "step": 23120 }, { "epoch": 3.52783203125e-05, "model_forward_time": 0.02522873878479004, "step": 23120 }, { "epoch": 3.52783203125e-05, "step": 23120, "training_step_time": 0.10798978805541992 }, { "epoch": 3.527984619140625e-05, "model_forward_time": 0.025073528289794922, "step": 23121 }, { "epoch": 3.527984619140625e-05, "step": 23121, "training_step_time": 0.12377691268920898 }, { "epoch": 3.52813720703125e-05, "model_forward_time": 0.02470088005065918, "step": 23122 }, { "epoch": 3.52813720703125e-05, "step": 23122, "training_step_time": 0.13213658332824707 }, { "epoch": 3.528289794921875e-05, "model_forward_time": 0.02543163299560547, "step": 23123 }, { "epoch": 3.528289794921875e-05, "step": 23123, "training_step_time": 0.15021991729736328 }, { "epoch": 3.5284423828125e-05, "model_forward_time": 0.024410247802734375, "step": 23124 }, { "epoch": 3.5284423828125e-05, "step": 23124, "training_step_time": 0.17357683181762695 }, { "epoch": 3.528594970703125e-05, "model_forward_time": 0.024524688720703125, "step": 23125 }, { "epoch": 3.528594970703125e-05, "step": 23125, "training_step_time": 0.14800596237182617 }, { "epoch": 3.52874755859375e-05, "model_forward_time": 0.02429962158203125, "step": 23126 }, { "epoch": 3.52874755859375e-05, "step": 23126, "training_step_time": 0.14898681640625 }, { "epoch": 3.528900146484375e-05, "model_forward_time": 0.024982690811157227, "step": 23127 }, { "epoch": 3.528900146484375e-05, "step": 23127, "training_step_time": 0.15599393844604492 }, { "epoch": 3.529052734375e-05, "model_forward_time": 0.024388551712036133, "step": 23128 }, { "epoch": 3.529052734375e-05, "step": 23128, "training_step_time": 0.10816407203674316 }, { "epoch": 3.529205322265625e-05, "model_forward_time": 0.024954557418823242, "step": 23129 }, { "epoch": 3.529205322265625e-05, "step": 23129, "training_step_time": 0.10117244720458984 }, { "epoch": 3.52935791015625e-05, "grad_norm": 0.10926108062267303, "learning_rate": 1.3664955517574968e-05, "loss": 0.005, "step": 23130 }, { "epoch": 3.52935791015625e-05, "model_forward_time": 0.02512526512145996, "step": 23130 }, { "epoch": 3.52935791015625e-05, "step": 23130, "training_step_time": 0.10891032218933105 }, { "epoch": 3.529510498046875e-05, "model_forward_time": 0.024851083755493164, "step": 23131 }, { "epoch": 3.529510498046875e-05, "step": 23131, "training_step_time": 0.10560441017150879 }, { "epoch": 3.5296630859375e-05, "model_forward_time": 0.025130033493041992, "step": 23132 }, { "epoch": 3.5296630859375e-05, "step": 23132, "training_step_time": 0.11008644104003906 }, { "epoch": 3.529815673828125e-05, "model_forward_time": 0.024986982345581055, "step": 23133 }, { "epoch": 3.529815673828125e-05, "step": 23133, "training_step_time": 0.10787439346313477 }, { "epoch": 3.52996826171875e-05, "model_forward_time": 0.025223970413208008, "step": 23134 }, { "epoch": 3.52996826171875e-05, "step": 23134, "training_step_time": 0.10712289810180664 }, { "epoch": 3.530120849609375e-05, "model_forward_time": 0.025408506393432617, "step": 23135 }, { "epoch": 3.530120849609375e-05, "step": 23135, "training_step_time": 0.10667562484741211 }, { "epoch": 3.5302734375e-05, "model_forward_time": 0.02529120445251465, "step": 23136 }, { "epoch": 3.5302734375e-05, "step": 23136, "training_step_time": 0.10810375213623047 }, { "epoch": 3.530426025390625e-05, "model_forward_time": 0.025081872940063477, "step": 23137 }, { "epoch": 3.530426025390625e-05, "step": 23137, "training_step_time": 0.1043083667755127 }, { "epoch": 3.53057861328125e-05, "model_forward_time": 0.02501082420349121, "step": 23138 }, { "epoch": 3.53057861328125e-05, "step": 23138, "training_step_time": 0.1042017936706543 }, { "epoch": 3.530731201171875e-05, "model_forward_time": 0.02492237091064453, "step": 23139 }, { "epoch": 3.530731201171875e-05, "step": 23139, "training_step_time": 0.10530972480773926 }, { "epoch": 3.5308837890625e-05, "grad_norm": 0.107993483543396, "learning_rate": 1.3627115676233832e-05, "loss": 0.0033, "step": 23140 }, { "epoch": 3.5308837890625e-05, "model_forward_time": 0.0258634090423584, "step": 23140 }, { "epoch": 3.5308837890625e-05, "step": 23140, "training_step_time": 0.11224055290222168 }, { "epoch": 3.531036376953125e-05, "model_forward_time": 0.025231599807739258, "step": 23141 }, { "epoch": 3.531036376953125e-05, "step": 23141, "training_step_time": 0.1389141082763672 }, { "epoch": 3.53118896484375e-05, "model_forward_time": 0.025280475616455078, "step": 23142 }, { "epoch": 3.53118896484375e-05, "step": 23142, "training_step_time": 0.17558932304382324 }, { "epoch": 3.531341552734375e-05, "model_forward_time": 0.025073766708374023, "step": 23143 }, { "epoch": 3.531341552734375e-05, "step": 23143, "training_step_time": 0.1547374725341797 }, { "epoch": 3.531494140625e-05, "model_forward_time": 0.02518177032470703, "step": 23144 }, { "epoch": 3.531494140625e-05, "step": 23144, "training_step_time": 0.16141605377197266 }, { "epoch": 3.531646728515625e-05, "model_forward_time": 0.02423262596130371, "step": 23145 }, { "epoch": 3.531646728515625e-05, "step": 23145, "training_step_time": 0.17450547218322754 }, { "epoch": 3.53179931640625e-05, "model_forward_time": 0.024927854537963867, "step": 23146 }, { "epoch": 3.53179931640625e-05, "step": 23146, "training_step_time": 0.2000565528869629 }, { "epoch": 3.531951904296875e-05, "model_forward_time": 0.02409815788269043, "step": 23147 }, { "epoch": 3.531951904296875e-05, "step": 23147, "training_step_time": 0.14000868797302246 }, { "epoch": 3.5321044921875e-05, "model_forward_time": 0.02734518051147461, "step": 23148 }, { "epoch": 3.5321044921875e-05, "step": 23148, "training_step_time": 0.20338034629821777 }, { "epoch": 3.532257080078125e-05, "model_forward_time": 0.02471613883972168, "step": 23149 }, { "epoch": 3.532257080078125e-05, "step": 23149, "training_step_time": 0.11739897727966309 }, { "epoch": 3.53240966796875e-05, "grad_norm": 0.21589864790439606, "learning_rate": 1.3589320031376901e-05, "loss": 0.0193, "step": 23150 }, { "epoch": 3.53240966796875e-05, "model_forward_time": 0.023828506469726562, "step": 23150 }, { "epoch": 3.53240966796875e-05, "step": 23150, "training_step_time": 0.11547422409057617 }, { "epoch": 3.532562255859375e-05, "model_forward_time": 0.024300336837768555, "step": 23151 }, { "epoch": 3.532562255859375e-05, "step": 23151, "training_step_time": 0.1128087043762207 }, { "epoch": 3.53271484375e-05, "model_forward_time": 0.024165868759155273, "step": 23152 }, { "epoch": 3.53271484375e-05, "step": 23152, "training_step_time": 0.1094212532043457 }, { "epoch": 3.532867431640625e-05, "model_forward_time": 0.024923324584960938, "step": 23153 }, { "epoch": 3.532867431640625e-05, "step": 23153, "training_step_time": 0.10966134071350098 }, { "epoch": 3.53302001953125e-05, "model_forward_time": 0.024950504302978516, "step": 23154 }, { "epoch": 3.53302001953125e-05, "step": 23154, "training_step_time": 0.1122894287109375 }, { "epoch": 3.533172607421875e-05, "model_forward_time": 0.025178194046020508, "step": 23155 }, { "epoch": 3.533172607421875e-05, "step": 23155, "training_step_time": 0.1089625358581543 }, { "epoch": 3.5333251953125e-05, "model_forward_time": 0.025194406509399414, "step": 23156 }, { "epoch": 3.5333251953125e-05, "step": 23156, "training_step_time": 0.10573196411132812 }, { "epoch": 3.533477783203125e-05, "model_forward_time": 0.025018930435180664, "step": 23157 }, { "epoch": 3.533477783203125e-05, "step": 23157, "training_step_time": 0.10500121116638184 }, { "epoch": 3.53363037109375e-05, "model_forward_time": 0.025125503540039062, "step": 23158 }, { "epoch": 3.53363037109375e-05, "step": 23158, "training_step_time": 0.10570025444030762 }, { "epoch": 3.533782958984375e-05, "model_forward_time": 0.025163888931274414, "step": 23159 }, { "epoch": 3.533782958984375e-05, "step": 23159, "training_step_time": 0.10565829277038574 }, { "epoch": 3.533935546875e-05, "grad_norm": 0.11769426614046097, "learning_rate": 1.3551568628929434e-05, "loss": 0.0042, "step": 23160 }, { "epoch": 3.533935546875e-05, "model_forward_time": 0.025023460388183594, "step": 23160 }, { "epoch": 3.533935546875e-05, "step": 23160, "training_step_time": 0.10630416870117188 }, { "epoch": 3.534088134765625e-05, "model_forward_time": 0.025200843811035156, "step": 23161 }, { "epoch": 3.534088134765625e-05, "step": 23161, "training_step_time": 0.10595250129699707 }, { "epoch": 3.53424072265625e-05, "model_forward_time": 0.02394247055053711, "step": 23162 }, { "epoch": 3.53424072265625e-05, "step": 23162, "training_step_time": 0.10371804237365723 }, { "epoch": 3.534393310546875e-05, "model_forward_time": 0.0249176025390625, "step": 23163 }, { "epoch": 3.534393310546875e-05, "step": 23163, "training_step_time": 0.10493588447570801 }, { "epoch": 3.5345458984375e-05, "model_forward_time": 0.025281190872192383, "step": 23164 }, { "epoch": 3.5345458984375e-05, "step": 23164, "training_step_time": 0.12078738212585449 }, { "epoch": 3.534698486328125e-05, "model_forward_time": 0.02546215057373047, "step": 23165 }, { "epoch": 3.534698486328125e-05, "step": 23165, "training_step_time": 0.1128091812133789 }, { "epoch": 3.53485107421875e-05, "model_forward_time": 0.02509331703186035, "step": 23166 }, { "epoch": 3.53485107421875e-05, "step": 23166, "training_step_time": 0.1770646572113037 }, { "epoch": 3.535003662109375e-05, "model_forward_time": 0.024168014526367188, "step": 23167 }, { "epoch": 3.535003662109375e-05, "step": 23167, "training_step_time": 0.17686176300048828 }, { "epoch": 3.53515625e-05, "model_forward_time": 0.02414107322692871, "step": 23168 }, { "epoch": 3.53515625e-05, "step": 23168, "training_step_time": 0.19973278045654297 }, { "epoch": 3.535308837890625e-05, "model_forward_time": 0.024329185485839844, "step": 23169 }, { "epoch": 3.535308837890625e-05, "step": 23169, "training_step_time": 0.13905692100524902 }, { "epoch": 3.53546142578125e-05, "grad_norm": 0.08424199372529984, "learning_rate": 1.3513861514762933e-05, "loss": 0.0063, "step": 23170 }, { "epoch": 3.53546142578125e-05, "model_forward_time": 0.024341583251953125, "step": 23170 }, { "epoch": 3.53546142578125e-05, "step": 23170, "training_step_time": 0.10908222198486328 }, { "epoch": 3.535614013671875e-05, "model_forward_time": 0.02497553825378418, "step": 23171 }, { "epoch": 3.535614013671875e-05, "step": 23171, "training_step_time": 0.1777026653289795 }, { "epoch": 3.5357666015625e-05, "model_forward_time": 0.02461409568786621, "step": 23172 }, { "epoch": 3.5357666015625e-05, "step": 23172, "training_step_time": 0.12847208976745605 }, { "epoch": 3.535919189453125e-05, "model_forward_time": 0.02349257469177246, "step": 23173 }, { "epoch": 3.535919189453125e-05, "step": 23173, "training_step_time": 0.12204575538635254 }, { "epoch": 3.53607177734375e-05, "model_forward_time": 0.02443528175354004, "step": 23174 }, { "epoch": 3.53607177734375e-05, "step": 23174, "training_step_time": 0.10437226295471191 }, { "epoch": 3.536224365234375e-05, "model_forward_time": 0.02559804916381836, "step": 23175 }, { "epoch": 3.536224365234375e-05, "step": 23175, "training_step_time": 0.10947871208190918 }, { "epoch": 3.536376953125e-05, "model_forward_time": 0.02488088607788086, "step": 23176 }, { "epoch": 3.536376953125e-05, "step": 23176, "training_step_time": 0.10653328895568848 }, { "epoch": 3.536529541015625e-05, "model_forward_time": 0.024779558181762695, "step": 23177 }, { "epoch": 3.536529541015625e-05, "step": 23177, "training_step_time": 0.1111290454864502 }, { "epoch": 3.53668212890625e-05, "model_forward_time": 0.027599573135375977, "step": 23178 }, { "epoch": 3.53668212890625e-05, "step": 23178, "training_step_time": 0.10939764976501465 }, { "epoch": 3.536834716796875e-05, "model_forward_time": 0.025013208389282227, "step": 23179 }, { "epoch": 3.536834716796875e-05, "step": 23179, "training_step_time": 0.10781335830688477 }, { "epoch": 3.5369873046875e-05, "grad_norm": 0.08925186842679977, "learning_rate": 1.3476198734695122e-05, "loss": 0.0037, "step": 23180 }, { "epoch": 3.5369873046875e-05, "model_forward_time": 0.025090456008911133, "step": 23180 }, { "epoch": 3.5369873046875e-05, "step": 23180, "training_step_time": 0.1077268123626709 }, { "epoch": 3.537139892578125e-05, "model_forward_time": 0.02525925636291504, "step": 23181 }, { "epoch": 3.537139892578125e-05, "step": 23181, "training_step_time": 0.10716080665588379 }, { "epoch": 3.53729248046875e-05, "model_forward_time": 0.02513718605041504, "step": 23182 }, { "epoch": 3.53729248046875e-05, "step": 23182, "training_step_time": 0.10761070251464844 }, { "epoch": 3.537445068359375e-05, "model_forward_time": 0.025086402893066406, "step": 23183 }, { "epoch": 3.537445068359375e-05, "step": 23183, "training_step_time": 0.18614983558654785 }, { "epoch": 3.53759765625e-05, "model_forward_time": 0.024204730987548828, "step": 23184 }, { "epoch": 3.53759765625e-05, "step": 23184, "training_step_time": 0.1532917022705078 }, { "epoch": 3.537750244140625e-05, "model_forward_time": 0.024193763732910156, "step": 23185 }, { "epoch": 3.537750244140625e-05, "step": 23185, "training_step_time": 0.11033821105957031 }, { "epoch": 3.53790283203125e-05, "model_forward_time": 0.024779558181762695, "step": 23186 }, { "epoch": 3.53790283203125e-05, "step": 23186, "training_step_time": 0.10337281227111816 }, { "epoch": 3.538055419921875e-05, "model_forward_time": 0.02505779266357422, "step": 23187 }, { "epoch": 3.538055419921875e-05, "step": 23187, "training_step_time": 0.11513972282409668 }, { "epoch": 3.5382080078125e-05, "model_forward_time": 0.02463817596435547, "step": 23188 }, { "epoch": 3.5382080078125e-05, "step": 23188, "training_step_time": 0.1819157600402832 }, { "epoch": 3.538360595703125e-05, "model_forward_time": 0.026914119720458984, "step": 23189 }, { "epoch": 3.538360595703125e-05, "step": 23189, "training_step_time": 0.2105121612548828 }, { "epoch": 3.53851318359375e-05, "grad_norm": 0.11898551881313324, "learning_rate": 1.343858033448982e-05, "loss": 0.0066, "step": 23190 }, { "epoch": 3.53851318359375e-05, "model_forward_time": 0.024605274200439453, "step": 23190 }, { "epoch": 3.53851318359375e-05, "step": 23190, "training_step_time": 0.1592860221862793 }, { "epoch": 3.538665771484375e-05, "model_forward_time": 0.024466991424560547, "step": 23191 }, { "epoch": 3.538665771484375e-05, "step": 23191, "training_step_time": 0.11331367492675781 }, { "epoch": 3.538818359375e-05, "model_forward_time": 0.02729487419128418, "step": 23192 }, { "epoch": 3.538818359375e-05, "step": 23192, "training_step_time": 0.13415265083312988 }, { "epoch": 3.538970947265625e-05, "model_forward_time": 0.024981260299682617, "step": 23193 }, { "epoch": 3.538970947265625e-05, "step": 23193, "training_step_time": 0.19992852210998535 }, { "epoch": 3.53912353515625e-05, "model_forward_time": 0.024353504180908203, "step": 23194 }, { "epoch": 3.53912353515625e-05, "step": 23194, "training_step_time": 0.1254897117614746 }, { "epoch": 3.539276123046875e-05, "model_forward_time": 0.0244748592376709, "step": 23195 }, { "epoch": 3.539276123046875e-05, "step": 23195, "training_step_time": 0.12822842597961426 }, { "epoch": 3.5394287109375e-05, "model_forward_time": 0.02441883087158203, "step": 23196 }, { "epoch": 3.5394287109375e-05, "step": 23196, "training_step_time": 0.12202048301696777 }, { "epoch": 3.539581298828125e-05, "model_forward_time": 0.024977922439575195, "step": 23197 }, { "epoch": 3.539581298828125e-05, "step": 23197, "training_step_time": 0.11932706832885742 }, { "epoch": 3.53973388671875e-05, "model_forward_time": 0.0253908634185791, "step": 23198 }, { "epoch": 3.53973388671875e-05, "step": 23198, "training_step_time": 0.11548233032226562 }, { "epoch": 3.539886474609375e-05, "model_forward_time": 0.02502298355102539, "step": 23199 }, { "epoch": 3.539886474609375e-05, "step": 23199, "training_step_time": 0.11390852928161621 }, { "epoch": 3.5400390625e-05, "grad_norm": 0.1261717975139618, "learning_rate": 1.3401006359856915e-05, "loss": 0.0052, "step": 23200 }, { "epoch": 3.5400390625e-05, "model_forward_time": 0.02512359619140625, "step": 23200 }, { "epoch": 3.5400390625e-05, "step": 23200, "training_step_time": 0.11222577095031738 }, { "epoch": 3.540191650390625e-05, "model_forward_time": 0.025458097457885742, "step": 23201 }, { "epoch": 3.540191650390625e-05, "step": 23201, "training_step_time": 0.11441636085510254 }, { "epoch": 3.54034423828125e-05, "model_forward_time": 0.02531599998474121, "step": 23202 }, { "epoch": 3.54034423828125e-05, "step": 23202, "training_step_time": 0.1075751781463623 }, { "epoch": 3.540496826171875e-05, "model_forward_time": 0.02594780921936035, "step": 23203 }, { "epoch": 3.540496826171875e-05, "step": 23203, "training_step_time": 0.10672950744628906 }, { "epoch": 3.5406494140625e-05, "model_forward_time": 0.02500629425048828, "step": 23204 }, { "epoch": 3.5406494140625e-05, "step": 23204, "training_step_time": 0.10536932945251465 }, { "epoch": 3.540802001953125e-05, "model_forward_time": 0.024811506271362305, "step": 23205 }, { "epoch": 3.540802001953125e-05, "step": 23205, "training_step_time": 0.10711526870727539 }, { "epoch": 3.54095458984375e-05, "model_forward_time": 0.025145292282104492, "step": 23206 }, { "epoch": 3.54095458984375e-05, "step": 23206, "training_step_time": 0.1084451675415039 }, { "epoch": 3.541107177734375e-05, "model_forward_time": 0.025076627731323242, "step": 23207 }, { "epoch": 3.541107177734375e-05, "step": 23207, "training_step_time": 0.10977387428283691 }, { "epoch": 3.541259765625e-05, "model_forward_time": 0.024968862533569336, "step": 23208 }, { "epoch": 3.541259765625e-05, "step": 23208, "training_step_time": 0.16614603996276855 }, { "epoch": 3.541412353515625e-05, "model_forward_time": 0.024358749389648438, "step": 23209 }, { "epoch": 3.541412353515625e-05, "step": 23209, "training_step_time": 0.10749959945678711 }, { "epoch": 3.54156494140625e-05, "grad_norm": 0.08713316172361374, "learning_rate": 1.3363476856452356e-05, "loss": 0.0046, "step": 23210 }, { "epoch": 3.54156494140625e-05, "model_forward_time": 0.025378704071044922, "step": 23210 }, { "epoch": 3.54156494140625e-05, "step": 23210, "training_step_time": 0.10693883895874023 }, { "epoch": 3.541717529296875e-05, "model_forward_time": 0.025310277938842773, "step": 23211 }, { "epoch": 3.541717529296875e-05, "step": 23211, "training_step_time": 0.1280827522277832 }, { "epoch": 3.5418701171875e-05, "model_forward_time": 0.025011062622070312, "step": 23212 }, { "epoch": 3.5418701171875e-05, "step": 23212, "training_step_time": 0.11826372146606445 }, { "epoch": 3.542022705078125e-05, "model_forward_time": 0.02533102035522461, "step": 23213 }, { "epoch": 3.542022705078125e-05, "step": 23213, "training_step_time": 0.21614766120910645 }, { "epoch": 3.54217529296875e-05, "model_forward_time": 0.02450728416442871, "step": 23214 }, { "epoch": 3.54217529296875e-05, "step": 23214, "training_step_time": 0.11902379989624023 }, { "epoch": 3.542327880859375e-05, "model_forward_time": 0.02406144142150879, "step": 23215 }, { "epoch": 3.542327880859375e-05, "step": 23215, "training_step_time": 0.12376856803894043 }, { "epoch": 3.54248046875e-05, "model_forward_time": 0.025962114334106445, "step": 23216 }, { "epoch": 3.54248046875e-05, "step": 23216, "training_step_time": 0.15221571922302246 }, { "epoch": 3.542633056640625e-05, "model_forward_time": 0.024130582809448242, "step": 23217 }, { "epoch": 3.542633056640625e-05, "step": 23217, "training_step_time": 0.10278511047363281 }, { "epoch": 3.54278564453125e-05, "model_forward_time": 0.024770021438598633, "step": 23218 }, { "epoch": 3.54278564453125e-05, "step": 23218, "training_step_time": 0.11805987358093262 }, { "epoch": 3.542938232421875e-05, "model_forward_time": 0.025004863739013672, "step": 23219 }, { "epoch": 3.542938232421875e-05, "step": 23219, "training_step_time": 0.11264300346374512 }, { "epoch": 3.5430908203125e-05, "grad_norm": 0.07672315090894699, "learning_rate": 1.3325991869878013e-05, "loss": 0.0041, "step": 23220 }, { "epoch": 3.5430908203125e-05, "model_forward_time": 0.025330543518066406, "step": 23220 }, { "epoch": 3.5430908203125e-05, "step": 23220, "training_step_time": 0.10419535636901855 }, { "epoch": 3.543243408203125e-05, "model_forward_time": 0.025084495544433594, "step": 23221 }, { "epoch": 3.543243408203125e-05, "step": 23221, "training_step_time": 0.10591363906860352 }, { "epoch": 3.54339599609375e-05, "model_forward_time": 0.027942895889282227, "step": 23222 }, { "epoch": 3.54339599609375e-05, "step": 23222, "training_step_time": 0.10905265808105469 }, { "epoch": 3.543548583984375e-05, "model_forward_time": 0.025374412536621094, "step": 23223 }, { "epoch": 3.543548583984375e-05, "step": 23223, "training_step_time": 0.1045374870300293 }, { "epoch": 3.543701171875e-05, "model_forward_time": 0.0255429744720459, "step": 23224 }, { "epoch": 3.543701171875e-05, "step": 23224, "training_step_time": 0.10607695579528809 }, { "epoch": 3.543853759765625e-05, "model_forward_time": 0.025141477584838867, "step": 23225 }, { "epoch": 3.543853759765625e-05, "step": 23225, "training_step_time": 0.10401415824890137 }, { "epoch": 3.54400634765625e-05, "model_forward_time": 0.02557659149169922, "step": 23226 }, { "epoch": 3.54400634765625e-05, "step": 23226, "training_step_time": 0.10540771484375 }, { "epoch": 3.544158935546875e-05, "model_forward_time": 0.025210142135620117, "step": 23227 }, { "epoch": 3.544158935546875e-05, "step": 23227, "training_step_time": 0.10345458984375 }, { "epoch": 3.5443115234375e-05, "model_forward_time": 0.026408672332763672, "step": 23228 }, { "epoch": 3.5443115234375e-05, "step": 23228, "training_step_time": 0.1813514232635498 }, { "epoch": 3.544464111328125e-05, "model_forward_time": 0.024237394332885742, "step": 23229 }, { "epoch": 3.544464111328125e-05, "step": 23229, "training_step_time": 0.14154505729675293 }, { "epoch": 3.54461669921875e-05, "grad_norm": 0.16616730391979218, "learning_rate": 1.328855144568168e-05, "loss": 0.0068, "step": 23230 }, { "epoch": 3.54461669921875e-05, "model_forward_time": 0.024788379669189453, "step": 23230 }, { "epoch": 3.54461669921875e-05, "step": 23230, "training_step_time": 0.11187744140625 }, { "epoch": 3.544769287109375e-05, "model_forward_time": 0.025032758712768555, "step": 23231 }, { "epoch": 3.544769287109375e-05, "step": 23231, "training_step_time": 0.1047823429107666 }, { "epoch": 3.544921875e-05, "model_forward_time": 0.02505183219909668, "step": 23232 }, { "epoch": 3.544921875e-05, "step": 23232, "training_step_time": 0.10802388191223145 }, { "epoch": 3.545074462890625e-05, "model_forward_time": 0.024825096130371094, "step": 23233 }, { "epoch": 3.545074462890625e-05, "step": 23233, "training_step_time": 0.10576272010803223 }, { "epoch": 3.54522705078125e-05, "model_forward_time": 0.025194406509399414, "step": 23234 }, { "epoch": 3.54522705078125e-05, "step": 23234, "training_step_time": 0.12727808952331543 }, { "epoch": 3.545379638671875e-05, "model_forward_time": 0.02395176887512207, "step": 23235 }, { "epoch": 3.545379638671875e-05, "step": 23235, "training_step_time": 0.12461733818054199 }, { "epoch": 3.5455322265625e-05, "model_forward_time": 0.0250699520111084, "step": 23236 }, { "epoch": 3.5455322265625e-05, "step": 23236, "training_step_time": 0.16695499420166016 }, { "epoch": 3.545684814453125e-05, "model_forward_time": 0.025638341903686523, "step": 23237 }, { "epoch": 3.545684814453125e-05, "step": 23237, "training_step_time": 0.11171555519104004 }, { "epoch": 3.54583740234375e-05, "model_forward_time": 0.024732112884521484, "step": 23238 }, { "epoch": 3.54583740234375e-05, "step": 23238, "training_step_time": 0.14163899421691895 }, { "epoch": 3.545989990234375e-05, "model_forward_time": 0.025101900100708008, "step": 23239 }, { "epoch": 3.545989990234375e-05, "step": 23239, "training_step_time": 0.1862194538116455 }, { "epoch": 3.546142578125e-05, "grad_norm": 0.2733677923679352, "learning_rate": 1.325115562935701e-05, "loss": 0.0065, "step": 23240 }, { "epoch": 3.546142578125e-05, "model_forward_time": 0.02449774742126465, "step": 23240 }, { "epoch": 3.546142578125e-05, "step": 23240, "training_step_time": 0.12102222442626953 }, { "epoch": 3.546295166015625e-05, "model_forward_time": 0.0246884822845459, "step": 23241 }, { "epoch": 3.546295166015625e-05, "step": 23241, "training_step_time": 0.11891484260559082 }, { "epoch": 3.54644775390625e-05, "model_forward_time": 0.02537250518798828, "step": 23242 }, { "epoch": 3.54644775390625e-05, "step": 23242, "training_step_time": 0.11764168739318848 }, { "epoch": 3.546600341796875e-05, "model_forward_time": 0.02504110336303711, "step": 23243 }, { "epoch": 3.546600341796875e-05, "step": 23243, "training_step_time": 0.11519527435302734 }, { "epoch": 3.5467529296875e-05, "model_forward_time": 0.02525162696838379, "step": 23244 }, { "epoch": 3.5467529296875e-05, "step": 23244, "training_step_time": 0.10953164100646973 }, { "epoch": 3.546905517578125e-05, "model_forward_time": 0.024918079376220703, "step": 23245 }, { "epoch": 3.546905517578125e-05, "step": 23245, "training_step_time": 0.11133337020874023 }, { "epoch": 3.54705810546875e-05, "model_forward_time": 0.02353501319885254, "step": 23246 }, { "epoch": 3.54705810546875e-05, "step": 23246, "training_step_time": 0.10850024223327637 }, { "epoch": 3.547210693359375e-05, "model_forward_time": 0.024735212326049805, "step": 23247 }, { "epoch": 3.547210693359375e-05, "step": 23247, "training_step_time": 0.10585331916809082 }, { "epoch": 3.54736328125e-05, "model_forward_time": 0.024870872497558594, "step": 23248 }, { "epoch": 3.54736328125e-05, "step": 23248, "training_step_time": 0.10627007484436035 }, { "epoch": 3.547515869140625e-05, "model_forward_time": 0.02475428581237793, "step": 23249 }, { "epoch": 3.547515869140625e-05, "step": 23249, "training_step_time": 0.10610795021057129 }, { "epoch": 3.54766845703125e-05, "grad_norm": 0.10498591512441635, "learning_rate": 1.3213804466343421e-05, "loss": 0.0076, "step": 23250 }, { "epoch": 3.54766845703125e-05, "model_forward_time": 0.02497243881225586, "step": 23250 }, { "epoch": 3.54766845703125e-05, "step": 23250, "training_step_time": 0.10488319396972656 }, { "epoch": 3.547821044921875e-05, "model_forward_time": 0.02505207061767578, "step": 23251 }, { "epoch": 3.547821044921875e-05, "step": 23251, "training_step_time": 0.10893988609313965 }, { "epoch": 3.5479736328125e-05, "model_forward_time": 0.02505326271057129, "step": 23252 }, { "epoch": 3.5479736328125e-05, "step": 23252, "training_step_time": 0.10735774040222168 }, { "epoch": 3.548126220703125e-05, "model_forward_time": 0.025427579879760742, "step": 23253 }, { "epoch": 3.548126220703125e-05, "step": 23253, "training_step_time": 0.10477447509765625 }, { "epoch": 3.54827880859375e-05, "model_forward_time": 0.02494049072265625, "step": 23254 }, { "epoch": 3.54827880859375e-05, "step": 23254, "training_step_time": 0.1077413558959961 }, { "epoch": 3.548431396484375e-05, "model_forward_time": 0.02415156364440918, "step": 23255 }, { "epoch": 3.548431396484375e-05, "step": 23255, "training_step_time": 0.13237833976745605 }, { "epoch": 3.548583984375e-05, "model_forward_time": 0.025124788284301758, "step": 23256 }, { "epoch": 3.548583984375e-05, "step": 23256, "training_step_time": 0.11238884925842285 }, { "epoch": 3.548736572265625e-05, "model_forward_time": 0.02548837661743164, "step": 23257 }, { "epoch": 3.548736572265625e-05, "step": 23257, "training_step_time": 0.11195874214172363 }, { "epoch": 3.54888916015625e-05, "model_forward_time": 0.02487468719482422, "step": 23258 }, { "epoch": 3.54888916015625e-05, "step": 23258, "training_step_time": 0.1182868480682373 }, { "epoch": 3.549041748046875e-05, "model_forward_time": 0.024868488311767578, "step": 23259 }, { "epoch": 3.549041748046875e-05, "step": 23259, "training_step_time": 0.12801098823547363 }, { "epoch": 3.5491943359375e-05, "grad_norm": 0.09662266075611115, "learning_rate": 1.3176498002026128e-05, "loss": 0.0043, "step": 23260 }, { "epoch": 3.5491943359375e-05, "model_forward_time": 0.024862051010131836, "step": 23260 }, { "epoch": 3.5491943359375e-05, "step": 23260, "training_step_time": 0.15819597244262695 }, { "epoch": 3.549346923828125e-05, "model_forward_time": 0.023737668991088867, "step": 23261 }, { "epoch": 3.549346923828125e-05, "step": 23261, "training_step_time": 0.16686058044433594 }, { "epoch": 3.54949951171875e-05, "model_forward_time": 0.024262666702270508, "step": 23262 }, { "epoch": 3.54949951171875e-05, "step": 23262, "training_step_time": 0.16208553314208984 }, { "epoch": 3.549652099609375e-05, "model_forward_time": 0.02391648292541504, "step": 23263 }, { "epoch": 3.549652099609375e-05, "step": 23263, "training_step_time": 0.16832447052001953 }, { "epoch": 3.5498046875e-05, "model_forward_time": 0.024211406707763672, "step": 23264 }, { "epoch": 3.5498046875e-05, "step": 23264, "training_step_time": 0.12025809288024902 }, { "epoch": 3.549957275390625e-05, "model_forward_time": 0.024289846420288086, "step": 23265 }, { "epoch": 3.549957275390625e-05, "step": 23265, "training_step_time": 0.11322546005249023 }, { "epoch": 3.55010986328125e-05, "model_forward_time": 0.025094032287597656, "step": 23266 }, { "epoch": 3.55010986328125e-05, "step": 23266, "training_step_time": 0.10638666152954102 }, { "epoch": 3.550262451171875e-05, "model_forward_time": 0.02642512321472168, "step": 23267 }, { "epoch": 3.550262451171875e-05, "step": 23267, "training_step_time": 0.10757994651794434 }, { "epoch": 3.5504150390625e-05, "model_forward_time": 0.025058984756469727, "step": 23268 }, { "epoch": 3.5504150390625e-05, "step": 23268, "training_step_time": 0.10490179061889648 }, { "epoch": 3.550567626953125e-05, "model_forward_time": 0.02548503875732422, "step": 23269 }, { "epoch": 3.550567626953125e-05, "step": 23269, "training_step_time": 0.15086650848388672 }, { "epoch": 3.55072021484375e-05, "grad_norm": 0.48639655113220215, "learning_rate": 1.3139236281736e-05, "loss": 0.0063, "step": 23270 }, { "epoch": 3.55072021484375e-05, "model_forward_time": 0.024800777435302734, "step": 23270 }, { "epoch": 3.55072021484375e-05, "step": 23270, "training_step_time": 0.15541934967041016 }, { "epoch": 3.550872802734375e-05, "model_forward_time": 0.02434992790222168, "step": 23271 }, { "epoch": 3.550872802734375e-05, "step": 23271, "training_step_time": 0.1647641658782959 }, { "epoch": 3.551025390625e-05, "model_forward_time": 0.02456951141357422, "step": 23272 }, { "epoch": 3.551025390625e-05, "step": 23272, "training_step_time": 0.1574866771697998 }, { "epoch": 3.551177978515625e-05, "model_forward_time": 0.024190425872802734, "step": 23273 }, { "epoch": 3.551177978515625e-05, "step": 23273, "training_step_time": 0.1403357982635498 }, { "epoch": 3.55133056640625e-05, "model_forward_time": 0.02552008628845215, "step": 23274 }, { "epoch": 3.55133056640625e-05, "step": 23274, "training_step_time": 0.1301894187927246 }, { "epoch": 3.551483154296875e-05, "model_forward_time": 0.02405261993408203, "step": 23275 }, { "epoch": 3.551483154296875e-05, "step": 23275, "training_step_time": 0.18494534492492676 }, { "epoch": 3.5516357421875e-05, "model_forward_time": 0.024471044540405273, "step": 23276 }, { "epoch": 3.5516357421875e-05, "step": 23276, "training_step_time": 0.1264054775238037 }, { "epoch": 3.551788330078125e-05, "model_forward_time": 0.024277687072753906, "step": 23277 }, { "epoch": 3.551788330078125e-05, "step": 23277, "training_step_time": 0.18108487129211426 }, { "epoch": 3.55194091796875e-05, "model_forward_time": 0.024292707443237305, "step": 23278 }, { "epoch": 3.55194091796875e-05, "step": 23278, "training_step_time": 0.11571288108825684 }, { "epoch": 3.552093505859375e-05, "model_forward_time": 0.024270057678222656, "step": 23279 }, { "epoch": 3.552093505859375e-05, "step": 23279, "training_step_time": 0.12001895904541016 }, { "epoch": 3.55224609375e-05, "grad_norm": 0.11145589500665665, "learning_rate": 1.3102019350749528e-05, "loss": 0.0067, "step": 23280 }, { "epoch": 3.55224609375e-05, "model_forward_time": 0.025935649871826172, "step": 23280 }, { "epoch": 3.55224609375e-05, "step": 23280, "training_step_time": 0.16225361824035645 }, { "epoch": 3.552398681640625e-05, "model_forward_time": 0.02530980110168457, "step": 23281 }, { "epoch": 3.552398681640625e-05, "step": 23281, "training_step_time": 0.10560727119445801 }, { "epoch": 3.55255126953125e-05, "model_forward_time": 0.024799585342407227, "step": 23282 }, { "epoch": 3.55255126953125e-05, "step": 23282, "training_step_time": 0.13144898414611816 }, { "epoch": 3.552703857421875e-05, "model_forward_time": 0.02505803108215332, "step": 23283 }, { "epoch": 3.552703857421875e-05, "step": 23283, "training_step_time": 0.1971437931060791 }, { "epoch": 3.5528564453125e-05, "model_forward_time": 0.024053573608398438, "step": 23284 }, { "epoch": 3.5528564453125e-05, "step": 23284, "training_step_time": 0.10184669494628906 }, { "epoch": 3.553009033203125e-05, "model_forward_time": 0.02431941032409668, "step": 23285 }, { "epoch": 3.553009033203125e-05, "step": 23285, "training_step_time": 0.10358595848083496 }, { "epoch": 3.55316162109375e-05, "model_forward_time": 0.024988651275634766, "step": 23286 }, { "epoch": 3.55316162109375e-05, "step": 23286, "training_step_time": 0.1100015640258789 }, { "epoch": 3.553314208984375e-05, "model_forward_time": 0.024855375289916992, "step": 23287 }, { "epoch": 3.553314208984375e-05, "step": 23287, "training_step_time": 0.1062617301940918 }, { "epoch": 3.553466796875e-05, "model_forward_time": 0.0253298282623291, "step": 23288 }, { "epoch": 3.553466796875e-05, "step": 23288, "training_step_time": 0.10749983787536621 }, { "epoch": 3.553619384765625e-05, "model_forward_time": 0.024973392486572266, "step": 23289 }, { "epoch": 3.553619384765625e-05, "step": 23289, "training_step_time": 0.10478854179382324 }, { "epoch": 3.55377197265625e-05, "grad_norm": 0.11471951007843018, "learning_rate": 1.3064847254288797e-05, "loss": 0.0151, "step": 23290 }, { "epoch": 3.55377197265625e-05, "model_forward_time": 0.025330543518066406, "step": 23290 }, { "epoch": 3.55377197265625e-05, "step": 23290, "training_step_time": 0.10653042793273926 }, { "epoch": 3.553924560546875e-05, "model_forward_time": 0.024834871292114258, "step": 23291 }, { "epoch": 3.553924560546875e-05, "step": 23291, "training_step_time": 0.10373878479003906 }, { "epoch": 3.5540771484375e-05, "model_forward_time": 0.024792194366455078, "step": 23292 }, { "epoch": 3.5540771484375e-05, "step": 23292, "training_step_time": 0.10434103012084961 }, { "epoch": 3.554229736328125e-05, "model_forward_time": 0.025188207626342773, "step": 23293 }, { "epoch": 3.554229736328125e-05, "step": 23293, "training_step_time": 0.10489225387573242 }, { "epoch": 3.55438232421875e-05, "model_forward_time": 0.025982141494750977, "step": 23294 }, { "epoch": 3.55438232421875e-05, "step": 23294, "training_step_time": 0.10580873489379883 }, { "epoch": 3.554534912109375e-05, "model_forward_time": 0.024731874465942383, "step": 23295 }, { "epoch": 3.554534912109375e-05, "step": 23295, "training_step_time": 0.10597562789916992 }, { "epoch": 3.5546875e-05, "model_forward_time": 0.025089502334594727, "step": 23296 }, { "epoch": 3.5546875e-05, "step": 23296, "training_step_time": 0.10997128486633301 }, { "epoch": 3.554840087890625e-05, "model_forward_time": 0.025561094284057617, "step": 23297 }, { "epoch": 3.554840087890625e-05, "step": 23297, "training_step_time": 0.10680890083312988 }, { "epoch": 3.55499267578125e-05, "model_forward_time": 0.02472829818725586, "step": 23298 }, { "epoch": 3.55499267578125e-05, "step": 23298, "training_step_time": 0.11026453971862793 }, { "epoch": 3.555145263671875e-05, "model_forward_time": 0.025469541549682617, "step": 23299 }, { "epoch": 3.555145263671875e-05, "step": 23299, "training_step_time": 0.10811519622802734 }, { "epoch": 3.5552978515625e-05, "grad_norm": 0.13361810147762299, "learning_rate": 1.3027720037521397e-05, "loss": 0.0067, "step": 23300 }, { "epoch": 3.5552978515625e-05, "model_forward_time": 0.024899005889892578, "step": 23300 }, { "epoch": 3.5552978515625e-05, "step": 23300, "training_step_time": 0.1316995620727539 }, { "epoch": 3.555450439453125e-05, "model_forward_time": 0.02493143081665039, "step": 23301 }, { "epoch": 3.555450439453125e-05, "step": 23301, "training_step_time": 0.10849690437316895 }, { "epoch": 3.55560302734375e-05, "model_forward_time": 0.025310993194580078, "step": 23302 }, { "epoch": 3.55560302734375e-05, "step": 23302, "training_step_time": 0.1089019775390625 }, { "epoch": 3.555755615234375e-05, "model_forward_time": 0.024958372116088867, "step": 23303 }, { "epoch": 3.555755615234375e-05, "step": 23303, "training_step_time": 0.13319849967956543 }, { "epoch": 3.555908203125e-05, "model_forward_time": 0.025043249130249023, "step": 23304 }, { "epoch": 3.555908203125e-05, "step": 23304, "training_step_time": 0.12258553504943848 }, { "epoch": 3.556060791015625e-05, "model_forward_time": 0.025115251541137695, "step": 23305 }, { "epoch": 3.556060791015625e-05, "step": 23305, "training_step_time": 0.110107421875 }, { "epoch": 3.55621337890625e-05, "model_forward_time": 0.025054454803466797, "step": 23306 }, { "epoch": 3.55621337890625e-05, "step": 23306, "training_step_time": 0.14678144454956055 }, { "epoch": 3.556365966796875e-05, "model_forward_time": 0.024631500244140625, "step": 23307 }, { "epoch": 3.556365966796875e-05, "step": 23307, "training_step_time": 0.11731100082397461 }, { "epoch": 3.5565185546875e-05, "model_forward_time": 0.024802684783935547, "step": 23308 }, { "epoch": 3.5565185546875e-05, "step": 23308, "training_step_time": 0.11899328231811523 }, { "epoch": 3.556671142578125e-05, "model_forward_time": 0.025536775588989258, "step": 23309 }, { "epoch": 3.556671142578125e-05, "step": 23309, "training_step_time": 0.13795137405395508 }, { "epoch": 3.55682373046875e-05, "grad_norm": 0.29144373536109924, "learning_rate": 1.299063774556042e-05, "loss": 0.0124, "step": 23310 }, { "epoch": 3.55682373046875e-05, "model_forward_time": 0.02469491958618164, "step": 23310 }, { "epoch": 3.55682373046875e-05, "step": 23310, "training_step_time": 0.11194825172424316 }, { "epoch": 3.556976318359375e-05, "model_forward_time": 0.02476334571838379, "step": 23311 }, { "epoch": 3.556976318359375e-05, "step": 23311, "training_step_time": 0.13114643096923828 }, { "epoch": 3.55712890625e-05, "model_forward_time": 0.025343656539916992, "step": 23312 }, { "epoch": 3.55712890625e-05, "step": 23312, "training_step_time": 0.10868430137634277 }, { "epoch": 3.557281494140625e-05, "model_forward_time": 0.025021076202392578, "step": 23313 }, { "epoch": 3.557281494140625e-05, "step": 23313, "training_step_time": 0.10639834403991699 }, { "epoch": 3.55743408203125e-05, "model_forward_time": 0.025058507919311523, "step": 23314 }, { "epoch": 3.55743408203125e-05, "step": 23314, "training_step_time": 0.10808539390563965 }, { "epoch": 3.557586669921875e-05, "model_forward_time": 0.02484416961669922, "step": 23315 }, { "epoch": 3.557586669921875e-05, "step": 23315, "training_step_time": 0.10540509223937988 }, { "epoch": 3.5577392578125e-05, "model_forward_time": 0.025367021560668945, "step": 23316 }, { "epoch": 3.5577392578125e-05, "step": 23316, "training_step_time": 0.10761475563049316 }, { "epoch": 3.557891845703125e-05, "model_forward_time": 0.024781465530395508, "step": 23317 }, { "epoch": 3.557891845703125e-05, "step": 23317, "training_step_time": 0.1067814826965332 }, { "epoch": 3.55804443359375e-05, "model_forward_time": 0.025071144104003906, "step": 23318 }, { "epoch": 3.55804443359375e-05, "step": 23318, "training_step_time": 0.10760903358459473 }, { "epoch": 3.558197021484375e-05, "model_forward_time": 0.02533745765686035, "step": 23319 }, { "epoch": 3.558197021484375e-05, "step": 23319, "training_step_time": 0.11087560653686523 }, { "epoch": 3.558349609375e-05, "grad_norm": 0.4952482581138611, "learning_rate": 1.2953600423464324e-05, "loss": 0.0069, "step": 23320 }, { "epoch": 3.558349609375e-05, "model_forward_time": 0.024710416793823242, "step": 23320 }, { "epoch": 3.558349609375e-05, "step": 23320, "training_step_time": 0.1100778579711914 }, { "epoch": 3.558502197265625e-05, "model_forward_time": 0.025284767150878906, "step": 23321 }, { "epoch": 3.558502197265625e-05, "step": 23321, "training_step_time": 0.12181758880615234 }, { "epoch": 3.55865478515625e-05, "model_forward_time": 0.02543044090270996, "step": 23322 }, { "epoch": 3.55865478515625e-05, "step": 23322, "training_step_time": 0.10865664482116699 }, { "epoch": 3.558807373046875e-05, "model_forward_time": 0.025342702865600586, "step": 23323 }, { "epoch": 3.558807373046875e-05, "step": 23323, "training_step_time": 0.1110837459564209 }, { "epoch": 3.5589599609375e-05, "model_forward_time": 0.0251007080078125, "step": 23324 }, { "epoch": 3.5589599609375e-05, "step": 23324, "training_step_time": 0.10879254341125488 }, { "epoch": 3.559112548828125e-05, "model_forward_time": 0.025250673294067383, "step": 23325 }, { "epoch": 3.559112548828125e-05, "step": 23325, "training_step_time": 0.10831046104431152 }, { "epoch": 3.55926513671875e-05, "model_forward_time": 0.02517390251159668, "step": 23326 }, { "epoch": 3.55926513671875e-05, "step": 23326, "training_step_time": 0.1046907901763916 }, { "epoch": 3.559417724609375e-05, "model_forward_time": 0.024714946746826172, "step": 23327 }, { "epoch": 3.559417724609375e-05, "step": 23327, "training_step_time": 0.11985516548156738 }, { "epoch": 3.5595703125e-05, "model_forward_time": 0.025017261505126953, "step": 23328 }, { "epoch": 3.5595703125e-05, "step": 23328, "training_step_time": 0.12053775787353516 }, { "epoch": 3.559722900390625e-05, "model_forward_time": 0.024998188018798828, "step": 23329 }, { "epoch": 3.559722900390625e-05, "step": 23329, "training_step_time": 0.19411802291870117 }, { "epoch": 3.55987548828125e-05, "grad_norm": 0.1660095751285553, "learning_rate": 1.2916608116236977e-05, "loss": 0.0069, "step": 23330 }, { "epoch": 3.55987548828125e-05, "model_forward_time": 0.024116992950439453, "step": 23330 }, { "epoch": 3.55987548828125e-05, "step": 23330, "training_step_time": 0.1246027946472168 }, { "epoch": 3.560028076171875e-05, "model_forward_time": 0.023980140686035156, "step": 23331 }, { "epoch": 3.560028076171875e-05, "step": 23331, "training_step_time": 0.16336941719055176 }, { "epoch": 3.5601806640625e-05, "model_forward_time": 0.024234533309936523, "step": 23332 }, { "epoch": 3.5601806640625e-05, "step": 23332, "training_step_time": 0.1069941520690918 }, { "epoch": 3.560333251953125e-05, "model_forward_time": 0.024553298950195312, "step": 23333 }, { "epoch": 3.560333251953125e-05, "step": 23333, "training_step_time": 0.10144782066345215 }, { "epoch": 3.56048583984375e-05, "model_forward_time": 0.0250399112701416, "step": 23334 }, { "epoch": 3.56048583984375e-05, "step": 23334, "training_step_time": 0.10926222801208496 }, { "epoch": 3.560638427734375e-05, "model_forward_time": 0.027924537658691406, "step": 23335 }, { "epoch": 3.560638427734375e-05, "step": 23335, "training_step_time": 0.10864901542663574 }, { "epoch": 3.560791015625e-05, "model_forward_time": 0.02485489845275879, "step": 23336 }, { "epoch": 3.560791015625e-05, "step": 23336, "training_step_time": 0.10827326774597168 }, { "epoch": 3.560943603515625e-05, "model_forward_time": 0.024815797805786133, "step": 23337 }, { "epoch": 3.560943603515625e-05, "step": 23337, "training_step_time": 0.10715842247009277 }, { "epoch": 3.56109619140625e-05, "model_forward_time": 0.025029420852661133, "step": 23338 }, { "epoch": 3.56109619140625e-05, "step": 23338, "training_step_time": 0.11116528511047363 }, { "epoch": 3.561248779296875e-05, "model_forward_time": 0.025376319885253906, "step": 23339 }, { "epoch": 3.561248779296875e-05, "step": 23339, "training_step_time": 0.11059141159057617 }, { "epoch": 3.5614013671875e-05, "grad_norm": 0.310923308134079, "learning_rate": 1.2879660868827508e-05, "loss": 0.0084, "step": 23340 }, { "epoch": 3.5614013671875e-05, "model_forward_time": 0.024525880813598633, "step": 23340 }, { "epoch": 3.5614013671875e-05, "step": 23340, "training_step_time": 0.19060635566711426 }, { "epoch": 3.561553955078125e-05, "model_forward_time": 0.022893428802490234, "step": 23341 }, { "epoch": 3.561553955078125e-05, "step": 23341, "training_step_time": 0.2080857753753662 }, { "epoch": 3.56170654296875e-05, "model_forward_time": 0.024225473403930664, "step": 23342 }, { "epoch": 3.56170654296875e-05, "step": 23342, "training_step_time": 0.20780491828918457 }, { "epoch": 3.561859130859375e-05, "model_forward_time": 0.022484779357910156, "step": 23343 }, { "epoch": 3.561859130859375e-05, "step": 23343, "training_step_time": 0.19821953773498535 }, { "epoch": 3.56201171875e-05, "model_forward_time": 0.023019790649414062, "step": 23344 }, { "epoch": 3.56201171875e-05, "step": 23344, "training_step_time": 0.1931760311126709 }, { "epoch": 3.562164306640625e-05, "model_forward_time": 0.02282571792602539, "step": 23345 }, { "epoch": 3.562164306640625e-05, "step": 23345, "training_step_time": 0.13660502433776855 }, { "epoch": 3.56231689453125e-05, "model_forward_time": 0.02583003044128418, "step": 23346 }, { "epoch": 3.56231689453125e-05, "step": 23346, "training_step_time": 0.22557473182678223 }, { "epoch": 3.562469482421875e-05, "model_forward_time": 0.02505970001220703, "step": 23347 }, { "epoch": 3.562469482421875e-05, "step": 23347, "training_step_time": 0.11062383651733398 }, { "epoch": 3.5626220703125e-05, "model_forward_time": 0.025407075881958008, "step": 23348 }, { "epoch": 3.5626220703125e-05, "step": 23348, "training_step_time": 0.12543535232543945 }, { "epoch": 3.562774658203125e-05, "model_forward_time": 0.025110721588134766, "step": 23349 }, { "epoch": 3.562774658203125e-05, "step": 23349, "training_step_time": 0.18897771835327148 }, { "epoch": 3.56292724609375e-05, "grad_norm": 0.15721100568771362, "learning_rate": 1.2842758726130283e-05, "loss": 0.008, "step": 23350 }, { "epoch": 3.56292724609375e-05, "model_forward_time": 0.024692773818969727, "step": 23350 }, { "epoch": 3.56292724609375e-05, "step": 23350, "training_step_time": 0.18013334274291992 }, { "epoch": 3.563079833984375e-05, "model_forward_time": 0.02423381805419922, "step": 23351 }, { "epoch": 3.563079833984375e-05, "step": 23351, "training_step_time": 0.15563654899597168 }, { "epoch": 3.563232421875e-05, "model_forward_time": 0.024013757705688477, "step": 23352 }, { "epoch": 3.563232421875e-05, "step": 23352, "training_step_time": 0.16464519500732422 }, { "epoch": 3.563385009765625e-05, "model_forward_time": 0.026987075805664062, "step": 23353 }, { "epoch": 3.563385009765625e-05, "step": 23353, "training_step_time": 0.16717791557312012 }, { "epoch": 3.56353759765625e-05, "model_forward_time": 0.023528575897216797, "step": 23354 }, { "epoch": 3.56353759765625e-05, "step": 23354, "training_step_time": 0.17630600929260254 }, { "epoch": 3.563690185546875e-05, "model_forward_time": 0.02416706085205078, "step": 23355 }, { "epoch": 3.563690185546875e-05, "step": 23355, "training_step_time": 0.17811322212219238 }, { "epoch": 3.5638427734375e-05, "model_forward_time": 0.024021625518798828, "step": 23356 }, { "epoch": 3.5638427734375e-05, "step": 23356, "training_step_time": 0.16107773780822754 }, { "epoch": 3.563995361328125e-05, "model_forward_time": 0.023891210556030273, "step": 23357 }, { "epoch": 3.563995361328125e-05, "step": 23357, "training_step_time": 0.14253520965576172 }, { "epoch": 3.56414794921875e-05, "model_forward_time": 0.024144411087036133, "step": 23358 }, { "epoch": 3.56414794921875e-05, "step": 23358, "training_step_time": 0.13903450965881348 }, { "epoch": 3.564300537109375e-05, "model_forward_time": 0.027928829193115234, "step": 23359 }, { "epoch": 3.564300537109375e-05, "step": 23359, "training_step_time": 0.16277098655700684 }, { "epoch": 3.564453125e-05, "grad_norm": 0.18141719698905945, "learning_rate": 1.2805901732984921e-05, "loss": 0.0063, "step": 23360 }, { "epoch": 3.564453125e-05, "model_forward_time": 0.024163246154785156, "step": 23360 }, { "epoch": 3.564453125e-05, "step": 23360, "training_step_time": 0.1347823143005371 }, { "epoch": 3.564605712890625e-05, "model_forward_time": 0.02443385124206543, "step": 23361 }, { "epoch": 3.564605712890625e-05, "step": 23361, "training_step_time": 0.10343670845031738 }, { "epoch": 3.56475830078125e-05, "model_forward_time": 0.025066614151000977, "step": 23362 }, { "epoch": 3.56475830078125e-05, "step": 23362, "training_step_time": 0.10456037521362305 }, { "epoch": 3.564910888671875e-05, "model_forward_time": 0.02501678466796875, "step": 23363 }, { "epoch": 3.564910888671875e-05, "step": 23363, "training_step_time": 0.11089372634887695 }, { "epoch": 3.5650634765625e-05, "model_forward_time": 0.02549910545349121, "step": 23364 }, { "epoch": 3.5650634765625e-05, "step": 23364, "training_step_time": 0.10844969749450684 }, { "epoch": 3.565216064453125e-05, "model_forward_time": 0.025300264358520508, "step": 23365 }, { "epoch": 3.565216064453125e-05, "step": 23365, "training_step_time": 0.10320210456848145 }, { "epoch": 3.56536865234375e-05, "model_forward_time": 0.024327754974365234, "step": 23366 }, { "epoch": 3.56536865234375e-05, "step": 23366, "training_step_time": 0.14797687530517578 }, { "epoch": 3.565521240234375e-05, "model_forward_time": 0.024629831314086914, "step": 23367 }, { "epoch": 3.565521240234375e-05, "step": 23367, "training_step_time": 0.10716962814331055 }, { "epoch": 3.565673828125e-05, "model_forward_time": 0.025292634963989258, "step": 23368 }, { "epoch": 3.565673828125e-05, "step": 23368, "training_step_time": 0.18473291397094727 }, { "epoch": 3.565826416015625e-05, "model_forward_time": 0.024432897567749023, "step": 23369 }, { "epoch": 3.565826416015625e-05, "step": 23369, "training_step_time": 0.1386425495147705 }, { "epoch": 3.56597900390625e-05, "grad_norm": 0.11859441548585892, "learning_rate": 1.2769089934176126e-05, "loss": 0.0093, "step": 23370 }, { "epoch": 3.56597900390625e-05, "model_forward_time": 0.02429485321044922, "step": 23370 }, { "epoch": 3.56597900390625e-05, "step": 23370, "training_step_time": 0.19167351722717285 }, { "epoch": 3.566131591796875e-05, "model_forward_time": 0.024566173553466797, "step": 23371 }, { "epoch": 3.566131591796875e-05, "step": 23371, "training_step_time": 0.105224609375 }, { "epoch": 3.5662841796875e-05, "model_forward_time": 0.02485370635986328, "step": 23372 }, { "epoch": 3.5662841796875e-05, "step": 23372, "training_step_time": 0.1062474250793457 }, { "epoch": 3.566436767578125e-05, "model_forward_time": 0.025367021560668945, "step": 23373 }, { "epoch": 3.566436767578125e-05, "step": 23373, "training_step_time": 0.10510611534118652 }, { "epoch": 3.56658935546875e-05, "model_forward_time": 0.024597644805908203, "step": 23374 }, { "epoch": 3.56658935546875e-05, "step": 23374, "training_step_time": 0.11034798622131348 }, { "epoch": 3.566741943359375e-05, "model_forward_time": 0.0239870548248291, "step": 23375 }, { "epoch": 3.566741943359375e-05, "step": 23375, "training_step_time": 0.10626745223999023 }, { "epoch": 3.56689453125e-05, "model_forward_time": 0.02525162696838379, "step": 23376 }, { "epoch": 3.56689453125e-05, "step": 23376, "training_step_time": 0.11477017402648926 }, { "epoch": 3.567047119140625e-05, "model_forward_time": 0.023775577545166016, "step": 23377 }, { "epoch": 3.567047119140625e-05, "step": 23377, "training_step_time": 0.11799931526184082 }, { "epoch": 3.56719970703125e-05, "model_forward_time": 0.024049758911132812, "step": 23378 }, { "epoch": 3.56719970703125e-05, "step": 23378, "training_step_time": 0.11258220672607422 }, { "epoch": 3.567352294921875e-05, "model_forward_time": 0.02508234977722168, "step": 23379 }, { "epoch": 3.567352294921875e-05, "step": 23379, "training_step_time": 0.11038875579833984 }, { "epoch": 3.5675048828125e-05, "grad_norm": 0.30648550391197205, "learning_rate": 1.2732323374433707e-05, "loss": 0.0117, "step": 23380 }, { "epoch": 3.5675048828125e-05, "model_forward_time": 0.02510237693786621, "step": 23380 }, { "epoch": 3.5675048828125e-05, "step": 23380, "training_step_time": 0.11193013191223145 }, { "epoch": 3.567657470703125e-05, "model_forward_time": 0.025217771530151367, "step": 23381 }, { "epoch": 3.567657470703125e-05, "step": 23381, "training_step_time": 0.1100320816040039 }, { "epoch": 3.56781005859375e-05, "model_forward_time": 0.024732589721679688, "step": 23382 }, { "epoch": 3.56781005859375e-05, "step": 23382, "training_step_time": 0.10996675491333008 }, { "epoch": 3.567962646484375e-05, "model_forward_time": 0.024745464324951172, "step": 23383 }, { "epoch": 3.567962646484375e-05, "step": 23383, "training_step_time": 0.10846495628356934 }, { "epoch": 3.568115234375e-05, "model_forward_time": 0.024907827377319336, "step": 23384 }, { "epoch": 3.568115234375e-05, "step": 23384, "training_step_time": 0.10883402824401855 }, { "epoch": 3.568267822265625e-05, "model_forward_time": 0.025298118591308594, "step": 23385 }, { "epoch": 3.568267822265625e-05, "step": 23385, "training_step_time": 0.10821318626403809 }, { "epoch": 3.56842041015625e-05, "model_forward_time": 0.024891138076782227, "step": 23386 }, { "epoch": 3.56842041015625e-05, "step": 23386, "training_step_time": 0.10923361778259277 }, { "epoch": 3.568572998046875e-05, "model_forward_time": 0.024541854858398438, "step": 23387 }, { "epoch": 3.568572998046875e-05, "step": 23387, "training_step_time": 0.20641350746154785 }, { "epoch": 3.5687255859375e-05, "model_forward_time": 0.02439093589782715, "step": 23388 }, { "epoch": 3.5687255859375e-05, "step": 23388, "training_step_time": 0.10827755928039551 }, { "epoch": 3.568878173828125e-05, "model_forward_time": 0.024199485778808594, "step": 23389 }, { "epoch": 3.568878173828125e-05, "step": 23389, "training_step_time": 0.12380838394165039 }, { "epoch": 3.56903076171875e-05, "grad_norm": 0.1790643036365509, "learning_rate": 1.2695602098432502e-05, "loss": 0.0139, "step": 23390 }, { "epoch": 3.56903076171875e-05, "model_forward_time": 0.02472090721130371, "step": 23390 }, { "epoch": 3.56903076171875e-05, "step": 23390, "training_step_time": 0.12818002700805664 }, { "epoch": 3.569183349609375e-05, "model_forward_time": 0.024599313735961914, "step": 23391 }, { "epoch": 3.569183349609375e-05, "step": 23391, "training_step_time": 0.17270874977111816 }, { "epoch": 3.5693359375e-05, "model_forward_time": 0.02446126937866211, "step": 23392 }, { "epoch": 3.5693359375e-05, "step": 23392, "training_step_time": 0.19969511032104492 }, { "epoch": 3.569488525390625e-05, "model_forward_time": 0.024658679962158203, "step": 23393 }, { "epoch": 3.569488525390625e-05, "step": 23393, "training_step_time": 0.1544027328491211 }, { "epoch": 3.56964111328125e-05, "model_forward_time": 0.024145126342773438, "step": 23394 }, { "epoch": 3.56964111328125e-05, "step": 23394, "training_step_time": 0.16910266876220703 }, { "epoch": 3.569793701171875e-05, "model_forward_time": 0.023594379425048828, "step": 23395 }, { "epoch": 3.569793701171875e-05, "step": 23395, "training_step_time": 0.12185931205749512 }, { "epoch": 3.5699462890625e-05, "model_forward_time": 0.0244748592376709, "step": 23396 }, { "epoch": 3.5699462890625e-05, "step": 23396, "training_step_time": 0.1120297908782959 }, { "epoch": 3.570098876953125e-05, "model_forward_time": 0.025224924087524414, "step": 23397 }, { "epoch": 3.570098876953125e-05, "step": 23397, "training_step_time": 0.10351753234863281 }, { "epoch": 3.57025146484375e-05, "model_forward_time": 0.02507805824279785, "step": 23398 }, { "epoch": 3.57025146484375e-05, "step": 23398, "training_step_time": 0.10559582710266113 }, { "epoch": 3.570404052734375e-05, "model_forward_time": 0.02516937255859375, "step": 23399 }, { "epoch": 3.570404052734375e-05, "step": 23399, "training_step_time": 0.10587382316589355 }, { "epoch": 3.570556640625e-05, "grad_norm": 0.1076262816786766, "learning_rate": 1.2658926150792322e-05, "loss": 0.0125, "step": 23400 }, { "epoch": 3.570556640625e-05, "model_forward_time": 0.024970293045043945, "step": 23400 }, { "epoch": 3.570556640625e-05, "step": 23400, "training_step_time": 0.10558700561523438 }, { "epoch": 3.570709228515625e-05, "model_forward_time": 0.024866104125976562, "step": 23401 }, { "epoch": 3.570709228515625e-05, "step": 23401, "training_step_time": 0.10610699653625488 }, { "epoch": 3.57086181640625e-05, "model_forward_time": 0.025146484375, "step": 23402 }, { "epoch": 3.57086181640625e-05, "step": 23402, "training_step_time": 0.11017489433288574 }, { "epoch": 3.571014404296875e-05, "model_forward_time": 0.025333881378173828, "step": 23403 }, { "epoch": 3.571014404296875e-05, "step": 23403, "training_step_time": 0.10482192039489746 }, { "epoch": 3.5711669921875e-05, "model_forward_time": 0.02502751350402832, "step": 23404 }, { "epoch": 3.5711669921875e-05, "step": 23404, "training_step_time": 0.13179779052734375 }, { "epoch": 3.571319580078125e-05, "model_forward_time": 0.025427579879760742, "step": 23405 }, { "epoch": 3.571319580078125e-05, "step": 23405, "training_step_time": 0.12712836265563965 }, { "epoch": 3.57147216796875e-05, "model_forward_time": 0.02441120147705078, "step": 23406 }, { "epoch": 3.57147216796875e-05, "step": 23406, "training_step_time": 0.10913872718811035 }, { "epoch": 3.571624755859375e-05, "model_forward_time": 0.02491307258605957, "step": 23407 }, { "epoch": 3.571624755859375e-05, "step": 23407, "training_step_time": 0.10963010787963867 }, { "epoch": 3.57177734375e-05, "model_forward_time": 0.024850845336914062, "step": 23408 }, { "epoch": 3.57177734375e-05, "step": 23408, "training_step_time": 0.11035490036010742 }, { "epoch": 3.571929931640625e-05, "model_forward_time": 0.025103330612182617, "step": 23409 }, { "epoch": 3.571929931640625e-05, "step": 23409, "training_step_time": 0.1094350814819336 }, { "epoch": 3.57208251953125e-05, "grad_norm": 0.2716013491153717, "learning_rate": 1.2622295576077886e-05, "loss": 0.0111, "step": 23410 }, { "epoch": 3.57208251953125e-05, "model_forward_time": 0.024790048599243164, "step": 23410 }, { "epoch": 3.57208251953125e-05, "step": 23410, "training_step_time": 0.18961286544799805 }, { "epoch": 3.572235107421875e-05, "model_forward_time": 0.025233745574951172, "step": 23411 }, { "epoch": 3.572235107421875e-05, "step": 23411, "training_step_time": 0.10554385185241699 }, { "epoch": 3.5723876953125e-05, "model_forward_time": 0.024452686309814453, "step": 23412 }, { "epoch": 3.5723876953125e-05, "step": 23412, "training_step_time": 0.14497947692871094 }, { "epoch": 3.572540283203125e-05, "model_forward_time": 0.026020050048828125, "step": 23413 }, { "epoch": 3.572540283203125e-05, "step": 23413, "training_step_time": 0.15570807456970215 }, { "epoch": 3.57269287109375e-05, "model_forward_time": 0.024310588836669922, "step": 23414 }, { "epoch": 3.57269287109375e-05, "step": 23414, "training_step_time": 0.11222624778747559 }, { "epoch": 3.572845458984375e-05, "model_forward_time": 0.024678945541381836, "step": 23415 }, { "epoch": 3.572845458984375e-05, "step": 23415, "training_step_time": 0.13232088088989258 }, { "epoch": 3.572998046875e-05, "model_forward_time": 0.024749279022216797, "step": 23416 }, { "epoch": 3.572998046875e-05, "step": 23416, "training_step_time": 0.19741511344909668 }, { "epoch": 3.573150634765625e-05, "model_forward_time": 0.024762630462646484, "step": 23417 }, { "epoch": 3.573150634765625e-05, "step": 23417, "training_step_time": 0.10423970222473145 }, { "epoch": 3.57330322265625e-05, "model_forward_time": 0.02482008934020996, "step": 23418 }, { "epoch": 3.57330322265625e-05, "step": 23418, "training_step_time": 0.10256242752075195 }, { "epoch": 3.573455810546875e-05, "model_forward_time": 0.02541208267211914, "step": 23419 }, { "epoch": 3.573455810546875e-05, "step": 23419, "training_step_time": 0.10630273818969727 }, { "epoch": 3.5736083984375e-05, "grad_norm": 0.1038910299539566, "learning_rate": 1.2585710418798823e-05, "loss": 0.0033, "step": 23420 }, { "epoch": 3.5736083984375e-05, "model_forward_time": 0.024979829788208008, "step": 23420 }, { "epoch": 3.5736083984375e-05, "step": 23420, "training_step_time": 0.10648488998413086 }, { "epoch": 3.573760986328125e-05, "model_forward_time": 0.025211334228515625, "step": 23421 }, { "epoch": 3.573760986328125e-05, "step": 23421, "training_step_time": 0.10393404960632324 }, { "epoch": 3.57391357421875e-05, "model_forward_time": 0.025221586227416992, "step": 23422 }, { "epoch": 3.57391357421875e-05, "step": 23422, "training_step_time": 0.1058950424194336 }, { "epoch": 3.574066162109375e-05, "model_forward_time": 0.025351524353027344, "step": 23423 }, { "epoch": 3.574066162109375e-05, "step": 23423, "training_step_time": 0.10654306411743164 }, { "epoch": 3.57421875e-05, "model_forward_time": 0.025364160537719727, "step": 23424 }, { "epoch": 3.57421875e-05, "step": 23424, "training_step_time": 0.11027932167053223 }, { "epoch": 3.574371337890625e-05, "model_forward_time": 0.02570319175720215, "step": 23425 }, { "epoch": 3.574371337890625e-05, "step": 23425, "training_step_time": 0.11285138130187988 }, { "epoch": 3.57452392578125e-05, "model_forward_time": 0.028299808502197266, "step": 23426 }, { "epoch": 3.57452392578125e-05, "step": 23426, "training_step_time": 0.10859036445617676 }, { "epoch": 3.574676513671875e-05, "model_forward_time": 0.025182485580444336, "step": 23427 }, { "epoch": 3.574676513671875e-05, "step": 23427, "training_step_time": 0.13878870010375977 }, { "epoch": 3.5748291015625e-05, "model_forward_time": 0.02499222755432129, "step": 23428 }, { "epoch": 3.5748291015625e-05, "step": 23428, "training_step_time": 0.18028974533081055 }, { "epoch": 3.574981689453125e-05, "model_forward_time": 0.02451181411743164, "step": 23429 }, { "epoch": 3.574981689453125e-05, "step": 23429, "training_step_time": 0.18407702445983887 }, { "epoch": 3.57513427734375e-05, "grad_norm": 0.07759720087051392, "learning_rate": 1.2549170723409549e-05, "loss": 0.0064, "step": 23430 }, { "epoch": 3.57513427734375e-05, "model_forward_time": 0.02447819709777832, "step": 23430 }, { "epoch": 3.57513427734375e-05, "step": 23430, "training_step_time": 0.1833333969116211 }, { "epoch": 3.575286865234375e-05, "model_forward_time": 0.023698806762695312, "step": 23431 }, { "epoch": 3.575286865234375e-05, "step": 23431, "training_step_time": 0.1671161651611328 }, { "epoch": 3.575439453125e-05, "model_forward_time": 0.024468421936035156, "step": 23432 }, { "epoch": 3.575439453125e-05, "step": 23432, "training_step_time": 0.17778611183166504 }, { "epoch": 3.575592041015625e-05, "model_forward_time": 0.024341583251953125, "step": 23433 }, { "epoch": 3.575592041015625e-05, "step": 23433, "training_step_time": 0.14948439598083496 }, { "epoch": 3.57574462890625e-05, "model_forward_time": 0.024144649505615234, "step": 23434 }, { "epoch": 3.57574462890625e-05, "step": 23434, "training_step_time": 0.22899413108825684 }, { "epoch": 3.575897216796875e-05, "model_forward_time": 0.024518966674804688, "step": 23435 }, { "epoch": 3.575897216796875e-05, "step": 23435, "training_step_time": 0.1322798728942871 }, { "epoch": 3.5760498046875e-05, "model_forward_time": 0.024150609970092773, "step": 23436 }, { "epoch": 3.5760498046875e-05, "step": 23436, "training_step_time": 0.12027430534362793 }, { "epoch": 3.576202392578125e-05, "model_forward_time": 0.024500608444213867, "step": 23437 }, { "epoch": 3.576202392578125e-05, "step": 23437, "training_step_time": 0.15895462036132812 }, { "epoch": 3.57635498046875e-05, "model_forward_time": 0.02450418472290039, "step": 23438 }, { "epoch": 3.57635498046875e-05, "step": 23438, "training_step_time": 0.21920013427734375 }, { "epoch": 3.576507568359375e-05, "model_forward_time": 0.02441573143005371, "step": 23439 }, { "epoch": 3.576507568359375e-05, "step": 23439, "training_step_time": 0.10787749290466309 }, { "epoch": 3.57666015625e-05, "grad_norm": 0.21360817551612854, "learning_rate": 1.251267653430922e-05, "loss": 0.0064, "step": 23440 }, { "epoch": 3.57666015625e-05, "model_forward_time": 0.024637937545776367, "step": 23440 }, { "epoch": 3.57666015625e-05, "step": 23440, "training_step_time": 0.10346484184265137 }, { "epoch": 3.576812744140625e-05, "model_forward_time": 0.025167465209960938, "step": 23441 }, { "epoch": 3.576812744140625e-05, "step": 23441, "training_step_time": 0.10791754722595215 }, { "epoch": 3.57696533203125e-05, "model_forward_time": 0.02518463134765625, "step": 23442 }, { "epoch": 3.57696533203125e-05, "step": 23442, "training_step_time": 0.10646677017211914 }, { "epoch": 3.577117919921875e-05, "model_forward_time": 0.024528026580810547, "step": 23443 }, { "epoch": 3.577117919921875e-05, "step": 23443, "training_step_time": 0.1049647331237793 }, { "epoch": 3.5772705078125e-05, "model_forward_time": 0.025364398956298828, "step": 23444 }, { "epoch": 3.5772705078125e-05, "step": 23444, "training_step_time": 0.10627293586730957 }, { "epoch": 3.577423095703125e-05, "model_forward_time": 0.02756977081298828, "step": 23445 }, { "epoch": 3.577423095703125e-05, "step": 23445, "training_step_time": 0.10701203346252441 }, { "epoch": 3.57757568359375e-05, "model_forward_time": 0.025101423263549805, "step": 23446 }, { "epoch": 3.57757568359375e-05, "step": 23446, "training_step_time": 0.10464072227478027 }, { "epoch": 3.577728271484375e-05, "model_forward_time": 0.025330781936645508, "step": 23447 }, { "epoch": 3.577728271484375e-05, "step": 23447, "training_step_time": 0.1168966293334961 }, { "epoch": 3.577880859375e-05, "model_forward_time": 0.024997949600219727, "step": 23448 }, { "epoch": 3.577880859375e-05, "step": 23448, "training_step_time": 0.1403191089630127 }, { "epoch": 3.578033447265625e-05, "model_forward_time": 0.02475595474243164, "step": 23449 }, { "epoch": 3.578033447265625e-05, "step": 23449, "training_step_time": 0.10434269905090332 }, { "epoch": 3.57818603515625e-05, "grad_norm": 0.2325982004404068, "learning_rate": 1.2476227895841713e-05, "loss": 0.0078, "step": 23450 }, { "epoch": 3.57818603515625e-05, "model_forward_time": 0.02525162696838379, "step": 23450 }, { "epoch": 3.57818603515625e-05, "step": 23450, "training_step_time": 0.1096029281616211 }, { "epoch": 3.578338623046875e-05, "model_forward_time": 0.02516961097717285, "step": 23451 }, { "epoch": 3.578338623046875e-05, "step": 23451, "training_step_time": 0.10805034637451172 }, { "epoch": 3.5784912109375e-05, "model_forward_time": 0.025000333786010742, "step": 23452 }, { "epoch": 3.5784912109375e-05, "step": 23452, "training_step_time": 0.11142277717590332 }, { "epoch": 3.578643798828125e-05, "model_forward_time": 0.025150060653686523, "step": 23453 }, { "epoch": 3.578643798828125e-05, "step": 23453, "training_step_time": 0.1939072608947754 }, { "epoch": 3.57879638671875e-05, "model_forward_time": 0.024639368057250977, "step": 23454 }, { "epoch": 3.57879638671875e-05, "step": 23454, "training_step_time": 0.10454535484313965 }, { "epoch": 3.578948974609375e-05, "model_forward_time": 0.024239540100097656, "step": 23455 }, { "epoch": 3.578948974609375e-05, "step": 23455, "training_step_time": 0.14604640007019043 }, { "epoch": 3.5791015625e-05, "model_forward_time": 0.02385115623474121, "step": 23456 }, { "epoch": 3.5791015625e-05, "step": 23456, "training_step_time": 0.1557013988494873 }, { "epoch": 3.579254150390625e-05, "model_forward_time": 0.024628162384033203, "step": 23457 }, { "epoch": 3.579254150390625e-05, "step": 23457, "training_step_time": 0.11300039291381836 }, { "epoch": 3.57940673828125e-05, "model_forward_time": 0.024349689483642578, "step": 23458 }, { "epoch": 3.57940673828125e-05, "step": 23458, "training_step_time": 0.13253068923950195 }, { "epoch": 3.579559326171875e-05, "model_forward_time": 0.025249719619750977, "step": 23459 }, { "epoch": 3.579559326171875e-05, "step": 23459, "training_step_time": 0.1992473602294922 }, { "epoch": 3.5797119140625e-05, "grad_norm": 0.08661402016878128, "learning_rate": 1.243982485229559e-05, "loss": 0.0049, "step": 23460 }, { "epoch": 3.5797119140625e-05, "model_forward_time": 0.024042844772338867, "step": 23460 }, { "epoch": 3.5797119140625e-05, "step": 23460, "training_step_time": 0.10460567474365234 }, { "epoch": 3.579864501953125e-05, "model_forward_time": 0.02474498748779297, "step": 23461 }, { "epoch": 3.579864501953125e-05, "step": 23461, "training_step_time": 0.10154438018798828 }, { "epoch": 3.58001708984375e-05, "model_forward_time": 0.025541067123413086, "step": 23462 }, { "epoch": 3.58001708984375e-05, "step": 23462, "training_step_time": 0.1037135124206543 }, { "epoch": 3.580169677734375e-05, "model_forward_time": 0.02479100227355957, "step": 23463 }, { "epoch": 3.580169677734375e-05, "step": 23463, "training_step_time": 0.10387372970581055 }, { "epoch": 3.580322265625e-05, "model_forward_time": 0.024901866912841797, "step": 23464 }, { "epoch": 3.580322265625e-05, "step": 23464, "training_step_time": 0.10335445404052734 }, { "epoch": 3.580474853515625e-05, "model_forward_time": 0.025379180908203125, "step": 23465 }, { "epoch": 3.580474853515625e-05, "step": 23465, "training_step_time": 0.10377645492553711 }, { "epoch": 3.58062744140625e-05, "model_forward_time": 0.02531743049621582, "step": 23466 }, { "epoch": 3.58062744140625e-05, "step": 23466, "training_step_time": 0.1050114631652832 }, { "epoch": 3.580780029296875e-05, "model_forward_time": 0.025125503540039062, "step": 23467 }, { "epoch": 3.580780029296875e-05, "step": 23467, "training_step_time": 0.10748982429504395 }, { "epoch": 3.5809326171875e-05, "model_forward_time": 0.024906396865844727, "step": 23468 }, { "epoch": 3.5809326171875e-05, "step": 23468, "training_step_time": 0.10595154762268066 }, { "epoch": 3.581085205078125e-05, "model_forward_time": 0.024980545043945312, "step": 23469 }, { "epoch": 3.581085205078125e-05, "step": 23469, "training_step_time": 0.10785484313964844 }, { "epoch": 3.58123779296875e-05, "grad_norm": 0.09178736060857773, "learning_rate": 1.2403467447903943e-05, "loss": 0.0063, "step": 23470 }, { "epoch": 3.58123779296875e-05, "model_forward_time": 0.02494215965270996, "step": 23470 }, { "epoch": 3.58123779296875e-05, "step": 23470, "training_step_time": 0.10521316528320312 }, { "epoch": 3.581390380859375e-05, "model_forward_time": 0.02504110336303711, "step": 23471 }, { "epoch": 3.581390380859375e-05, "step": 23471, "training_step_time": 0.10410594940185547 }, { "epoch": 3.58154296875e-05, "model_forward_time": 0.02494192123413086, "step": 23472 }, { "epoch": 3.58154296875e-05, "step": 23472, "training_step_time": 0.10920095443725586 }, { "epoch": 3.581695556640625e-05, "model_forward_time": 0.02512049674987793, "step": 23473 }, { "epoch": 3.581695556640625e-05, "step": 23473, "training_step_time": 0.10818171501159668 }, { "epoch": 3.58184814453125e-05, "model_forward_time": 0.025060653686523438, "step": 23474 }, { "epoch": 3.58184814453125e-05, "step": 23474, "training_step_time": 0.10834622383117676 }, { "epoch": 3.582000732421875e-05, "model_forward_time": 0.024729013442993164, "step": 23475 }, { "epoch": 3.582000732421875e-05, "step": 23475, "training_step_time": 0.11076927185058594 }, { "epoch": 3.5821533203125e-05, "model_forward_time": 0.025833606719970703, "step": 23476 }, { "epoch": 3.5821533203125e-05, "step": 23476, "training_step_time": 0.10984015464782715 }, { "epoch": 3.582305908203125e-05, "model_forward_time": 0.02550506591796875, "step": 23477 }, { "epoch": 3.582305908203125e-05, "step": 23477, "training_step_time": 0.14144611358642578 }, { "epoch": 3.58245849609375e-05, "model_forward_time": 0.025098800659179688, "step": 23478 }, { "epoch": 3.58245849609375e-05, "step": 23478, "training_step_time": 0.10920071601867676 }, { "epoch": 3.582611083984375e-05, "model_forward_time": 0.025118112564086914, "step": 23479 }, { "epoch": 3.582611083984375e-05, "step": 23479, "training_step_time": 0.10694289207458496 }, { "epoch": 3.582763671875e-05, "grad_norm": 0.08807004988193512, "learning_rate": 1.2367155726844492e-05, "loss": 0.0036, "step": 23480 }, { "epoch": 3.582763671875e-05, "model_forward_time": 0.02416706085205078, "step": 23480 }, { "epoch": 3.582763671875e-05, "step": 23480, "training_step_time": 0.21194005012512207 }, { "epoch": 3.582916259765625e-05, "model_forward_time": 0.024582862854003906, "step": 23481 }, { "epoch": 3.582916259765625e-05, "step": 23481, "training_step_time": 0.15209341049194336 }, { "epoch": 3.58306884765625e-05, "model_forward_time": 0.024405479431152344, "step": 23482 }, { "epoch": 3.58306884765625e-05, "step": 23482, "training_step_time": 0.14760470390319824 }, { "epoch": 3.583221435546875e-05, "model_forward_time": 0.024424314498901367, "step": 23483 }, { "epoch": 3.583221435546875e-05, "step": 23483, "training_step_time": 0.11039423942565918 }, { "epoch": 3.5833740234375e-05, "model_forward_time": 0.024700641632080078, "step": 23484 }, { "epoch": 3.5833740234375e-05, "step": 23484, "training_step_time": 0.2165203094482422 }, { "epoch": 3.583526611328125e-05, "model_forward_time": 0.024034500122070312, "step": 23485 }, { "epoch": 3.583526611328125e-05, "step": 23485, "training_step_time": 0.11187601089477539 }, { "epoch": 3.58367919921875e-05, "model_forward_time": 0.024262428283691406, "step": 23486 }, { "epoch": 3.58367919921875e-05, "step": 23486, "training_step_time": 0.10226893424987793 }, { "epoch": 3.583831787109375e-05, "model_forward_time": 0.02522110939025879, "step": 23487 }, { "epoch": 3.583831787109375e-05, "step": 23487, "training_step_time": 0.10602307319641113 }, { "epoch": 3.583984375e-05, "model_forward_time": 0.025101184844970703, "step": 23488 }, { "epoch": 3.583984375e-05, "step": 23488, "training_step_time": 0.10498356819152832 }, { "epoch": 3.584136962890625e-05, "model_forward_time": 0.025235652923583984, "step": 23489 }, { "epoch": 3.584136962890625e-05, "step": 23489, "training_step_time": 0.10766887664794922 }, { "epoch": 3.58428955078125e-05, "grad_norm": 0.17897766828536987, "learning_rate": 1.233088973323937e-05, "loss": 0.0079, "step": 23490 }, { "epoch": 3.58428955078125e-05, "model_forward_time": 0.025496959686279297, "step": 23490 }, { "epoch": 3.58428955078125e-05, "step": 23490, "training_step_time": 0.10622096061706543 }, { "epoch": 3.584442138671875e-05, "model_forward_time": 0.02518320083618164, "step": 23491 }, { "epoch": 3.584442138671875e-05, "step": 23491, "training_step_time": 0.10700631141662598 }, { "epoch": 3.5845947265625e-05, "model_forward_time": 0.025218486785888672, "step": 23492 }, { "epoch": 3.5845947265625e-05, "step": 23492, "training_step_time": 0.10589241981506348 }, { "epoch": 3.584747314453125e-05, "model_forward_time": 0.0251157283782959, "step": 23493 }, { "epoch": 3.584747314453125e-05, "step": 23493, "training_step_time": 0.12342405319213867 }, { "epoch": 3.58489990234375e-05, "model_forward_time": 0.025136947631835938, "step": 23494 }, { "epoch": 3.58489990234375e-05, "step": 23494, "training_step_time": 0.13508248329162598 }, { "epoch": 3.585052490234375e-05, "model_forward_time": 0.024733781814575195, "step": 23495 }, { "epoch": 3.585052490234375e-05, "step": 23495, "training_step_time": 0.10801100730895996 }, { "epoch": 3.585205078125e-05, "model_forward_time": 0.02513575553894043, "step": 23496 }, { "epoch": 3.585205078125e-05, "step": 23496, "training_step_time": 0.10839962959289551 }, { "epoch": 3.585357666015625e-05, "model_forward_time": 0.02506279945373535, "step": 23497 }, { "epoch": 3.585357666015625e-05, "step": 23497, "training_step_time": 0.11402606964111328 }, { "epoch": 3.58551025390625e-05, "model_forward_time": 0.02499246597290039, "step": 23498 }, { "epoch": 3.58551025390625e-05, "step": 23498, "training_step_time": 0.10582470893859863 }, { "epoch": 3.585662841796875e-05, "model_forward_time": 0.024924278259277344, "step": 23499 }, { "epoch": 3.585662841796875e-05, "step": 23499, "training_step_time": 0.2000875473022461 }, { "epoch": 3.5858154296875e-05, "grad_norm": 0.07113680988550186, "learning_rate": 1.2294669511155193e-05, "loss": 0.0159, "step": 23500 }, { "epoch": 3.5858154296875e-05, "model_forward_time": 0.024460792541503906, "step": 23500 }, { "epoch": 3.5858154296875e-05, "step": 23500, "training_step_time": 0.1026468276977539 }, { "epoch": 3.585968017578125e-05, "model_forward_time": 0.024387836456298828, "step": 23501 }, { "epoch": 3.585968017578125e-05, "step": 23501, "training_step_time": 0.12257838249206543 }, { "epoch": 3.58612060546875e-05, "model_forward_time": 0.024692058563232422, "step": 23502 }, { "epoch": 3.58612060546875e-05, "step": 23502, "training_step_time": 0.10624456405639648 }, { "epoch": 3.586273193359375e-05, "model_forward_time": 0.0254971981048584, "step": 23503 }, { "epoch": 3.586273193359375e-05, "step": 23503, "training_step_time": 0.12292885780334473 }, { "epoch": 3.58642578125e-05, "model_forward_time": 0.025409221649169922, "step": 23504 }, { "epoch": 3.58642578125e-05, "step": 23504, "training_step_time": 0.10971617698669434 }, { "epoch": 3.586578369140625e-05, "model_forward_time": 0.025597095489501953, "step": 23505 }, { "epoch": 3.586578369140625e-05, "step": 23505, "training_step_time": 0.11999964714050293 }, { "epoch": 3.58673095703125e-05, "model_forward_time": 0.025358915328979492, "step": 23506 }, { "epoch": 3.58673095703125e-05, "step": 23506, "training_step_time": 0.10528063774108887 }, { "epoch": 3.586883544921875e-05, "model_forward_time": 0.02526569366455078, "step": 23507 }, { "epoch": 3.586883544921875e-05, "step": 23507, "training_step_time": 0.1100308895111084 }, { "epoch": 3.5870361328125e-05, "model_forward_time": 0.02522730827331543, "step": 23508 }, { "epoch": 3.5870361328125e-05, "step": 23508, "training_step_time": 0.10607218742370605 }, { "epoch": 3.587188720703125e-05, "model_forward_time": 0.025279760360717773, "step": 23509 }, { "epoch": 3.587188720703125e-05, "step": 23509, "training_step_time": 0.1060476303100586 }, { "epoch": 3.58734130859375e-05, "grad_norm": 0.08744464069604874, "learning_rate": 1.2258495104602924e-05, "loss": 0.0061, "step": 23510 }, { "epoch": 3.58734130859375e-05, "model_forward_time": 0.025003433227539062, "step": 23510 }, { "epoch": 3.58734130859375e-05, "step": 23510, "training_step_time": 0.10390901565551758 }, { "epoch": 3.587493896484375e-05, "model_forward_time": 0.025115251541137695, "step": 23511 }, { "epoch": 3.587493896484375e-05, "step": 23511, "training_step_time": 0.10389494895935059 }, { "epoch": 3.587646484375e-05, "model_forward_time": 0.024961471557617188, "step": 23512 }, { "epoch": 3.587646484375e-05, "step": 23512, "training_step_time": 0.10506272315979004 }, { "epoch": 3.587799072265625e-05, "model_forward_time": 0.02483534812927246, "step": 23513 }, { "epoch": 3.587799072265625e-05, "step": 23513, "training_step_time": 0.11701703071594238 }, { "epoch": 3.58795166015625e-05, "model_forward_time": 0.025045394897460938, "step": 23514 }, { "epoch": 3.58795166015625e-05, "step": 23514, "training_step_time": 0.12827110290527344 }, { "epoch": 3.588104248046875e-05, "model_forward_time": 0.02506256103515625, "step": 23515 }, { "epoch": 3.588104248046875e-05, "step": 23515, "training_step_time": 0.13115167617797852 }, { "epoch": 3.5882568359375e-05, "model_forward_time": 0.02457118034362793, "step": 23516 }, { "epoch": 3.5882568359375e-05, "step": 23516, "training_step_time": 0.12658286094665527 }, { "epoch": 3.588409423828125e-05, "model_forward_time": 0.024909019470214844, "step": 23517 }, { "epoch": 3.588409423828125e-05, "step": 23517, "training_step_time": 0.12047672271728516 }, { "epoch": 3.58856201171875e-05, "model_forward_time": 0.02512335777282715, "step": 23518 }, { "epoch": 3.58856201171875e-05, "step": 23518, "training_step_time": 0.1179051399230957 }, { "epoch": 3.588714599609375e-05, "model_forward_time": 0.0250241756439209, "step": 23519 }, { "epoch": 3.588714599609375e-05, "step": 23519, "training_step_time": 0.11281919479370117 }, { "epoch": 3.5888671875e-05, "grad_norm": 0.5377272367477417, "learning_rate": 1.2222366557537911e-05, "loss": 0.0197, "step": 23520 }, { "epoch": 3.5888671875e-05, "model_forward_time": 0.02539968490600586, "step": 23520 }, { "epoch": 3.5888671875e-05, "step": 23520, "training_step_time": 0.11529254913330078 }, { "epoch": 3.589019775390625e-05, "model_forward_time": 0.026179075241088867, "step": 23521 }, { "epoch": 3.589019775390625e-05, "step": 23521, "training_step_time": 0.11334371566772461 }, { "epoch": 3.58917236328125e-05, "model_forward_time": 0.024983644485473633, "step": 23522 }, { "epoch": 3.58917236328125e-05, "step": 23522, "training_step_time": 0.10957646369934082 }, { "epoch": 3.589324951171875e-05, "model_forward_time": 0.025823354721069336, "step": 23523 }, { "epoch": 3.589324951171875e-05, "step": 23523, "training_step_time": 0.17384934425354004 }, { "epoch": 3.5894775390625e-05, "model_forward_time": 0.024407148361206055, "step": 23524 }, { "epoch": 3.5894775390625e-05, "step": 23524, "training_step_time": 0.10748839378356934 }, { "epoch": 3.589630126953125e-05, "model_forward_time": 0.02473306655883789, "step": 23525 }, { "epoch": 3.589630126953125e-05, "step": 23525, "training_step_time": 0.16157984733581543 }, { "epoch": 3.58978271484375e-05, "model_forward_time": 0.024798154830932617, "step": 23526 }, { "epoch": 3.58978271484375e-05, "step": 23526, "training_step_time": 0.13297271728515625 }, { "epoch": 3.589935302734375e-05, "model_forward_time": 0.024226665496826172, "step": 23527 }, { "epoch": 3.589935302734375e-05, "step": 23527, "training_step_time": 0.12042379379272461 }, { "epoch": 3.590087890625e-05, "model_forward_time": 0.028097867965698242, "step": 23528 }, { "epoch": 3.590087890625e-05, "step": 23528, "training_step_time": 0.13189435005187988 }, { "epoch": 3.590240478515625e-05, "model_forward_time": 0.02470993995666504, "step": 23529 }, { "epoch": 3.590240478515625e-05, "step": 23529, "training_step_time": 0.14176034927368164 }, { "epoch": 3.59039306640625e-05, "grad_norm": 0.14628897607326508, "learning_rate": 1.2186283913859726e-05, "loss": 0.0037, "step": 23530 }, { "epoch": 3.59039306640625e-05, "model_forward_time": 0.02386927604675293, "step": 23530 }, { "epoch": 3.59039306640625e-05, "step": 23530, "training_step_time": 0.20692229270935059 }, { "epoch": 3.590545654296875e-05, "model_forward_time": 0.024326801300048828, "step": 23531 }, { "epoch": 3.590545654296875e-05, "step": 23531, "training_step_time": 0.1141660213470459 }, { "epoch": 3.5906982421875e-05, "model_forward_time": 0.024133682250976562, "step": 23532 }, { "epoch": 3.5906982421875e-05, "step": 23532, "training_step_time": 0.10202169418334961 }, { "epoch": 3.590850830078125e-05, "model_forward_time": 0.02486562728881836, "step": 23533 }, { "epoch": 3.590850830078125e-05, "step": 23533, "training_step_time": 0.10515332221984863 }, { "epoch": 3.59100341796875e-05, "model_forward_time": 0.025341510772705078, "step": 23534 }, { "epoch": 3.59100341796875e-05, "step": 23534, "training_step_time": 0.10917806625366211 }, { "epoch": 3.591156005859375e-05, "model_forward_time": 0.025040388107299805, "step": 23535 }, { "epoch": 3.591156005859375e-05, "step": 23535, "training_step_time": 0.10798788070678711 }, { "epoch": 3.59130859375e-05, "model_forward_time": 0.025020599365234375, "step": 23536 }, { "epoch": 3.59130859375e-05, "step": 23536, "training_step_time": 0.10379958152770996 }, { "epoch": 3.591461181640625e-05, "model_forward_time": 0.02504873275756836, "step": 23537 }, { "epoch": 3.591461181640625e-05, "step": 23537, "training_step_time": 0.10690450668334961 }, { "epoch": 3.59161376953125e-05, "model_forward_time": 0.026689529418945312, "step": 23538 }, { "epoch": 3.59161376953125e-05, "step": 23538, "training_step_time": 0.11020612716674805 }, { "epoch": 3.591766357421875e-05, "model_forward_time": 0.0250241756439209, "step": 23539 }, { "epoch": 3.591766357421875e-05, "step": 23539, "training_step_time": 0.11072087287902832 }, { "epoch": 3.5919189453125e-05, "grad_norm": 0.14447247982025146, "learning_rate": 1.2150247217412186e-05, "loss": 0.0087, "step": 23540 }, { "epoch": 3.5919189453125e-05, "model_forward_time": 0.024659156799316406, "step": 23540 }, { "epoch": 3.5919189453125e-05, "step": 23540, "training_step_time": 0.1290600299835205 }, { "epoch": 3.592071533203125e-05, "model_forward_time": 0.024895429611206055, "step": 23541 }, { "epoch": 3.592071533203125e-05, "step": 23541, "training_step_time": 0.10946512222290039 }, { "epoch": 3.59222412109375e-05, "model_forward_time": 0.025244951248168945, "step": 23542 }, { "epoch": 3.59222412109375e-05, "step": 23542, "training_step_time": 0.1135261058807373 }, { "epoch": 3.592376708984375e-05, "model_forward_time": 0.025615930557250977, "step": 23543 }, { "epoch": 3.592376708984375e-05, "step": 23543, "training_step_time": 0.11053943634033203 }, { "epoch": 3.592529296875e-05, "model_forward_time": 0.02485346794128418, "step": 23544 }, { "epoch": 3.592529296875e-05, "step": 23544, "training_step_time": 0.1028134822845459 }, { "epoch": 3.592681884765625e-05, "model_forward_time": 0.025249958038330078, "step": 23545 }, { "epoch": 3.592681884765625e-05, "step": 23545, "training_step_time": 0.19267773628234863 }, { "epoch": 3.59283447265625e-05, "model_forward_time": 0.024204254150390625, "step": 23546 }, { "epoch": 3.59283447265625e-05, "step": 23546, "training_step_time": 0.10150718688964844 }, { "epoch": 3.592987060546875e-05, "model_forward_time": 0.024008750915527344, "step": 23547 }, { "epoch": 3.592987060546875e-05, "step": 23547, "training_step_time": 0.10187053680419922 }, { "epoch": 3.5931396484375e-05, "model_forward_time": 0.024293184280395508, "step": 23548 }, { "epoch": 3.5931396484375e-05, "step": 23548, "training_step_time": 0.15435409545898438 }, { "epoch": 3.593292236328125e-05, "model_forward_time": 0.025104999542236328, "step": 23549 }, { "epoch": 3.593292236328125e-05, "step": 23549, "training_step_time": 0.14942026138305664 }, { "epoch": 3.59344482421875e-05, "grad_norm": 0.1571156084537506, "learning_rate": 1.2114256511983274e-05, "loss": 0.0042, "step": 23550 }, { "epoch": 3.59344482421875e-05, "model_forward_time": 0.024634361267089844, "step": 23550 }, { "epoch": 3.59344482421875e-05, "step": 23550, "training_step_time": 0.14546465873718262 }, { "epoch": 3.593597412109375e-05, "model_forward_time": 0.023958206176757812, "step": 23551 }, { "epoch": 3.593597412109375e-05, "step": 23551, "training_step_time": 0.13865375518798828 }, { "epoch": 3.59375e-05, "model_forward_time": 0.024143695831298828, "step": 23552 }, { "epoch": 3.59375e-05, "step": 23552, "training_step_time": 0.19643950462341309 }, { "epoch": 3.593902587890625e-05, "model_forward_time": 0.024049997329711914, "step": 23553 }, { "epoch": 3.593902587890625e-05, "step": 23553, "training_step_time": 0.10597777366638184 }, { "epoch": 3.59405517578125e-05, "model_forward_time": 0.023816585540771484, "step": 23554 }, { "epoch": 3.59405517578125e-05, "step": 23554, "training_step_time": 0.10827875137329102 }, { "epoch": 3.594207763671875e-05, "model_forward_time": 0.024288177490234375, "step": 23555 }, { "epoch": 3.594207763671875e-05, "step": 23555, "training_step_time": 0.10856914520263672 }, { "epoch": 3.5943603515625e-05, "model_forward_time": 0.023801565170288086, "step": 23556 }, { "epoch": 3.5943603515625e-05, "step": 23556, "training_step_time": 0.10976123809814453 }, { "epoch": 3.594512939453125e-05, "model_forward_time": 0.028114795684814453, "step": 23557 }, { "epoch": 3.594512939453125e-05, "step": 23557, "training_step_time": 0.10836434364318848 }, { "epoch": 3.59466552734375e-05, "model_forward_time": 0.026053428649902344, "step": 23558 }, { "epoch": 3.59466552734375e-05, "step": 23558, "training_step_time": 0.11066627502441406 }, { "epoch": 3.594818115234375e-05, "model_forward_time": 0.024656057357788086, "step": 23559 }, { "epoch": 3.594818115234375e-05, "step": 23559, "training_step_time": 0.10505127906799316 }, { "epoch": 3.594970703125e-05, "grad_norm": 0.1672380417585373, "learning_rate": 1.2078311841305084e-05, "loss": 0.0049, "step": 23560 }, { "epoch": 3.594970703125e-05, "model_forward_time": 0.0247652530670166, "step": 23560 }, { "epoch": 3.594970703125e-05, "step": 23560, "training_step_time": 0.10628533363342285 }, { "epoch": 3.595123291015625e-05, "model_forward_time": 0.024989843368530273, "step": 23561 }, { "epoch": 3.595123291015625e-05, "step": 23561, "training_step_time": 0.10596609115600586 }, { "epoch": 3.59527587890625e-05, "model_forward_time": 0.02521538734436035, "step": 23562 }, { "epoch": 3.59527587890625e-05, "step": 23562, "training_step_time": 0.10956668853759766 }, { "epoch": 3.595428466796875e-05, "model_forward_time": 0.025239229202270508, "step": 23563 }, { "epoch": 3.595428466796875e-05, "step": 23563, "training_step_time": 0.10869622230529785 }, { "epoch": 3.5955810546875e-05, "model_forward_time": 0.024997234344482422, "step": 23564 }, { "epoch": 3.5955810546875e-05, "step": 23564, "training_step_time": 0.11023736000061035 }, { "epoch": 3.595733642578125e-05, "model_forward_time": 0.024806737899780273, "step": 23565 }, { "epoch": 3.595733642578125e-05, "step": 23565, "training_step_time": 0.10726737976074219 }, { "epoch": 3.59588623046875e-05, "model_forward_time": 0.024991989135742188, "step": 23566 }, { "epoch": 3.59588623046875e-05, "step": 23566, "training_step_time": 0.10702204704284668 }, { "epoch": 3.596038818359375e-05, "model_forward_time": 0.025324106216430664, "step": 23567 }, { "epoch": 3.596038818359375e-05, "step": 23567, "training_step_time": 0.10797786712646484 }, { "epoch": 3.59619140625e-05, "model_forward_time": 0.0250089168548584, "step": 23568 }, { "epoch": 3.59619140625e-05, "step": 23568, "training_step_time": 0.10377168655395508 }, { "epoch": 3.596343994140625e-05, "model_forward_time": 0.024726152420043945, "step": 23569 }, { "epoch": 3.596343994140625e-05, "step": 23569, "training_step_time": 0.10789346694946289 }, { "epoch": 3.59649658203125e-05, "grad_norm": 0.09695081412792206, "learning_rate": 1.2042413249053796e-05, "loss": 0.0041, "step": 23570 }, { "epoch": 3.59649658203125e-05, "model_forward_time": 0.024789094924926758, "step": 23570 }, { "epoch": 3.59649658203125e-05, "step": 23570, "training_step_time": 0.18400168418884277 }, { "epoch": 3.596649169921875e-05, "model_forward_time": 0.02424025535583496, "step": 23571 }, { "epoch": 3.596649169921875e-05, "step": 23571, "training_step_time": 0.23163127899169922 }, { "epoch": 3.5968017578125e-05, "model_forward_time": 0.024302244186401367, "step": 23572 }, { "epoch": 3.5968017578125e-05, "step": 23572, "training_step_time": 0.15258359909057617 }, { "epoch": 3.596954345703125e-05, "model_forward_time": 0.023998022079467773, "step": 23573 }, { "epoch": 3.596954345703125e-05, "step": 23573, "training_step_time": 0.20294570922851562 }, { "epoch": 3.59710693359375e-05, "model_forward_time": 0.024451494216918945, "step": 23574 }, { "epoch": 3.59710693359375e-05, "step": 23574, "training_step_time": 0.17335939407348633 }, { "epoch": 3.597259521484375e-05, "model_forward_time": 0.024142742156982422, "step": 23575 }, { "epoch": 3.597259521484375e-05, "step": 23575, "training_step_time": 0.12632513046264648 }, { "epoch": 3.597412109375e-05, "model_forward_time": 0.023948192596435547, "step": 23576 }, { "epoch": 3.597412109375e-05, "step": 23576, "training_step_time": 0.10339212417602539 }, { "epoch": 3.597564697265625e-05, "model_forward_time": 0.025509119033813477, "step": 23577 }, { "epoch": 3.597564697265625e-05, "step": 23577, "training_step_time": 0.10282540321350098 }, { "epoch": 3.59771728515625e-05, "model_forward_time": 0.024596691131591797, "step": 23578 }, { "epoch": 3.59771728515625e-05, "step": 23578, "training_step_time": 0.10655403137207031 }, { "epoch": 3.597869873046875e-05, "model_forward_time": 0.024942398071289062, "step": 23579 }, { "epoch": 3.597869873046875e-05, "step": 23579, "training_step_time": 0.10764145851135254 }, { "epoch": 3.5980224609375e-05, "grad_norm": 0.20490725338459015, "learning_rate": 1.2006560778849578e-05, "loss": 0.0067, "step": 23580 }, { "epoch": 3.5980224609375e-05, "model_forward_time": 0.027492046356201172, "step": 23580 }, { "epoch": 3.5980224609375e-05, "step": 23580, "training_step_time": 0.10685038566589355 }, { "epoch": 3.598175048828125e-05, "model_forward_time": 0.024816036224365234, "step": 23581 }, { "epoch": 3.598175048828125e-05, "step": 23581, "training_step_time": 0.12947964668273926 }, { "epoch": 3.59832763671875e-05, "model_forward_time": 0.024666309356689453, "step": 23582 }, { "epoch": 3.59832763671875e-05, "step": 23582, "training_step_time": 0.1424112319946289 }, { "epoch": 3.598480224609375e-05, "model_forward_time": 0.024393796920776367, "step": 23583 }, { "epoch": 3.598480224609375e-05, "step": 23583, "training_step_time": 0.1782221794128418 }, { "epoch": 3.5986328125e-05, "model_forward_time": 0.025555133819580078, "step": 23584 }, { "epoch": 3.5986328125e-05, "step": 23584, "training_step_time": 0.146806001663208 }, { "epoch": 3.598785400390625e-05, "model_forward_time": 0.024281024932861328, "step": 23585 }, { "epoch": 3.598785400390625e-05, "step": 23585, "training_step_time": 0.12274360656738281 }, { "epoch": 3.59893798828125e-05, "model_forward_time": 0.02421736717224121, "step": 23586 }, { "epoch": 3.59893798828125e-05, "step": 23586, "training_step_time": 0.19606637954711914 }, { "epoch": 3.599090576171875e-05, "model_forward_time": 0.024244308471679688, "step": 23587 }, { "epoch": 3.599090576171875e-05, "step": 23587, "training_step_time": 0.11588263511657715 }, { "epoch": 3.5992431640625e-05, "model_forward_time": 0.024840593338012695, "step": 23588 }, { "epoch": 3.5992431640625e-05, "step": 23588, "training_step_time": 0.19193744659423828 }, { "epoch": 3.599395751953125e-05, "model_forward_time": 0.025223493576049805, "step": 23589 }, { "epoch": 3.599395751953125e-05, "step": 23589, "training_step_time": 0.10455155372619629 }, { "epoch": 3.59954833984375e-05, "grad_norm": 0.09416133165359497, "learning_rate": 1.1970754474256563e-05, "loss": 0.0151, "step": 23590 }, { "epoch": 3.59954833984375e-05, "model_forward_time": 0.025414466857910156, "step": 23590 }, { "epoch": 3.59954833984375e-05, "step": 23590, "training_step_time": 0.10431718826293945 }, { "epoch": 3.599700927734375e-05, "model_forward_time": 0.024834156036376953, "step": 23591 }, { "epoch": 3.599700927734375e-05, "step": 23591, "training_step_time": 0.14815783500671387 }, { "epoch": 3.599853515625e-05, "model_forward_time": 0.025567054748535156, "step": 23592 }, { "epoch": 3.599853515625e-05, "step": 23592, "training_step_time": 0.11618757247924805 }, { "epoch": 3.600006103515625e-05, "model_forward_time": 0.02521657943725586, "step": 23593 }, { "epoch": 3.600006103515625e-05, "step": 23593, "training_step_time": 0.11089920997619629 }, { "epoch": 3.60015869140625e-05, "model_forward_time": 0.02496957778930664, "step": 23594 }, { "epoch": 3.60015869140625e-05, "step": 23594, "training_step_time": 0.11283302307128906 }, { "epoch": 3.600311279296875e-05, "model_forward_time": 0.02503204345703125, "step": 23595 }, { "epoch": 3.600311279296875e-05, "step": 23595, "training_step_time": 0.11581754684448242 }, { "epoch": 3.6004638671875e-05, "model_forward_time": 0.025567293167114258, "step": 23596 }, { "epoch": 3.6004638671875e-05, "step": 23596, "training_step_time": 0.1293470859527588 }, { "epoch": 3.600616455078125e-05, "model_forward_time": 0.025957107543945312, "step": 23597 }, { "epoch": 3.600616455078125e-05, "step": 23597, "training_step_time": 0.11259126663208008 }, { "epoch": 3.60076904296875e-05, "model_forward_time": 0.02510237693786621, "step": 23598 }, { "epoch": 3.60076904296875e-05, "step": 23598, "training_step_time": 0.1048727035522461 }, { "epoch": 3.600921630859375e-05, "model_forward_time": 0.02497577667236328, "step": 23599 }, { "epoch": 3.600921630859375e-05, "step": 23599, "training_step_time": 0.1073920726776123 }, { "epoch": 3.60107421875e-05, "grad_norm": 0.11431095004081726, "learning_rate": 1.1934994378782772e-05, "loss": 0.0076, "step": 23600 }, { "epoch": 3.60107421875e-05, "model_forward_time": 0.02571415901184082, "step": 23600 }, { "epoch": 3.60107421875e-05, "step": 23600, "training_step_time": 0.10950541496276855 }, { "epoch": 3.601226806640625e-05, "model_forward_time": 0.025052309036254883, "step": 23601 }, { "epoch": 3.601226806640625e-05, "step": 23601, "training_step_time": 0.10416293144226074 }, { "epoch": 3.60137939453125e-05, "model_forward_time": 0.025397539138793945, "step": 23602 }, { "epoch": 3.60137939453125e-05, "step": 23602, "training_step_time": 0.10831522941589355 }, { "epoch": 3.601531982421875e-05, "model_forward_time": 0.024766921997070312, "step": 23603 }, { "epoch": 3.601531982421875e-05, "step": 23603, "training_step_time": 0.10491561889648438 }, { "epoch": 3.6016845703125e-05, "model_forward_time": 0.025013446807861328, "step": 23604 }, { "epoch": 3.6016845703125e-05, "step": 23604, "training_step_time": 0.1061406135559082 }, { "epoch": 3.601837158203125e-05, "model_forward_time": 0.024492740631103516, "step": 23605 }, { "epoch": 3.601837158203125e-05, "step": 23605, "training_step_time": 0.10788440704345703 }, { "epoch": 3.60198974609375e-05, "model_forward_time": 0.02505970001220703, "step": 23606 }, { "epoch": 3.60198974609375e-05, "step": 23606, "training_step_time": 0.10908031463623047 }, { "epoch": 3.602142333984375e-05, "model_forward_time": 0.024733543395996094, "step": 23607 }, { "epoch": 3.602142333984375e-05, "step": 23607, "training_step_time": 0.10606050491333008 }, { "epoch": 3.602294921875e-05, "model_forward_time": 0.025064468383789062, "step": 23608 }, { "epoch": 3.602294921875e-05, "step": 23608, "training_step_time": 0.10462021827697754 }, { "epoch": 3.602447509765625e-05, "model_forward_time": 0.025264263153076172, "step": 23609 }, { "epoch": 3.602447509765625e-05, "step": 23609, "training_step_time": 0.10648608207702637 }, { "epoch": 3.60260009765625e-05, "grad_norm": 0.10195007920265198, "learning_rate": 1.1899280535880119e-05, "loss": 0.0085, "step": 23610 }, { "epoch": 3.60260009765625e-05, "model_forward_time": 0.024643898010253906, "step": 23610 }, { "epoch": 3.60260009765625e-05, "step": 23610, "training_step_time": 0.10398316383361816 }, { "epoch": 3.602752685546875e-05, "model_forward_time": 0.0250244140625, "step": 23611 }, { "epoch": 3.602752685546875e-05, "step": 23611, "training_step_time": 0.10622048377990723 }, { "epoch": 3.6029052734375e-05, "model_forward_time": 0.024807453155517578, "step": 23612 }, { "epoch": 3.6029052734375e-05, "step": 23612, "training_step_time": 0.10375165939331055 }, { "epoch": 3.603057861328125e-05, "model_forward_time": 0.02509927749633789, "step": 23613 }, { "epoch": 3.603057861328125e-05, "step": 23613, "training_step_time": 0.10480713844299316 }, { "epoch": 3.60321044921875e-05, "model_forward_time": 0.02498340606689453, "step": 23614 }, { "epoch": 3.60321044921875e-05, "step": 23614, "training_step_time": 0.10484027862548828 }, { "epoch": 3.603363037109375e-05, "model_forward_time": 0.025099754333496094, "step": 23615 }, { "epoch": 3.603363037109375e-05, "step": 23615, "training_step_time": 0.18629765510559082 }, { "epoch": 3.603515625e-05, "model_forward_time": 0.02417445182800293, "step": 23616 }, { "epoch": 3.603515625e-05, "step": 23616, "training_step_time": 0.20795345306396484 }, { "epoch": 3.603668212890625e-05, "model_forward_time": 0.024274349212646484, "step": 23617 }, { "epoch": 3.603668212890625e-05, "step": 23617, "training_step_time": 0.2086319923400879 }, { "epoch": 3.60382080078125e-05, "model_forward_time": 0.024445056915283203, "step": 23618 }, { "epoch": 3.60382080078125e-05, "step": 23618, "training_step_time": 0.18890881538391113 }, { "epoch": 3.603973388671875e-05, "model_forward_time": 0.024297237396240234, "step": 23619 }, { "epoch": 3.603973388671875e-05, "step": 23619, "training_step_time": 0.19274091720581055 }, { "epoch": 3.6041259765625e-05, "grad_norm": 0.14618656039237976, "learning_rate": 1.1863612988944267e-05, "loss": 0.0042, "step": 23620 }, { "epoch": 3.6041259765625e-05, "model_forward_time": 0.02428579330444336, "step": 23620 }, { "epoch": 3.6041259765625e-05, "step": 23620, "training_step_time": 0.10942697525024414 }, { "epoch": 3.604278564453125e-05, "model_forward_time": 0.024430513381958008, "step": 23621 }, { "epoch": 3.604278564453125e-05, "step": 23621, "training_step_time": 0.10673141479492188 }, { "epoch": 3.60443115234375e-05, "model_forward_time": 0.024926185607910156, "step": 23622 }, { "epoch": 3.60443115234375e-05, "step": 23622, "training_step_time": 0.1098170280456543 }, { "epoch": 3.604583740234375e-05, "model_forward_time": 0.02496647834777832, "step": 23623 }, { "epoch": 3.604583740234375e-05, "step": 23623, "training_step_time": 0.1047065258026123 }, { "epoch": 3.604736328125e-05, "model_forward_time": 0.02501058578491211, "step": 23624 }, { "epoch": 3.604736328125e-05, "step": 23624, "training_step_time": 0.11269998550415039 }, { "epoch": 3.604888916015625e-05, "model_forward_time": 0.025124788284301758, "step": 23625 }, { "epoch": 3.604888916015625e-05, "step": 23625, "training_step_time": 0.10673141479492188 }, { "epoch": 3.60504150390625e-05, "model_forward_time": 0.02520155906677246, "step": 23626 }, { "epoch": 3.60504150390625e-05, "step": 23626, "training_step_time": 0.10758709907531738 }, { "epoch": 3.605194091796875e-05, "model_forward_time": 0.02508234977722168, "step": 23627 }, { "epoch": 3.605194091796875e-05, "step": 23627, "training_step_time": 0.19153094291687012 }, { "epoch": 3.6053466796875e-05, "model_forward_time": 0.024097204208374023, "step": 23628 }, { "epoch": 3.6053466796875e-05, "step": 23628, "training_step_time": 0.1426694393157959 }, { "epoch": 3.605499267578125e-05, "model_forward_time": 0.024979114532470703, "step": 23629 }, { "epoch": 3.605499267578125e-05, "step": 23629, "training_step_time": 0.10771560668945312 }, { "epoch": 3.60565185546875e-05, "grad_norm": 0.05361221730709076, "learning_rate": 1.1827991781314667e-05, "loss": 0.0036, "step": 23630 }, { "epoch": 3.60565185546875e-05, "model_forward_time": 0.026069164276123047, "step": 23630 }, { "epoch": 3.60565185546875e-05, "step": 23630, "training_step_time": 0.1053464412689209 }, { "epoch": 3.605804443359375e-05, "model_forward_time": 0.0251009464263916, "step": 23631 }, { "epoch": 3.605804443359375e-05, "step": 23631, "training_step_time": 0.1287093162536621 }, { "epoch": 3.60595703125e-05, "model_forward_time": 0.0249478816986084, "step": 23632 }, { "epoch": 3.60595703125e-05, "step": 23632, "training_step_time": 0.10561251640319824 }, { "epoch": 3.606109619140625e-05, "model_forward_time": 0.024882078170776367, "step": 23633 }, { "epoch": 3.606109619140625e-05, "step": 23633, "training_step_time": 0.12982773780822754 }, { "epoch": 3.60626220703125e-05, "model_forward_time": 0.02492380142211914, "step": 23634 }, { "epoch": 3.60626220703125e-05, "step": 23634, "training_step_time": 0.11460661888122559 }, { "epoch": 3.606414794921875e-05, "model_forward_time": 0.02471637725830078, "step": 23635 }, { "epoch": 3.606414794921875e-05, "step": 23635, "training_step_time": 0.1020212173461914 }, { "epoch": 3.6065673828125e-05, "model_forward_time": 0.025141239166259766, "step": 23636 }, { "epoch": 3.6065673828125e-05, "step": 23636, "training_step_time": 0.10455560684204102 }, { "epoch": 3.606719970703125e-05, "model_forward_time": 0.02605724334716797, "step": 23637 }, { "epoch": 3.606719970703125e-05, "step": 23637, "training_step_time": 0.1095585823059082 }, { "epoch": 3.60687255859375e-05, "model_forward_time": 0.025331974029541016, "step": 23638 }, { "epoch": 3.60687255859375e-05, "step": 23638, "training_step_time": 0.11555147171020508 }, { "epoch": 3.607025146484375e-05, "model_forward_time": 0.024937152862548828, "step": 23639 }, { "epoch": 3.607025146484375e-05, "step": 23639, "training_step_time": 0.11928367614746094 }, { "epoch": 3.607177734375e-05, "grad_norm": 0.09311248362064362, "learning_rate": 1.1792416956274444e-05, "loss": 0.0103, "step": 23640 }, { "epoch": 3.607177734375e-05, "model_forward_time": 0.025140047073364258, "step": 23640 }, { "epoch": 3.607177734375e-05, "step": 23640, "training_step_time": 0.21143865585327148 }, { "epoch": 3.607330322265625e-05, "model_forward_time": 0.024616003036499023, "step": 23641 }, { "epoch": 3.607330322265625e-05, "step": 23641, "training_step_time": 0.12924838066101074 }, { "epoch": 3.60748291015625e-05, "model_forward_time": 0.024165868759155273, "step": 23642 }, { "epoch": 3.60748291015625e-05, "step": 23642, "training_step_time": 0.10586285591125488 }, { "epoch": 3.607635498046875e-05, "model_forward_time": 0.025098323822021484, "step": 23643 }, { "epoch": 3.607635498046875e-05, "step": 23643, "training_step_time": 0.1102759838104248 }, { "epoch": 3.6077880859375e-05, "model_forward_time": 0.025369644165039062, "step": 23644 }, { "epoch": 3.6077880859375e-05, "step": 23644, "training_step_time": 0.10628628730773926 }, { "epoch": 3.607940673828125e-05, "model_forward_time": 0.02664661407470703, "step": 23645 }, { "epoch": 3.607940673828125e-05, "step": 23645, "training_step_time": 0.18776988983154297 }, { "epoch": 3.60809326171875e-05, "model_forward_time": 0.024267911911010742, "step": 23646 }, { "epoch": 3.60809326171875e-05, "step": 23646, "training_step_time": 0.20703840255737305 }, { "epoch": 3.608245849609375e-05, "model_forward_time": 0.02430582046508789, "step": 23647 }, { "epoch": 3.608245849609375e-05, "step": 23647, "training_step_time": 0.19942545890808105 }, { "epoch": 3.6083984375e-05, "model_forward_time": 0.02398061752319336, "step": 23648 }, { "epoch": 3.6083984375e-05, "step": 23648, "training_step_time": 0.1999495029449463 }, { "epoch": 3.608551025390625e-05, "model_forward_time": 0.02431631088256836, "step": 23649 }, { "epoch": 3.608551025390625e-05, "step": 23649, "training_step_time": 0.1857318878173828 }, { "epoch": 3.60870361328125e-05, "grad_norm": 0.18105536699295044, "learning_rate": 1.1756888557050355e-05, "loss": 0.0058, "step": 23650 }, { "epoch": 3.60870361328125e-05, "model_forward_time": 0.024442672729492188, "step": 23650 }, { "epoch": 3.60870361328125e-05, "step": 23650, "training_step_time": 0.1774148941040039 }, { "epoch": 3.608856201171875e-05, "model_forward_time": 0.024475812911987305, "step": 23651 }, { "epoch": 3.608856201171875e-05, "step": 23651, "training_step_time": 0.16650772094726562 }, { "epoch": 3.6090087890625e-05, "model_forward_time": 0.024212360382080078, "step": 23652 }, { "epoch": 3.6090087890625e-05, "step": 23652, "training_step_time": 0.10882425308227539 }, { "epoch": 3.609161376953125e-05, "model_forward_time": 0.02420783042907715, "step": 23653 }, { "epoch": 3.609161376953125e-05, "step": 23653, "training_step_time": 0.10209846496582031 }, { "epoch": 3.60931396484375e-05, "model_forward_time": 0.024633169174194336, "step": 23654 }, { "epoch": 3.60931396484375e-05, "step": 23654, "training_step_time": 0.10369515419006348 }, { "epoch": 3.609466552734375e-05, "model_forward_time": 0.024765968322753906, "step": 23655 }, { "epoch": 3.609466552734375e-05, "step": 23655, "training_step_time": 0.10445833206176758 }, { "epoch": 3.609619140625e-05, "model_forward_time": 0.02559375762939453, "step": 23656 }, { "epoch": 3.609619140625e-05, "step": 23656, "training_step_time": 0.10603451728820801 }, { "epoch": 3.609771728515625e-05, "model_forward_time": 0.026569604873657227, "step": 23657 }, { "epoch": 3.609771728515625e-05, "step": 23657, "training_step_time": 0.19005155563354492 }, { "epoch": 3.60992431640625e-05, "model_forward_time": 0.02618098258972168, "step": 23658 }, { "epoch": 3.60992431640625e-05, "step": 23658, "training_step_time": 0.1537942886352539 }, { "epoch": 3.610076904296875e-05, "model_forward_time": 0.024334430694580078, "step": 23659 }, { "epoch": 3.610076904296875e-05, "step": 23659, "training_step_time": 0.13201189041137695 }, { "epoch": 3.6102294921875e-05, "grad_norm": 0.12258859723806381, "learning_rate": 1.1721406626812764e-05, "loss": 0.0049, "step": 23660 }, { "epoch": 3.6102294921875e-05, "model_forward_time": 0.024624347686767578, "step": 23660 }, { "epoch": 3.6102294921875e-05, "step": 23660, "training_step_time": 0.12688970565795898 }, { "epoch": 3.610382080078125e-05, "model_forward_time": 0.024461984634399414, "step": 23661 }, { "epoch": 3.610382080078125e-05, "step": 23661, "training_step_time": 0.21181607246398926 }, { "epoch": 3.61053466796875e-05, "model_forward_time": 0.024272680282592773, "step": 23662 }, { "epoch": 3.61053466796875e-05, "step": 23662, "training_step_time": 0.11909985542297363 }, { "epoch": 3.610687255859375e-05, "model_forward_time": 0.024024009704589844, "step": 23663 }, { "epoch": 3.610687255859375e-05, "step": 23663, "training_step_time": 0.10667204856872559 }, { "epoch": 3.61083984375e-05, "model_forward_time": 0.025383949279785156, "step": 23664 }, { "epoch": 3.61083984375e-05, "step": 23664, "training_step_time": 0.10444784164428711 }, { "epoch": 3.610992431640625e-05, "model_forward_time": 0.024931669235229492, "step": 23665 }, { "epoch": 3.610992431640625e-05, "step": 23665, "training_step_time": 0.1108388900756836 }, { "epoch": 3.61114501953125e-05, "model_forward_time": 0.024664640426635742, "step": 23666 }, { "epoch": 3.61114501953125e-05, "step": 23666, "training_step_time": 0.10907602310180664 }, { "epoch": 3.611297607421875e-05, "model_forward_time": 0.026160717010498047, "step": 23667 }, { "epoch": 3.611297607421875e-05, "step": 23667, "training_step_time": 0.10678267478942871 }, { "epoch": 3.6114501953125e-05, "model_forward_time": 0.023955821990966797, "step": 23668 }, { "epoch": 3.6114501953125e-05, "step": 23668, "training_step_time": 0.1043848991394043 }, { "epoch": 3.611602783203125e-05, "model_forward_time": 0.02502608299255371, "step": 23669 }, { "epoch": 3.611602783203125e-05, "step": 23669, "training_step_time": 0.10542917251586914 }, { "epoch": 3.61175537109375e-05, "grad_norm": 0.11462750285863876, "learning_rate": 1.1685971208675539e-05, "loss": 0.0065, "step": 23670 }, { "epoch": 3.61175537109375e-05, "model_forward_time": 0.0245058536529541, "step": 23670 }, { "epoch": 3.61175537109375e-05, "step": 23670, "training_step_time": 0.18134307861328125 }, { "epoch": 3.611907958984375e-05, "model_forward_time": 0.02442145347595215, "step": 23671 }, { "epoch": 3.611907958984375e-05, "step": 23671, "training_step_time": 0.1262824535369873 }, { "epoch": 3.612060546875e-05, "model_forward_time": 0.024320125579833984, "step": 23672 }, { "epoch": 3.612060546875e-05, "step": 23672, "training_step_time": 0.10498189926147461 }, { "epoch": 3.612213134765625e-05, "model_forward_time": 0.024841785430908203, "step": 23673 }, { "epoch": 3.612213134765625e-05, "step": 23673, "training_step_time": 0.12695598602294922 }, { "epoch": 3.61236572265625e-05, "model_forward_time": 0.024845600128173828, "step": 23674 }, { "epoch": 3.61236572265625e-05, "step": 23674, "training_step_time": 0.12056541442871094 }, { "epoch": 3.612518310546875e-05, "model_forward_time": 0.024847745895385742, "step": 23675 }, { "epoch": 3.612518310546875e-05, "step": 23675, "training_step_time": 0.1084890365600586 }, { "epoch": 3.6126708984375e-05, "model_forward_time": 0.025242328643798828, "step": 23676 }, { "epoch": 3.6126708984375e-05, "step": 23676, "training_step_time": 0.11405134201049805 }, { "epoch": 3.612823486328125e-05, "model_forward_time": 0.02510547637939453, "step": 23677 }, { "epoch": 3.612823486328125e-05, "step": 23677, "training_step_time": 0.11397027969360352 }, { "epoch": 3.61297607421875e-05, "model_forward_time": 0.025266170501708984, "step": 23678 }, { "epoch": 3.61297607421875e-05, "step": 23678, "training_step_time": 0.10998272895812988 }, { "epoch": 3.613128662109375e-05, "model_forward_time": 0.025412797927856445, "step": 23679 }, { "epoch": 3.613128662109375e-05, "step": 23679, "training_step_time": 0.11212825775146484 }, { "epoch": 3.61328125e-05, "grad_norm": 0.06080739200115204, "learning_rate": 1.1650582345696088e-05, "loss": 0.0114, "step": 23680 }, { "epoch": 3.61328125e-05, "model_forward_time": 0.025837421417236328, "step": 23680 }, { "epoch": 3.61328125e-05, "step": 23680, "training_step_time": 0.10612130165100098 }, { "epoch": 3.613433837890625e-05, "model_forward_time": 0.025225400924682617, "step": 23681 }, { "epoch": 3.613433837890625e-05, "step": 23681, "training_step_time": 0.10998797416687012 }, { "epoch": 3.61358642578125e-05, "model_forward_time": 0.02528214454650879, "step": 23682 }, { "epoch": 3.61358642578125e-05, "step": 23682, "training_step_time": 0.11130142211914062 }, { "epoch": 3.613739013671875e-05, "model_forward_time": 0.024903059005737305, "step": 23683 }, { "epoch": 3.613739013671875e-05, "step": 23683, "training_step_time": 0.11158251762390137 }, { "epoch": 3.6138916015625e-05, "model_forward_time": 0.025099515914916992, "step": 23684 }, { "epoch": 3.6138916015625e-05, "step": 23684, "training_step_time": 0.11320805549621582 }, { "epoch": 3.614044189453125e-05, "model_forward_time": 0.025210857391357422, "step": 23685 }, { "epoch": 3.614044189453125e-05, "step": 23685, "training_step_time": 0.10645222663879395 }, { "epoch": 3.61419677734375e-05, "model_forward_time": 0.02637791633605957, "step": 23686 }, { "epoch": 3.61419677734375e-05, "step": 23686, "training_step_time": 0.1257326602935791 }, { "epoch": 3.614349365234375e-05, "model_forward_time": 0.025270700454711914, "step": 23687 }, { "epoch": 3.614349365234375e-05, "step": 23687, "training_step_time": 0.11284756660461426 }, { "epoch": 3.614501953125e-05, "model_forward_time": 0.025583267211914062, "step": 23688 }, { "epoch": 3.614501953125e-05, "step": 23688, "training_step_time": 0.10629606246948242 }, { "epoch": 3.614654541015625e-05, "model_forward_time": 0.025105953216552734, "step": 23689 }, { "epoch": 3.614654541015625e-05, "step": 23689, "training_step_time": 0.10973095893859863 }, { "epoch": 3.61480712890625e-05, "grad_norm": 0.1435283124446869, "learning_rate": 1.16152400808752e-05, "loss": 0.004, "step": 23690 }, { "epoch": 3.61480712890625e-05, "model_forward_time": 0.025171995162963867, "step": 23690 }, { "epoch": 3.61480712890625e-05, "step": 23690, "training_step_time": 0.1071023941040039 }, { "epoch": 3.614959716796875e-05, "model_forward_time": 0.02489447593688965, "step": 23691 }, { "epoch": 3.614959716796875e-05, "step": 23691, "training_step_time": 0.10735821723937988 }, { "epoch": 3.6151123046875e-05, "model_forward_time": 0.024934768676757812, "step": 23692 }, { "epoch": 3.6151123046875e-05, "step": 23692, "training_step_time": 0.10444879531860352 }, { "epoch": 3.615264892578125e-05, "model_forward_time": 0.02548670768737793, "step": 23693 }, { "epoch": 3.615264892578125e-05, "step": 23693, "training_step_time": 0.10754895210266113 }, { "epoch": 3.61541748046875e-05, "model_forward_time": 0.02497076988220215, "step": 23694 }, { "epoch": 3.61541748046875e-05, "step": 23694, "training_step_time": 0.1065518856048584 }, { "epoch": 3.615570068359375e-05, "model_forward_time": 0.02497124671936035, "step": 23695 }, { "epoch": 3.615570068359375e-05, "step": 23695, "training_step_time": 0.1099853515625 }, { "epoch": 3.61572265625e-05, "model_forward_time": 0.025304317474365234, "step": 23696 }, { "epoch": 3.61572265625e-05, "step": 23696, "training_step_time": 0.1062924861907959 }, { "epoch": 3.615875244140625e-05, "model_forward_time": 0.024989843368530273, "step": 23697 }, { "epoch": 3.615875244140625e-05, "step": 23697, "training_step_time": 0.10691165924072266 }, { "epoch": 3.61602783203125e-05, "model_forward_time": 0.025228261947631836, "step": 23698 }, { "epoch": 3.61602783203125e-05, "step": 23698, "training_step_time": 0.10608458518981934 }, { "epoch": 3.616180419921875e-05, "model_forward_time": 0.025272607803344727, "step": 23699 }, { "epoch": 3.616180419921875e-05, "step": 23699, "training_step_time": 0.10791301727294922 }, { "epoch": 3.6163330078125e-05, "grad_norm": 0.2573447525501251, "learning_rate": 1.157994445715706e-05, "loss": 0.0072, "step": 23700 }, { "epoch": 3.6163330078125e-05, "model_forward_time": 0.02500152587890625, "step": 23700 }, { "epoch": 3.6163330078125e-05, "step": 23700, "training_step_time": 0.10466694831848145 }, { "epoch": 3.616485595703125e-05, "model_forward_time": 0.02492690086364746, "step": 23701 }, { "epoch": 3.616485595703125e-05, "step": 23701, "training_step_time": 0.10827350616455078 }, { "epoch": 3.61663818359375e-05, "model_forward_time": 0.02497076988220215, "step": 23702 }, { "epoch": 3.61663818359375e-05, "step": 23702, "training_step_time": 0.10385775566101074 }, { "epoch": 3.616790771484375e-05, "model_forward_time": 0.025234699249267578, "step": 23703 }, { "epoch": 3.616790771484375e-05, "step": 23703, "training_step_time": 0.10654640197753906 }, { "epoch": 3.616943359375e-05, "model_forward_time": 0.025424480438232422, "step": 23704 }, { "epoch": 3.616943359375e-05, "step": 23704, "training_step_time": 0.10701417922973633 }, { "epoch": 3.617095947265625e-05, "model_forward_time": 0.025267839431762695, "step": 23705 }, { "epoch": 3.617095947265625e-05, "step": 23705, "training_step_time": 0.10860729217529297 }, { "epoch": 3.61724853515625e-05, "model_forward_time": 0.025504589080810547, "step": 23706 }, { "epoch": 3.61724853515625e-05, "step": 23706, "training_step_time": 0.1421358585357666 }, { "epoch": 3.617401123046875e-05, "model_forward_time": 0.027516841888427734, "step": 23707 }, { "epoch": 3.617401123046875e-05, "step": 23707, "training_step_time": 0.11456799507141113 }, { "epoch": 3.6175537109375e-05, "model_forward_time": 0.0250546932220459, "step": 23708 }, { "epoch": 3.6175537109375e-05, "step": 23708, "training_step_time": 0.12677907943725586 }, { "epoch": 3.617706298828125e-05, "model_forward_time": 0.024838924407958984, "step": 23709 }, { "epoch": 3.617706298828125e-05, "step": 23709, "training_step_time": 0.15665555000305176 }, { "epoch": 3.61785888671875e-05, "grad_norm": 0.13485924899578094, "learning_rate": 1.1544695517429178e-05, "loss": 0.0042, "step": 23710 }, { "epoch": 3.61785888671875e-05, "model_forward_time": 0.024256467819213867, "step": 23710 }, { "epoch": 3.61785888671875e-05, "step": 23710, "training_step_time": 0.22320318222045898 }, { "epoch": 3.618011474609375e-05, "model_forward_time": 0.023978710174560547, "step": 23711 }, { "epoch": 3.618011474609375e-05, "step": 23711, "training_step_time": 0.11704254150390625 }, { "epoch": 3.6181640625e-05, "model_forward_time": 0.024837255477905273, "step": 23712 }, { "epoch": 3.6181640625e-05, "step": 23712, "training_step_time": 0.10869765281677246 }, { "epoch": 3.618316650390625e-05, "model_forward_time": 0.024811744689941406, "step": 23713 }, { "epoch": 3.618316650390625e-05, "step": 23713, "training_step_time": 0.10901141166687012 }, { "epoch": 3.61846923828125e-05, "model_forward_time": 0.024979829788208008, "step": 23714 }, { "epoch": 3.61846923828125e-05, "step": 23714, "training_step_time": 0.11147952079772949 }, { "epoch": 3.618621826171875e-05, "model_forward_time": 0.02472519874572754, "step": 23715 }, { "epoch": 3.618621826171875e-05, "step": 23715, "training_step_time": 0.11027288436889648 }, { "epoch": 3.6187744140625e-05, "model_forward_time": 0.0242159366607666, "step": 23716 }, { "epoch": 3.6187744140625e-05, "step": 23716, "training_step_time": 0.11115813255310059 }, { "epoch": 3.618927001953125e-05, "model_forward_time": 0.024096965789794922, "step": 23717 }, { "epoch": 3.618927001953125e-05, "step": 23717, "training_step_time": 0.10878133773803711 }, { "epoch": 3.61907958984375e-05, "model_forward_time": 0.026072263717651367, "step": 23718 }, { "epoch": 3.61907958984375e-05, "step": 23718, "training_step_time": 0.10933971405029297 }, { "epoch": 3.619232177734375e-05, "model_forward_time": 0.025133132934570312, "step": 23719 }, { "epoch": 3.619232177734375e-05, "step": 23719, "training_step_time": 0.19052672386169434 }, { "epoch": 3.619384765625e-05, "grad_norm": 0.11386612057685852, "learning_rate": 1.1509493304522329e-05, "loss": 0.0046, "step": 23720 }, { "epoch": 3.619384765625e-05, "model_forward_time": 0.024660110473632812, "step": 23720 }, { "epoch": 3.619384765625e-05, "step": 23720, "training_step_time": 0.11484551429748535 }, { "epoch": 3.619537353515625e-05, "model_forward_time": 0.02398228645324707, "step": 23721 }, { "epoch": 3.619537353515625e-05, "step": 23721, "training_step_time": 0.10903787612915039 }, { "epoch": 3.61968994140625e-05, "model_forward_time": 0.024988412857055664, "step": 23722 }, { "epoch": 3.61968994140625e-05, "step": 23722, "training_step_time": 0.10947918891906738 }, { "epoch": 3.619842529296875e-05, "model_forward_time": 0.025120019912719727, "step": 23723 }, { "epoch": 3.619842529296875e-05, "step": 23723, "training_step_time": 0.12113475799560547 }, { "epoch": 3.6199951171875e-05, "model_forward_time": 0.025365114212036133, "step": 23724 }, { "epoch": 3.6199951171875e-05, "step": 23724, "training_step_time": 0.10991501808166504 }, { "epoch": 3.620147705078125e-05, "model_forward_time": 0.025114059448242188, "step": 23725 }, { "epoch": 3.620147705078125e-05, "step": 23725, "training_step_time": 0.15848755836486816 }, { "epoch": 3.62030029296875e-05, "model_forward_time": 0.024485349655151367, "step": 23726 }, { "epoch": 3.62030029296875e-05, "step": 23726, "training_step_time": 0.10464096069335938 }, { "epoch": 3.620452880859375e-05, "model_forward_time": 0.026644468307495117, "step": 23727 }, { "epoch": 3.620452880859375e-05, "step": 23727, "training_step_time": 0.10659384727478027 }, { "epoch": 3.62060546875e-05, "model_forward_time": 0.025087833404541016, "step": 23728 }, { "epoch": 3.62060546875e-05, "step": 23728, "training_step_time": 0.1043539047241211 }, { "epoch": 3.620758056640625e-05, "model_forward_time": 0.024904727935791016, "step": 23729 }, { "epoch": 3.620758056640625e-05, "step": 23729, "training_step_time": 0.1120903491973877 }, { "epoch": 3.62091064453125e-05, "grad_norm": 0.34860873222351074, "learning_rate": 1.1474337861210543e-05, "loss": 0.0126, "step": 23730 }, { "epoch": 3.62091064453125e-05, "model_forward_time": 0.02493906021118164, "step": 23730 }, { "epoch": 3.62091064453125e-05, "step": 23730, "training_step_time": 0.10784339904785156 }, { "epoch": 3.621063232421875e-05, "model_forward_time": 0.025957345962524414, "step": 23731 }, { "epoch": 3.621063232421875e-05, "step": 23731, "training_step_time": 0.10765457153320312 }, { "epoch": 3.6212158203125e-05, "model_forward_time": 0.025282621383666992, "step": 23732 }, { "epoch": 3.6212158203125e-05, "step": 23732, "training_step_time": 0.1086280345916748 }, { "epoch": 3.621368408203125e-05, "model_forward_time": 0.025543212890625, "step": 23733 }, { "epoch": 3.621368408203125e-05, "step": 23733, "training_step_time": 0.11067533493041992 }, { "epoch": 3.62152099609375e-05, "model_forward_time": 0.025578022003173828, "step": 23734 }, { "epoch": 3.62152099609375e-05, "step": 23734, "training_step_time": 0.1094663143157959 }, { "epoch": 3.621673583984375e-05, "model_forward_time": 0.02633523941040039, "step": 23735 }, { "epoch": 3.621673583984375e-05, "step": 23735, "training_step_time": 0.11171650886535645 }, { "epoch": 3.621826171875e-05, "model_forward_time": 0.02539205551147461, "step": 23736 }, { "epoch": 3.621826171875e-05, "step": 23736, "training_step_time": 0.10599112510681152 }, { "epoch": 3.621978759765625e-05, "model_forward_time": 0.02544379234313965, "step": 23737 }, { "epoch": 3.621978759765625e-05, "step": 23737, "training_step_time": 0.10599732398986816 }, { "epoch": 3.62213134765625e-05, "model_forward_time": 0.02514195442199707, "step": 23738 }, { "epoch": 3.62213134765625e-05, "step": 23738, "training_step_time": 0.10503172874450684 }, { "epoch": 3.622283935546875e-05, "model_forward_time": 0.025303125381469727, "step": 23739 }, { "epoch": 3.622283935546875e-05, "step": 23739, "training_step_time": 0.10688209533691406 }, { "epoch": 3.6224365234375e-05, "grad_norm": 0.10724588483572006, "learning_rate": 1.143922923021099e-05, "loss": 0.0042, "step": 23740 }, { "epoch": 3.6224365234375e-05, "model_forward_time": 0.02524280548095703, "step": 23740 }, { "epoch": 3.6224365234375e-05, "step": 23740, "training_step_time": 0.10619878768920898 }, { "epoch": 3.622589111328125e-05, "model_forward_time": 0.024982213973999023, "step": 23741 }, { "epoch": 3.622589111328125e-05, "step": 23741, "training_step_time": 0.10676074028015137 }, { "epoch": 3.62274169921875e-05, "model_forward_time": 0.024823427200317383, "step": 23742 }, { "epoch": 3.62274169921875e-05, "step": 23742, "training_step_time": 0.10700488090515137 }, { "epoch": 3.622894287109375e-05, "model_forward_time": 0.02497386932373047, "step": 23743 }, { "epoch": 3.622894287109375e-05, "step": 23743, "training_step_time": 0.10875320434570312 }, { "epoch": 3.623046875e-05, "model_forward_time": 0.025289297103881836, "step": 23744 }, { "epoch": 3.623046875e-05, "step": 23744, "training_step_time": 0.1081380844116211 }, { "epoch": 3.623199462890625e-05, "model_forward_time": 0.024966716766357422, "step": 23745 }, { "epoch": 3.623199462890625e-05, "step": 23745, "training_step_time": 0.17196917533874512 }, { "epoch": 3.62335205078125e-05, "model_forward_time": 0.02452874183654785, "step": 23746 }, { "epoch": 3.62335205078125e-05, "step": 23746, "training_step_time": 0.2034451961517334 }, { "epoch": 3.623504638671875e-05, "model_forward_time": 0.02291083335876465, "step": 23747 }, { "epoch": 3.623504638671875e-05, "step": 23747, "training_step_time": 0.1963338851928711 }, { "epoch": 3.6236572265625e-05, "model_forward_time": 0.02420520782470703, "step": 23748 }, { "epoch": 3.6236572265625e-05, "step": 23748, "training_step_time": 0.18754792213439941 }, { "epoch": 3.623809814453125e-05, "model_forward_time": 0.023858070373535156, "step": 23749 }, { "epoch": 3.623809814453125e-05, "step": 23749, "training_step_time": 0.16849780082702637 }, { "epoch": 3.62396240234375e-05, "grad_norm": 0.0854354128241539, "learning_rate": 1.1404167454183957e-05, "loss": 0.005, "step": 23750 }, { "epoch": 3.62396240234375e-05, "model_forward_time": 0.02428889274597168, "step": 23750 }, { "epoch": 3.62396240234375e-05, "step": 23750, "training_step_time": 0.21044492721557617 }, { "epoch": 3.624114990234375e-05, "model_forward_time": 0.02402639389038086, "step": 23751 }, { "epoch": 3.624114990234375e-05, "step": 23751, "training_step_time": 0.16014957427978516 }, { "epoch": 3.624267578125e-05, "model_forward_time": 0.024404048919677734, "step": 23752 }, { "epoch": 3.624267578125e-05, "step": 23752, "training_step_time": 0.1611499786376953 }, { "epoch": 3.624420166015625e-05, "model_forward_time": 0.02448582649230957, "step": 23753 }, { "epoch": 3.624420166015625e-05, "step": 23753, "training_step_time": 0.16031861305236816 }, { "epoch": 3.62457275390625e-05, "model_forward_time": 0.024632930755615234, "step": 23754 }, { "epoch": 3.62457275390625e-05, "step": 23754, "training_step_time": 0.16802358627319336 }, { "epoch": 3.624725341796875e-05, "model_forward_time": 0.024485349655151367, "step": 23755 }, { "epoch": 3.624725341796875e-05, "step": 23755, "training_step_time": 0.15432405471801758 }, { "epoch": 3.6248779296875e-05, "model_forward_time": 0.023845195770263672, "step": 23756 }, { "epoch": 3.6248779296875e-05, "step": 23756, "training_step_time": 0.10907173156738281 }, { "epoch": 3.625030517578125e-05, "model_forward_time": 0.024504423141479492, "step": 23757 }, { "epoch": 3.625030517578125e-05, "step": 23757, "training_step_time": 0.10340380668640137 }, { "epoch": 3.62518310546875e-05, "model_forward_time": 0.0244903564453125, "step": 23758 }, { "epoch": 3.62518310546875e-05, "step": 23758, "training_step_time": 0.10374259948730469 }, { "epoch": 3.625335693359375e-05, "model_forward_time": 0.025090456008911133, "step": 23759 }, { "epoch": 3.625335693359375e-05, "step": 23759, "training_step_time": 0.10402321815490723 }, { "epoch": 3.62548828125e-05, "grad_norm": 0.13724148273468018, "learning_rate": 1.1369152575732822e-05, "loss": 0.0066, "step": 23760 }, { "epoch": 3.62548828125e-05, "model_forward_time": 0.02530074119567871, "step": 23760 }, { "epoch": 3.62548828125e-05, "step": 23760, "training_step_time": 0.10542869567871094 }, { "epoch": 3.625640869140625e-05, "model_forward_time": 0.024912595748901367, "step": 23761 }, { "epoch": 3.625640869140625e-05, "step": 23761, "training_step_time": 0.10939908027648926 }, { "epoch": 3.62579345703125e-05, "model_forward_time": 0.02526402473449707, "step": 23762 }, { "epoch": 3.62579345703125e-05, "step": 23762, "training_step_time": 0.10358381271362305 }, { "epoch": 3.625946044921875e-05, "model_forward_time": 0.026082754135131836, "step": 23763 }, { "epoch": 3.625946044921875e-05, "step": 23763, "training_step_time": 0.1731853485107422 }, { "epoch": 3.6260986328125e-05, "model_forward_time": 0.024230003356933594, "step": 23764 }, { "epoch": 3.6260986328125e-05, "step": 23764, "training_step_time": 0.1359238624572754 }, { "epoch": 3.626251220703125e-05, "model_forward_time": 0.02398991584777832, "step": 23765 }, { "epoch": 3.626251220703125e-05, "step": 23765, "training_step_time": 0.10796141624450684 }, { "epoch": 3.62640380859375e-05, "model_forward_time": 0.025463342666625977, "step": 23766 }, { "epoch": 3.62640380859375e-05, "step": 23766, "training_step_time": 0.13053488731384277 }, { "epoch": 3.626556396484375e-05, "model_forward_time": 0.024906396865844727, "step": 23767 }, { "epoch": 3.626556396484375e-05, "step": 23767, "training_step_time": 0.11472010612487793 }, { "epoch": 3.626708984375e-05, "model_forward_time": 0.0249788761138916, "step": 23768 }, { "epoch": 3.626708984375e-05, "step": 23768, "training_step_time": 0.10928010940551758 }, { "epoch": 3.626861572265625e-05, "model_forward_time": 0.02431035041809082, "step": 23769 }, { "epoch": 3.626861572265625e-05, "step": 23769, "training_step_time": 0.10584259033203125 }, { "epoch": 3.62701416015625e-05, "grad_norm": 0.1321377158164978, "learning_rate": 1.133418463740395e-05, "loss": 0.0038, "step": 23770 }, { "epoch": 3.62701416015625e-05, "model_forward_time": 0.024942398071289062, "step": 23770 }, { "epoch": 3.62701416015625e-05, "step": 23770, "training_step_time": 0.10840415954589844 }, { "epoch": 3.627166748046875e-05, "model_forward_time": 0.02516007423400879, "step": 23771 }, { "epoch": 3.627166748046875e-05, "step": 23771, "training_step_time": 0.10595846176147461 }, { "epoch": 3.6273193359375e-05, "model_forward_time": 0.024936437606811523, "step": 23772 }, { "epoch": 3.6273193359375e-05, "step": 23772, "training_step_time": 0.10771822929382324 }, { "epoch": 3.627471923828125e-05, "model_forward_time": 0.0252683162689209, "step": 23773 }, { "epoch": 3.627471923828125e-05, "step": 23773, "training_step_time": 0.10668611526489258 }, { "epoch": 3.62762451171875e-05, "model_forward_time": 0.025457143783569336, "step": 23774 }, { "epoch": 3.62762451171875e-05, "step": 23774, "training_step_time": 0.12391829490661621 }, { "epoch": 3.627777099609375e-05, "model_forward_time": 0.02526545524597168, "step": 23775 }, { "epoch": 3.627777099609375e-05, "step": 23775, "training_step_time": 0.11473202705383301 }, { "epoch": 3.6279296875e-05, "model_forward_time": 0.025341033935546875, "step": 23776 }, { "epoch": 3.6279296875e-05, "step": 23776, "training_step_time": 0.1972365379333496 }, { "epoch": 3.628082275390625e-05, "model_forward_time": 0.02441263198852539, "step": 23777 }, { "epoch": 3.628082275390625e-05, "step": 23777, "training_step_time": 0.11819148063659668 }, { "epoch": 3.62823486328125e-05, "model_forward_time": 0.024697065353393555, "step": 23778 }, { "epoch": 3.62823486328125e-05, "step": 23778, "training_step_time": 0.12651538848876953 }, { "epoch": 3.628387451171875e-05, "model_forward_time": 0.024565458297729492, "step": 23779 }, { "epoch": 3.628387451171875e-05, "step": 23779, "training_step_time": 0.11623597145080566 }, { "epoch": 3.6285400390625e-05, "grad_norm": 0.2529659867286682, "learning_rate": 1.1299263681686706e-05, "loss": 0.0058, "step": 23780 }, { "epoch": 3.6285400390625e-05, "model_forward_time": 0.02516651153564453, "step": 23780 }, { "epoch": 3.6285400390625e-05, "step": 23780, "training_step_time": 0.10579752922058105 }, { "epoch": 3.628692626953125e-05, "model_forward_time": 0.025121212005615234, "step": 23781 }, { "epoch": 3.628692626953125e-05, "step": 23781, "training_step_time": 0.10657811164855957 }, { "epoch": 3.62884521484375e-05, "model_forward_time": 0.025098323822021484, "step": 23782 }, { "epoch": 3.62884521484375e-05, "step": 23782, "training_step_time": 0.10640263557434082 }, { "epoch": 3.628997802734375e-05, "model_forward_time": 0.025368452072143555, "step": 23783 }, { "epoch": 3.628997802734375e-05, "step": 23783, "training_step_time": 0.10695052146911621 }, { "epoch": 3.629150390625e-05, "model_forward_time": 0.025005578994750977, "step": 23784 }, { "epoch": 3.629150390625e-05, "step": 23784, "training_step_time": 0.10613131523132324 }, { "epoch": 3.629302978515625e-05, "model_forward_time": 0.025133609771728516, "step": 23785 }, { "epoch": 3.629302978515625e-05, "step": 23785, "training_step_time": 0.1042792797088623 }, { "epoch": 3.62945556640625e-05, "model_forward_time": 0.027497053146362305, "step": 23786 }, { "epoch": 3.62945556640625e-05, "step": 23786, "training_step_time": 0.10802006721496582 }, { "epoch": 3.629608154296875e-05, "model_forward_time": 0.024399280548095703, "step": 23787 }, { "epoch": 3.629608154296875e-05, "step": 23787, "training_step_time": 0.1041872501373291 }, { "epoch": 3.6297607421875e-05, "model_forward_time": 0.025021791458129883, "step": 23788 }, { "epoch": 3.6297607421875e-05, "step": 23788, "training_step_time": 0.1070566177368164 }, { "epoch": 3.629913330078125e-05, "model_forward_time": 0.025044918060302734, "step": 23789 }, { "epoch": 3.629913330078125e-05, "step": 23789, "training_step_time": 0.10471248626708984 }, { "epoch": 3.63006591796875e-05, "grad_norm": 0.15855908393859863, "learning_rate": 1.1264389751013326e-05, "loss": 0.0045, "step": 23790 }, { "epoch": 3.63006591796875e-05, "model_forward_time": 0.025020599365234375, "step": 23790 }, { "epoch": 3.63006591796875e-05, "step": 23790, "training_step_time": 0.10537505149841309 }, { "epoch": 3.630218505859375e-05, "model_forward_time": 0.025368213653564453, "step": 23791 }, { "epoch": 3.630218505859375e-05, "step": 23791, "training_step_time": 0.10362935066223145 }, { "epoch": 3.63037109375e-05, "model_forward_time": 0.025111913681030273, "step": 23792 }, { "epoch": 3.63037109375e-05, "step": 23792, "training_step_time": 0.10421872138977051 }, { "epoch": 3.630523681640625e-05, "model_forward_time": 0.024875879287719727, "step": 23793 }, { "epoch": 3.630523681640625e-05, "step": 23793, "training_step_time": 0.10898947715759277 }, { "epoch": 3.63067626953125e-05, "model_forward_time": 0.024104833602905273, "step": 23794 }, { "epoch": 3.63067626953125e-05, "step": 23794, "training_step_time": 0.10918593406677246 }, { "epoch": 3.630828857421875e-05, "model_forward_time": 0.02530384063720703, "step": 23795 }, { "epoch": 3.630828857421875e-05, "step": 23795, "training_step_time": 0.10532879829406738 }, { "epoch": 3.6309814453125e-05, "model_forward_time": 0.024891376495361328, "step": 23796 }, { "epoch": 3.6309814453125e-05, "step": 23796, "training_step_time": 0.1112673282623291 }, { "epoch": 3.631134033203125e-05, "model_forward_time": 0.024657487869262695, "step": 23797 }, { "epoch": 3.631134033203125e-05, "step": 23797, "training_step_time": 0.1267855167388916 }, { "epoch": 3.63128662109375e-05, "model_forward_time": 0.025045394897460938, "step": 23798 }, { "epoch": 3.63128662109375e-05, "step": 23798, "training_step_time": 0.1914212703704834 }, { "epoch": 3.631439208984375e-05, "model_forward_time": 0.024173974990844727, "step": 23799 }, { "epoch": 3.631439208984375e-05, "step": 23799, "training_step_time": 0.15396642684936523 }, { "epoch": 3.631591796875e-05, "grad_norm": 0.07535918056964874, "learning_rate": 1.1229562887758926e-05, "loss": 0.0061, "step": 23800 }, { "epoch": 3.631591796875e-05, "model_forward_time": 0.023712158203125, "step": 23800 }, { "epoch": 3.631591796875e-05, "step": 23800, "training_step_time": 0.10831570625305176 }, { "epoch": 3.631744384765625e-05, "model_forward_time": 0.024420738220214844, "step": 23801 }, { "epoch": 3.631744384765625e-05, "step": 23801, "training_step_time": 0.1759941577911377 }, { "epoch": 3.63189697265625e-05, "model_forward_time": 0.024541139602661133, "step": 23802 }, { "epoch": 3.63189697265625e-05, "step": 23802, "training_step_time": 0.18945622444152832 }, { "epoch": 3.632049560546875e-05, "model_forward_time": 0.024092674255371094, "step": 23803 }, { "epoch": 3.632049560546875e-05, "step": 23803, "training_step_time": 0.11419963836669922 }, { "epoch": 3.6322021484375e-05, "model_forward_time": 0.024261951446533203, "step": 23804 }, { "epoch": 3.6322021484375e-05, "step": 23804, "training_step_time": 0.14516043663024902 }, { "epoch": 3.632354736328125e-05, "model_forward_time": 0.024548768997192383, "step": 23805 }, { "epoch": 3.632354736328125e-05, "step": 23805, "training_step_time": 0.10465645790100098 }, { "epoch": 3.63250732421875e-05, "model_forward_time": 0.025165319442749023, "step": 23806 }, { "epoch": 3.63250732421875e-05, "step": 23806, "training_step_time": 0.10359454154968262 }, { "epoch": 3.632659912109375e-05, "model_forward_time": 0.025231361389160156, "step": 23807 }, { "epoch": 3.632659912109375e-05, "step": 23807, "training_step_time": 0.10949087142944336 }, { "epoch": 3.6328125e-05, "model_forward_time": 0.025070905685424805, "step": 23808 }, { "epoch": 3.6328125e-05, "step": 23808, "training_step_time": 0.10441017150878906 }, { "epoch": 3.632965087890625e-05, "model_forward_time": 0.02825617790222168, "step": 23809 }, { "epoch": 3.632965087890625e-05, "step": 23809, "training_step_time": 0.10683703422546387 }, { "epoch": 3.63311767578125e-05, "grad_norm": 0.09677168726921082, "learning_rate": 1.1194783134241437e-05, "loss": 0.005, "step": 23810 }, { "epoch": 3.63311767578125e-05, "model_forward_time": 0.024693727493286133, "step": 23810 }, { "epoch": 3.63311767578125e-05, "step": 23810, "training_step_time": 0.19266581535339355 }, { "epoch": 3.633270263671875e-05, "model_forward_time": 0.0240023136138916, "step": 23811 }, { "epoch": 3.633270263671875e-05, "step": 23811, "training_step_time": 0.14217042922973633 }, { "epoch": 3.6334228515625e-05, "model_forward_time": 0.02397942543029785, "step": 23812 }, { "epoch": 3.6334228515625e-05, "step": 23812, "training_step_time": 0.10151433944702148 }, { "epoch": 3.633575439453125e-05, "model_forward_time": 0.024928569793701172, "step": 23813 }, { "epoch": 3.633575439453125e-05, "step": 23813, "training_step_time": 0.12736773490905762 }, { "epoch": 3.63372802734375e-05, "model_forward_time": 0.02482891082763672, "step": 23814 }, { "epoch": 3.63372802734375e-05, "step": 23814, "training_step_time": 0.13560080528259277 }, { "epoch": 3.633880615234375e-05, "model_forward_time": 0.024355173110961914, "step": 23815 }, { "epoch": 3.633880615234375e-05, "step": 23815, "training_step_time": 0.1795063018798828 }, { "epoch": 3.634033203125e-05, "model_forward_time": 0.024530887603759766, "step": 23816 }, { "epoch": 3.634033203125e-05, "step": 23816, "training_step_time": 0.12770938873291016 }, { "epoch": 3.634185791015625e-05, "model_forward_time": 0.023942947387695312, "step": 23817 }, { "epoch": 3.634185791015625e-05, "step": 23817, "training_step_time": 0.11959004402160645 }, { "epoch": 3.63433837890625e-05, "model_forward_time": 0.024176359176635742, "step": 23818 }, { "epoch": 3.63433837890625e-05, "step": 23818, "training_step_time": 0.11889529228210449 }, { "epoch": 3.634490966796875e-05, "model_forward_time": 0.025031089782714844, "step": 23819 }, { "epoch": 3.634490966796875e-05, "step": 23819, "training_step_time": 0.11275529861450195 }, { "epoch": 3.6346435546875e-05, "grad_norm": 0.2195141464471817, "learning_rate": 1.1160050532721528e-05, "loss": 0.0041, "step": 23820 }, { "epoch": 3.6346435546875e-05, "model_forward_time": 0.02460026741027832, "step": 23820 }, { "epoch": 3.6346435546875e-05, "step": 23820, "training_step_time": 0.11313271522521973 }, { "epoch": 3.634796142578125e-05, "model_forward_time": 0.02518296241760254, "step": 23821 }, { "epoch": 3.634796142578125e-05, "step": 23821, "training_step_time": 0.11475038528442383 }, { "epoch": 3.63494873046875e-05, "model_forward_time": 0.02507805824279785, "step": 23822 }, { "epoch": 3.63494873046875e-05, "step": 23822, "training_step_time": 0.11010575294494629 }, { "epoch": 3.635101318359375e-05, "model_forward_time": 0.026886940002441406, "step": 23823 }, { "epoch": 3.635101318359375e-05, "step": 23823, "training_step_time": 0.1113576889038086 }, { "epoch": 3.63525390625e-05, "model_forward_time": 0.025590181350708008, "step": 23824 }, { "epoch": 3.63525390625e-05, "step": 23824, "training_step_time": 0.12001800537109375 }, { "epoch": 3.635406494140625e-05, "model_forward_time": 0.02516341209411621, "step": 23825 }, { "epoch": 3.635406494140625e-05, "step": 23825, "training_step_time": 0.10926485061645508 }, { "epoch": 3.63555908203125e-05, "model_forward_time": 0.025412797927856445, "step": 23826 }, { "epoch": 3.63555908203125e-05, "step": 23826, "training_step_time": 0.1138448715209961 }, { "epoch": 3.635711669921875e-05, "model_forward_time": 0.024778127670288086, "step": 23827 }, { "epoch": 3.635711669921875e-05, "step": 23827, "training_step_time": 0.10615015029907227 }, { "epoch": 3.6358642578125e-05, "model_forward_time": 0.025089263916015625, "step": 23828 }, { "epoch": 3.6358642578125e-05, "step": 23828, "training_step_time": 0.10982227325439453 }, { "epoch": 3.636016845703125e-05, "model_forward_time": 0.024789094924926758, "step": 23829 }, { "epoch": 3.636016845703125e-05, "step": 23829, "training_step_time": 0.10493850708007812 }, { "epoch": 3.63616943359375e-05, "grad_norm": 0.08239518105983734, "learning_rate": 1.1125365125402582e-05, "loss": 0.0041, "step": 23830 }, { "epoch": 3.63616943359375e-05, "model_forward_time": 0.025389909744262695, "step": 23830 }, { "epoch": 3.63616943359375e-05, "step": 23830, "training_step_time": 0.10818719863891602 }, { "epoch": 3.636322021484375e-05, "model_forward_time": 0.025417804718017578, "step": 23831 }, { "epoch": 3.636322021484375e-05, "step": 23831, "training_step_time": 0.10602855682373047 }, { "epoch": 3.636474609375e-05, "model_forward_time": 0.026076078414916992, "step": 23832 }, { "epoch": 3.636474609375e-05, "step": 23832, "training_step_time": 0.10586094856262207 }, { "epoch": 3.636627197265625e-05, "model_forward_time": 0.02544426918029785, "step": 23833 }, { "epoch": 3.636627197265625e-05, "step": 23833, "training_step_time": 0.1044168472290039 }, { "epoch": 3.63677978515625e-05, "model_forward_time": 0.025240421295166016, "step": 23834 }, { "epoch": 3.63677978515625e-05, "step": 23834, "training_step_time": 0.10423684120178223 }, { "epoch": 3.636932373046875e-05, "model_forward_time": 0.02508091926574707, "step": 23835 }, { "epoch": 3.636932373046875e-05, "step": 23835, "training_step_time": 0.10547232627868652 }, { "epoch": 3.6370849609375e-05, "model_forward_time": 0.025339126586914062, "step": 23836 }, { "epoch": 3.6370849609375e-05, "step": 23836, "training_step_time": 0.10489869117736816 }, { "epoch": 3.637237548828125e-05, "model_forward_time": 0.024822473526000977, "step": 23837 }, { "epoch": 3.637237548828125e-05, "step": 23837, "training_step_time": 0.10431265830993652 }, { "epoch": 3.63739013671875e-05, "model_forward_time": 0.025453805923461914, "step": 23838 }, { "epoch": 3.63739013671875e-05, "step": 23838, "training_step_time": 0.10352468490600586 }, { "epoch": 3.637542724609375e-05, "model_forward_time": 0.02507305145263672, "step": 23839 }, { "epoch": 3.637542724609375e-05, "step": 23839, "training_step_time": 0.1081230640411377 }, { "epoch": 3.6376953125e-05, "grad_norm": 0.09703323245048523, "learning_rate": 1.1090726954430658e-05, "loss": 0.0064, "step": 23840 }, { "epoch": 3.6376953125e-05, "model_forward_time": 0.0249481201171875, "step": 23840 }, { "epoch": 3.6376953125e-05, "step": 23840, "training_step_time": 0.10661554336547852 }, { "epoch": 3.637847900390625e-05, "model_forward_time": 0.024932384490966797, "step": 23841 }, { "epoch": 3.637847900390625e-05, "step": 23841, "training_step_time": 0.10624051094055176 }, { "epoch": 3.63800048828125e-05, "model_forward_time": 0.024860858917236328, "step": 23842 }, { "epoch": 3.63800048828125e-05, "step": 23842, "training_step_time": 0.10806059837341309 }, { "epoch": 3.638153076171875e-05, "model_forward_time": 0.02544403076171875, "step": 23843 }, { "epoch": 3.638153076171875e-05, "step": 23843, "training_step_time": 0.1051173210144043 }, { "epoch": 3.6383056640625e-05, "model_forward_time": 0.025183916091918945, "step": 23844 }, { "epoch": 3.6383056640625e-05, "step": 23844, "training_step_time": 0.17462468147277832 }, { "epoch": 3.638458251953125e-05, "model_forward_time": 0.02468109130859375, "step": 23845 }, { "epoch": 3.638458251953125e-05, "step": 23845, "training_step_time": 0.12604117393493652 }, { "epoch": 3.63861083984375e-05, "model_forward_time": 0.024676799774169922, "step": 23846 }, { "epoch": 3.63861083984375e-05, "step": 23846, "training_step_time": 0.16299724578857422 }, { "epoch": 3.638763427734375e-05, "model_forward_time": 0.02454233169555664, "step": 23847 }, { "epoch": 3.638763427734375e-05, "step": 23847, "training_step_time": 0.22947454452514648 }, { "epoch": 3.638916015625e-05, "model_forward_time": 0.02444744110107422, "step": 23848 }, { "epoch": 3.638916015625e-05, "step": 23848, "training_step_time": 0.15347719192504883 }, { "epoch": 3.639068603515625e-05, "model_forward_time": 0.024225711822509766, "step": 23849 }, { "epoch": 3.639068603515625e-05, "step": 23849, "training_step_time": 0.17047715187072754 }, { "epoch": 3.63922119140625e-05, "grad_norm": 0.06358692049980164, "learning_rate": 1.1056136061894384e-05, "loss": 0.0038, "step": 23850 }, { "epoch": 3.63922119140625e-05, "model_forward_time": 0.02421283721923828, "step": 23850 }, { "epoch": 3.63922119140625e-05, "step": 23850, "training_step_time": 0.16432428359985352 }, { "epoch": 3.639373779296875e-05, "model_forward_time": 0.025168657302856445, "step": 23851 }, { "epoch": 3.639373779296875e-05, "step": 23851, "training_step_time": 0.12441420555114746 }, { "epoch": 3.6395263671875e-05, "model_forward_time": 0.02466297149658203, "step": 23852 }, { "epoch": 3.6395263671875e-05, "step": 23852, "training_step_time": 0.10793924331665039 }, { "epoch": 3.639678955078125e-05, "model_forward_time": 0.025212764739990234, "step": 23853 }, { "epoch": 3.639678955078125e-05, "step": 23853, "training_step_time": 0.11177611351013184 }, { "epoch": 3.63983154296875e-05, "model_forward_time": 0.025574445724487305, "step": 23854 }, { "epoch": 3.63983154296875e-05, "step": 23854, "training_step_time": 0.10936093330383301 }, { "epoch": 3.639984130859375e-05, "model_forward_time": 0.024920940399169922, "step": 23855 }, { "epoch": 3.639984130859375e-05, "step": 23855, "training_step_time": 0.10675215721130371 }, { "epoch": 3.64013671875e-05, "model_forward_time": 0.02535247802734375, "step": 23856 }, { "epoch": 3.64013671875e-05, "step": 23856, "training_step_time": 0.15739750862121582 }, { "epoch": 3.640289306640625e-05, "model_forward_time": 0.024805784225463867, "step": 23857 }, { "epoch": 3.640289306640625e-05, "step": 23857, "training_step_time": 0.12184476852416992 }, { "epoch": 3.64044189453125e-05, "model_forward_time": 0.02467823028564453, "step": 23858 }, { "epoch": 3.64044189453125e-05, "step": 23858, "training_step_time": 0.1768941879272461 }, { "epoch": 3.640594482421875e-05, "model_forward_time": 0.024393081665039062, "step": 23859 }, { "epoch": 3.640594482421875e-05, "step": 23859, "training_step_time": 0.10309553146362305 }, { "epoch": 3.6407470703125e-05, "grad_norm": 0.107351154088974, "learning_rate": 1.1021592489824967e-05, "loss": 0.0038, "step": 23860 }, { "epoch": 3.6407470703125e-05, "model_forward_time": 0.02430891990661621, "step": 23860 }, { "epoch": 3.6407470703125e-05, "step": 23860, "training_step_time": 0.1591331958770752 }, { "epoch": 3.640899658203125e-05, "model_forward_time": 0.02558588981628418, "step": 23861 }, { "epoch": 3.640899658203125e-05, "step": 23861, "training_step_time": 0.10560297966003418 }, { "epoch": 3.64105224609375e-05, "model_forward_time": 0.024373769760131836, "step": 23862 }, { "epoch": 3.64105224609375e-05, "step": 23862, "training_step_time": 0.10432147979736328 }, { "epoch": 3.641204833984375e-05, "model_forward_time": 0.024957895278930664, "step": 23863 }, { "epoch": 3.641204833984375e-05, "step": 23863, "training_step_time": 0.10355734825134277 }, { "epoch": 3.641357421875e-05, "model_forward_time": 0.025213003158569336, "step": 23864 }, { "epoch": 3.641357421875e-05, "step": 23864, "training_step_time": 0.10899066925048828 }, { "epoch": 3.641510009765625e-05, "model_forward_time": 0.02597975730895996, "step": 23865 }, { "epoch": 3.641510009765625e-05, "step": 23865, "training_step_time": 0.10654568672180176 }, { "epoch": 3.64166259765625e-05, "model_forward_time": 0.0258181095123291, "step": 23866 }, { "epoch": 3.64166259765625e-05, "step": 23866, "training_step_time": 0.10535192489624023 }, { "epoch": 3.641815185546875e-05, "model_forward_time": 0.025333404541015625, "step": 23867 }, { "epoch": 3.641815185546875e-05, "step": 23867, "training_step_time": 0.15082883834838867 }, { "epoch": 3.6419677734375e-05, "model_forward_time": 0.0247955322265625, "step": 23868 }, { "epoch": 3.6419677734375e-05, "step": 23868, "training_step_time": 0.17311906814575195 }, { "epoch": 3.642120361328125e-05, "model_forward_time": 0.024601221084594727, "step": 23869 }, { "epoch": 3.642120361328125e-05, "step": 23869, "training_step_time": 0.13028287887573242 }, { "epoch": 3.64227294921875e-05, "grad_norm": 0.17584475874900818, "learning_rate": 1.0987096280196086e-05, "loss": 0.0083, "step": 23870 }, { "epoch": 3.64227294921875e-05, "model_forward_time": 0.02426910400390625, "step": 23870 }, { "epoch": 3.64227294921875e-05, "step": 23870, "training_step_time": 0.1327214241027832 }, { "epoch": 3.642425537109375e-05, "model_forward_time": 0.026093006134033203, "step": 23871 }, { "epoch": 3.642425537109375e-05, "step": 23871, "training_step_time": 0.1962425708770752 }, { "epoch": 3.642578125e-05, "model_forward_time": 0.02451348304748535, "step": 23872 }, { "epoch": 3.642578125e-05, "step": 23872, "training_step_time": 0.10332131385803223 }, { "epoch": 3.642730712890625e-05, "model_forward_time": 0.02447223663330078, "step": 23873 }, { "epoch": 3.642730712890625e-05, "step": 23873, "training_step_time": 0.1018836498260498 }, { "epoch": 3.64288330078125e-05, "model_forward_time": 0.025185346603393555, "step": 23874 }, { "epoch": 3.64288330078125e-05, "step": 23874, "training_step_time": 0.10932707786560059 }, { "epoch": 3.643035888671875e-05, "model_forward_time": 0.025146007537841797, "step": 23875 }, { "epoch": 3.643035888671875e-05, "step": 23875, "training_step_time": 0.1065664291381836 }, { "epoch": 3.6431884765625e-05, "model_forward_time": 0.02554178237915039, "step": 23876 }, { "epoch": 3.6431884765625e-05, "step": 23876, "training_step_time": 0.10508227348327637 }, { "epoch": 3.643341064453125e-05, "model_forward_time": 0.02531123161315918, "step": 23877 }, { "epoch": 3.643341064453125e-05, "step": 23877, "training_step_time": 0.11355972290039062 }, { "epoch": 3.64349365234375e-05, "model_forward_time": 0.024814128875732422, "step": 23878 }, { "epoch": 3.64349365234375e-05, "step": 23878, "training_step_time": 0.10688900947570801 }, { "epoch": 3.643646240234375e-05, "model_forward_time": 0.024965763092041016, "step": 23879 }, { "epoch": 3.643646240234375e-05, "step": 23879, "training_step_time": 0.10531783103942871 }, { "epoch": 3.643798828125e-05, "grad_norm": 0.10480281710624695, "learning_rate": 1.095264747492391e-05, "loss": 0.0032, "step": 23880 }, { "epoch": 3.643798828125e-05, "model_forward_time": 0.024976253509521484, "step": 23880 }, { "epoch": 3.643798828125e-05, "step": 23880, "training_step_time": 0.10486531257629395 }, { "epoch": 3.643951416015625e-05, "model_forward_time": 0.025159120559692383, "step": 23881 }, { "epoch": 3.643951416015625e-05, "step": 23881, "training_step_time": 0.10704445838928223 }, { "epoch": 3.64410400390625e-05, "model_forward_time": 0.0252077579498291, "step": 23882 }, { "epoch": 3.64410400390625e-05, "step": 23882, "training_step_time": 0.10481810569763184 }, { "epoch": 3.644256591796875e-05, "model_forward_time": 0.024898767471313477, "step": 23883 }, { "epoch": 3.644256591796875e-05, "step": 23883, "training_step_time": 0.10427570343017578 }, { "epoch": 3.6444091796875e-05, "model_forward_time": 0.0252382755279541, "step": 23884 }, { "epoch": 3.6444091796875e-05, "step": 23884, "training_step_time": 0.1077737808227539 }, { "epoch": 3.644561767578125e-05, "model_forward_time": 0.025249242782592773, "step": 23885 }, { "epoch": 3.644561767578125e-05, "step": 23885, "training_step_time": 0.1056058406829834 }, { "epoch": 3.64471435546875e-05, "model_forward_time": 0.025231599807739258, "step": 23886 }, { "epoch": 3.64471435546875e-05, "step": 23886, "training_step_time": 0.1046438217163086 }, { "epoch": 3.644866943359375e-05, "model_forward_time": 0.02515578269958496, "step": 23887 }, { "epoch": 3.644866943359375e-05, "step": 23887, "training_step_time": 0.10477852821350098 }, { "epoch": 3.64501953125e-05, "model_forward_time": 0.02551889419555664, "step": 23888 }, { "epoch": 3.64501953125e-05, "step": 23888, "training_step_time": 0.10579419136047363 }, { "epoch": 3.645172119140625e-05, "model_forward_time": 0.02513885498046875, "step": 23889 }, { "epoch": 3.645172119140625e-05, "step": 23889, "training_step_time": 0.1233360767364502 }, { "epoch": 3.64532470703125e-05, "grad_norm": 0.09248506277799606, "learning_rate": 1.0918246115866964e-05, "loss": 0.0085, "step": 23890 }, { "epoch": 3.64532470703125e-05, "model_forward_time": 0.02534198760986328, "step": 23890 }, { "epoch": 3.64532470703125e-05, "step": 23890, "training_step_time": 0.11260700225830078 }, { "epoch": 3.645477294921875e-05, "model_forward_time": 0.02507758140563965, "step": 23891 }, { "epoch": 3.645477294921875e-05, "step": 23891, "training_step_time": 0.1282210350036621 }, { "epoch": 3.6456298828125e-05, "model_forward_time": 0.025000810623168945, "step": 23892 }, { "epoch": 3.6456298828125e-05, "step": 23892, "training_step_time": 0.1393587589263916 }, { "epoch": 3.645782470703125e-05, "model_forward_time": 0.02394723892211914, "step": 23893 }, { "epoch": 3.645782470703125e-05, "step": 23893, "training_step_time": 0.11671829223632812 }, { "epoch": 3.64593505859375e-05, "model_forward_time": 0.025250911712646484, "step": 23894 }, { "epoch": 3.64593505859375e-05, "step": 23894, "training_step_time": 0.12872838973999023 }, { "epoch": 3.646087646484375e-05, "model_forward_time": 0.025410890579223633, "step": 23895 }, { "epoch": 3.646087646484375e-05, "step": 23895, "training_step_time": 0.11078715324401855 }, { "epoch": 3.646240234375e-05, "model_forward_time": 0.025033950805664062, "step": 23896 }, { "epoch": 3.646240234375e-05, "step": 23896, "training_step_time": 0.13236141204833984 }, { "epoch": 3.646392822265625e-05, "model_forward_time": 0.025265932083129883, "step": 23897 }, { "epoch": 3.646392822265625e-05, "step": 23897, "training_step_time": 0.11030721664428711 }, { "epoch": 3.64654541015625e-05, "model_forward_time": 0.02521538734436035, "step": 23898 }, { "epoch": 3.64654541015625e-05, "step": 23898, "training_step_time": 0.11383914947509766 }, { "epoch": 3.646697998046875e-05, "model_forward_time": 0.025220155715942383, "step": 23899 }, { "epoch": 3.646697998046875e-05, "step": 23899, "training_step_time": 0.10855746269226074 }, { "epoch": 3.6468505859375e-05, "grad_norm": 0.07143854349851608, "learning_rate": 1.0883892244826172e-05, "loss": 0.0035, "step": 23900 }, { "epoch": 3.6468505859375e-05, "model_forward_time": 0.02645730972290039, "step": 23900 }, { "epoch": 3.6468505859375e-05, "step": 23900, "training_step_time": 0.18007969856262207 }, { "epoch": 3.647003173828125e-05, "model_forward_time": 0.024250030517578125, "step": 23901 }, { "epoch": 3.647003173828125e-05, "step": 23901, "training_step_time": 0.21277213096618652 }, { "epoch": 3.64715576171875e-05, "model_forward_time": 0.024412155151367188, "step": 23902 }, { "epoch": 3.64715576171875e-05, "step": 23902, "training_step_time": 0.20573067665100098 }, { "epoch": 3.647308349609375e-05, "model_forward_time": 0.024324893951416016, "step": 23903 }, { "epoch": 3.647308349609375e-05, "step": 23903, "training_step_time": 0.1971428394317627 }, { "epoch": 3.6474609375e-05, "model_forward_time": 0.02409052848815918, "step": 23904 }, { "epoch": 3.6474609375e-05, "step": 23904, "training_step_time": 0.21251535415649414 }, { "epoch": 3.647613525390625e-05, "model_forward_time": 0.024709463119506836, "step": 23905 }, { "epoch": 3.647613525390625e-05, "step": 23905, "training_step_time": 0.19163894653320312 }, { "epoch": 3.64776611328125e-05, "model_forward_time": 0.02375054359436035, "step": 23906 }, { "epoch": 3.64776611328125e-05, "step": 23906, "training_step_time": 0.177598237991333 }, { "epoch": 3.647918701171875e-05, "model_forward_time": 0.024397611618041992, "step": 23907 }, { "epoch": 3.647918701171875e-05, "step": 23907, "training_step_time": 0.09996867179870605 }, { "epoch": 3.6480712890625e-05, "model_forward_time": 0.02400803565979004, "step": 23908 }, { "epoch": 3.6480712890625e-05, "step": 23908, "training_step_time": 0.10216712951660156 }, { "epoch": 3.648223876953125e-05, "model_forward_time": 0.025124311447143555, "step": 23909 }, { "epoch": 3.648223876953125e-05, "step": 23909, "training_step_time": 0.10426497459411621 }, { "epoch": 3.64837646484375e-05, "grad_norm": 0.2721317410469055, "learning_rate": 1.0849585903544706e-05, "loss": 0.005, "step": 23910 }, { "epoch": 3.64837646484375e-05, "model_forward_time": 0.025544166564941406, "step": 23910 }, { "epoch": 3.64837646484375e-05, "step": 23910, "training_step_time": 0.10668063163757324 }, { "epoch": 3.648529052734375e-05, "model_forward_time": 0.024471521377563477, "step": 23911 }, { "epoch": 3.648529052734375e-05, "step": 23911, "training_step_time": 0.14273571968078613 }, { "epoch": 3.648681640625e-05, "model_forward_time": 0.024634122848510742, "step": 23912 }, { "epoch": 3.648681640625e-05, "step": 23912, "training_step_time": 0.16340875625610352 }, { "epoch": 3.648834228515625e-05, "model_forward_time": 0.024509191513061523, "step": 23913 }, { "epoch": 3.648834228515625e-05, "step": 23913, "training_step_time": 0.11221194267272949 }, { "epoch": 3.64898681640625e-05, "model_forward_time": 0.024311065673828125, "step": 23914 }, { "epoch": 3.64898681640625e-05, "step": 23914, "training_step_time": 0.13302969932556152 }, { "epoch": 3.649139404296875e-05, "model_forward_time": 0.025389432907104492, "step": 23915 }, { "epoch": 3.649139404296875e-05, "step": 23915, "training_step_time": 0.20921683311462402 }, { "epoch": 3.6492919921875e-05, "model_forward_time": 0.02478766441345215, "step": 23916 }, { "epoch": 3.6492919921875e-05, "step": 23916, "training_step_time": 0.10566043853759766 }, { "epoch": 3.649444580078125e-05, "model_forward_time": 0.02468585968017578, "step": 23917 }, { "epoch": 3.649444580078125e-05, "step": 23917, "training_step_time": 0.10468053817749023 }, { "epoch": 3.64959716796875e-05, "model_forward_time": 0.025393009185791016, "step": 23918 }, { "epoch": 3.64959716796875e-05, "step": 23918, "training_step_time": 0.10818934440612793 }, { "epoch": 3.649749755859375e-05, "model_forward_time": 0.025679588317871094, "step": 23919 }, { "epoch": 3.649749755859375e-05, "step": 23919, "training_step_time": 0.10683751106262207 }, { "epoch": 3.64990234375e-05, "grad_norm": 0.17563557624816895, "learning_rate": 1.0815327133708015e-05, "loss": 0.0104, "step": 23920 }, { "epoch": 3.64990234375e-05, "model_forward_time": 0.024729251861572266, "step": 23920 }, { "epoch": 3.64990234375e-05, "step": 23920, "training_step_time": 0.10470199584960938 }, { "epoch": 3.650054931640625e-05, "model_forward_time": 0.025269031524658203, "step": 23921 }, { "epoch": 3.650054931640625e-05, "step": 23921, "training_step_time": 0.10552239418029785 }, { "epoch": 3.65020751953125e-05, "model_forward_time": 0.025113582611083984, "step": 23922 }, { "epoch": 3.65020751953125e-05, "step": 23922, "training_step_time": 0.10371565818786621 }, { "epoch": 3.650360107421875e-05, "model_forward_time": 0.024776220321655273, "step": 23923 }, { "epoch": 3.650360107421875e-05, "step": 23923, "training_step_time": 0.10338854789733887 }, { "epoch": 3.6505126953125e-05, "model_forward_time": 0.024939298629760742, "step": 23924 }, { "epoch": 3.6505126953125e-05, "step": 23924, "training_step_time": 0.10313034057617188 }, { "epoch": 3.650665283203125e-05, "model_forward_time": 0.025725841522216797, "step": 23925 }, { "epoch": 3.650665283203125e-05, "step": 23925, "training_step_time": 0.10590362548828125 }, { "epoch": 3.65081787109375e-05, "model_forward_time": 0.02538156509399414, "step": 23926 }, { "epoch": 3.65081787109375e-05, "step": 23926, "training_step_time": 0.10534048080444336 }, { "epoch": 3.650970458984375e-05, "model_forward_time": 0.028499126434326172, "step": 23927 }, { "epoch": 3.650970458984375e-05, "step": 23927, "training_step_time": 0.10802626609802246 }, { "epoch": 3.651123046875e-05, "model_forward_time": 0.025043964385986328, "step": 23928 }, { "epoch": 3.651123046875e-05, "step": 23928, "training_step_time": 0.1066586971282959 }, { "epoch": 3.651275634765625e-05, "model_forward_time": 0.025124549865722656, "step": 23929 }, { "epoch": 3.651275634765625e-05, "step": 23929, "training_step_time": 0.10372424125671387 }, { "epoch": 3.65142822265625e-05, "grad_norm": 0.08833561837673187, "learning_rate": 1.0781115976943717e-05, "loss": 0.0036, "step": 23930 }, { "epoch": 3.65142822265625e-05, "model_forward_time": 0.025107383728027344, "step": 23930 }, { "epoch": 3.65142822265625e-05, "step": 23930, "training_step_time": 0.10421562194824219 }, { "epoch": 3.651580810546875e-05, "model_forward_time": 0.024898529052734375, "step": 23931 }, { "epoch": 3.651580810546875e-05, "step": 23931, "training_step_time": 0.10519123077392578 }, { "epoch": 3.6517333984375e-05, "model_forward_time": 0.02464604377746582, "step": 23932 }, { "epoch": 3.6517333984375e-05, "step": 23932, "training_step_time": 0.19600939750671387 }, { "epoch": 3.651885986328125e-05, "model_forward_time": 0.024336576461791992, "step": 23933 }, { "epoch": 3.651885986328125e-05, "step": 23933, "training_step_time": 0.11583089828491211 }, { "epoch": 3.65203857421875e-05, "model_forward_time": 0.024325132369995117, "step": 23934 }, { "epoch": 3.65203857421875e-05, "step": 23934, "training_step_time": 0.1271059513092041 }, { "epoch": 3.652191162109375e-05, "model_forward_time": 0.025506019592285156, "step": 23935 }, { "epoch": 3.652191162109375e-05, "step": 23935, "training_step_time": 0.1427474021911621 }, { "epoch": 3.65234375e-05, "model_forward_time": 0.0247039794921875, "step": 23936 }, { "epoch": 3.65234375e-05, "step": 23936, "training_step_time": 0.20601701736450195 }, { "epoch": 3.652496337890625e-05, "model_forward_time": 0.02445197105407715, "step": 23937 }, { "epoch": 3.652496337890625e-05, "step": 23937, "training_step_time": 0.14899563789367676 }, { "epoch": 3.65264892578125e-05, "model_forward_time": 0.024459123611450195, "step": 23938 }, { "epoch": 3.65264892578125e-05, "step": 23938, "training_step_time": 0.20193839073181152 }, { "epoch": 3.652801513671875e-05, "model_forward_time": 0.024033069610595703, "step": 23939 }, { "epoch": 3.652801513671875e-05, "step": 23939, "training_step_time": 0.1343083381652832 }, { "epoch": 3.6529541015625e-05, "grad_norm": 0.11135763674974442, "learning_rate": 1.0746952474821614e-05, "loss": 0.0029, "step": 23940 }, { "epoch": 3.6529541015625e-05, "model_forward_time": 0.024798870086669922, "step": 23940 }, { "epoch": 3.6529541015625e-05, "step": 23940, "training_step_time": 0.10696172714233398 }, { "epoch": 3.653106689453125e-05, "model_forward_time": 0.025301694869995117, "step": 23941 }, { "epoch": 3.653106689453125e-05, "step": 23941, "training_step_time": 0.11362552642822266 }, { "epoch": 3.65325927734375e-05, "model_forward_time": 0.025057077407836914, "step": 23942 }, { "epoch": 3.65325927734375e-05, "step": 23942, "training_step_time": 0.13768839836120605 }, { "epoch": 3.653411865234375e-05, "model_forward_time": 0.024944543838500977, "step": 23943 }, { "epoch": 3.653411865234375e-05, "step": 23943, "training_step_time": 0.1921687126159668 }, { "epoch": 3.653564453125e-05, "model_forward_time": 0.024840831756591797, "step": 23944 }, { "epoch": 3.653564453125e-05, "step": 23944, "training_step_time": 0.10192513465881348 }, { "epoch": 3.653717041015625e-05, "model_forward_time": 0.024465084075927734, "step": 23945 }, { "epoch": 3.653717041015625e-05, "step": 23945, "training_step_time": 0.2050459384918213 }, { "epoch": 3.65386962890625e-05, "model_forward_time": 0.025182723999023438, "step": 23946 }, { "epoch": 3.65386962890625e-05, "step": 23946, "training_step_time": 0.13811922073364258 }, { "epoch": 3.654022216796875e-05, "model_forward_time": 0.024602890014648438, "step": 23947 }, { "epoch": 3.654022216796875e-05, "step": 23947, "training_step_time": 0.10910487174987793 }, { "epoch": 3.6541748046875e-05, "model_forward_time": 0.025218725204467773, "step": 23948 }, { "epoch": 3.6541748046875e-05, "step": 23948, "training_step_time": 0.10687851905822754 }, { "epoch": 3.654327392578125e-05, "model_forward_time": 0.02527618408203125, "step": 23949 }, { "epoch": 3.654327392578125e-05, "step": 23949, "training_step_time": 0.10986137390136719 }, { "epoch": 3.65447998046875e-05, "grad_norm": 0.10144961625337601, "learning_rate": 1.0712836668853582e-05, "loss": 0.0029, "step": 23950 }, { "epoch": 3.65447998046875e-05, "model_forward_time": 0.025519609451293945, "step": 23950 }, { "epoch": 3.65447998046875e-05, "step": 23950, "training_step_time": 0.10483956336975098 }, { "epoch": 3.654632568359375e-05, "model_forward_time": 0.025439977645874023, "step": 23951 }, { "epoch": 3.654632568359375e-05, "step": 23951, "training_step_time": 0.10793709754943848 }, { "epoch": 3.65478515625e-05, "model_forward_time": 0.025325775146484375, "step": 23952 }, { "epoch": 3.65478515625e-05, "step": 23952, "training_step_time": 0.10506129264831543 }, { "epoch": 3.654937744140625e-05, "model_forward_time": 0.025190114974975586, "step": 23953 }, { "epoch": 3.654937744140625e-05, "step": 23953, "training_step_time": 0.10608434677124023 }, { "epoch": 3.65509033203125e-05, "model_forward_time": 0.024977684020996094, "step": 23954 }, { "epoch": 3.65509033203125e-05, "step": 23954, "training_step_time": 0.10709381103515625 }, { "epoch": 3.655242919921875e-05, "model_forward_time": 0.025832176208496094, "step": 23955 }, { "epoch": 3.655242919921875e-05, "step": 23955, "training_step_time": 0.10762643814086914 }, { "epoch": 3.6553955078125e-05, "model_forward_time": 0.025383472442626953, "step": 23956 }, { "epoch": 3.6553955078125e-05, "step": 23956, "training_step_time": 0.14487195014953613 }, { "epoch": 3.655548095703125e-05, "model_forward_time": 0.024576663970947266, "step": 23957 }, { "epoch": 3.655548095703125e-05, "step": 23957, "training_step_time": 0.15160918235778809 }, { "epoch": 3.65570068359375e-05, "model_forward_time": 0.025400638580322266, "step": 23958 }, { "epoch": 3.65570068359375e-05, "step": 23958, "training_step_time": 0.11628866195678711 }, { "epoch": 3.655853271484375e-05, "model_forward_time": 0.02435898780822754, "step": 23959 }, { "epoch": 3.655853271484375e-05, "step": 23959, "training_step_time": 0.1323254108428955 }, { "epoch": 3.656005859375e-05, "grad_norm": 0.3526037931442261, "learning_rate": 1.0678768600493528e-05, "loss": 0.0139, "step": 23960 }, { "epoch": 3.656005859375e-05, "model_forward_time": 0.025356292724609375, "step": 23960 }, { "epoch": 3.656005859375e-05, "step": 23960, "training_step_time": 0.10399365425109863 }, { "epoch": 3.656158447265625e-05, "model_forward_time": 0.02509784698486328, "step": 23961 }, { "epoch": 3.656158447265625e-05, "step": 23961, "training_step_time": 0.12304854393005371 }, { "epoch": 3.65631103515625e-05, "model_forward_time": 0.02531599998474121, "step": 23962 }, { "epoch": 3.65631103515625e-05, "step": 23962, "training_step_time": 0.10485243797302246 }, { "epoch": 3.656463623046875e-05, "model_forward_time": 0.025495529174804688, "step": 23963 }, { "epoch": 3.656463623046875e-05, "step": 23963, "training_step_time": 0.10354065895080566 }, { "epoch": 3.6566162109375e-05, "model_forward_time": 0.024891376495361328, "step": 23964 }, { "epoch": 3.6566162109375e-05, "step": 23964, "training_step_time": 0.1065068244934082 }, { "epoch": 3.656768798828125e-05, "model_forward_time": 0.02472209930419922, "step": 23965 }, { "epoch": 3.656768798828125e-05, "step": 23965, "training_step_time": 0.10564446449279785 }, { "epoch": 3.65692138671875e-05, "model_forward_time": 0.025160551071166992, "step": 23966 }, { "epoch": 3.65692138671875e-05, "step": 23966, "training_step_time": 0.10565853118896484 }, { "epoch": 3.657073974609375e-05, "model_forward_time": 0.02499699592590332, "step": 23967 }, { "epoch": 3.657073974609375e-05, "step": 23967, "training_step_time": 0.10349559783935547 }, { "epoch": 3.6572265625e-05, "model_forward_time": 0.02480483055114746, "step": 23968 }, { "epoch": 3.6572265625e-05, "step": 23968, "training_step_time": 0.105194091796875 }, { "epoch": 3.657379150390625e-05, "model_forward_time": 0.024136066436767578, "step": 23969 }, { "epoch": 3.657379150390625e-05, "step": 23969, "training_step_time": 0.10750436782836914 }, { "epoch": 3.65753173828125e-05, "grad_norm": 0.11314871907234192, "learning_rate": 1.0644748311137376e-05, "loss": 0.008, "step": 23970 }, { "epoch": 3.65753173828125e-05, "model_forward_time": 0.025174617767333984, "step": 23970 }, { "epoch": 3.65753173828125e-05, "step": 23970, "training_step_time": 0.10592532157897949 }, { "epoch": 3.657684326171875e-05, "model_forward_time": 0.0250093936920166, "step": 23971 }, { "epoch": 3.657684326171875e-05, "step": 23971, "training_step_time": 0.10729050636291504 }, { "epoch": 3.6578369140625e-05, "model_forward_time": 0.02558279037475586, "step": 23972 }, { "epoch": 3.6578369140625e-05, "step": 23972, "training_step_time": 0.10511183738708496 }, { "epoch": 3.657989501953125e-05, "model_forward_time": 0.025240421295166016, "step": 23973 }, { "epoch": 3.657989501953125e-05, "step": 23973, "training_step_time": 0.1531233787536621 }, { "epoch": 3.65814208984375e-05, "model_forward_time": 0.02479243278503418, "step": 23974 }, { "epoch": 3.65814208984375e-05, "step": 23974, "training_step_time": 0.18065738677978516 }, { "epoch": 3.658294677734375e-05, "model_forward_time": 0.024245500564575195, "step": 23975 }, { "epoch": 3.658294677734375e-05, "step": 23975, "training_step_time": 0.18618035316467285 }, { "epoch": 3.658447265625e-05, "model_forward_time": 0.024186134338378906, "step": 23976 }, { "epoch": 3.658447265625e-05, "step": 23976, "training_step_time": 0.17686915397644043 }, { "epoch": 3.658599853515625e-05, "model_forward_time": 0.02416706085205078, "step": 23977 }, { "epoch": 3.658599853515625e-05, "step": 23977, "training_step_time": 0.2102205753326416 }, { "epoch": 3.65875244140625e-05, "model_forward_time": 0.02425098419189453, "step": 23978 }, { "epoch": 3.65875244140625e-05, "step": 23978, "training_step_time": 0.13461613655090332 }, { "epoch": 3.658905029296875e-05, "model_forward_time": 0.02405571937561035, "step": 23979 }, { "epoch": 3.658905029296875e-05, "step": 23979, "training_step_time": 0.1157839298248291 }, { "epoch": 3.6590576171875e-05, "grad_norm": 0.07854122668504715, "learning_rate": 1.0610775842122972e-05, "loss": 0.0039, "step": 23980 }, { "epoch": 3.6590576171875e-05, "model_forward_time": 0.02569127082824707, "step": 23980 }, { "epoch": 3.6590576171875e-05, "step": 23980, "training_step_time": 0.19122552871704102 }, { "epoch": 3.659210205078125e-05, "model_forward_time": 0.024831533432006836, "step": 23981 }, { "epoch": 3.659210205078125e-05, "step": 23981, "training_step_time": 0.11115050315856934 }, { "epoch": 3.65936279296875e-05, "model_forward_time": 0.023618221282958984, "step": 23982 }, { "epoch": 3.65936279296875e-05, "step": 23982, "training_step_time": 0.11294746398925781 }, { "epoch": 3.659515380859375e-05, "model_forward_time": 0.02515387535095215, "step": 23983 }, { "epoch": 3.659515380859375e-05, "step": 23983, "training_step_time": 0.13150382041931152 }, { "epoch": 3.65966796875e-05, "model_forward_time": 0.024895429611206055, "step": 23984 }, { "epoch": 3.65966796875e-05, "step": 23984, "training_step_time": 0.11939787864685059 }, { "epoch": 3.659820556640625e-05, "model_forward_time": 0.025341272354125977, "step": 23985 }, { "epoch": 3.659820556640625e-05, "step": 23985, "training_step_time": 0.11521649360656738 }, { "epoch": 3.65997314453125e-05, "model_forward_time": 0.025594472885131836, "step": 23986 }, { "epoch": 3.65997314453125e-05, "step": 23986, "training_step_time": 0.11291790008544922 }, { "epoch": 3.660125732421875e-05, "model_forward_time": 0.024996280670166016, "step": 23987 }, { "epoch": 3.660125732421875e-05, "step": 23987, "training_step_time": 0.1248483657836914 }, { "epoch": 3.6602783203125e-05, "model_forward_time": 0.025066375732421875, "step": 23988 }, { "epoch": 3.6602783203125e-05, "step": 23988, "training_step_time": 0.10714173316955566 }, { "epoch": 3.660430908203125e-05, "model_forward_time": 0.025182247161865234, "step": 23989 }, { "epoch": 3.660430908203125e-05, "step": 23989, "training_step_time": 0.13114523887634277 }, { "epoch": 3.66058349609375e-05, "grad_norm": 0.0672249123454094, "learning_rate": 1.0576851234730095e-05, "loss": 0.0075, "step": 23990 }, { "epoch": 3.66058349609375e-05, "model_forward_time": 0.025022268295288086, "step": 23990 }, { "epoch": 3.66058349609375e-05, "step": 23990, "training_step_time": 0.16492366790771484 }, { "epoch": 3.660736083984375e-05, "model_forward_time": 0.024395465850830078, "step": 23991 }, { "epoch": 3.660736083984375e-05, "step": 23991, "training_step_time": 0.12519478797912598 }, { "epoch": 3.660888671875e-05, "model_forward_time": 0.024024009704589844, "step": 23992 }, { "epoch": 3.660888671875e-05, "step": 23992, "training_step_time": 0.10299420356750488 }, { "epoch": 3.661041259765625e-05, "model_forward_time": 0.02498936653137207, "step": 23993 }, { "epoch": 3.661041259765625e-05, "step": 23993, "training_step_time": 0.10996794700622559 }, { "epoch": 3.66119384765625e-05, "model_forward_time": 0.024919509887695312, "step": 23994 }, { "epoch": 3.66119384765625e-05, "step": 23994, "training_step_time": 0.18923044204711914 }, { "epoch": 3.661346435546875e-05, "model_forward_time": 0.024245500564575195, "step": 23995 }, { "epoch": 3.661346435546875e-05, "step": 23995, "training_step_time": 0.10231709480285645 }, { "epoch": 3.6614990234375e-05, "model_forward_time": 0.024425029754638672, "step": 23996 }, { "epoch": 3.6614990234375e-05, "step": 23996, "training_step_time": 0.10216951370239258 }, { "epoch": 3.661651611328125e-05, "model_forward_time": 0.024704694747924805, "step": 23997 }, { "epoch": 3.661651611328125e-05, "step": 23997, "training_step_time": 0.10421514511108398 }, { "epoch": 3.66180419921875e-05, "model_forward_time": 0.025384187698364258, "step": 23998 }, { "epoch": 3.66180419921875e-05, "step": 23998, "training_step_time": 0.10660743713378906 }, { "epoch": 3.661956787109375e-05, "model_forward_time": 0.024947643280029297, "step": 23999 }, { "epoch": 3.661956787109375e-05, "step": 23999, "training_step_time": 0.10779118537902832 }, { "epoch": 3.662109375e-05, "grad_norm": 0.1890254020690918, "learning_rate": 1.0542974530180327e-05, "loss": 0.0041, "step": 24000 }, { "epoch": 3.662109375e-05, "model_forward_time": 0.026225566864013672, "step": 24000 }, { "epoch": 3.662109375e-05, "step": 24000, "training_step_time": 0.09939074516296387 }, { "epoch": 3.662261962890625e-05, "model_forward_time": 0.023174762725830078, "step": 24001 }, { "epoch": 3.662261962890625e-05, "step": 24001, "training_step_time": 0.10173487663269043 }, { "epoch": 3.66241455078125e-05, "model_forward_time": 0.02447819709777832, "step": 24002 }, { "epoch": 3.66241455078125e-05, "step": 24002, "training_step_time": 0.10319709777832031 }, { "epoch": 3.662567138671875e-05, "model_forward_time": 0.026036977767944336, "step": 24003 }, { "epoch": 3.662567138671875e-05, "step": 24003, "training_step_time": 0.10711503028869629 }, { "epoch": 3.6627197265625e-05, "model_forward_time": 0.02511429786682129, "step": 24004 }, { "epoch": 3.6627197265625e-05, "step": 24004, "training_step_time": 0.14751219749450684 }, { "epoch": 3.662872314453125e-05, "model_forward_time": 0.02524852752685547, "step": 24005 }, { "epoch": 3.662872314453125e-05, "step": 24005, "training_step_time": 0.10389828681945801 }, { "epoch": 3.66302490234375e-05, "model_forward_time": 0.02554488182067871, "step": 24006 }, { "epoch": 3.66302490234375e-05, "step": 24006, "training_step_time": 0.1060783863067627 }, { "epoch": 3.663177490234375e-05, "model_forward_time": 0.02532052993774414, "step": 24007 }, { "epoch": 3.663177490234375e-05, "step": 24007, "training_step_time": 0.11531424522399902 }, { "epoch": 3.663330078125e-05, "model_forward_time": 0.02546381950378418, "step": 24008 }, { "epoch": 3.663330078125e-05, "step": 24008, "training_step_time": 0.1126258373260498 }, { "epoch": 3.663482666015625e-05, "model_forward_time": 0.025568008422851562, "step": 24009 }, { "epoch": 3.663482666015625e-05, "step": 24009, "training_step_time": 0.10782456398010254 }, { "epoch": 3.66363525390625e-05, "grad_norm": 0.06362523883581161, "learning_rate": 1.0509145769637057e-05, "loss": 0.0027, "step": 24010 }, { "epoch": 3.66363525390625e-05, "model_forward_time": 0.025328397750854492, "step": 24010 }, { "epoch": 3.66363525390625e-05, "step": 24010, "training_step_time": 0.10945868492126465 }, { "epoch": 3.663787841796875e-05, "model_forward_time": 0.025268077850341797, "step": 24011 }, { "epoch": 3.663787841796875e-05, "step": 24011, "training_step_time": 0.10518336296081543 }, { "epoch": 3.6639404296875e-05, "model_forward_time": 0.025100231170654297, "step": 24012 }, { "epoch": 3.6639404296875e-05, "step": 24012, "training_step_time": 0.10564041137695312 }, { "epoch": 3.664093017578125e-05, "model_forward_time": 0.025072336196899414, "step": 24013 }, { "epoch": 3.664093017578125e-05, "step": 24013, "training_step_time": 0.1044607162475586 }, { "epoch": 3.66424560546875e-05, "model_forward_time": 0.025456905364990234, "step": 24014 }, { "epoch": 3.66424560546875e-05, "step": 24014, "training_step_time": 0.10441899299621582 }, { "epoch": 3.664398193359375e-05, "model_forward_time": 0.024611234664916992, "step": 24015 }, { "epoch": 3.664398193359375e-05, "step": 24015, "training_step_time": 0.10735774040222168 }, { "epoch": 3.66455078125e-05, "model_forward_time": 0.02546977996826172, "step": 24016 }, { "epoch": 3.66455078125e-05, "step": 24016, "training_step_time": 0.1321725845336914 }, { "epoch": 3.664703369140625e-05, "model_forward_time": 0.024175643920898438, "step": 24017 }, { "epoch": 3.664703369140625e-05, "step": 24017, "training_step_time": 0.15592145919799805 }, { "epoch": 3.66485595703125e-05, "model_forward_time": 0.02366948127746582, "step": 24018 }, { "epoch": 3.66485595703125e-05, "step": 24018, "training_step_time": 0.1436929702758789 }, { "epoch": 3.665008544921875e-05, "model_forward_time": 0.024038076400756836, "step": 24019 }, { "epoch": 3.665008544921875e-05, "step": 24019, "training_step_time": 0.15226960182189941 }, { "epoch": 3.6651611328125e-05, "grad_norm": 0.07818132638931274, "learning_rate": 1.0475364994205411e-05, "loss": 0.005, "step": 24020 }, { "epoch": 3.6651611328125e-05, "model_forward_time": 0.023895978927612305, "step": 24020 }, { "epoch": 3.6651611328125e-05, "step": 24020, "training_step_time": 0.13908672332763672 }, { "epoch": 3.665313720703125e-05, "model_forward_time": 0.0238955020904541, "step": 24021 }, { "epoch": 3.665313720703125e-05, "step": 24021, "training_step_time": 0.12632393836975098 }, { "epoch": 3.66546630859375e-05, "model_forward_time": 0.02394866943359375, "step": 24022 }, { "epoch": 3.66546630859375e-05, "step": 24022, "training_step_time": 0.12409543991088867 }, { "epoch": 3.665618896484375e-05, "model_forward_time": 0.0240480899810791, "step": 24023 }, { "epoch": 3.665618896484375e-05, "step": 24023, "training_step_time": 0.17068696022033691 }, { "epoch": 3.665771484375e-05, "model_forward_time": 0.02702617645263672, "step": 24024 }, { "epoch": 3.665771484375e-05, "step": 24024, "training_step_time": 0.11970996856689453 }, { "epoch": 3.665924072265625e-05, "model_forward_time": 0.024559736251831055, "step": 24025 }, { "epoch": 3.665924072265625e-05, "step": 24025, "training_step_time": 0.11860418319702148 }, { "epoch": 3.66607666015625e-05, "model_forward_time": 0.02488398551940918, "step": 24026 }, { "epoch": 3.66607666015625e-05, "step": 24026, "training_step_time": 0.13718271255493164 }, { "epoch": 3.666229248046875e-05, "model_forward_time": 0.02512383460998535, "step": 24027 }, { "epoch": 3.666229248046875e-05, "step": 24027, "training_step_time": 0.1224820613861084 }, { "epoch": 3.6663818359375e-05, "model_forward_time": 0.025180339813232422, "step": 24028 }, { "epoch": 3.6663818359375e-05, "step": 24028, "training_step_time": 0.11996817588806152 }, { "epoch": 3.666534423828125e-05, "model_forward_time": 0.025171279907226562, "step": 24029 }, { "epoch": 3.666534423828125e-05, "step": 24029, "training_step_time": 0.11020040512084961 }, { "epoch": 3.66668701171875e-05, "grad_norm": 0.09496715664863586, "learning_rate": 1.0441632244932237e-05, "loss": 0.0044, "step": 24030 }, { "epoch": 3.66668701171875e-05, "model_forward_time": 0.02509140968322754, "step": 24030 }, { "epoch": 3.66668701171875e-05, "step": 24030, "training_step_time": 0.11746025085449219 }, { "epoch": 3.666839599609375e-05, "model_forward_time": 0.02589702606201172, "step": 24031 }, { "epoch": 3.666839599609375e-05, "step": 24031, "training_step_time": 0.10252928733825684 }, { "epoch": 3.6669921875e-05, "model_forward_time": 0.025146007537841797, "step": 24032 }, { "epoch": 3.6669921875e-05, "step": 24032, "training_step_time": 0.10325884819030762 }, { "epoch": 3.667144775390625e-05, "model_forward_time": 0.025122880935668945, "step": 24033 }, { "epoch": 3.667144775390625e-05, "step": 24033, "training_step_time": 0.12383675575256348 }, { "epoch": 3.66729736328125e-05, "model_forward_time": 0.024909019470214844, "step": 24034 }, { "epoch": 3.66729736328125e-05, "step": 24034, "training_step_time": 0.1274123191833496 }, { "epoch": 3.667449951171875e-05, "model_forward_time": 0.02518606185913086, "step": 24035 }, { "epoch": 3.667449951171875e-05, "step": 24035, "training_step_time": 0.11529898643493652 }, { "epoch": 3.6676025390625e-05, "model_forward_time": 0.02518153190612793, "step": 24036 }, { "epoch": 3.6676025390625e-05, "step": 24036, "training_step_time": 0.10953950881958008 }, { "epoch": 3.667755126953125e-05, "model_forward_time": 0.025110960006713867, "step": 24037 }, { "epoch": 3.667755126953125e-05, "step": 24037, "training_step_time": 0.11234617233276367 }, { "epoch": 3.66790771484375e-05, "model_forward_time": 0.02500295639038086, "step": 24038 }, { "epoch": 3.66790771484375e-05, "step": 24038, "training_step_time": 0.14154791831970215 }, { "epoch": 3.668060302734375e-05, "model_forward_time": 0.025320053100585938, "step": 24039 }, { "epoch": 3.668060302734375e-05, "step": 24039, "training_step_time": 0.10712647438049316 }, { "epoch": 3.668212890625e-05, "grad_norm": 0.29370254278182983, "learning_rate": 1.0407947562805986e-05, "loss": 0.0063, "step": 24040 }, { "epoch": 3.668212890625e-05, "model_forward_time": 0.0256500244140625, "step": 24040 }, { "epoch": 3.668212890625e-05, "step": 24040, "training_step_time": 0.10539865493774414 }, { "epoch": 3.668365478515625e-05, "model_forward_time": 0.024700403213500977, "step": 24041 }, { "epoch": 3.668365478515625e-05, "step": 24041, "training_step_time": 0.11933088302612305 }, { "epoch": 3.66851806640625e-05, "model_forward_time": 0.02482438087463379, "step": 24042 }, { "epoch": 3.66851806640625e-05, "step": 24042, "training_step_time": 0.10567212104797363 }, { "epoch": 3.668670654296875e-05, "model_forward_time": 0.025246858596801758, "step": 24043 }, { "epoch": 3.668670654296875e-05, "step": 24043, "training_step_time": 0.203230619430542 }, { "epoch": 3.6688232421875e-05, "model_forward_time": 0.023929119110107422, "step": 24044 }, { "epoch": 3.6688232421875e-05, "step": 24044, "training_step_time": 0.13109374046325684 }, { "epoch": 3.668975830078125e-05, "model_forward_time": 0.02285599708557129, "step": 24045 }, { "epoch": 3.668975830078125e-05, "step": 24045, "training_step_time": 0.13663125038146973 }, { "epoch": 3.66912841796875e-05, "model_forward_time": 0.025371074676513672, "step": 24046 }, { "epoch": 3.66912841796875e-05, "step": 24046, "training_step_time": 0.12169337272644043 }, { "epoch": 3.669281005859375e-05, "model_forward_time": 0.024966955184936523, "step": 24047 }, { "epoch": 3.669281005859375e-05, "step": 24047, "training_step_time": 0.12405586242675781 }, { "epoch": 3.66943359375e-05, "model_forward_time": 0.025287628173828125, "step": 24048 }, { "epoch": 3.66943359375e-05, "step": 24048, "training_step_time": 0.11823272705078125 }, { "epoch": 3.669586181640625e-05, "model_forward_time": 0.024600744247436523, "step": 24049 }, { "epoch": 3.669586181640625e-05, "step": 24049, "training_step_time": 0.10256004333496094 }, { "epoch": 3.66973876953125e-05, "grad_norm": 0.08640720695257187, "learning_rate": 1.0374310988756747e-05, "loss": 0.0049, "step": 24050 }, { "epoch": 3.66973876953125e-05, "model_forward_time": 0.024354219436645508, "step": 24050 }, { "epoch": 3.66973876953125e-05, "step": 24050, "training_step_time": 0.14849400520324707 }, { "epoch": 3.669891357421875e-05, "model_forward_time": 0.024599313735961914, "step": 24051 }, { "epoch": 3.669891357421875e-05, "step": 24051, "training_step_time": 0.17339777946472168 }, { "epoch": 3.6700439453125e-05, "model_forward_time": 0.024622201919555664, "step": 24052 }, { "epoch": 3.6700439453125e-05, "step": 24052, "training_step_time": 0.12133049964904785 }, { "epoch": 3.670196533203125e-05, "model_forward_time": 0.027564048767089844, "step": 24053 }, { "epoch": 3.670196533203125e-05, "step": 24053, "training_step_time": 0.13035273551940918 }, { "epoch": 3.67034912109375e-05, "model_forward_time": 0.025209426879882812, "step": 24054 }, { "epoch": 3.67034912109375e-05, "step": 24054, "training_step_time": 0.10977888107299805 }, { "epoch": 3.670501708984375e-05, "model_forward_time": 0.025180339813232422, "step": 24055 }, { "epoch": 3.670501708984375e-05, "step": 24055, "training_step_time": 0.10635042190551758 }, { "epoch": 3.670654296875e-05, "model_forward_time": 0.02494502067565918, "step": 24056 }, { "epoch": 3.670654296875e-05, "step": 24056, "training_step_time": 0.1087186336517334 }, { "epoch": 3.670806884765625e-05, "model_forward_time": 0.024883270263671875, "step": 24057 }, { "epoch": 3.670806884765625e-05, "step": 24057, "training_step_time": 0.10693931579589844 }, { "epoch": 3.67095947265625e-05, "model_forward_time": 0.02525186538696289, "step": 24058 }, { "epoch": 3.67095947265625e-05, "step": 24058, "training_step_time": 0.10685396194458008 }, { "epoch": 3.671112060546875e-05, "model_forward_time": 0.0251619815826416, "step": 24059 }, { "epoch": 3.671112060546875e-05, "step": 24059, "training_step_time": 0.10617518424987793 }, { "epoch": 3.6712646484375e-05, "grad_norm": 0.07657795399427414, "learning_rate": 1.0340722563656107e-05, "loss": 0.0083, "step": 24060 }, { "epoch": 3.6712646484375e-05, "model_forward_time": 0.024883270263671875, "step": 24060 }, { "epoch": 3.6712646484375e-05, "step": 24060, "training_step_time": 0.11042499542236328 }, { "epoch": 3.671417236328125e-05, "model_forward_time": 0.0251467227935791, "step": 24061 }, { "epoch": 3.671417236328125e-05, "step": 24061, "training_step_time": 0.11004877090454102 }, { "epoch": 3.67156982421875e-05, "model_forward_time": 0.02520895004272461, "step": 24062 }, { "epoch": 3.67156982421875e-05, "step": 24062, "training_step_time": 0.1058967113494873 }, { "epoch": 3.671722412109375e-05, "model_forward_time": 0.02517247200012207, "step": 24063 }, { "epoch": 3.671722412109375e-05, "step": 24063, "training_step_time": 0.10699629783630371 }, { "epoch": 3.671875e-05, "model_forward_time": 0.024727821350097656, "step": 24064 }, { "epoch": 3.671875e-05, "step": 24064, "training_step_time": 0.11183881759643555 }, { "epoch": 3.672027587890625e-05, "model_forward_time": 0.024860143661499023, "step": 24065 }, { "epoch": 3.672027587890625e-05, "step": 24065, "training_step_time": 0.10896825790405273 }, { "epoch": 3.67218017578125e-05, "model_forward_time": 0.025084495544433594, "step": 24066 }, { "epoch": 3.67218017578125e-05, "step": 24066, "training_step_time": 0.10719466209411621 }, { "epoch": 3.672332763671875e-05, "model_forward_time": 0.025223493576049805, "step": 24067 }, { "epoch": 3.672332763671875e-05, "step": 24067, "training_step_time": 0.10651993751525879 }, { "epoch": 3.6724853515625e-05, "model_forward_time": 0.025406837463378906, "step": 24068 }, { "epoch": 3.6724853515625e-05, "step": 24068, "training_step_time": 0.11262702941894531 }, { "epoch": 3.672637939453125e-05, "model_forward_time": 0.02527165412902832, "step": 24069 }, { "epoch": 3.672637939453125e-05, "step": 24069, "training_step_time": 0.15921354293823242 }, { "epoch": 3.67279052734375e-05, "grad_norm": 0.10217463225126266, "learning_rate": 1.0307182328317188e-05, "loss": 0.0035, "step": 24070 }, { "epoch": 3.67279052734375e-05, "model_forward_time": 0.024503707885742188, "step": 24070 }, { "epoch": 3.67279052734375e-05, "step": 24070, "training_step_time": 0.10456991195678711 }, { "epoch": 3.672943115234375e-05, "model_forward_time": 0.024464130401611328, "step": 24071 }, { "epoch": 3.672943115234375e-05, "step": 24071, "training_step_time": 0.13351702690124512 }, { "epoch": 3.673095703125e-05, "model_forward_time": 0.025415897369384766, "step": 24072 }, { "epoch": 3.673095703125e-05, "step": 24072, "training_step_time": 0.1625065803527832 }, { "epoch": 3.673248291015625e-05, "model_forward_time": 0.024666309356689453, "step": 24073 }, { "epoch": 3.673248291015625e-05, "step": 24073, "training_step_time": 0.2170882225036621 }, { "epoch": 3.67340087890625e-05, "model_forward_time": 0.024489164352416992, "step": 24074 }, { "epoch": 3.67340087890625e-05, "step": 24074, "training_step_time": 0.11351466178894043 }, { "epoch": 3.673553466796875e-05, "model_forward_time": 0.024282217025756836, "step": 24075 }, { "epoch": 3.673553466796875e-05, "step": 24075, "training_step_time": 0.20580601692199707 }, { "epoch": 3.6737060546875e-05, "model_forward_time": 0.02455735206604004, "step": 24076 }, { "epoch": 3.6737060546875e-05, "step": 24076, "training_step_time": 0.10820841789245605 }, { "epoch": 3.673858642578125e-05, "model_forward_time": 0.024129867553710938, "step": 24077 }, { "epoch": 3.673858642578125e-05, "step": 24077, "training_step_time": 0.14322400093078613 }, { "epoch": 3.67401123046875e-05, "model_forward_time": 0.024748802185058594, "step": 24078 }, { "epoch": 3.67401123046875e-05, "step": 24078, "training_step_time": 0.11382627487182617 }, { "epoch": 3.674163818359375e-05, "model_forward_time": 0.0245363712310791, "step": 24079 }, { "epoch": 3.674163818359375e-05, "step": 24079, "training_step_time": 0.1187901496887207 }, { "epoch": 3.67431640625e-05, "grad_norm": 0.24320381879806519, "learning_rate": 1.0273690323494523e-05, "loss": 0.0081, "step": 24080 }, { "epoch": 3.67431640625e-05, "model_forward_time": 0.025028467178344727, "step": 24080 }, { "epoch": 3.67431640625e-05, "step": 24080, "training_step_time": 0.12297463417053223 }, { "epoch": 3.674468994140625e-05, "model_forward_time": 0.02501964569091797, "step": 24081 }, { "epoch": 3.674468994140625e-05, "step": 24081, "training_step_time": 0.17127132415771484 }, { "epoch": 3.67462158203125e-05, "model_forward_time": 0.024918556213378906, "step": 24082 }, { "epoch": 3.67462158203125e-05, "step": 24082, "training_step_time": 0.14585256576538086 }, { "epoch": 3.674774169921875e-05, "model_forward_time": 0.02424168586730957, "step": 24083 }, { "epoch": 3.674774169921875e-05, "step": 24083, "training_step_time": 0.10650968551635742 }, { "epoch": 3.6749267578125e-05, "model_forward_time": 0.024976253509521484, "step": 24084 }, { "epoch": 3.6749267578125e-05, "step": 24084, "training_step_time": 0.10765480995178223 }, { "epoch": 3.675079345703125e-05, "model_forward_time": 0.025284290313720703, "step": 24085 }, { "epoch": 3.675079345703125e-05, "step": 24085, "training_step_time": 0.11167001724243164 }, { "epoch": 3.67523193359375e-05, "model_forward_time": 0.025534868240356445, "step": 24086 }, { "epoch": 3.67523193359375e-05, "step": 24086, "training_step_time": 0.10933351516723633 }, { "epoch": 3.675384521484375e-05, "model_forward_time": 0.025104284286499023, "step": 24087 }, { "epoch": 3.675384521484375e-05, "step": 24087, "training_step_time": 0.19156122207641602 }, { "epoch": 3.675537109375e-05, "model_forward_time": 0.026508331298828125, "step": 24088 }, { "epoch": 3.675537109375e-05, "step": 24088, "training_step_time": 0.10448050498962402 }, { "epoch": 3.675689697265625e-05, "model_forward_time": 0.0250244140625, "step": 24089 }, { "epoch": 3.675689697265625e-05, "step": 24089, "training_step_time": 0.10562634468078613 }, { "epoch": 3.67584228515625e-05, "grad_norm": 0.1064799502491951, "learning_rate": 1.0240246589884044e-05, "loss": 0.0033, "step": 24090 }, { "epoch": 3.67584228515625e-05, "model_forward_time": 0.026424884796142578, "step": 24090 }, { "epoch": 3.67584228515625e-05, "step": 24090, "training_step_time": 0.11082077026367188 }, { "epoch": 3.675994873046875e-05, "model_forward_time": 0.0261383056640625, "step": 24091 }, { "epoch": 3.675994873046875e-05, "step": 24091, "training_step_time": 0.10821914672851562 }, { "epoch": 3.6761474609375e-05, "model_forward_time": 0.025788307189941406, "step": 24092 }, { "epoch": 3.6761474609375e-05, "step": 24092, "training_step_time": 0.10869264602661133 }, { "epoch": 3.676300048828125e-05, "model_forward_time": 0.0257418155670166, "step": 24093 }, { "epoch": 3.676300048828125e-05, "step": 24093, "training_step_time": 0.10656070709228516 }, { "epoch": 3.67645263671875e-05, "model_forward_time": 0.02525186538696289, "step": 24094 }, { "epoch": 3.67645263671875e-05, "step": 24094, "training_step_time": 0.10399937629699707 }, { "epoch": 3.676605224609375e-05, "model_forward_time": 0.024868488311767578, "step": 24095 }, { "epoch": 3.676605224609375e-05, "step": 24095, "training_step_time": 0.13977551460266113 }, { "epoch": 3.6767578125e-05, "model_forward_time": 0.025123119354248047, "step": 24096 }, { "epoch": 3.6767578125e-05, "step": 24096, "training_step_time": 0.16968226432800293 }, { "epoch": 3.676910400390625e-05, "model_forward_time": 0.024972200393676758, "step": 24097 }, { "epoch": 3.676910400390625e-05, "step": 24097, "training_step_time": 0.10448741912841797 }, { "epoch": 3.67706298828125e-05, "model_forward_time": 0.02499556541442871, "step": 24098 }, { "epoch": 3.67706298828125e-05, "step": 24098, "training_step_time": 0.13866209983825684 }, { "epoch": 3.677215576171875e-05, "model_forward_time": 0.02551865577697754, "step": 24099 }, { "epoch": 3.677215576171875e-05, "step": 24099, "training_step_time": 0.1945338249206543 }, { "epoch": 3.6773681640625e-05, "grad_norm": 0.13784803450107574, "learning_rate": 1.0206851168123077e-05, "loss": 0.0073, "step": 24100 }, { "epoch": 3.6773681640625e-05, "model_forward_time": 0.024990320205688477, "step": 24100 }, { "epoch": 3.6773681640625e-05, "step": 24100, "training_step_time": 0.10333991050720215 }, { "epoch": 3.677520751953125e-05, "model_forward_time": 0.025194168090820312, "step": 24101 }, { "epoch": 3.677520751953125e-05, "step": 24101, "training_step_time": 0.10208559036254883 }, { "epoch": 3.67767333984375e-05, "model_forward_time": 0.02661895751953125, "step": 24102 }, { "epoch": 3.67767333984375e-05, "step": 24102, "training_step_time": 0.10495328903198242 }, { "epoch": 3.677825927734375e-05, "model_forward_time": 0.026109695434570312, "step": 24103 }, { "epoch": 3.677825927734375e-05, "step": 24103, "training_step_time": 0.10863399505615234 }, { "epoch": 3.677978515625e-05, "model_forward_time": 0.026014328002929688, "step": 24104 }, { "epoch": 3.677978515625e-05, "step": 24104, "training_step_time": 0.11199164390563965 }, { "epoch": 3.678131103515625e-05, "model_forward_time": 0.025629520416259766, "step": 24105 }, { "epoch": 3.678131103515625e-05, "step": 24105, "training_step_time": 0.10424065589904785 }, { "epoch": 3.67828369140625e-05, "model_forward_time": 0.02532219886779785, "step": 24106 }, { "epoch": 3.67828369140625e-05, "step": 24106, "training_step_time": 0.10615921020507812 }, { "epoch": 3.678436279296875e-05, "model_forward_time": 0.025867462158203125, "step": 24107 }, { "epoch": 3.678436279296875e-05, "step": 24107, "training_step_time": 0.10522961616516113 }, { "epoch": 3.6785888671875e-05, "model_forward_time": 0.025787830352783203, "step": 24108 }, { "epoch": 3.6785888671875e-05, "step": 24108, "training_step_time": 0.10486817359924316 }, { "epoch": 3.678741455078125e-05, "model_forward_time": 0.02577495574951172, "step": 24109 }, { "epoch": 3.678741455078125e-05, "step": 24109, "training_step_time": 0.10868716239929199 }, { "epoch": 3.67889404296875e-05, "grad_norm": 0.11030536144971848, "learning_rate": 1.0173504098790187e-05, "loss": 0.0044, "step": 24110 }, { "epoch": 3.67889404296875e-05, "model_forward_time": 0.02561783790588379, "step": 24110 }, { "epoch": 3.67889404296875e-05, "step": 24110, "training_step_time": 0.1055595874786377 }, { "epoch": 3.679046630859375e-05, "model_forward_time": 0.025632858276367188, "step": 24111 }, { "epoch": 3.679046630859375e-05, "step": 24111, "training_step_time": 0.10642528533935547 }, { "epoch": 3.67919921875e-05, "model_forward_time": 0.026107311248779297, "step": 24112 }, { "epoch": 3.67919921875e-05, "step": 24112, "training_step_time": 0.10688638687133789 }, { "epoch": 3.679351806640625e-05, "model_forward_time": 0.025941133499145508, "step": 24113 }, { "epoch": 3.679351806640625e-05, "step": 24113, "training_step_time": 0.21828055381774902 }, { "epoch": 3.67950439453125e-05, "model_forward_time": 0.025509357452392578, "step": 24114 }, { "epoch": 3.67950439453125e-05, "step": 24114, "training_step_time": 0.12597942352294922 }, { "epoch": 3.679656982421875e-05, "model_forward_time": 0.02488994598388672, "step": 24115 }, { "epoch": 3.679656982421875e-05, "step": 24115, "training_step_time": 0.1425619125366211 }, { "epoch": 3.6798095703125e-05, "model_forward_time": 0.025248050689697266, "step": 24116 }, { "epoch": 3.6798095703125e-05, "step": 24116, "training_step_time": 0.14768481254577637 }, { "epoch": 3.679962158203125e-05, "model_forward_time": 0.02538466453552246, "step": 24117 }, { "epoch": 3.679962158203125e-05, "step": 24117, "training_step_time": 0.22845911979675293 }, { "epoch": 3.68011474609375e-05, "model_forward_time": 0.025609493255615234, "step": 24118 }, { "epoch": 3.68011474609375e-05, "step": 24118, "training_step_time": 0.12885069847106934 }, { "epoch": 3.680267333984375e-05, "model_forward_time": 0.02861762046813965, "step": 24119 }, { "epoch": 3.680267333984375e-05, "step": 24119, "training_step_time": 0.1259927749633789 }, { "epoch": 3.680419921875e-05, "grad_norm": 0.0730748251080513, "learning_rate": 1.0140205422405214e-05, "loss": 0.0031, "step": 24120 }, { "epoch": 3.680419921875e-05, "model_forward_time": 0.025124549865722656, "step": 24120 }, { "epoch": 3.680419921875e-05, "step": 24120, "training_step_time": 0.1855602264404297 }, { "epoch": 3.680572509765625e-05, "model_forward_time": 0.02525949478149414, "step": 24121 }, { "epoch": 3.680572509765625e-05, "step": 24121, "training_step_time": 0.2160933017730713 }, { "epoch": 3.68072509765625e-05, "model_forward_time": 0.025052547454833984, "step": 24122 }, { "epoch": 3.68072509765625e-05, "step": 24122, "training_step_time": 0.1080467700958252 }, { "epoch": 3.680877685546875e-05, "model_forward_time": 0.024290084838867188, "step": 24123 }, { "epoch": 3.680877685546875e-05, "step": 24123, "training_step_time": 0.12437939643859863 }, { "epoch": 3.6810302734375e-05, "model_forward_time": 0.025772571563720703, "step": 24124 }, { "epoch": 3.6810302734375e-05, "step": 24124, "training_step_time": 0.12514424324035645 }, { "epoch": 3.681182861328125e-05, "model_forward_time": 0.025779008865356445, "step": 24125 }, { "epoch": 3.681182861328125e-05, "step": 24125, "training_step_time": 0.21355390548706055 }, { "epoch": 3.68133544921875e-05, "model_forward_time": 0.025203466415405273, "step": 24126 }, { "epoch": 3.68133544921875e-05, "step": 24126, "training_step_time": 0.11249518394470215 }, { "epoch": 3.681488037109375e-05, "model_forward_time": 0.026461124420166016, "step": 24127 }, { "epoch": 3.681488037109375e-05, "step": 24127, "training_step_time": 0.10911107063293457 }, { "epoch": 3.681640625e-05, "model_forward_time": 0.02622675895690918, "step": 24128 }, { "epoch": 3.681640625e-05, "step": 24128, "training_step_time": 0.11655497550964355 }, { "epoch": 3.681793212890625e-05, "model_forward_time": 0.025938749313354492, "step": 24129 }, { "epoch": 3.681793212890625e-05, "step": 24129, "training_step_time": 0.10959720611572266 }, { "epoch": 3.68194580078125e-05, "grad_norm": 0.1205686703324318, "learning_rate": 1.0106955179429183e-05, "loss": 0.0138, "step": 24130 }, { "epoch": 3.68194580078125e-05, "model_forward_time": 0.02564716339111328, "step": 24130 }, { "epoch": 3.68194580078125e-05, "step": 24130, "training_step_time": 0.18543100357055664 }, { "epoch": 3.682098388671875e-05, "model_forward_time": 0.025803565979003906, "step": 24131 }, { "epoch": 3.682098388671875e-05, "step": 24131, "training_step_time": 0.11078238487243652 }, { "epoch": 3.6822509765625e-05, "model_forward_time": 0.025426864624023438, "step": 24132 }, { "epoch": 3.6822509765625e-05, "step": 24132, "training_step_time": 0.10596632957458496 }, { "epoch": 3.682403564453125e-05, "model_forward_time": 0.0252993106842041, "step": 24133 }, { "epoch": 3.682403564453125e-05, "step": 24133, "training_step_time": 0.10480976104736328 }, { "epoch": 3.68255615234375e-05, "model_forward_time": 0.025949954986572266, "step": 24134 }, { "epoch": 3.68255615234375e-05, "step": 24134, "training_step_time": 0.10796904563903809 }, { "epoch": 3.682708740234375e-05, "model_forward_time": 0.02588677406311035, "step": 24135 }, { "epoch": 3.682708740234375e-05, "step": 24135, "training_step_time": 0.10493755340576172 }, { "epoch": 3.682861328125e-05, "model_forward_time": 0.026416778564453125, "step": 24136 }, { "epoch": 3.682861328125e-05, "step": 24136, "training_step_time": 0.11046147346496582 }, { "epoch": 3.683013916015625e-05, "model_forward_time": 0.025254487991333008, "step": 24137 }, { "epoch": 3.683013916015625e-05, "step": 24137, "training_step_time": 0.1023256778717041 }, { "epoch": 3.68316650390625e-05, "model_forward_time": 0.025026321411132812, "step": 24138 }, { "epoch": 3.68316650390625e-05, "step": 24138, "training_step_time": 0.15315842628479004 }, { "epoch": 3.683319091796875e-05, "model_forward_time": 0.025055408477783203, "step": 24139 }, { "epoch": 3.683319091796875e-05, "step": 24139, "training_step_time": 0.16449975967407227 }, { "epoch": 3.6834716796875e-05, "grad_norm": 0.11838909238576889, "learning_rate": 1.0073753410264263e-05, "loss": 0.0096, "step": 24140 }, { "epoch": 3.6834716796875e-05, "model_forward_time": 0.02541828155517578, "step": 24140 }, { "epoch": 3.6834716796875e-05, "step": 24140, "training_step_time": 0.10796213150024414 }, { "epoch": 3.683624267578125e-05, "model_forward_time": 0.02563643455505371, "step": 24141 }, { "epoch": 3.683624267578125e-05, "step": 24141, "training_step_time": 0.12060427665710449 }, { "epoch": 3.68377685546875e-05, "model_forward_time": 0.025612354278564453, "step": 24142 }, { "epoch": 3.68377685546875e-05, "step": 24142, "training_step_time": 0.19051265716552734 }, { "epoch": 3.683929443359375e-05, "model_forward_time": 0.025469064712524414, "step": 24143 }, { "epoch": 3.683929443359375e-05, "step": 24143, "training_step_time": 0.10236263275146484 }, { "epoch": 3.68408203125e-05, "model_forward_time": 0.025121212005615234, "step": 24144 }, { "epoch": 3.68408203125e-05, "step": 24144, "training_step_time": 0.10287714004516602 }, { "epoch": 3.684234619140625e-05, "model_forward_time": 0.026030302047729492, "step": 24145 }, { "epoch": 3.684234619140625e-05, "step": 24145, "training_step_time": 0.10700154304504395 }, { "epoch": 3.68438720703125e-05, "model_forward_time": 0.025827646255493164, "step": 24146 }, { "epoch": 3.68438720703125e-05, "step": 24146, "training_step_time": 0.10430169105529785 }, { "epoch": 3.684539794921875e-05, "model_forward_time": 0.029242515563964844, "step": 24147 }, { "epoch": 3.684539794921875e-05, "step": 24147, "training_step_time": 0.1156926155090332 }, { "epoch": 3.6846923828125e-05, "model_forward_time": 0.025898218154907227, "step": 24148 }, { "epoch": 3.6846923828125e-05, "step": 24148, "training_step_time": 0.1133279800415039 }, { "epoch": 3.684844970703125e-05, "model_forward_time": 0.025713682174682617, "step": 24149 }, { "epoch": 3.684844970703125e-05, "step": 24149, "training_step_time": 0.11545777320861816 }, { "epoch": 3.68499755859375e-05, "grad_norm": 0.26032018661499023, "learning_rate": 1.0040600155253765e-05, "loss": 0.005, "step": 24150 }, { "epoch": 3.68499755859375e-05, "model_forward_time": 0.025847673416137695, "step": 24150 }, { "epoch": 3.68499755859375e-05, "step": 24150, "training_step_time": 0.11114215850830078 }, { "epoch": 3.685150146484375e-05, "model_forward_time": 0.02635669708251953, "step": 24151 }, { "epoch": 3.685150146484375e-05, "step": 24151, "training_step_time": 0.11066985130310059 }, { "epoch": 3.685302734375e-05, "model_forward_time": 0.026253700256347656, "step": 24152 }, { "epoch": 3.685302734375e-05, "step": 24152, "training_step_time": 0.10852861404418945 }, { "epoch": 3.685455322265625e-05, "model_forward_time": 0.02539205551147461, "step": 24153 }, { "epoch": 3.685455322265625e-05, "step": 24153, "training_step_time": 0.10694146156311035 }, { "epoch": 3.68560791015625e-05, "model_forward_time": 0.025690317153930664, "step": 24154 }, { "epoch": 3.68560791015625e-05, "step": 24154, "training_step_time": 0.11032772064208984 }, { "epoch": 3.685760498046875e-05, "model_forward_time": 0.026186466217041016, "step": 24155 }, { "epoch": 3.685760498046875e-05, "step": 24155, "training_step_time": 0.10882782936096191 }, { "epoch": 3.6859130859375e-05, "model_forward_time": 0.025942325592041016, "step": 24156 }, { "epoch": 3.6859130859375e-05, "step": 24156, "training_step_time": 0.19700336456298828 }, { "epoch": 3.686065673828125e-05, "model_forward_time": 0.025162458419799805, "step": 24157 }, { "epoch": 3.686065673828125e-05, "step": 24157, "training_step_time": 0.10867452621459961 }, { "epoch": 3.68621826171875e-05, "model_forward_time": 0.02543163299560547, "step": 24158 }, { "epoch": 3.68621826171875e-05, "step": 24158, "training_step_time": 0.12447953224182129 }, { "epoch": 3.686370849609375e-05, "model_forward_time": 0.025775432586669922, "step": 24159 }, { "epoch": 3.686370849609375e-05, "step": 24159, "training_step_time": 0.1308298110961914 }, { "epoch": 3.6865234375e-05, "grad_norm": 0.12172437459230423, "learning_rate": 1.0007495454682004e-05, "loss": 0.0066, "step": 24160 }, { "epoch": 3.6865234375e-05, "model_forward_time": 0.0257568359375, "step": 24160 }, { "epoch": 3.6865234375e-05, "step": 24160, "training_step_time": 0.12293839454650879 }, { "epoch": 3.686676025390625e-05, "model_forward_time": 0.024862051010131836, "step": 24161 }, { "epoch": 3.686676025390625e-05, "step": 24161, "training_step_time": 0.11708998680114746 }, { "epoch": 3.68682861328125e-05, "model_forward_time": 0.025666475296020508, "step": 24162 }, { "epoch": 3.68682861328125e-05, "step": 24162, "training_step_time": 0.10637140274047852 }, { "epoch": 3.686981201171875e-05, "model_forward_time": 0.026049137115478516, "step": 24163 }, { "epoch": 3.686981201171875e-05, "step": 24163, "training_step_time": 0.10631394386291504 }, { "epoch": 3.6871337890625e-05, "model_forward_time": 0.02570509910583496, "step": 24164 }, { "epoch": 3.6871337890625e-05, "step": 24164, "training_step_time": 0.11016988754272461 }, { "epoch": 3.687286376953125e-05, "model_forward_time": 0.02590036392211914, "step": 24165 }, { "epoch": 3.687286376953125e-05, "step": 24165, "training_step_time": 0.10574626922607422 }, { "epoch": 3.68743896484375e-05, "model_forward_time": 0.025386333465576172, "step": 24166 }, { "epoch": 3.68743896484375e-05, "step": 24166, "training_step_time": 0.12269306182861328 }, { "epoch": 3.687591552734375e-05, "model_forward_time": 0.025882720947265625, "step": 24167 }, { "epoch": 3.687591552734375e-05, "step": 24167, "training_step_time": 0.10982513427734375 }, { "epoch": 3.687744140625e-05, "model_forward_time": 0.02607560157775879, "step": 24168 }, { "epoch": 3.687744140625e-05, "step": 24168, "training_step_time": 0.11633181571960449 }, { "epoch": 3.687896728515625e-05, "model_forward_time": 0.025753498077392578, "step": 24169 }, { "epoch": 3.687896728515625e-05, "step": 24169, "training_step_time": 0.14510607719421387 }, { "epoch": 3.68804931640625e-05, "grad_norm": 0.123519167304039, "learning_rate": 9.974439348774295e-06, "loss": 0.0034, "step": 24170 }, { "epoch": 3.68804931640625e-05, "model_forward_time": 0.025382041931152344, "step": 24170 }, { "epoch": 3.68804931640625e-05, "step": 24170, "training_step_time": 0.12420201301574707 }, { "epoch": 3.688201904296875e-05, "model_forward_time": 0.02507162094116211, "step": 24171 }, { "epoch": 3.688201904296875e-05, "step": 24171, "training_step_time": 0.18939566612243652 }, { "epoch": 3.6883544921875e-05, "model_forward_time": 0.027229785919189453, "step": 24172 }, { "epoch": 3.6883544921875e-05, "step": 24172, "training_step_time": 0.14023590087890625 }, { "epoch": 3.688507080078125e-05, "model_forward_time": 0.025155305862426758, "step": 24173 }, { "epoch": 3.688507080078125e-05, "step": 24173, "training_step_time": 0.11326003074645996 }, { "epoch": 3.68865966796875e-05, "model_forward_time": 0.025065898895263672, "step": 24174 }, { "epoch": 3.68865966796875e-05, "step": 24174, "training_step_time": 0.11387276649475098 }, { "epoch": 3.688812255859375e-05, "model_forward_time": 0.02569413185119629, "step": 24175 }, { "epoch": 3.688812255859375e-05, "step": 24175, "training_step_time": 0.1069328784942627 }, { "epoch": 3.68896484375e-05, "model_forward_time": 0.025589466094970703, "step": 24176 }, { "epoch": 3.68896484375e-05, "step": 24176, "training_step_time": 0.15832018852233887 }, { "epoch": 3.689117431640625e-05, "model_forward_time": 0.025401830673217773, "step": 24177 }, { "epoch": 3.689117431640625e-05, "step": 24177, "training_step_time": 0.14029550552368164 }, { "epoch": 3.68927001953125e-05, "model_forward_time": 0.025074243545532227, "step": 24178 }, { "epoch": 3.68927001953125e-05, "step": 24178, "training_step_time": 0.1045222282409668 }, { "epoch": 3.689422607421875e-05, "model_forward_time": 0.02561354637145996, "step": 24179 }, { "epoch": 3.689422607421875e-05, "step": 24179, "training_step_time": 0.1047205924987793 }, { "epoch": 3.6895751953125e-05, "grad_norm": 0.11646206676959991, "learning_rate": 9.941431877696955e-06, "loss": 0.0039, "step": 24180 }, { "epoch": 3.6895751953125e-05, "model_forward_time": 0.025722265243530273, "step": 24180 }, { "epoch": 3.6895751953125e-05, "step": 24180, "training_step_time": 0.11010861396789551 }, { "epoch": 3.689727783203125e-05, "model_forward_time": 0.025756120681762695, "step": 24181 }, { "epoch": 3.689727783203125e-05, "step": 24181, "training_step_time": 0.10695147514343262 }, { "epoch": 3.68988037109375e-05, "model_forward_time": 0.025637388229370117, "step": 24182 }, { "epoch": 3.68988037109375e-05, "step": 24182, "training_step_time": 0.10614395141601562 }, { "epoch": 3.690032958984375e-05, "model_forward_time": 0.02583003044128418, "step": 24183 }, { "epoch": 3.690032958984375e-05, "step": 24183, "training_step_time": 0.10625147819519043 }, { "epoch": 3.690185546875e-05, "model_forward_time": 0.02498459815979004, "step": 24184 }, { "epoch": 3.690185546875e-05, "step": 24184, "training_step_time": 0.14630532264709473 }, { "epoch": 3.690338134765625e-05, "model_forward_time": 0.026002168655395508, "step": 24185 }, { "epoch": 3.690338134765625e-05, "step": 24185, "training_step_time": 0.16147828102111816 }, { "epoch": 3.69049072265625e-05, "model_forward_time": 0.025487661361694336, "step": 24186 }, { "epoch": 3.69049072265625e-05, "step": 24186, "training_step_time": 0.10625791549682617 }, { "epoch": 3.690643310546875e-05, "model_forward_time": 0.02519059181213379, "step": 24187 }, { "epoch": 3.690643310546875e-05, "step": 24187, "training_step_time": 0.13268351554870605 }, { "epoch": 3.6907958984375e-05, "model_forward_time": 0.02602100372314453, "step": 24188 }, { "epoch": 3.6907958984375e-05, "step": 24188, "training_step_time": 0.16809439659118652 }, { "epoch": 3.690948486328125e-05, "model_forward_time": 0.025990724563598633, "step": 24189 }, { "epoch": 3.690948486328125e-05, "step": 24189, "training_step_time": 0.10544872283935547 }, { "epoch": 3.69110107421875e-05, "grad_norm": 0.07278118282556534, "learning_rate": 9.908473081557151e-06, "loss": 0.0046, "step": 24190 }, { "epoch": 3.69110107421875e-05, "model_forward_time": 0.02511310577392578, "step": 24190 }, { "epoch": 3.69110107421875e-05, "step": 24190, "training_step_time": 0.10655426979064941 }, { "epoch": 3.691253662109375e-05, "model_forward_time": 0.025409698486328125, "step": 24191 }, { "epoch": 3.691253662109375e-05, "step": 24191, "training_step_time": 0.10576176643371582 }, { "epoch": 3.69140625e-05, "model_forward_time": 0.028889179229736328, "step": 24192 }, { "epoch": 3.69140625e-05, "step": 24192, "training_step_time": 0.10943078994750977 }, { "epoch": 3.691558837890625e-05, "model_forward_time": 0.025516271591186523, "step": 24193 }, { "epoch": 3.691558837890625e-05, "step": 24193, "training_step_time": 0.10556554794311523 }, { "epoch": 3.69171142578125e-05, "model_forward_time": 0.025563478469848633, "step": 24194 }, { "epoch": 3.69171142578125e-05, "step": 24194, "training_step_time": 0.10485434532165527 }, { "epoch": 3.691864013671875e-05, "model_forward_time": 0.025976896286010742, "step": 24195 }, { "epoch": 3.691864013671875e-05, "step": 24195, "training_step_time": 0.1061553955078125 }, { "epoch": 3.6920166015625e-05, "model_forward_time": 0.02554798126220703, "step": 24196 }, { "epoch": 3.6920166015625e-05, "step": 24196, "training_step_time": 0.1088249683380127 }, { "epoch": 3.692169189453125e-05, "model_forward_time": 0.025852680206298828, "step": 24197 }, { "epoch": 3.692169189453125e-05, "step": 24197, "training_step_time": 0.10515618324279785 }, { "epoch": 3.69232177734375e-05, "model_forward_time": 0.025780677795410156, "step": 24198 }, { "epoch": 3.69232177734375e-05, "step": 24198, "training_step_time": 0.1053006649017334 }, { "epoch": 3.692474365234375e-05, "model_forward_time": 0.0259549617767334, "step": 24199 }, { "epoch": 3.692474365234375e-05, "step": 24199, "training_step_time": 0.10728788375854492 }, { "epoch": 3.692626953125e-05, "grad_norm": 0.07233019173145294, "learning_rate": 9.875563000402948e-06, "loss": 0.0031, "step": 24200 }, { "epoch": 3.692626953125e-05, "model_forward_time": 0.025726795196533203, "step": 24200 }, { "epoch": 3.692626953125e-05, "step": 24200, "training_step_time": 0.10914993286132812 }, { "epoch": 3.692779541015625e-05, "model_forward_time": 0.02552509307861328, "step": 24201 }, { "epoch": 3.692779541015625e-05, "step": 24201, "training_step_time": 0.10451960563659668 }, { "epoch": 3.69293212890625e-05, "model_forward_time": 0.026088237762451172, "step": 24202 }, { "epoch": 3.69293212890625e-05, "step": 24202, "training_step_time": 0.13700604438781738 }, { "epoch": 3.693084716796875e-05, "model_forward_time": 0.025475740432739258, "step": 24203 }, { "epoch": 3.693084716796875e-05, "step": 24203, "training_step_time": 0.11335897445678711 }, { "epoch": 3.6932373046875e-05, "model_forward_time": 0.02568197250366211, "step": 24204 }, { "epoch": 3.6932373046875e-05, "step": 24204, "training_step_time": 0.10724139213562012 }, { "epoch": 3.693389892578125e-05, "model_forward_time": 0.02583765983581543, "step": 24205 }, { "epoch": 3.693389892578125e-05, "step": 24205, "training_step_time": 0.1203920841217041 }, { "epoch": 3.69354248046875e-05, "model_forward_time": 0.025754690170288086, "step": 24206 }, { "epoch": 3.69354248046875e-05, "step": 24206, "training_step_time": 0.10697770118713379 }, { "epoch": 3.693695068359375e-05, "model_forward_time": 0.026006460189819336, "step": 24207 }, { "epoch": 3.693695068359375e-05, "step": 24207, "training_step_time": 0.12242317199707031 }, { "epoch": 3.69384765625e-05, "model_forward_time": 0.0244905948638916, "step": 24208 }, { "epoch": 3.69384765625e-05, "step": 24208, "training_step_time": 0.1125631332397461 }, { "epoch": 3.694000244140625e-05, "model_forward_time": 0.026227951049804688, "step": 24209 }, { "epoch": 3.694000244140625e-05, "step": 24209, "training_step_time": 0.10888433456420898 }, { "epoch": 3.69415283203125e-05, "grad_norm": 0.2593819200992584, "learning_rate": 9.842701674223187e-06, "loss": 0.0045, "step": 24210 }, { "epoch": 3.69415283203125e-05, "model_forward_time": 0.025493860244750977, "step": 24210 }, { "epoch": 3.69415283203125e-05, "step": 24210, "training_step_time": 0.13140583038330078 }, { "epoch": 3.694305419921875e-05, "model_forward_time": 0.025914669036865234, "step": 24211 }, { "epoch": 3.694305419921875e-05, "step": 24211, "training_step_time": 0.15052556991577148 }, { "epoch": 3.6944580078125e-05, "model_forward_time": 0.025086641311645508, "step": 24212 }, { "epoch": 3.6944580078125e-05, "step": 24212, "training_step_time": 0.15983295440673828 }, { "epoch": 3.694610595703125e-05, "model_forward_time": 0.02484440803527832, "step": 24213 }, { "epoch": 3.694610595703125e-05, "step": 24213, "training_step_time": 0.19882583618164062 }, { "epoch": 3.69476318359375e-05, "model_forward_time": 0.02533435821533203, "step": 24214 }, { "epoch": 3.69476318359375e-05, "step": 24214, "training_step_time": 0.14635109901428223 }, { "epoch": 3.694915771484375e-05, "model_forward_time": 0.027299880981445312, "step": 24215 }, { "epoch": 3.694915771484375e-05, "step": 24215, "training_step_time": 0.20888018608093262 }, { "epoch": 3.695068359375e-05, "model_forward_time": 0.025694847106933594, "step": 24216 }, { "epoch": 3.695068359375e-05, "step": 24216, "training_step_time": 0.13883352279663086 }, { "epoch": 3.695220947265625e-05, "model_forward_time": 0.024866580963134766, "step": 24217 }, { "epoch": 3.695220947265625e-05, "step": 24217, "training_step_time": 0.1363205909729004 }, { "epoch": 3.69537353515625e-05, "model_forward_time": 0.02494668960571289, "step": 24218 }, { "epoch": 3.69537353515625e-05, "step": 24218, "training_step_time": 0.1144707202911377 }, { "epoch": 3.695526123046875e-05, "model_forward_time": 0.025547504425048828, "step": 24219 }, { "epoch": 3.695526123046875e-05, "step": 24219, "training_step_time": 0.11354517936706543 }, { "epoch": 3.6956787109375e-05, "grad_norm": 0.08765245974063873, "learning_rate": 9.809889142947476e-06, "loss": 0.0079, "step": 24220 }, { "epoch": 3.6956787109375e-05, "model_forward_time": 0.02567148208618164, "step": 24220 }, { "epoch": 3.6956787109375e-05, "step": 24220, "training_step_time": 0.10988211631774902 }, { "epoch": 3.695831298828125e-05, "model_forward_time": 0.02645587921142578, "step": 24221 }, { "epoch": 3.695831298828125e-05, "step": 24221, "training_step_time": 0.10975050926208496 }, { "epoch": 3.69598388671875e-05, "model_forward_time": 0.028138399124145508, "step": 24222 }, { "epoch": 3.69598388671875e-05, "step": 24222, "training_step_time": 0.12267899513244629 }, { "epoch": 3.696136474609375e-05, "model_forward_time": 0.025582075119018555, "step": 24223 }, { "epoch": 3.696136474609375e-05, "step": 24223, "training_step_time": 0.12055397033691406 }, { "epoch": 3.6962890625e-05, "model_forward_time": 0.0255279541015625, "step": 24224 }, { "epoch": 3.6962890625e-05, "step": 24224, "training_step_time": 0.10575413703918457 }, { "epoch": 3.696441650390625e-05, "model_forward_time": 0.025485754013061523, "step": 24225 }, { "epoch": 3.696441650390625e-05, "step": 24225, "training_step_time": 0.10703039169311523 }, { "epoch": 3.69659423828125e-05, "model_forward_time": 0.025424718856811523, "step": 24226 }, { "epoch": 3.69659423828125e-05, "step": 24226, "training_step_time": 0.1045675277709961 }, { "epoch": 3.696746826171875e-05, "model_forward_time": 0.02596759796142578, "step": 24227 }, { "epoch": 3.696746826171875e-05, "step": 24227, "training_step_time": 0.10712075233459473 }, { "epoch": 3.6968994140625e-05, "model_forward_time": 0.025853872299194336, "step": 24228 }, { "epoch": 3.6968994140625e-05, "step": 24228, "training_step_time": 0.10766386985778809 }, { "epoch": 3.697052001953125e-05, "model_forward_time": 0.026806354522705078, "step": 24229 }, { "epoch": 3.697052001953125e-05, "step": 24229, "training_step_time": 0.10610604286193848 }, { "epoch": 3.69720458984375e-05, "grad_norm": 0.09023567289113998, "learning_rate": 9.777125446446133e-06, "loss": 0.0079, "step": 24230 }, { "epoch": 3.69720458984375e-05, "model_forward_time": 0.02937483787536621, "step": 24230 }, { "epoch": 3.69720458984375e-05, "step": 24230, "training_step_time": 0.13452911376953125 }, { "epoch": 3.697357177734375e-05, "model_forward_time": 0.025794029235839844, "step": 24231 }, { "epoch": 3.697357177734375e-05, "step": 24231, "training_step_time": 0.16328811645507812 }, { "epoch": 3.697509765625e-05, "model_forward_time": 0.025360107421875, "step": 24232 }, { "epoch": 3.697509765625e-05, "step": 24232, "training_step_time": 0.11490869522094727 }, { "epoch": 3.697662353515625e-05, "model_forward_time": 0.02565741539001465, "step": 24233 }, { "epoch": 3.697662353515625e-05, "step": 24233, "training_step_time": 0.1259596347808838 }, { "epoch": 3.69781494140625e-05, "model_forward_time": 0.025437593460083008, "step": 24234 }, { "epoch": 3.69781494140625e-05, "step": 24234, "training_step_time": 0.19434595108032227 }, { "epoch": 3.697967529296875e-05, "model_forward_time": 0.02480769157409668, "step": 24235 }, { "epoch": 3.697967529296875e-05, "step": 24235, "training_step_time": 0.10785484313964844 }, { "epoch": 3.6981201171875e-05, "model_forward_time": 0.025592803955078125, "step": 24236 }, { "epoch": 3.6981201171875e-05, "step": 24236, "training_step_time": 0.10455536842346191 }, { "epoch": 3.698272705078125e-05, "model_forward_time": 0.027256011962890625, "step": 24237 }, { "epoch": 3.698272705078125e-05, "step": 24237, "training_step_time": 0.10758209228515625 }, { "epoch": 3.69842529296875e-05, "model_forward_time": 0.026010513305664062, "step": 24238 }, { "epoch": 3.69842529296875e-05, "step": 24238, "training_step_time": 0.10954928398132324 }, { "epoch": 3.698577880859375e-05, "model_forward_time": 0.026130199432373047, "step": 24239 }, { "epoch": 3.698577880859375e-05, "step": 24239, "training_step_time": 0.10959863662719727 }, { "epoch": 3.69873046875e-05, "grad_norm": 0.06264301389455795, "learning_rate": 9.744410624530148e-06, "loss": 0.0039, "step": 24240 }, { "epoch": 3.69873046875e-05, "model_forward_time": 0.0256500244140625, "step": 24240 }, { "epoch": 3.69873046875e-05, "step": 24240, "training_step_time": 0.10589003562927246 }, { "epoch": 3.698883056640625e-05, "model_forward_time": 0.025907516479492188, "step": 24241 }, { "epoch": 3.698883056640625e-05, "step": 24241, "training_step_time": 0.10596799850463867 }, { "epoch": 3.69903564453125e-05, "model_forward_time": 0.025638818740844727, "step": 24242 }, { "epoch": 3.69903564453125e-05, "step": 24242, "training_step_time": 0.10683965682983398 }, { "epoch": 3.699188232421875e-05, "model_forward_time": 0.02575397491455078, "step": 24243 }, { "epoch": 3.699188232421875e-05, "step": 24243, "training_step_time": 0.10543656349182129 }, { "epoch": 3.6993408203125e-05, "model_forward_time": 0.02575373649597168, "step": 24244 }, { "epoch": 3.6993408203125e-05, "step": 24244, "training_step_time": 0.10977721214294434 }, { "epoch": 3.699493408203125e-05, "model_forward_time": 0.02539825439453125, "step": 24245 }, { "epoch": 3.699493408203125e-05, "step": 24245, "training_step_time": 0.1150813102722168 }, { "epoch": 3.69964599609375e-05, "model_forward_time": 0.02538752555847168, "step": 24246 }, { "epoch": 3.69964599609375e-05, "step": 24246, "training_step_time": 0.10520410537719727 }, { "epoch": 3.699798583984375e-05, "model_forward_time": 0.02498030662536621, "step": 24247 }, { "epoch": 3.699798583984375e-05, "step": 24247, "training_step_time": 0.10857295989990234 }, { "epoch": 3.699951171875e-05, "model_forward_time": 0.025450468063354492, "step": 24248 }, { "epoch": 3.699951171875e-05, "step": 24248, "training_step_time": 0.14297866821289062 }, { "epoch": 3.700103759765625e-05, "model_forward_time": 0.0248563289642334, "step": 24249 }, { "epoch": 3.700103759765625e-05, "step": 24249, "training_step_time": 0.12423872947692871 }, { "epoch": 3.70025634765625e-05, "grad_norm": 0.07255098223686218, "learning_rate": 9.711744716951093e-06, "loss": 0.0054, "step": 24250 }, { "epoch": 3.70025634765625e-05, "model_forward_time": 0.025104284286499023, "step": 24250 }, { "epoch": 3.70025634765625e-05, "step": 24250, "training_step_time": 0.1201024055480957 }, { "epoch": 3.700408935546875e-05, "model_forward_time": 0.025100231170654297, "step": 24251 }, { "epoch": 3.700408935546875e-05, "step": 24251, "training_step_time": 0.11166095733642578 }, { "epoch": 3.7005615234375e-05, "model_forward_time": 0.024966716766357422, "step": 24252 }, { "epoch": 3.7005615234375e-05, "step": 24252, "training_step_time": 0.1812901496887207 }, { "epoch": 3.700714111328125e-05, "model_forward_time": 0.02512955665588379, "step": 24253 }, { "epoch": 3.700714111328125e-05, "step": 24253, "training_step_time": 0.13298702239990234 }, { "epoch": 3.70086669921875e-05, "model_forward_time": 0.024826526641845703, "step": 24254 }, { "epoch": 3.70086669921875e-05, "step": 24254, "training_step_time": 0.11890721321105957 }, { "epoch": 3.701019287109375e-05, "model_forward_time": 0.02501845359802246, "step": 24255 }, { "epoch": 3.701019287109375e-05, "step": 24255, "training_step_time": 0.10590553283691406 }, { "epoch": 3.701171875e-05, "model_forward_time": 0.024799108505249023, "step": 24256 }, { "epoch": 3.701171875e-05, "step": 24256, "training_step_time": 0.10848045349121094 }, { "epoch": 3.701324462890625e-05, "model_forward_time": 0.02550530433654785, "step": 24257 }, { "epoch": 3.701324462890625e-05, "step": 24257, "training_step_time": 0.10931611061096191 }, { "epoch": 3.70147705078125e-05, "model_forward_time": 0.025151729583740234, "step": 24258 }, { "epoch": 3.70147705078125e-05, "step": 24258, "training_step_time": 0.10706925392150879 }, { "epoch": 3.701629638671875e-05, "model_forward_time": 0.02559828758239746, "step": 24259 }, { "epoch": 3.701629638671875e-05, "step": 24259, "training_step_time": 0.11228418350219727 }, { "epoch": 3.7017822265625e-05, "grad_norm": 0.13656899333000183, "learning_rate": 9.679127763401152e-06, "loss": 0.0037, "step": 24260 }, { "epoch": 3.7017822265625e-05, "model_forward_time": 0.02507615089416504, "step": 24260 }, { "epoch": 3.7017822265625e-05, "step": 24260, "training_step_time": 0.1109464168548584 }, { "epoch": 3.701934814453125e-05, "model_forward_time": 0.02564692497253418, "step": 24261 }, { "epoch": 3.701934814453125e-05, "step": 24261, "training_step_time": 0.17238974571228027 }, { "epoch": 3.70208740234375e-05, "model_forward_time": 0.0242769718170166, "step": 24262 }, { "epoch": 3.70208740234375e-05, "step": 24262, "training_step_time": 0.18323302268981934 }, { "epoch": 3.702239990234375e-05, "model_forward_time": 0.025003671646118164, "step": 24263 }, { "epoch": 3.702239990234375e-05, "step": 24263, "training_step_time": 0.14184880256652832 }, { "epoch": 3.702392578125e-05, "model_forward_time": 0.024840593338012695, "step": 24264 }, { "epoch": 3.702392578125e-05, "step": 24264, "training_step_time": 0.18009424209594727 }, { "epoch": 3.702545166015625e-05, "model_forward_time": 0.024814367294311523, "step": 24265 }, { "epoch": 3.702545166015625e-05, "step": 24265, "training_step_time": 0.19361662864685059 }, { "epoch": 3.70269775390625e-05, "model_forward_time": 0.026308298110961914, "step": 24266 }, { "epoch": 3.70269775390625e-05, "step": 24266, "training_step_time": 0.12687468528747559 }, { "epoch": 3.702850341796875e-05, "model_forward_time": 0.024618864059448242, "step": 24267 }, { "epoch": 3.702850341796875e-05, "step": 24267, "training_step_time": 0.10491251945495605 }, { "epoch": 3.7030029296875e-05, "model_forward_time": 0.025823354721069336, "step": 24268 }, { "epoch": 3.7030029296875e-05, "step": 24268, "training_step_time": 0.11254501342773438 }, { "epoch": 3.703155517578125e-05, "model_forward_time": 0.025709152221679688, "step": 24269 }, { "epoch": 3.703155517578125e-05, "step": 24269, "training_step_time": 0.11238861083984375 }, { "epoch": 3.70330810546875e-05, "grad_norm": 0.39457446336746216, "learning_rate": 9.646559803512994e-06, "loss": 0.0055, "step": 24270 }, { "epoch": 3.70330810546875e-05, "model_forward_time": 0.025118350982666016, "step": 24270 }, { "epoch": 3.70330810546875e-05, "step": 24270, "training_step_time": 0.10356450080871582 }, { "epoch": 3.703460693359375e-05, "model_forward_time": 0.025590181350708008, "step": 24271 }, { "epoch": 3.703460693359375e-05, "step": 24271, "training_step_time": 0.10285258293151855 }, { "epoch": 3.70361328125e-05, "model_forward_time": 0.02526068687438965, "step": 24272 }, { "epoch": 3.70361328125e-05, "step": 24272, "training_step_time": 0.10517120361328125 }, { "epoch": 3.703765869140625e-05, "model_forward_time": 0.025374174118041992, "step": 24273 }, { "epoch": 3.703765869140625e-05, "step": 24273, "training_step_time": 0.10662317276000977 }, { "epoch": 3.70391845703125e-05, "model_forward_time": 0.025262117385864258, "step": 24274 }, { "epoch": 3.70391845703125e-05, "step": 24274, "training_step_time": 0.10935187339782715 }, { "epoch": 3.704071044921875e-05, "model_forward_time": 0.025469064712524414, "step": 24275 }, { "epoch": 3.704071044921875e-05, "step": 24275, "training_step_time": 0.14448213577270508 }, { "epoch": 3.7042236328125e-05, "model_forward_time": 0.025028705596923828, "step": 24276 }, { "epoch": 3.7042236328125e-05, "step": 24276, "training_step_time": 0.1568596363067627 }, { "epoch": 3.704376220703125e-05, "model_forward_time": 0.024761676788330078, "step": 24277 }, { "epoch": 3.704376220703125e-05, "step": 24277, "training_step_time": 0.10796070098876953 }, { "epoch": 3.70452880859375e-05, "model_forward_time": 0.02533578872680664, "step": 24278 }, { "epoch": 3.70452880859375e-05, "step": 24278, "training_step_time": 0.13182473182678223 }, { "epoch": 3.704681396484375e-05, "model_forward_time": 0.025144577026367188, "step": 24279 }, { "epoch": 3.704681396484375e-05, "step": 24279, "training_step_time": 0.11942291259765625 }, { "epoch": 3.704833984375e-05, "grad_norm": 0.25070905685424805, "learning_rate": 9.614040876859748e-06, "loss": 0.0054, "step": 24280 }, { "epoch": 3.704833984375e-05, "model_forward_time": 0.02584052085876465, "step": 24280 }, { "epoch": 3.704833984375e-05, "step": 24280, "training_step_time": 0.11781001091003418 }, { "epoch": 3.704986572265625e-05, "model_forward_time": 0.025209426879882812, "step": 24281 }, { "epoch": 3.704986572265625e-05, "step": 24281, "training_step_time": 0.10901546478271484 }, { "epoch": 3.70513916015625e-05, "model_forward_time": 0.025323867797851562, "step": 24282 }, { "epoch": 3.70513916015625e-05, "step": 24282, "training_step_time": 0.10624408721923828 }, { "epoch": 3.705291748046875e-05, "model_forward_time": 0.025385141372680664, "step": 24283 }, { "epoch": 3.705291748046875e-05, "step": 24283, "training_step_time": 0.10848069190979004 }, { "epoch": 3.7054443359375e-05, "model_forward_time": 0.024928569793701172, "step": 24284 }, { "epoch": 3.7054443359375e-05, "step": 24284, "training_step_time": 0.10614657402038574 }, { "epoch": 3.705596923828125e-05, "model_forward_time": 0.025206327438354492, "step": 24285 }, { "epoch": 3.705596923828125e-05, "step": 24285, "training_step_time": 0.11389946937561035 }, { "epoch": 3.70574951171875e-05, "model_forward_time": 0.0251007080078125, "step": 24286 }, { "epoch": 3.70574951171875e-05, "step": 24286, "training_step_time": 0.10445451736450195 }, { "epoch": 3.705902099609375e-05, "model_forward_time": 0.025239944458007812, "step": 24287 }, { "epoch": 3.705902099609375e-05, "step": 24287, "training_step_time": 0.10410881042480469 }, { "epoch": 3.7060546875e-05, "model_forward_time": 0.02525496482849121, "step": 24288 }, { "epoch": 3.7060546875e-05, "step": 24288, "training_step_time": 0.10597419738769531 }, { "epoch": 3.706207275390625e-05, "model_forward_time": 0.02528691291809082, "step": 24289 }, { "epoch": 3.706207275390625e-05, "step": 24289, "training_step_time": 0.10604691505432129 }, { "epoch": 3.70635986328125e-05, "grad_norm": 0.14922821521759033, "learning_rate": 9.581571022954988e-06, "loss": 0.0094, "step": 24290 }, { "epoch": 3.70635986328125e-05, "model_forward_time": 0.02492213249206543, "step": 24290 }, { "epoch": 3.70635986328125e-05, "step": 24290, "training_step_time": 0.10851073265075684 }, { "epoch": 3.706512451171875e-05, "model_forward_time": 0.025308609008789062, "step": 24291 }, { "epoch": 3.706512451171875e-05, "step": 24291, "training_step_time": 0.10402274131774902 }, { "epoch": 3.7066650390625e-05, "model_forward_time": 0.0250089168548584, "step": 24292 }, { "epoch": 3.7066650390625e-05, "step": 24292, "training_step_time": 0.10638809204101562 }, { "epoch": 3.706817626953125e-05, "model_forward_time": 0.02515721321105957, "step": 24293 }, { "epoch": 3.706817626953125e-05, "step": 24293, "training_step_time": 0.10640192031860352 }, { "epoch": 3.70697021484375e-05, "model_forward_time": 0.02560257911682129, "step": 24294 }, { "epoch": 3.70697021484375e-05, "step": 24294, "training_step_time": 0.17103195190429688 }, { "epoch": 3.707122802734375e-05, "model_forward_time": 0.02433156967163086, "step": 24295 }, { "epoch": 3.707122802734375e-05, "step": 24295, "training_step_time": 0.11850714683532715 }, { "epoch": 3.707275390625e-05, "model_forward_time": 0.0244901180267334, "step": 24296 }, { "epoch": 3.707275390625e-05, "step": 24296, "training_step_time": 0.1273956298828125 }, { "epoch": 3.707427978515625e-05, "model_forward_time": 0.025173664093017578, "step": 24297 }, { "epoch": 3.707427978515625e-05, "step": 24297, "training_step_time": 0.11911797523498535 }, { "epoch": 3.70758056640625e-05, "model_forward_time": 0.027826547622680664, "step": 24298 }, { "epoch": 3.70758056640625e-05, "step": 24298, "training_step_time": 0.10586071014404297 }, { "epoch": 3.707733154296875e-05, "model_forward_time": 0.025238990783691406, "step": 24299 }, { "epoch": 3.707733154296875e-05, "step": 24299, "training_step_time": 0.2242581844329834 }, { "epoch": 3.7078857421875e-05, "grad_norm": 0.1144566461443901, "learning_rate": 9.549150281252633e-06, "loss": 0.011, "step": 24300 }, { "epoch": 3.7078857421875e-05, "model_forward_time": 0.024498939514160156, "step": 24300 }, { "epoch": 3.7078857421875e-05, "step": 24300, "training_step_time": 0.10791873931884766 }, { "epoch": 3.708038330078125e-05, "model_forward_time": 0.023784399032592773, "step": 24301 }, { "epoch": 3.708038330078125e-05, "step": 24301, "training_step_time": 0.10400652885437012 }, { "epoch": 3.70819091796875e-05, "model_forward_time": 0.02714848518371582, "step": 24302 }, { "epoch": 3.70819091796875e-05, "step": 24302, "training_step_time": 0.10984468460083008 }, { "epoch": 3.708343505859375e-05, "model_forward_time": 0.02475762367248535, "step": 24303 }, { "epoch": 3.708343505859375e-05, "step": 24303, "training_step_time": 0.10720181465148926 }, { "epoch": 3.70849609375e-05, "model_forward_time": 0.025285005569458008, "step": 24304 }, { "epoch": 3.70849609375e-05, "step": 24304, "training_step_time": 0.19831514358520508 }, { "epoch": 3.708648681640625e-05, "model_forward_time": 0.02477741241455078, "step": 24305 }, { "epoch": 3.708648681640625e-05, "step": 24305, "training_step_time": 0.10388517379760742 }, { "epoch": 3.70880126953125e-05, "model_forward_time": 0.024883747100830078, "step": 24306 }, { "epoch": 3.70880126953125e-05, "step": 24306, "training_step_time": 0.10828399658203125 }, { "epoch": 3.708953857421875e-05, "model_forward_time": 0.025094270706176758, "step": 24307 }, { "epoch": 3.708953857421875e-05, "step": 24307, "training_step_time": 0.13716506958007812 }, { "epoch": 3.7091064453125e-05, "model_forward_time": 0.024924278259277344, "step": 24308 }, { "epoch": 3.7091064453125e-05, "step": 24308, "training_step_time": 0.12142634391784668 }, { "epoch": 3.709259033203125e-05, "model_forward_time": 0.024669408798217773, "step": 24309 }, { "epoch": 3.709259033203125e-05, "step": 24309, "training_step_time": 0.11022329330444336 }, { "epoch": 3.70941162109375e-05, "grad_norm": 0.36761602759361267, "learning_rate": 9.51677869114696e-06, "loss": 0.0076, "step": 24310 }, { "epoch": 3.70941162109375e-05, "model_forward_time": 0.024998903274536133, "step": 24310 }, { "epoch": 3.70941162109375e-05, "step": 24310, "training_step_time": 0.11482763290405273 }, { "epoch": 3.709564208984375e-05, "model_forward_time": 0.024855852127075195, "step": 24311 }, { "epoch": 3.709564208984375e-05, "step": 24311, "training_step_time": 0.1150212287902832 }, { "epoch": 3.709716796875e-05, "model_forward_time": 0.025383472442626953, "step": 24312 }, { "epoch": 3.709716796875e-05, "step": 24312, "training_step_time": 0.10722684860229492 }, { "epoch": 3.709869384765625e-05, "model_forward_time": 0.026248693466186523, "step": 24313 }, { "epoch": 3.709869384765625e-05, "step": 24313, "training_step_time": 0.11372828483581543 }, { "epoch": 3.71002197265625e-05, "model_forward_time": 0.025073766708374023, "step": 24314 }, { "epoch": 3.71002197265625e-05, "step": 24314, "training_step_time": 0.11293148994445801 }, { "epoch": 3.710174560546875e-05, "model_forward_time": 0.025626182556152344, "step": 24315 }, { "epoch": 3.710174560546875e-05, "step": 24315, "training_step_time": 0.10316872596740723 }, { "epoch": 3.7103271484375e-05, "model_forward_time": 0.02488255500793457, "step": 24316 }, { "epoch": 3.7103271484375e-05, "step": 24316, "training_step_time": 0.19737744331359863 }, { "epoch": 3.710479736328125e-05, "model_forward_time": 0.02450847625732422, "step": 24317 }, { "epoch": 3.710479736328125e-05, "step": 24317, "training_step_time": 0.1007838249206543 }, { "epoch": 3.71063232421875e-05, "model_forward_time": 0.024889469146728516, "step": 24318 }, { "epoch": 3.71063232421875e-05, "step": 24318, "training_step_time": 0.10272645950317383 }, { "epoch": 3.710784912109375e-05, "model_forward_time": 0.024867773056030273, "step": 24319 }, { "epoch": 3.710784912109375e-05, "step": 24319, "training_step_time": 0.1074526309967041 }, { "epoch": 3.7109375e-05, "grad_norm": 0.15049193799495697, "learning_rate": 9.484456291972487e-06, "loss": 0.0053, "step": 24320 }, { "epoch": 3.7109375e-05, "model_forward_time": 0.025033235549926758, "step": 24320 }, { "epoch": 3.7109375e-05, "step": 24320, "training_step_time": 0.13138723373413086 }, { "epoch": 3.711090087890625e-05, "model_forward_time": 0.02538895606994629, "step": 24321 }, { "epoch": 3.711090087890625e-05, "step": 24321, "training_step_time": 0.13435578346252441 }, { "epoch": 3.71124267578125e-05, "model_forward_time": 0.024294614791870117, "step": 24322 }, { "epoch": 3.71124267578125e-05, "step": 24322, "training_step_time": 0.17002367973327637 }, { "epoch": 3.711395263671875e-05, "model_forward_time": 0.02425360679626465, "step": 24323 }, { "epoch": 3.711395263671875e-05, "step": 24323, "training_step_time": 0.17560982704162598 }, { "epoch": 3.7115478515625e-05, "model_forward_time": 0.024230480194091797, "step": 24324 }, { "epoch": 3.7115478515625e-05, "step": 24324, "training_step_time": 0.16555118560791016 }, { "epoch": 3.711700439453125e-05, "model_forward_time": 0.02448725700378418, "step": 24325 }, { "epoch": 3.711700439453125e-05, "step": 24325, "training_step_time": 0.17579197883605957 }, { "epoch": 3.71185302734375e-05, "model_forward_time": 0.024272441864013672, "step": 24326 }, { "epoch": 3.71185302734375e-05, "step": 24326, "training_step_time": 0.14416766166687012 }, { "epoch": 3.712005615234375e-05, "model_forward_time": 0.02452373504638672, "step": 24327 }, { "epoch": 3.712005615234375e-05, "step": 24327, "training_step_time": 0.1298818588256836 }, { "epoch": 3.712158203125e-05, "model_forward_time": 0.024457216262817383, "step": 24328 }, { "epoch": 3.712158203125e-05, "step": 24328, "training_step_time": 0.12549948692321777 }, { "epoch": 3.712310791015625e-05, "model_forward_time": 0.02436232566833496, "step": 24329 }, { "epoch": 3.712310791015625e-05, "step": 24329, "training_step_time": 0.11989140510559082 }, { "epoch": 3.71246337890625e-05, "grad_norm": 0.11885454505681992, "learning_rate": 9.452183123004e-06, "loss": 0.0027, "step": 24330 }, { "epoch": 3.71246337890625e-05, "model_forward_time": 0.024841785430908203, "step": 24330 }, { "epoch": 3.71246337890625e-05, "step": 24330, "training_step_time": 0.11301326751708984 }, { "epoch": 3.712615966796875e-05, "model_forward_time": 0.02505350112915039, "step": 24331 }, { "epoch": 3.712615966796875e-05, "step": 24331, "training_step_time": 0.11555814743041992 }, { "epoch": 3.7127685546875e-05, "model_forward_time": 0.024890899658203125, "step": 24332 }, { "epoch": 3.7127685546875e-05, "step": 24332, "training_step_time": 0.11681270599365234 }, { "epoch": 3.712921142578125e-05, "model_forward_time": 0.025279760360717773, "step": 24333 }, { "epoch": 3.712921142578125e-05, "step": 24333, "training_step_time": 0.10809063911437988 }, { "epoch": 3.71307373046875e-05, "model_forward_time": 0.025611400604248047, "step": 24334 }, { "epoch": 3.71307373046875e-05, "step": 24334, "training_step_time": 0.10654711723327637 }, { "epoch": 3.713226318359375e-05, "model_forward_time": 0.025287866592407227, "step": 24335 }, { "epoch": 3.713226318359375e-05, "step": 24335, "training_step_time": 0.11050224304199219 }, { "epoch": 3.71337890625e-05, "model_forward_time": 0.02549290657043457, "step": 24336 }, { "epoch": 3.71337890625e-05, "step": 24336, "training_step_time": 0.10606861114501953 }, { "epoch": 3.713531494140625e-05, "model_forward_time": 0.02516484260559082, "step": 24337 }, { "epoch": 3.713531494140625e-05, "step": 24337, "training_step_time": 0.10801172256469727 }, { "epoch": 3.71368408203125e-05, "model_forward_time": 0.024900197982788086, "step": 24338 }, { "epoch": 3.71368408203125e-05, "step": 24338, "training_step_time": 0.10728049278259277 }, { "epoch": 3.713836669921875e-05, "model_forward_time": 0.024777889251708984, "step": 24339 }, { "epoch": 3.713836669921875e-05, "step": 24339, "training_step_time": 0.17107176780700684 }, { "epoch": 3.7139892578125e-05, "grad_norm": 0.059934068471193314, "learning_rate": 9.41995922345642e-06, "loss": 0.0063, "step": 24340 }, { "epoch": 3.7139892578125e-05, "model_forward_time": 0.024144649505615234, "step": 24340 }, { "epoch": 3.7139892578125e-05, "step": 24340, "training_step_time": 0.1242973804473877 }, { "epoch": 3.714141845703125e-05, "model_forward_time": 0.024165630340576172, "step": 24341 }, { "epoch": 3.714141845703125e-05, "step": 24341, "training_step_time": 0.12415528297424316 }, { "epoch": 3.71429443359375e-05, "model_forward_time": 0.02523207664489746, "step": 24342 }, { "epoch": 3.71429443359375e-05, "step": 24342, "training_step_time": 0.10575342178344727 }, { "epoch": 3.714447021484375e-05, "model_forward_time": 0.025203704833984375, "step": 24343 }, { "epoch": 3.714447021484375e-05, "step": 24343, "training_step_time": 0.15754270553588867 }, { "epoch": 3.714599609375e-05, "model_forward_time": 0.024272680282592773, "step": 24344 }, { "epoch": 3.714599609375e-05, "step": 24344, "training_step_time": 0.1399390697479248 }, { "epoch": 3.714752197265625e-05, "model_forward_time": 0.024673938751220703, "step": 24345 }, { "epoch": 3.714752197265625e-05, "step": 24345, "training_step_time": 0.10219860076904297 }, { "epoch": 3.71490478515625e-05, "model_forward_time": 0.025735855102539062, "step": 24346 }, { "epoch": 3.71490478515625e-05, "step": 24346, "training_step_time": 0.10573911666870117 }, { "epoch": 3.715057373046875e-05, "model_forward_time": 0.02526378631591797, "step": 24347 }, { "epoch": 3.715057373046875e-05, "step": 24347, "training_step_time": 0.10457921028137207 }, { "epoch": 3.7152099609375e-05, "model_forward_time": 0.025121212005615234, "step": 24348 }, { "epoch": 3.7152099609375e-05, "step": 24348, "training_step_time": 0.10540246963500977 }, { "epoch": 3.715362548828125e-05, "model_forward_time": 0.02505970001220703, "step": 24349 }, { "epoch": 3.715362548828125e-05, "step": 24349, "training_step_time": 0.10681986808776855 }, { "epoch": 3.71551513671875e-05, "grad_norm": 0.08646565675735474, "learning_rate": 9.387784632484826e-06, "loss": 0.0082, "step": 24350 }, { "epoch": 3.71551513671875e-05, "model_forward_time": 0.025452852249145508, "step": 24350 }, { "epoch": 3.71551513671875e-05, "step": 24350, "training_step_time": 0.1106417179107666 }, { "epoch": 3.715667724609375e-05, "model_forward_time": 0.025509119033813477, "step": 24351 }, { "epoch": 3.715667724609375e-05, "step": 24351, "training_step_time": 0.10989260673522949 }, { "epoch": 3.7158203125e-05, "model_forward_time": 0.025065898895263672, "step": 24352 }, { "epoch": 3.7158203125e-05, "step": 24352, "training_step_time": 0.10683155059814453 }, { "epoch": 3.715972900390625e-05, "model_forward_time": 0.024544239044189453, "step": 24353 }, { "epoch": 3.715972900390625e-05, "step": 24353, "training_step_time": 0.12331247329711914 }, { "epoch": 3.71612548828125e-05, "model_forward_time": 0.025012731552124023, "step": 24354 }, { "epoch": 3.71612548828125e-05, "step": 24354, "training_step_time": 0.1308460235595703 }, { "epoch": 3.716278076171875e-05, "model_forward_time": 0.025029420852661133, "step": 24355 }, { "epoch": 3.716278076171875e-05, "step": 24355, "training_step_time": 0.1532447338104248 }, { "epoch": 3.7164306640625e-05, "model_forward_time": 0.024352312088012695, "step": 24356 }, { "epoch": 3.7164306640625e-05, "step": 24356, "training_step_time": 0.17566919326782227 }, { "epoch": 3.716583251953125e-05, "model_forward_time": 0.02512955665588379, "step": 24357 }, { "epoch": 3.716583251953125e-05, "step": 24357, "training_step_time": 0.12132096290588379 }, { "epoch": 3.71673583984375e-05, "model_forward_time": 0.024603605270385742, "step": 24358 }, { "epoch": 3.71673583984375e-05, "step": 24358, "training_step_time": 0.15294384956359863 }, { "epoch": 3.716888427734375e-05, "model_forward_time": 0.024913311004638672, "step": 24359 }, { "epoch": 3.716888427734375e-05, "step": 24359, "training_step_time": 0.10367631912231445 }, { "epoch": 3.717041015625e-05, "grad_norm": 0.06944193691015244, "learning_rate": 9.355659389184396e-06, "loss": 0.0049, "step": 24360 }, { "epoch": 3.717041015625e-05, "model_forward_time": 0.02496051788330078, "step": 24360 }, { "epoch": 3.717041015625e-05, "step": 24360, "training_step_time": 0.10398197174072266 }, { "epoch": 3.717193603515625e-05, "model_forward_time": 0.02520751953125, "step": 24361 }, { "epoch": 3.717193603515625e-05, "step": 24361, "training_step_time": 0.10443449020385742 }, { "epoch": 3.71734619140625e-05, "model_forward_time": 0.02516627311706543, "step": 24362 }, { "epoch": 3.71734619140625e-05, "step": 24362, "training_step_time": 0.10392260551452637 }, { "epoch": 3.717498779296875e-05, "model_forward_time": 0.02533245086669922, "step": 24363 }, { "epoch": 3.717498779296875e-05, "step": 24363, "training_step_time": 0.10829401016235352 }, { "epoch": 3.7176513671875e-05, "model_forward_time": 0.02538919448852539, "step": 24364 }, { "epoch": 3.7176513671875e-05, "step": 24364, "training_step_time": 0.10499691963195801 }, { "epoch": 3.717803955078125e-05, "model_forward_time": 0.024544715881347656, "step": 24365 }, { "epoch": 3.717803955078125e-05, "step": 24365, "training_step_time": 0.10553193092346191 }, { "epoch": 3.71795654296875e-05, "model_forward_time": 0.024393558502197266, "step": 24366 }, { "epoch": 3.71795654296875e-05, "step": 24366, "training_step_time": 0.10773301124572754 }, { "epoch": 3.718109130859375e-05, "model_forward_time": 0.02479386329650879, "step": 24367 }, { "epoch": 3.718109130859375e-05, "step": 24367, "training_step_time": 0.1049952507019043 }, { "epoch": 3.71826171875e-05, "model_forward_time": 0.025301694869995117, "step": 24368 }, { "epoch": 3.71826171875e-05, "step": 24368, "training_step_time": 0.1498570442199707 }, { "epoch": 3.718414306640625e-05, "model_forward_time": 0.02356433868408203, "step": 24369 }, { "epoch": 3.718414306640625e-05, "step": 24369, "training_step_time": 0.1030876636505127 }, { "epoch": 3.71856689453125e-05, "grad_norm": 0.12869904935359955, "learning_rate": 9.32358353259032e-06, "loss": 0.0062, "step": 24370 }, { "epoch": 3.71856689453125e-05, "model_forward_time": 0.02393651008605957, "step": 24370 }, { "epoch": 3.71856689453125e-05, "step": 24370, "training_step_time": 0.20751118659973145 }, { "epoch": 3.718719482421875e-05, "model_forward_time": 0.022573232650756836, "step": 24371 }, { "epoch": 3.718719482421875e-05, "step": 24371, "training_step_time": 0.12214946746826172 }, { "epoch": 3.7188720703125e-05, "model_forward_time": 0.023389101028442383, "step": 24372 }, { "epoch": 3.7188720703125e-05, "step": 24372, "training_step_time": 0.1640775203704834 }, { "epoch": 3.719024658203125e-05, "model_forward_time": 0.023114442825317383, "step": 24373 }, { "epoch": 3.719024658203125e-05, "step": 24373, "training_step_time": 0.10543179512023926 }, { "epoch": 3.71917724609375e-05, "model_forward_time": 0.023762941360473633, "step": 24374 }, { "epoch": 3.71917724609375e-05, "step": 24374, "training_step_time": 0.1037454605102539 }, { "epoch": 3.719329833984375e-05, "model_forward_time": 0.023972511291503906, "step": 24375 }, { "epoch": 3.719329833984375e-05, "step": 24375, "training_step_time": 0.10392212867736816 }, { "epoch": 3.719482421875e-05, "model_forward_time": 0.02437138557434082, "step": 24376 }, { "epoch": 3.719482421875e-05, "step": 24376, "training_step_time": 0.1049506664276123 }, { "epoch": 3.719635009765625e-05, "model_forward_time": 0.024208545684814453, "step": 24377 }, { "epoch": 3.719635009765625e-05, "step": 24377, "training_step_time": 0.1049339771270752 }, { "epoch": 3.71978759765625e-05, "model_forward_time": 0.024483203887939453, "step": 24378 }, { "epoch": 3.71978759765625e-05, "step": 24378, "training_step_time": 0.10791301727294922 }, { "epoch": 3.719940185546875e-05, "model_forward_time": 0.024637222290039062, "step": 24379 }, { "epoch": 3.719940185546875e-05, "step": 24379, "training_step_time": 0.11005735397338867 }, { "epoch": 3.7200927734375e-05, "grad_norm": 0.19114813208580017, "learning_rate": 9.291557101677784e-06, "loss": 0.0033, "step": 24380 }, { "epoch": 3.7200927734375e-05, "model_forward_time": 0.02460455894470215, "step": 24380 }, { "epoch": 3.7200927734375e-05, "step": 24380, "training_step_time": 0.10765218734741211 }, { "epoch": 3.720245361328125e-05, "model_forward_time": 0.023885726928710938, "step": 24381 }, { "epoch": 3.720245361328125e-05, "step": 24381, "training_step_time": 0.10705375671386719 }, { "epoch": 3.72039794921875e-05, "model_forward_time": 0.02405405044555664, "step": 24382 }, { "epoch": 3.72039794921875e-05, "step": 24382, "training_step_time": 0.10443878173828125 }, { "epoch": 3.720550537109375e-05, "model_forward_time": 0.02433943748474121, "step": 24383 }, { "epoch": 3.720550537109375e-05, "step": 24383, "training_step_time": 0.1049649715423584 }, { "epoch": 3.720703125e-05, "model_forward_time": 0.024621009826660156, "step": 24384 }, { "epoch": 3.720703125e-05, "step": 24384, "training_step_time": 0.10624170303344727 }, { "epoch": 3.720855712890625e-05, "model_forward_time": 0.024230003356933594, "step": 24385 }, { "epoch": 3.720855712890625e-05, "step": 24385, "training_step_time": 0.10561013221740723 }, { "epoch": 3.72100830078125e-05, "model_forward_time": 0.024133682250976562, "step": 24386 }, { "epoch": 3.72100830078125e-05, "step": 24386, "training_step_time": 0.18191909790039062 }, { "epoch": 3.721160888671875e-05, "model_forward_time": 0.02341628074645996, "step": 24387 }, { "epoch": 3.721160888671875e-05, "step": 24387, "training_step_time": 0.17897605895996094 }, { "epoch": 3.7213134765625e-05, "model_forward_time": 0.023186683654785156, "step": 24388 }, { "epoch": 3.7213134765625e-05, "step": 24388, "training_step_time": 0.10691070556640625 }, { "epoch": 3.721466064453125e-05, "model_forward_time": 0.023356914520263672, "step": 24389 }, { "epoch": 3.721466064453125e-05, "step": 24389, "training_step_time": 0.12597894668579102 }, { "epoch": 3.72161865234375e-05, "grad_norm": 0.08762897551059723, "learning_rate": 9.259580135361929e-06, "loss": 0.0032, "step": 24390 }, { "epoch": 3.72161865234375e-05, "model_forward_time": 0.023990392684936523, "step": 24390 }, { "epoch": 3.72161865234375e-05, "step": 24390, "training_step_time": 0.10661005973815918 }, { "epoch": 3.721771240234375e-05, "model_forward_time": 0.02408909797668457, "step": 24391 }, { "epoch": 3.721771240234375e-05, "step": 24391, "training_step_time": 0.11261367797851562 }, { "epoch": 3.721923828125e-05, "model_forward_time": 0.023883819580078125, "step": 24392 }, { "epoch": 3.721923828125e-05, "step": 24392, "training_step_time": 0.1110224723815918 }, { "epoch": 3.722076416015625e-05, "model_forward_time": 0.024221420288085938, "step": 24393 }, { "epoch": 3.722076416015625e-05, "step": 24393, "training_step_time": 0.10687875747680664 }, { "epoch": 3.72222900390625e-05, "model_forward_time": 0.023711681365966797, "step": 24394 }, { "epoch": 3.72222900390625e-05, "step": 24394, "training_step_time": 0.10729122161865234 }, { "epoch": 3.722381591796875e-05, "model_forward_time": 0.024107694625854492, "step": 24395 }, { "epoch": 3.722381591796875e-05, "step": 24395, "training_step_time": 0.10476994514465332 }, { "epoch": 3.7225341796875e-05, "model_forward_time": 0.024468660354614258, "step": 24396 }, { "epoch": 3.7225341796875e-05, "step": 24396, "training_step_time": 0.10322022438049316 }, { "epoch": 3.722686767578125e-05, "model_forward_time": 0.024061203002929688, "step": 24397 }, { "epoch": 3.722686767578125e-05, "step": 24397, "training_step_time": 0.19710946083068848 }, { "epoch": 3.72283935546875e-05, "model_forward_time": 0.02508068084716797, "step": 24398 }, { "epoch": 3.72283935546875e-05, "step": 24398, "training_step_time": 0.21164894104003906 }, { "epoch": 3.722991943359375e-05, "model_forward_time": 0.023173809051513672, "step": 24399 }, { "epoch": 3.722991943359375e-05, "step": 24399, "training_step_time": 0.13428568840026855 }, { "epoch": 3.72314453125e-05, "grad_norm": 0.20159612596035004, "learning_rate": 9.227652672497761e-06, "loss": 0.0052, "step": 24400 }, { "epoch": 3.72314453125e-05, "model_forward_time": 0.0236053466796875, "step": 24400 }, { "epoch": 3.72314453125e-05, "step": 24400, "training_step_time": 0.12832164764404297 }, { "epoch": 3.723297119140625e-05, "model_forward_time": 0.02382373809814453, "step": 24401 }, { "epoch": 3.723297119140625e-05, "step": 24401, "training_step_time": 0.1102597713470459 }, { "epoch": 3.72344970703125e-05, "model_forward_time": 0.024133920669555664, "step": 24402 }, { "epoch": 3.72344970703125e-05, "step": 24402, "training_step_time": 0.1150217056274414 }, { "epoch": 3.723602294921875e-05, "model_forward_time": 0.02417731285095215, "step": 24403 }, { "epoch": 3.723602294921875e-05, "step": 24403, "training_step_time": 0.2016441822052002 }, { "epoch": 3.7237548828125e-05, "model_forward_time": 0.024802446365356445, "step": 24404 }, { "epoch": 3.7237548828125e-05, "step": 24404, "training_step_time": 0.10185766220092773 }, { "epoch": 3.723907470703125e-05, "model_forward_time": 0.024893522262573242, "step": 24405 }, { "epoch": 3.723907470703125e-05, "step": 24405, "training_step_time": 0.1029655933380127 }, { "epoch": 3.72406005859375e-05, "model_forward_time": 0.025116920471191406, "step": 24406 }, { "epoch": 3.72406005859375e-05, "step": 24406, "training_step_time": 0.10986685752868652 }, { "epoch": 3.724212646484375e-05, "model_forward_time": 0.025323867797851562, "step": 24407 }, { "epoch": 3.724212646484375e-05, "step": 24407, "training_step_time": 0.10656118392944336 }, { "epoch": 3.724365234375e-05, "model_forward_time": 0.025153160095214844, "step": 24408 }, { "epoch": 3.724365234375e-05, "step": 24408, "training_step_time": 0.10987687110900879 }, { "epoch": 3.724517822265625e-05, "model_forward_time": 0.025244474411010742, "step": 24409 }, { "epoch": 3.724517822265625e-05, "step": 24409, "training_step_time": 0.10483217239379883 }, { "epoch": 3.72467041015625e-05, "grad_norm": 0.05615602433681488, "learning_rate": 9.195774751880198e-06, "loss": 0.0031, "step": 24410 }, { "epoch": 3.72467041015625e-05, "model_forward_time": 0.02749037742614746, "step": 24410 }, { "epoch": 3.72467041015625e-05, "step": 24410, "training_step_time": 0.10707354545593262 }, { "epoch": 3.724822998046875e-05, "model_forward_time": 0.025300264358520508, "step": 24411 }, { "epoch": 3.724822998046875e-05, "step": 24411, "training_step_time": 0.10661149024963379 }, { "epoch": 3.7249755859375e-05, "model_forward_time": 0.024798870086669922, "step": 24412 }, { "epoch": 3.7249755859375e-05, "step": 24412, "training_step_time": 0.10559797286987305 }, { "epoch": 3.725128173828125e-05, "model_forward_time": 0.025254249572753906, "step": 24413 }, { "epoch": 3.725128173828125e-05, "step": 24413, "training_step_time": 0.10337686538696289 }, { "epoch": 3.72528076171875e-05, "model_forward_time": 0.024353742599487305, "step": 24414 }, { "epoch": 3.72528076171875e-05, "step": 24414, "training_step_time": 0.14342904090881348 }, { "epoch": 3.725433349609375e-05, "model_forward_time": 0.02440476417541504, "step": 24415 }, { "epoch": 3.725433349609375e-05, "step": 24415, "training_step_time": 0.1732161045074463 }, { "epoch": 3.7255859375e-05, "model_forward_time": 0.025637388229370117, "step": 24416 }, { "epoch": 3.7255859375e-05, "step": 24416, "training_step_time": 0.10689473152160645 }, { "epoch": 3.725738525390625e-05, "model_forward_time": 0.02557682991027832, "step": 24417 }, { "epoch": 3.725738525390625e-05, "step": 24417, "training_step_time": 0.13231158256530762 }, { "epoch": 3.72589111328125e-05, "model_forward_time": 0.025146007537841797, "step": 24418 }, { "epoch": 3.72589111328125e-05, "step": 24418, "training_step_time": 0.1983506679534912 }, { "epoch": 3.726043701171875e-05, "model_forward_time": 0.02427077293395996, "step": 24419 }, { "epoch": 3.726043701171875e-05, "step": 24419, "training_step_time": 0.10168337821960449 }, { "epoch": 3.7261962890625e-05, "grad_norm": 0.06053365767002106, "learning_rate": 9.163946412243896e-06, "loss": 0.0046, "step": 24420 }, { "epoch": 3.7261962890625e-05, "model_forward_time": 0.023658275604248047, "step": 24420 }, { "epoch": 3.7261962890625e-05, "step": 24420, "training_step_time": 0.10434293746948242 }, { "epoch": 3.726348876953125e-05, "model_forward_time": 0.02411365509033203, "step": 24421 }, { "epoch": 3.726348876953125e-05, "step": 24421, "training_step_time": 0.10277986526489258 }, { "epoch": 3.72650146484375e-05, "model_forward_time": 0.02487945556640625, "step": 24422 }, { "epoch": 3.72650146484375e-05, "step": 24422, "training_step_time": 0.10518050193786621 }, { "epoch": 3.726654052734375e-05, "model_forward_time": 0.02506232261657715, "step": 24423 }, { "epoch": 3.726654052734375e-05, "step": 24423, "training_step_time": 0.1073160171508789 }, { "epoch": 3.726806640625e-05, "model_forward_time": 0.025388002395629883, "step": 24424 }, { "epoch": 3.726806640625e-05, "step": 24424, "training_step_time": 0.10558390617370605 }, { "epoch": 3.726959228515625e-05, "model_forward_time": 0.025506973266601562, "step": 24425 }, { "epoch": 3.726959228515625e-05, "step": 24425, "training_step_time": 0.10461640357971191 }, { "epoch": 3.72711181640625e-05, "model_forward_time": 0.02484297752380371, "step": 24426 }, { "epoch": 3.72711181640625e-05, "step": 24426, "training_step_time": 0.10875463485717773 }, { "epoch": 3.727264404296875e-05, "model_forward_time": 0.02524089813232422, "step": 24427 }, { "epoch": 3.727264404296875e-05, "step": 24427, "training_step_time": 0.10506010055541992 }, { "epoch": 3.7274169921875e-05, "model_forward_time": 0.025609731674194336, "step": 24428 }, { "epoch": 3.7274169921875e-05, "step": 24428, "training_step_time": 0.10543656349182129 }, { "epoch": 3.727569580078125e-05, "model_forward_time": 0.025098562240600586, "step": 24429 }, { "epoch": 3.727569580078125e-05, "step": 24429, "training_step_time": 0.11373162269592285 }, { "epoch": 3.72772216796875e-05, "grad_norm": 0.3164246678352356, "learning_rate": 9.132167692263289e-06, "loss": 0.0064, "step": 24430 }, { "epoch": 3.72772216796875e-05, "model_forward_time": 0.025788307189941406, "step": 24430 }, { "epoch": 3.72772216796875e-05, "step": 24430, "training_step_time": 0.13280701637268066 }, { "epoch": 3.727874755859375e-05, "model_forward_time": 0.025031566619873047, "step": 24431 }, { "epoch": 3.727874755859375e-05, "step": 24431, "training_step_time": 0.13015151023864746 }, { "epoch": 3.72802734375e-05, "model_forward_time": 0.024872303009033203, "step": 24432 }, { "epoch": 3.72802734375e-05, "step": 24432, "training_step_time": 0.21679997444152832 }, { "epoch": 3.728179931640625e-05, "model_forward_time": 0.024282217025756836, "step": 24433 }, { "epoch": 3.728179931640625e-05, "step": 24433, "training_step_time": 0.13921308517456055 }, { "epoch": 3.72833251953125e-05, "model_forward_time": 0.024581193923950195, "step": 24434 }, { "epoch": 3.72833251953125e-05, "step": 24434, "training_step_time": 0.11746644973754883 }, { "epoch": 3.728485107421875e-05, "model_forward_time": 0.024698495864868164, "step": 24435 }, { "epoch": 3.728485107421875e-05, "step": 24435, "training_step_time": 0.11534976959228516 }, { "epoch": 3.7286376953125e-05, "model_forward_time": 0.02514052391052246, "step": 24436 }, { "epoch": 3.7286376953125e-05, "step": 24436, "training_step_time": 0.17078328132629395 }, { "epoch": 3.728790283203125e-05, "model_forward_time": 0.024447202682495117, "step": 24437 }, { "epoch": 3.728790283203125e-05, "step": 24437, "training_step_time": 0.1314399242401123 }, { "epoch": 3.72894287109375e-05, "model_forward_time": 0.0242919921875, "step": 24438 }, { "epoch": 3.72894287109375e-05, "step": 24438, "training_step_time": 0.1101677417755127 }, { "epoch": 3.729095458984375e-05, "model_forward_time": 0.025096654891967773, "step": 24439 }, { "epoch": 3.729095458984375e-05, "step": 24439, "training_step_time": 0.11149907112121582 }, { "epoch": 3.729248046875e-05, "grad_norm": 0.16114254295825958, "learning_rate": 9.100438630552521e-06, "loss": 0.0027, "step": 24440 }, { "epoch": 3.729248046875e-05, "model_forward_time": 0.025304317474365234, "step": 24440 }, { "epoch": 3.729248046875e-05, "step": 24440, "training_step_time": 0.10672140121459961 }, { "epoch": 3.729400634765625e-05, "model_forward_time": 0.024938106536865234, "step": 24441 }, { "epoch": 3.729400634765625e-05, "step": 24441, "training_step_time": 0.10538053512573242 }, { "epoch": 3.72955322265625e-05, "model_forward_time": 0.025084495544433594, "step": 24442 }, { "epoch": 3.72955322265625e-05, "step": 24442, "training_step_time": 0.10523033142089844 }, { "epoch": 3.729705810546875e-05, "model_forward_time": 0.024934768676757812, "step": 24443 }, { "epoch": 3.729705810546875e-05, "step": 24443, "training_step_time": 0.11861705780029297 }, { "epoch": 3.7298583984375e-05, "model_forward_time": 0.024907588958740234, "step": 24444 }, { "epoch": 3.7298583984375e-05, "step": 24444, "training_step_time": 0.10475897789001465 }, { "epoch": 3.730010986328125e-05, "model_forward_time": 0.025435209274291992, "step": 24445 }, { "epoch": 3.730010986328125e-05, "step": 24445, "training_step_time": 0.12146592140197754 }, { "epoch": 3.73016357421875e-05, "model_forward_time": 0.024726152420043945, "step": 24446 }, { "epoch": 3.73016357421875e-05, "step": 24446, "training_step_time": 0.13011574745178223 }, { "epoch": 3.730316162109375e-05, "model_forward_time": 0.025139570236206055, "step": 24447 }, { "epoch": 3.730316162109375e-05, "step": 24447, "training_step_time": 0.1184072494506836 }, { "epoch": 3.73046875e-05, "model_forward_time": 0.025357723236083984, "step": 24448 }, { "epoch": 3.73046875e-05, "step": 24448, "training_step_time": 0.19695734977722168 }, { "epoch": 3.730621337890625e-05, "model_forward_time": 0.02479386329650879, "step": 24449 }, { "epoch": 3.730621337890625e-05, "step": 24449, "training_step_time": 0.13739514350891113 }, { "epoch": 3.73077392578125e-05, "grad_norm": 0.1747671365737915, "learning_rate": 9.068759265665384e-06, "loss": 0.0081, "step": 24450 }, { "epoch": 3.73077392578125e-05, "model_forward_time": 0.02449202537536621, "step": 24450 }, { "epoch": 3.73077392578125e-05, "step": 24450, "training_step_time": 0.17432641983032227 }, { "epoch": 3.730926513671875e-05, "model_forward_time": 0.024774789810180664, "step": 24451 }, { "epoch": 3.730926513671875e-05, "step": 24451, "training_step_time": 0.10655570030212402 }, { "epoch": 3.7310791015625e-05, "model_forward_time": 0.024790525436401367, "step": 24452 }, { "epoch": 3.7310791015625e-05, "step": 24452, "training_step_time": 0.10491299629211426 }, { "epoch": 3.731231689453125e-05, "model_forward_time": 0.024010896682739258, "step": 24453 }, { "epoch": 3.731231689453125e-05, "step": 24453, "training_step_time": 0.10352134704589844 }, { "epoch": 3.73138427734375e-05, "model_forward_time": 0.02620077133178711, "step": 24454 }, { "epoch": 3.73138427734375e-05, "step": 24454, "training_step_time": 0.1097109317779541 }, { "epoch": 3.731536865234375e-05, "model_forward_time": 0.025884389877319336, "step": 24455 }, { "epoch": 3.731536865234375e-05, "step": 24455, "training_step_time": 0.10816216468811035 }, { "epoch": 3.731689453125e-05, "model_forward_time": 0.02610182762145996, "step": 24456 }, { "epoch": 3.731689453125e-05, "step": 24456, "training_step_time": 0.10862469673156738 }, { "epoch": 3.731842041015625e-05, "model_forward_time": 0.025356292724609375, "step": 24457 }, { "epoch": 3.731842041015625e-05, "step": 24457, "training_step_time": 0.1099553108215332 }, { "epoch": 3.73199462890625e-05, "model_forward_time": 0.025036096572875977, "step": 24458 }, { "epoch": 3.73199462890625e-05, "step": 24458, "training_step_time": 0.10505843162536621 }, { "epoch": 3.732147216796875e-05, "model_forward_time": 0.024212360382080078, "step": 24459 }, { "epoch": 3.732147216796875e-05, "step": 24459, "training_step_time": 0.14887213706970215 }, { "epoch": 3.7322998046875e-05, "grad_norm": 0.14061211049556732, "learning_rate": 9.037129636095309e-06, "loss": 0.006, "step": 24460 }, { "epoch": 3.7322998046875e-05, "model_forward_time": 0.02426004409790039, "step": 24460 }, { "epoch": 3.7322998046875e-05, "step": 24460, "training_step_time": 0.15532779693603516 }, { "epoch": 3.732452392578125e-05, "model_forward_time": 0.026788949966430664, "step": 24461 }, { "epoch": 3.732452392578125e-05, "step": 24461, "training_step_time": 0.10747790336608887 }, { "epoch": 3.73260498046875e-05, "model_forward_time": 0.02523064613342285, "step": 24462 }, { "epoch": 3.73260498046875e-05, "step": 24462, "training_step_time": 0.14213895797729492 }, { "epoch": 3.732757568359375e-05, "model_forward_time": 0.02559971809387207, "step": 24463 }, { "epoch": 3.732757568359375e-05, "step": 24463, "training_step_time": 0.10694360733032227 }, { "epoch": 3.73291015625e-05, "model_forward_time": 0.02570652961730957, "step": 24464 }, { "epoch": 3.73291015625e-05, "step": 24464, "training_step_time": 0.11942481994628906 }, { "epoch": 3.733062744140625e-05, "model_forward_time": 0.02566814422607422, "step": 24465 }, { "epoch": 3.733062744140625e-05, "step": 24465, "training_step_time": 0.1065218448638916 }, { "epoch": 3.73321533203125e-05, "model_forward_time": 0.024989604949951172, "step": 24466 }, { "epoch": 3.73321533203125e-05, "step": 24466, "training_step_time": 0.10579180717468262 }, { "epoch": 3.733367919921875e-05, "model_forward_time": 0.025851011276245117, "step": 24467 }, { "epoch": 3.733367919921875e-05, "step": 24467, "training_step_time": 0.11101245880126953 }, { "epoch": 3.7335205078125e-05, "model_forward_time": 0.02535557746887207, "step": 24468 }, { "epoch": 3.7335205078125e-05, "step": 24468, "training_step_time": 0.10619950294494629 }, { "epoch": 3.733673095703125e-05, "model_forward_time": 0.02780938148498535, "step": 24469 }, { "epoch": 3.733673095703125e-05, "step": 24469, "training_step_time": 0.10766983032226562 }, { "epoch": 3.73382568359375e-05, "grad_norm": 0.08635987341403961, "learning_rate": 9.005549780275263e-06, "loss": 0.0076, "step": 24470 }, { "epoch": 3.73382568359375e-05, "model_forward_time": 0.02531123161315918, "step": 24470 }, { "epoch": 3.73382568359375e-05, "step": 24470, "training_step_time": 0.10678935050964355 }, { "epoch": 3.733978271484375e-05, "model_forward_time": 0.025258302688598633, "step": 24471 }, { "epoch": 3.733978271484375e-05, "step": 24471, "training_step_time": 0.10660767555236816 }, { "epoch": 3.734130859375e-05, "model_forward_time": 0.024765729904174805, "step": 24472 }, { "epoch": 3.734130859375e-05, "step": 24472, "training_step_time": 0.10633611679077148 }, { "epoch": 3.734283447265625e-05, "model_forward_time": 0.02553105354309082, "step": 24473 }, { "epoch": 3.734283447265625e-05, "step": 24473, "training_step_time": 0.1060178279876709 }, { "epoch": 3.73443603515625e-05, "model_forward_time": 0.02529764175415039, "step": 24474 }, { "epoch": 3.73443603515625e-05, "step": 24474, "training_step_time": 0.10556530952453613 }, { "epoch": 3.734588623046875e-05, "model_forward_time": 0.025002717971801758, "step": 24475 }, { "epoch": 3.734588623046875e-05, "step": 24475, "training_step_time": 0.1084604263305664 }, { "epoch": 3.7347412109375e-05, "model_forward_time": 0.025008440017700195, "step": 24476 }, { "epoch": 3.7347412109375e-05, "step": 24476, "training_step_time": 0.11102771759033203 }, { "epoch": 3.734893798828125e-05, "model_forward_time": 0.02500605583190918, "step": 24477 }, { "epoch": 3.734893798828125e-05, "step": 24477, "training_step_time": 0.10927844047546387 }, { "epoch": 3.73504638671875e-05, "model_forward_time": 0.02552509307861328, "step": 24478 }, { "epoch": 3.73504638671875e-05, "step": 24478, "training_step_time": 0.11022305488586426 }, { "epoch": 3.735198974609375e-05, "model_forward_time": 0.024997472763061523, "step": 24479 }, { "epoch": 3.735198974609375e-05, "step": 24479, "training_step_time": 0.16833901405334473 }, { "epoch": 3.7353515625e-05, "grad_norm": 0.11926314234733582, "learning_rate": 8.974019736577777e-06, "loss": 0.0054, "step": 24480 }, { "epoch": 3.7353515625e-05, "model_forward_time": 0.02425360679626465, "step": 24480 }, { "epoch": 3.7353515625e-05, "step": 24480, "training_step_time": 0.11188101768493652 }, { "epoch": 3.735504150390625e-05, "model_forward_time": 0.02490997314453125, "step": 24481 }, { "epoch": 3.735504150390625e-05, "step": 24481, "training_step_time": 0.10980653762817383 }, { "epoch": 3.73565673828125e-05, "model_forward_time": 0.025077342987060547, "step": 24482 }, { "epoch": 3.73565673828125e-05, "step": 24482, "training_step_time": 0.11562299728393555 }, { "epoch": 3.735809326171875e-05, "model_forward_time": 0.02558302879333496, "step": 24483 }, { "epoch": 3.735809326171875e-05, "step": 24483, "training_step_time": 0.11011409759521484 }, { "epoch": 3.7359619140625e-05, "model_forward_time": 0.024579286575317383, "step": 24484 }, { "epoch": 3.7359619140625e-05, "step": 24484, "training_step_time": 0.22366905212402344 }, { "epoch": 3.736114501953125e-05, "model_forward_time": 0.024477481842041016, "step": 24485 }, { "epoch": 3.736114501953125e-05, "step": 24485, "training_step_time": 0.10900163650512695 }, { "epoch": 3.73626708984375e-05, "model_forward_time": 0.024380922317504883, "step": 24486 }, { "epoch": 3.73626708984375e-05, "step": 24486, "training_step_time": 0.1042473316192627 }, { "epoch": 3.736419677734375e-05, "model_forward_time": 0.025431394577026367, "step": 24487 }, { "epoch": 3.736419677734375e-05, "step": 24487, "training_step_time": 0.10330891609191895 }, { "epoch": 3.736572265625e-05, "model_forward_time": 0.02518630027770996, "step": 24488 }, { "epoch": 3.736572265625e-05, "step": 24488, "training_step_time": 0.10445308685302734 }, { "epoch": 3.736724853515625e-05, "model_forward_time": 0.02534198760986328, "step": 24489 }, { "epoch": 3.736724853515625e-05, "step": 24489, "training_step_time": 0.2020111083984375 }, { "epoch": 3.73687744140625e-05, "grad_norm": 0.21885116398334503, "learning_rate": 8.9425395433148e-06, "loss": 0.0052, "step": 24490 }, { "epoch": 3.73687744140625e-05, "model_forward_time": 0.025732040405273438, "step": 24490 }, { "epoch": 3.73687744140625e-05, "step": 24490, "training_step_time": 0.11340045928955078 }, { "epoch": 3.737030029296875e-05, "model_forward_time": 0.02470111846923828, "step": 24491 }, { "epoch": 3.737030029296875e-05, "step": 24491, "training_step_time": 0.14742755889892578 }, { "epoch": 3.7371826171875e-05, "model_forward_time": 0.027136802673339844, "step": 24492 }, { "epoch": 3.7371826171875e-05, "step": 24492, "training_step_time": 0.214522123336792 }, { "epoch": 3.737335205078125e-05, "model_forward_time": 0.024012327194213867, "step": 24493 }, { "epoch": 3.737335205078125e-05, "step": 24493, "training_step_time": 0.14451360702514648 }, { "epoch": 3.73748779296875e-05, "model_forward_time": 0.024347543716430664, "step": 24494 }, { "epoch": 3.73748779296875e-05, "step": 24494, "training_step_time": 0.17842626571655273 }, { "epoch": 3.737640380859375e-05, "model_forward_time": 0.024528980255126953, "step": 24495 }, { "epoch": 3.737640380859375e-05, "step": 24495, "training_step_time": 0.13213634490966797 }, { "epoch": 3.73779296875e-05, "model_forward_time": 0.02420806884765625, "step": 24496 }, { "epoch": 3.73779296875e-05, "step": 24496, "training_step_time": 0.11410212516784668 }, { "epoch": 3.737945556640625e-05, "model_forward_time": 0.02490687370300293, "step": 24497 }, { "epoch": 3.737945556640625e-05, "step": 24497, "training_step_time": 0.11078190803527832 }, { "epoch": 3.73809814453125e-05, "model_forward_time": 0.02488422393798828, "step": 24498 }, { "epoch": 3.73809814453125e-05, "step": 24498, "training_step_time": 0.1129605770111084 }, { "epoch": 3.738250732421875e-05, "model_forward_time": 0.02515125274658203, "step": 24499 }, { "epoch": 3.738250732421875e-05, "step": 24499, "training_step_time": 0.11073613166809082 }, { "epoch": 3.7384033203125e-05, "grad_norm": 0.20034775137901306, "learning_rate": 8.911109238737747e-06, "loss": 0.0062, "step": 24500 }, { "epoch": 3.7384033203125e-05, "model_forward_time": 0.025101661682128906, "step": 24500 }, { "epoch": 3.7384033203125e-05, "step": 24500, "training_step_time": 0.11008238792419434 }, { "epoch": 3.738555908203125e-05, "model_forward_time": 0.025507688522338867, "step": 24501 }, { "epoch": 3.738555908203125e-05, "step": 24501, "training_step_time": 0.10880517959594727 }, { "epoch": 3.73870849609375e-05, "model_forward_time": 0.025183439254760742, "step": 24502 }, { "epoch": 3.73870849609375e-05, "step": 24502, "training_step_time": 0.11184215545654297 }, { "epoch": 3.738861083984375e-05, "model_forward_time": 0.025285959243774414, "step": 24503 }, { "epoch": 3.738861083984375e-05, "step": 24503, "training_step_time": 0.11021113395690918 }, { "epoch": 3.739013671875e-05, "model_forward_time": 0.025157690048217773, "step": 24504 }, { "epoch": 3.739013671875e-05, "step": 24504, "training_step_time": 0.10738492012023926 }, { "epoch": 3.739166259765625e-05, "model_forward_time": 0.025032758712768555, "step": 24505 }, { "epoch": 3.739166259765625e-05, "step": 24505, "training_step_time": 0.11628198623657227 }, { "epoch": 3.73931884765625e-05, "model_forward_time": 0.026024341583251953, "step": 24506 }, { "epoch": 3.73931884765625e-05, "step": 24506, "training_step_time": 0.161360502243042 }, { "epoch": 3.739471435546875e-05, "model_forward_time": 0.024524450302124023, "step": 24507 }, { "epoch": 3.739471435546875e-05, "step": 24507, "training_step_time": 0.13050246238708496 }, { "epoch": 3.7396240234375e-05, "model_forward_time": 0.024622678756713867, "step": 24508 }, { "epoch": 3.7396240234375e-05, "step": 24508, "training_step_time": 0.1219339370727539 }, { "epoch": 3.739776611328125e-05, "model_forward_time": 0.02512359619140625, "step": 24509 }, { "epoch": 3.739776611328125e-05, "step": 24509, "training_step_time": 0.10653972625732422 }, { "epoch": 3.73992919921875e-05, "grad_norm": 0.19504018127918243, "learning_rate": 8.879728861037384e-06, "loss": 0.0033, "step": 24510 }, { "epoch": 3.73992919921875e-05, "model_forward_time": 0.026160478591918945, "step": 24510 }, { "epoch": 3.73992919921875e-05, "step": 24510, "training_step_time": 0.13931560516357422 }, { "epoch": 3.740081787109375e-05, "model_forward_time": 0.0252988338470459, "step": 24511 }, { "epoch": 3.740081787109375e-05, "step": 24511, "training_step_time": 0.15154314041137695 }, { "epoch": 3.740234375e-05, "model_forward_time": 0.024866819381713867, "step": 24512 }, { "epoch": 3.740234375e-05, "step": 24512, "training_step_time": 0.14258217811584473 }, { "epoch": 3.740386962890625e-05, "model_forward_time": 0.027960777282714844, "step": 24513 }, { "epoch": 3.740386962890625e-05, "step": 24513, "training_step_time": 0.1492781639099121 }, { "epoch": 3.74053955078125e-05, "model_forward_time": 0.024449586868286133, "step": 24514 }, { "epoch": 3.74053955078125e-05, "step": 24514, "training_step_time": 0.13181233406066895 }, { "epoch": 3.740692138671875e-05, "model_forward_time": 0.02438640594482422, "step": 24515 }, { "epoch": 3.740692138671875e-05, "step": 24515, "training_step_time": 0.12447261810302734 }, { "epoch": 3.7408447265625e-05, "model_forward_time": 0.024675607681274414, "step": 24516 }, { "epoch": 3.7408447265625e-05, "step": 24516, "training_step_time": 0.12322807312011719 }, { "epoch": 3.740997314453125e-05, "model_forward_time": 0.025222063064575195, "step": 24517 }, { "epoch": 3.740997314453125e-05, "step": 24517, "training_step_time": 0.11993098258972168 }, { "epoch": 3.74114990234375e-05, "model_forward_time": 0.02517843246459961, "step": 24518 }, { "epoch": 3.74114990234375e-05, "step": 24518, "training_step_time": 0.11707425117492676 }, { "epoch": 3.741302490234375e-05, "model_forward_time": 0.02510547637939453, "step": 24519 }, { "epoch": 3.741302490234375e-05, "step": 24519, "training_step_time": 0.11530876159667969 }, { "epoch": 3.741455078125e-05, "grad_norm": 0.12681689858436584, "learning_rate": 8.848398448343859e-06, "loss": 0.0087, "step": 24520 }, { "epoch": 3.741455078125e-05, "model_forward_time": 0.02576756477355957, "step": 24520 }, { "epoch": 3.741455078125e-05, "step": 24520, "training_step_time": 0.11209583282470703 }, { "epoch": 3.741607666015625e-05, "model_forward_time": 0.025216341018676758, "step": 24521 }, { "epoch": 3.741607666015625e-05, "step": 24521, "training_step_time": 0.10951066017150879 }, { "epoch": 3.74176025390625e-05, "model_forward_time": 0.024831295013427734, "step": 24522 }, { "epoch": 3.74176025390625e-05, "step": 24522, "training_step_time": 0.11006712913513184 }, { "epoch": 3.741912841796875e-05, "model_forward_time": 0.02495098114013672, "step": 24523 }, { "epoch": 3.741912841796875e-05, "step": 24523, "training_step_time": 0.19521474838256836 }, { "epoch": 3.7420654296875e-05, "model_forward_time": 0.024242639541625977, "step": 24524 }, { "epoch": 3.7420654296875e-05, "step": 24524, "training_step_time": 0.12537288665771484 }, { "epoch": 3.742218017578125e-05, "model_forward_time": 0.02432560920715332, "step": 24525 }, { "epoch": 3.742218017578125e-05, "step": 24525, "training_step_time": 0.12476468086242676 }, { "epoch": 3.74237060546875e-05, "model_forward_time": 0.025181293487548828, "step": 24526 }, { "epoch": 3.74237060546875e-05, "step": 24526, "training_step_time": 0.11827707290649414 }, { "epoch": 3.742523193359375e-05, "model_forward_time": 0.02495574951171875, "step": 24527 }, { "epoch": 3.742523193359375e-05, "step": 24527, "training_step_time": 0.1655290126800537 }, { "epoch": 3.74267578125e-05, "model_forward_time": 0.024499177932739258, "step": 24528 }, { "epoch": 3.74267578125e-05, "step": 24528, "training_step_time": 0.12804722785949707 }, { "epoch": 3.742828369140625e-05, "model_forward_time": 0.024279356002807617, "step": 24529 }, { "epoch": 3.742828369140625e-05, "step": 24529, "training_step_time": 0.10935354232788086 }, { "epoch": 3.74298095703125e-05, "grad_norm": 0.09753312170505524, "learning_rate": 8.817118038726558e-06, "loss": 0.0037, "step": 24530 }, { "epoch": 3.74298095703125e-05, "model_forward_time": 0.025224924087524414, "step": 24530 }, { "epoch": 3.74298095703125e-05, "step": 24530, "training_step_time": 0.10779190063476562 }, { "epoch": 3.743133544921875e-05, "model_forward_time": 0.025062084197998047, "step": 24531 }, { "epoch": 3.743133544921875e-05, "step": 24531, "training_step_time": 0.10641813278198242 }, { "epoch": 3.7432861328125e-05, "model_forward_time": 0.024801015853881836, "step": 24532 }, { "epoch": 3.7432861328125e-05, "step": 24532, "training_step_time": 0.10737800598144531 }, { "epoch": 3.743438720703125e-05, "model_forward_time": 0.025140762329101562, "step": 24533 }, { "epoch": 3.743438720703125e-05, "step": 24533, "training_step_time": 0.20610356330871582 }, { "epoch": 3.74359130859375e-05, "model_forward_time": 0.024512767791748047, "step": 24534 }, { "epoch": 3.74359130859375e-05, "step": 24534, "training_step_time": 0.18795490264892578 }, { "epoch": 3.743743896484375e-05, "model_forward_time": 0.024355173110961914, "step": 24535 }, { "epoch": 3.743743896484375e-05, "step": 24535, "training_step_time": 0.16538214683532715 }, { "epoch": 3.743896484375e-05, "model_forward_time": 0.024393320083618164, "step": 24536 }, { "epoch": 3.743896484375e-05, "step": 24536, "training_step_time": 0.1935722827911377 }, { "epoch": 3.744049072265625e-05, "model_forward_time": 0.024545907974243164, "step": 24537 }, { "epoch": 3.744049072265625e-05, "step": 24537, "training_step_time": 0.17475152015686035 }, { "epoch": 3.74420166015625e-05, "model_forward_time": 0.024323701858520508, "step": 24538 }, { "epoch": 3.74420166015625e-05, "step": 24538, "training_step_time": 0.13945984840393066 }, { "epoch": 3.744354248046875e-05, "model_forward_time": 0.02463841438293457, "step": 24539 }, { "epoch": 3.744354248046875e-05, "step": 24539, "training_step_time": 0.11001372337341309 }, { "epoch": 3.7445068359375e-05, "grad_norm": 0.2660139799118042, "learning_rate": 8.785887670194138e-06, "loss": 0.0046, "step": 24540 }, { "epoch": 3.7445068359375e-05, "model_forward_time": 0.024878501892089844, "step": 24540 }, { "epoch": 3.7445068359375e-05, "step": 24540, "training_step_time": 0.11049199104309082 }, { "epoch": 3.744659423828125e-05, "model_forward_time": 0.0254669189453125, "step": 24541 }, { "epoch": 3.744659423828125e-05, "step": 24541, "training_step_time": 0.10457706451416016 }, { "epoch": 3.74481201171875e-05, "model_forward_time": 0.02501678466796875, "step": 24542 }, { "epoch": 3.74481201171875e-05, "step": 24542, "training_step_time": 0.10866785049438477 }, { "epoch": 3.744964599609375e-05, "model_forward_time": 0.02570652961730957, "step": 24543 }, { "epoch": 3.744964599609375e-05, "step": 24543, "training_step_time": 0.10526847839355469 }, { "epoch": 3.7451171875e-05, "model_forward_time": 0.025527000427246094, "step": 24544 }, { "epoch": 3.7451171875e-05, "step": 24544, "training_step_time": 0.10568881034851074 }, { "epoch": 3.745269775390625e-05, "model_forward_time": 0.025386810302734375, "step": 24545 }, { "epoch": 3.745269775390625e-05, "step": 24545, "training_step_time": 0.10553574562072754 }, { "epoch": 3.74542236328125e-05, "model_forward_time": 0.02509903907775879, "step": 24546 }, { "epoch": 3.74542236328125e-05, "step": 24546, "training_step_time": 0.10498642921447754 }, { "epoch": 3.745574951171875e-05, "model_forward_time": 0.025448322296142578, "step": 24547 }, { "epoch": 3.745574951171875e-05, "step": 24547, "training_step_time": 0.10721015930175781 }, { "epoch": 3.7457275390625e-05, "model_forward_time": 0.024565458297729492, "step": 24548 }, { "epoch": 3.7457275390625e-05, "step": 24548, "training_step_time": 0.11133456230163574 }, { "epoch": 3.745880126953125e-05, "model_forward_time": 0.026674509048461914, "step": 24549 }, { "epoch": 3.745880126953125e-05, "step": 24549, "training_step_time": 0.10602760314941406 }, { "epoch": 3.74603271484375e-05, "grad_norm": 0.17690864205360413, "learning_rate": 8.754707380694427e-06, "loss": 0.0065, "step": 24550 }, { "epoch": 3.74603271484375e-05, "model_forward_time": 0.02669501304626465, "step": 24550 }, { "epoch": 3.74603271484375e-05, "step": 24550, "training_step_time": 0.18921208381652832 }, { "epoch": 3.746185302734375e-05, "model_forward_time": 0.02469015121459961, "step": 24551 }, { "epoch": 3.746185302734375e-05, "step": 24551, "training_step_time": 0.14371609687805176 }, { "epoch": 3.746337890625e-05, "model_forward_time": 0.024575471878051758, "step": 24552 }, { "epoch": 3.746337890625e-05, "step": 24552, "training_step_time": 0.19556093215942383 }, { "epoch": 3.746490478515625e-05, "model_forward_time": 0.024895191192626953, "step": 24553 }, { "epoch": 3.746490478515625e-05, "step": 24553, "training_step_time": 0.10324525833129883 }, { "epoch": 3.74664306640625e-05, "model_forward_time": 0.024805307388305664, "step": 24554 }, { "epoch": 3.74664306640625e-05, "step": 24554, "training_step_time": 0.10402107238769531 }, { "epoch": 3.746795654296875e-05, "model_forward_time": 0.025465965270996094, "step": 24555 }, { "epoch": 3.746795654296875e-05, "step": 24555, "training_step_time": 0.10894393920898438 }, { "epoch": 3.7469482421875e-05, "model_forward_time": 0.02540302276611328, "step": 24556 }, { "epoch": 3.7469482421875e-05, "step": 24556, "training_step_time": 0.10485053062438965 }, { "epoch": 3.747100830078125e-05, "model_forward_time": 0.02529740333557129, "step": 24557 }, { "epoch": 3.747100830078125e-05, "step": 24557, "training_step_time": 0.10475683212280273 }, { "epoch": 3.74725341796875e-05, "model_forward_time": 0.025225400924682617, "step": 24558 }, { "epoch": 3.74725341796875e-05, "step": 24558, "training_step_time": 0.10404729843139648 }, { "epoch": 3.747406005859375e-05, "model_forward_time": 0.0252072811126709, "step": 24559 }, { "epoch": 3.747406005859375e-05, "step": 24559, "training_step_time": 0.10393953323364258 }, { "epoch": 3.74755859375e-05, "grad_norm": 0.07185178995132446, "learning_rate": 8.723577208114419e-06, "loss": 0.0031, "step": 24560 }, { "epoch": 3.74755859375e-05, "model_forward_time": 0.02567744255065918, "step": 24560 }, { "epoch": 3.74755859375e-05, "step": 24560, "training_step_time": 0.1043844223022461 }, { "epoch": 3.747711181640625e-05, "model_forward_time": 0.02534794807434082, "step": 24561 }, { "epoch": 3.747711181640625e-05, "step": 24561, "training_step_time": 0.10820889472961426 }, { "epoch": 3.74786376953125e-05, "model_forward_time": 0.0249326229095459, "step": 24562 }, { "epoch": 3.74786376953125e-05, "step": 24562, "training_step_time": 0.10959291458129883 }, { "epoch": 3.748016357421875e-05, "model_forward_time": 0.025355100631713867, "step": 24563 }, { "epoch": 3.748016357421875e-05, "step": 24563, "training_step_time": 0.10719919204711914 }, { "epoch": 3.7481689453125e-05, "model_forward_time": 0.02827930450439453, "step": 24564 }, { "epoch": 3.7481689453125e-05, "step": 24564, "training_step_time": 0.1087489128112793 }, { "epoch": 3.748321533203125e-05, "model_forward_time": 0.02530670166015625, "step": 24565 }, { "epoch": 3.748321533203125e-05, "step": 24565, "training_step_time": 0.10726213455200195 }, { "epoch": 3.74847412109375e-05, "model_forward_time": 0.024993896484375, "step": 24566 }, { "epoch": 3.74847412109375e-05, "step": 24566, "training_step_time": 0.1052238941192627 }, { "epoch": 3.748626708984375e-05, "model_forward_time": 0.02536606788635254, "step": 24567 }, { "epoch": 3.748626708984375e-05, "step": 24567, "training_step_time": 0.1067500114440918 }, { "epoch": 3.748779296875e-05, "model_forward_time": 0.024975299835205078, "step": 24568 }, { "epoch": 3.748779296875e-05, "step": 24568, "training_step_time": 0.1979060173034668 }, { "epoch": 3.748931884765625e-05, "model_forward_time": 0.024240970611572266, "step": 24569 }, { "epoch": 3.748931884765625e-05, "step": 24569, "training_step_time": 0.12394356727600098 }, { "epoch": 3.74908447265625e-05, "grad_norm": 0.0854315385222435, "learning_rate": 8.692497190280224e-06, "loss": 0.0034, "step": 24570 }, { "epoch": 3.74908447265625e-05, "model_forward_time": 0.02436995506286621, "step": 24570 }, { "epoch": 3.74908447265625e-05, "step": 24570, "training_step_time": 0.11355400085449219 }, { "epoch": 3.749237060546875e-05, "model_forward_time": 0.025146007537841797, "step": 24571 }, { "epoch": 3.749237060546875e-05, "step": 24571, "training_step_time": 0.12517523765563965 }, { "epoch": 3.7493896484375e-05, "model_forward_time": 0.02553391456604004, "step": 24572 }, { "epoch": 3.7493896484375e-05, "step": 24572, "training_step_time": 0.1638336181640625 }, { "epoch": 3.749542236328125e-05, "model_forward_time": 0.02447676658630371, "step": 24573 }, { "epoch": 3.749542236328125e-05, "step": 24573, "training_step_time": 0.1622469425201416 }, { "epoch": 3.74969482421875e-05, "model_forward_time": 0.024545669555664062, "step": 24574 }, { "epoch": 3.74969482421875e-05, "step": 24574, "training_step_time": 0.11072659492492676 }, { "epoch": 3.749847412109375e-05, "model_forward_time": 0.024540424346923828, "step": 24575 }, { "epoch": 3.749847412109375e-05, "step": 24575, "training_step_time": 0.10304594039916992 }, { "epoch": 3.75e-05, "model_forward_time": 0.02496647834777832, "step": 24576 }, { "epoch": 3.75e-05, "step": 24576, "training_step_time": 0.10583209991455078 }, { "epoch": 3.750152587890625e-05, "model_forward_time": 0.0249941349029541, "step": 24577 }, { "epoch": 3.750152587890625e-05, "step": 24577, "training_step_time": 0.10685205459594727 }, { "epoch": 3.75030517578125e-05, "model_forward_time": 0.025499582290649414, "step": 24578 }, { "epoch": 3.75030517578125e-05, "step": 24578, "training_step_time": 0.2000105381011963 }, { "epoch": 3.750457763671875e-05, "model_forward_time": 0.02456212043762207, "step": 24579 }, { "epoch": 3.750457763671875e-05, "step": 24579, "training_step_time": 0.1266946792602539 }, { "epoch": 3.7506103515625e-05, "grad_norm": 0.30930033326148987, "learning_rate": 8.661467364956993e-06, "loss": 0.0062, "step": 24580 }, { "epoch": 3.7506103515625e-05, "model_forward_time": 0.02426314353942871, "step": 24580 }, { "epoch": 3.7506103515625e-05, "step": 24580, "training_step_time": 0.2139286994934082 }, { "epoch": 3.750762939453125e-05, "model_forward_time": 0.024489641189575195, "step": 24581 }, { "epoch": 3.750762939453125e-05, "step": 24581, "training_step_time": 0.13774561882019043 }, { "epoch": 3.75091552734375e-05, "model_forward_time": 0.024532318115234375, "step": 24582 }, { "epoch": 3.75091552734375e-05, "step": 24582, "training_step_time": 0.11119675636291504 }, { "epoch": 3.751068115234375e-05, "model_forward_time": 0.02542281150817871, "step": 24583 }, { "epoch": 3.751068115234375e-05, "step": 24583, "training_step_time": 0.10898995399475098 }, { "epoch": 3.751220703125e-05, "model_forward_time": 0.0253450870513916, "step": 24584 }, { "epoch": 3.751220703125e-05, "step": 24584, "training_step_time": 0.19411587715148926 }, { "epoch": 3.751373291015625e-05, "model_forward_time": 0.0246124267578125, "step": 24585 }, { "epoch": 3.751373291015625e-05, "step": 24585, "training_step_time": 0.10469555854797363 }, { "epoch": 3.75152587890625e-05, "model_forward_time": 0.024412155151367188, "step": 24586 }, { "epoch": 3.75152587890625e-05, "step": 24586, "training_step_time": 0.10247659683227539 }, { "epoch": 3.751678466796875e-05, "model_forward_time": 0.024933815002441406, "step": 24587 }, { "epoch": 3.751678466796875e-05, "step": 24587, "training_step_time": 0.10512018203735352 }, { "epoch": 3.7518310546875e-05, "model_forward_time": 0.0258939266204834, "step": 24588 }, { "epoch": 3.7518310546875e-05, "step": 24588, "training_step_time": 0.10527467727661133 }, { "epoch": 3.751983642578125e-05, "model_forward_time": 0.02485203742980957, "step": 24589 }, { "epoch": 3.751983642578125e-05, "step": 24589, "training_step_time": 0.10344696044921875 }, { "epoch": 3.75213623046875e-05, "grad_norm": 0.07855116575956345, "learning_rate": 8.630487769848877e-06, "loss": 0.0042, "step": 24590 }, { "epoch": 3.75213623046875e-05, "model_forward_time": 0.025447845458984375, "step": 24590 }, { "epoch": 3.75213623046875e-05, "step": 24590, "training_step_time": 0.10543131828308105 }, { "epoch": 3.752288818359375e-05, "model_forward_time": 0.02537369728088379, "step": 24591 }, { "epoch": 3.752288818359375e-05, "step": 24591, "training_step_time": 0.1058201789855957 }, { "epoch": 3.75244140625e-05, "model_forward_time": 0.02550053596496582, "step": 24592 }, { "epoch": 3.75244140625e-05, "step": 24592, "training_step_time": 0.10565638542175293 }, { "epoch": 3.752593994140625e-05, "model_forward_time": 0.027561664581298828, "step": 24593 }, { "epoch": 3.752593994140625e-05, "step": 24593, "training_step_time": 0.10858559608459473 }, { "epoch": 3.75274658203125e-05, "model_forward_time": 0.025360107421875, "step": 24594 }, { "epoch": 3.75274658203125e-05, "step": 24594, "training_step_time": 0.10986328125 }, { "epoch": 3.752899169921875e-05, "model_forward_time": 0.025528669357299805, "step": 24595 }, { "epoch": 3.752899169921875e-05, "step": 24595, "training_step_time": 0.11755108833312988 }, { "epoch": 3.7530517578125e-05, "model_forward_time": 0.025484800338745117, "step": 24596 }, { "epoch": 3.7530517578125e-05, "step": 24596, "training_step_time": 0.11875677108764648 }, { "epoch": 3.753204345703125e-05, "model_forward_time": 0.025776147842407227, "step": 24597 }, { "epoch": 3.753204345703125e-05, "step": 24597, "training_step_time": 0.11156797409057617 }, { "epoch": 3.75335693359375e-05, "model_forward_time": 0.02546834945678711, "step": 24598 }, { "epoch": 3.75335693359375e-05, "step": 24598, "training_step_time": 0.12378120422363281 }, { "epoch": 3.753509521484375e-05, "model_forward_time": 0.025303125381469727, "step": 24599 }, { "epoch": 3.753509521484375e-05, "step": 24599, "training_step_time": 0.11845684051513672 }, { "epoch": 3.753662109375e-05, "grad_norm": 0.11178535968065262, "learning_rate": 8.599558442598998e-06, "loss": 0.0046, "step": 24600 }, { "epoch": 3.753662109375e-05, "model_forward_time": 0.025345325469970703, "step": 24600 }, { "epoch": 3.753662109375e-05, "step": 24600, "training_step_time": 0.10921835899353027 }, { "epoch": 3.753814697265625e-05, "model_forward_time": 0.02861499786376953, "step": 24601 }, { "epoch": 3.753814697265625e-05, "step": 24601, "training_step_time": 0.1116337776184082 }, { "epoch": 3.75396728515625e-05, "model_forward_time": 0.025134801864624023, "step": 24602 }, { "epoch": 3.75396728515625e-05, "step": 24602, "training_step_time": 0.11266422271728516 }, { "epoch": 3.754119873046875e-05, "model_forward_time": 0.02392745018005371, "step": 24603 }, { "epoch": 3.754119873046875e-05, "step": 24603, "training_step_time": 0.10880923271179199 }, { "epoch": 3.7542724609375e-05, "model_forward_time": 0.024075984954833984, "step": 24604 }, { "epoch": 3.7542724609375e-05, "step": 24604, "training_step_time": 0.10943150520324707 }, { "epoch": 3.754425048828125e-05, "model_forward_time": 0.025032997131347656, "step": 24605 }, { "epoch": 3.754425048828125e-05, "step": 24605, "training_step_time": 0.10854268074035645 }, { "epoch": 3.75457763671875e-05, "model_forward_time": 0.02542591094970703, "step": 24606 }, { "epoch": 3.75457763671875e-05, "step": 24606, "training_step_time": 0.11033391952514648 }, { "epoch": 3.754730224609375e-05, "model_forward_time": 0.0251157283782959, "step": 24607 }, { "epoch": 3.754730224609375e-05, "step": 24607, "training_step_time": 0.10775351524353027 }, { "epoch": 3.7548828125e-05, "model_forward_time": 0.025029420852661133, "step": 24608 }, { "epoch": 3.7548828125e-05, "step": 24608, "training_step_time": 0.10602116584777832 }, { "epoch": 3.755035400390625e-05, "model_forward_time": 0.025055885314941406, "step": 24609 }, { "epoch": 3.755035400390625e-05, "step": 24609, "training_step_time": 0.10934710502624512 }, { "epoch": 3.75518798828125e-05, "grad_norm": 0.09628903865814209, "learning_rate": 8.568679420789433e-06, "loss": 0.01, "step": 24610 }, { "epoch": 3.75518798828125e-05, "model_forward_time": 0.024932861328125, "step": 24610 }, { "epoch": 3.75518798828125e-05, "step": 24610, "training_step_time": 0.10542559623718262 }, { "epoch": 3.755340576171875e-05, "model_forward_time": 0.02511429786682129, "step": 24611 }, { "epoch": 3.755340576171875e-05, "step": 24611, "training_step_time": 0.10642290115356445 }, { "epoch": 3.7554931640625e-05, "model_forward_time": 0.027379274368286133, "step": 24612 }, { "epoch": 3.7554931640625e-05, "step": 24612, "training_step_time": 0.10973906517028809 }, { "epoch": 3.755645751953125e-05, "model_forward_time": 0.02495551109313965, "step": 24613 }, { "epoch": 3.755645751953125e-05, "step": 24613, "training_step_time": 0.10364937782287598 }, { "epoch": 3.75579833984375e-05, "model_forward_time": 0.025000333786010742, "step": 24614 }, { "epoch": 3.75579833984375e-05, "step": 24614, "training_step_time": 0.10458111763000488 }, { "epoch": 3.755950927734375e-05, "model_forward_time": 0.02641916275024414, "step": 24615 }, { "epoch": 3.755950927734375e-05, "step": 24615, "training_step_time": 0.15184259414672852 }, { "epoch": 3.756103515625e-05, "model_forward_time": 0.024826526641845703, "step": 24616 }, { "epoch": 3.756103515625e-05, "step": 24616, "training_step_time": 0.11342358589172363 }, { "epoch": 3.756256103515625e-05, "model_forward_time": 0.024573564529418945, "step": 24617 }, { "epoch": 3.756256103515625e-05, "step": 24617, "training_step_time": 0.12888240814208984 }, { "epoch": 3.75640869140625e-05, "model_forward_time": 0.025684833526611328, "step": 24618 }, { "epoch": 3.75640869140625e-05, "step": 24618, "training_step_time": 0.1049187183380127 }, { "epoch": 3.756561279296875e-05, "model_forward_time": 0.02555990219116211, "step": 24619 }, { "epoch": 3.756561279296875e-05, "step": 24619, "training_step_time": 0.18158793449401855 }, { "epoch": 3.7567138671875e-05, "grad_norm": 0.11766628921031952, "learning_rate": 8.537850741941073e-06, "loss": 0.003, "step": 24620 }, { "epoch": 3.7567138671875e-05, "model_forward_time": 0.02450847625732422, "step": 24620 }, { "epoch": 3.7567138671875e-05, "step": 24620, "training_step_time": 0.1171262264251709 }, { "epoch": 3.756866455078125e-05, "model_forward_time": 0.024476289749145508, "step": 24621 }, { "epoch": 3.756866455078125e-05, "step": 24621, "training_step_time": 0.11860322952270508 }, { "epoch": 3.75701904296875e-05, "model_forward_time": 0.025623559951782227, "step": 24622 }, { "epoch": 3.75701904296875e-05, "step": 24622, "training_step_time": 0.10486555099487305 }, { "epoch": 3.757171630859375e-05, "model_forward_time": 0.024907350540161133, "step": 24623 }, { "epoch": 3.757171630859375e-05, "step": 24623, "training_step_time": 0.1063544750213623 }, { "epoch": 3.75732421875e-05, "model_forward_time": 0.025706052780151367, "step": 24624 }, { "epoch": 3.75732421875e-05, "step": 24624, "training_step_time": 0.10533499717712402 }, { "epoch": 3.757476806640625e-05, "model_forward_time": 0.025083065032958984, "step": 24625 }, { "epoch": 3.757476806640625e-05, "step": 24625, "training_step_time": 0.20092034339904785 }, { "epoch": 3.75762939453125e-05, "model_forward_time": 0.023831605911254883, "step": 24626 }, { "epoch": 3.75762939453125e-05, "step": 24626, "training_step_time": 0.11516332626342773 }, { "epoch": 3.757781982421875e-05, "model_forward_time": 0.024568557739257812, "step": 24627 }, { "epoch": 3.757781982421875e-05, "step": 24627, "training_step_time": 0.13889455795288086 }, { "epoch": 3.7579345703125e-05, "model_forward_time": 0.02492380142211914, "step": 24628 }, { "epoch": 3.7579345703125e-05, "step": 24628, "training_step_time": 0.1076345443725586 }, { "epoch": 3.758087158203125e-05, "model_forward_time": 0.02507615089416504, "step": 24629 }, { "epoch": 3.758087158203125e-05, "step": 24629, "training_step_time": 0.1330573558807373 }, { "epoch": 3.75823974609375e-05, "grad_norm": 0.07475856691598892, "learning_rate": 8.507072443513702e-06, "loss": 0.0116, "step": 24630 }, { "epoch": 3.75823974609375e-05, "model_forward_time": 0.025316238403320312, "step": 24630 }, { "epoch": 3.75823974609375e-05, "step": 24630, "training_step_time": 0.19191956520080566 }, { "epoch": 3.758392333984375e-05, "model_forward_time": 0.024086952209472656, "step": 24631 }, { "epoch": 3.758392333984375e-05, "step": 24631, "training_step_time": 0.19752264022827148 }, { "epoch": 3.758544921875e-05, "model_forward_time": 0.02460503578186035, "step": 24632 }, { "epoch": 3.758544921875e-05, "step": 24632, "training_step_time": 0.13787627220153809 }, { "epoch": 3.758697509765625e-05, "model_forward_time": 0.02401256561279297, "step": 24633 }, { "epoch": 3.758697509765625e-05, "step": 24633, "training_step_time": 0.14313602447509766 }, { "epoch": 3.75885009765625e-05, "model_forward_time": 0.024225234985351562, "step": 24634 }, { "epoch": 3.75885009765625e-05, "step": 24634, "training_step_time": 0.13921570777893066 }, { "epoch": 3.759002685546875e-05, "model_forward_time": 0.024692058563232422, "step": 24635 }, { "epoch": 3.759002685546875e-05, "step": 24635, "training_step_time": 0.13478803634643555 }, { "epoch": 3.7591552734375e-05, "model_forward_time": 0.024507999420166016, "step": 24636 }, { "epoch": 3.7591552734375e-05, "step": 24636, "training_step_time": 0.12508273124694824 }, { "epoch": 3.759307861328125e-05, "model_forward_time": 0.02461862564086914, "step": 24637 }, { "epoch": 3.759307861328125e-05, "step": 24637, "training_step_time": 0.12484216690063477 }, { "epoch": 3.75946044921875e-05, "model_forward_time": 0.02584218978881836, "step": 24638 }, { "epoch": 3.75946044921875e-05, "step": 24638, "training_step_time": 0.1053304672241211 }, { "epoch": 3.759613037109375e-05, "model_forward_time": 0.024356603622436523, "step": 24639 }, { "epoch": 3.759613037109375e-05, "step": 24639, "training_step_time": 0.11115026473999023 }, { "epoch": 3.759765625e-05, "grad_norm": 0.12253975868225098, "learning_rate": 8.476344562905841e-06, "loss": 0.0063, "step": 24640 }, { "epoch": 3.759765625e-05, "model_forward_time": 0.025294065475463867, "step": 24640 }, { "epoch": 3.759765625e-05, "step": 24640, "training_step_time": 0.11259293556213379 }, { "epoch": 3.759918212890625e-05, "model_forward_time": 0.025040864944458008, "step": 24641 }, { "epoch": 3.759918212890625e-05, "step": 24641, "training_step_time": 0.10966968536376953 }, { "epoch": 3.76007080078125e-05, "model_forward_time": 0.0253143310546875, "step": 24642 }, { "epoch": 3.76007080078125e-05, "step": 24642, "training_step_time": 0.10686635971069336 }, { "epoch": 3.760223388671875e-05, "model_forward_time": 0.025565624237060547, "step": 24643 }, { "epoch": 3.760223388671875e-05, "step": 24643, "training_step_time": 0.1854720115661621 }, { "epoch": 3.7603759765625e-05, "model_forward_time": 0.024907350540161133, "step": 24644 }, { "epoch": 3.7603759765625e-05, "step": 24644, "training_step_time": 0.10588550567626953 }, { "epoch": 3.760528564453125e-05, "model_forward_time": 0.02510666847229004, "step": 24645 }, { "epoch": 3.760528564453125e-05, "step": 24645, "training_step_time": 0.1080477237701416 }, { "epoch": 3.76068115234375e-05, "model_forward_time": 0.025553464889526367, "step": 24646 }, { "epoch": 3.76068115234375e-05, "step": 24646, "training_step_time": 0.10716819763183594 }, { "epoch": 3.760833740234375e-05, "model_forward_time": 0.025299549102783203, "step": 24647 }, { "epoch": 3.760833740234375e-05, "step": 24647, "training_step_time": 0.10970139503479004 }, { "epoch": 3.760986328125e-05, "model_forward_time": 0.025234460830688477, "step": 24648 }, { "epoch": 3.760986328125e-05, "step": 24648, "training_step_time": 0.10617804527282715 }, { "epoch": 3.761138916015625e-05, "model_forward_time": 0.025331735610961914, "step": 24649 }, { "epoch": 3.761138916015625e-05, "step": 24649, "training_step_time": 0.10564613342285156 }, { "epoch": 3.76129150390625e-05, "grad_norm": 0.08911236375570297, "learning_rate": 8.445667137454761e-06, "loss": 0.0045, "step": 24650 }, { "epoch": 3.76129150390625e-05, "model_forward_time": 0.02532219886779785, "step": 24650 }, { "epoch": 3.76129150390625e-05, "step": 24650, "training_step_time": 0.10675811767578125 }, { "epoch": 3.761444091796875e-05, "model_forward_time": 0.025267839431762695, "step": 24651 }, { "epoch": 3.761444091796875e-05, "step": 24651, "training_step_time": 0.10497117042541504 }, { "epoch": 3.7615966796875e-05, "model_forward_time": 0.0250399112701416, "step": 24652 }, { "epoch": 3.7615966796875e-05, "step": 24652, "training_step_time": 0.10841941833496094 }, { "epoch": 3.761749267578125e-05, "model_forward_time": 0.02491593360900879, "step": 24653 }, { "epoch": 3.761749267578125e-05, "step": 24653, "training_step_time": 0.10384988784790039 }, { "epoch": 3.76190185546875e-05, "model_forward_time": 0.028411388397216797, "step": 24654 }, { "epoch": 3.76190185546875e-05, "step": 24654, "training_step_time": 0.10800600051879883 }, { "epoch": 3.762054443359375e-05, "model_forward_time": 0.02501678466796875, "step": 24655 }, { "epoch": 3.762054443359375e-05, "step": 24655, "training_step_time": 0.10379433631896973 }, { "epoch": 3.76220703125e-05, "model_forward_time": 0.025246858596801758, "step": 24656 }, { "epoch": 3.76220703125e-05, "step": 24656, "training_step_time": 0.10580968856811523 }, { "epoch": 3.762359619140625e-05, "model_forward_time": 0.025104045867919922, "step": 24657 }, { "epoch": 3.762359619140625e-05, "step": 24657, "training_step_time": 0.10452532768249512 }, { "epoch": 3.76251220703125e-05, "model_forward_time": 0.025057554244995117, "step": 24658 }, { "epoch": 3.76251220703125e-05, "step": 24658, "training_step_time": 0.10411739349365234 }, { "epoch": 3.762664794921875e-05, "model_forward_time": 0.024983644485473633, "step": 24659 }, { "epoch": 3.762664794921875e-05, "step": 24659, "training_step_time": 0.10465526580810547 }, { "epoch": 3.7628173828125e-05, "grad_norm": 0.1461312621831894, "learning_rate": 8.415040204436426e-06, "loss": 0.008, "step": 24660 }, { "epoch": 3.7628173828125e-05, "model_forward_time": 0.02513408660888672, "step": 24660 }, { "epoch": 3.7628173828125e-05, "step": 24660, "training_step_time": 0.1800389289855957 }, { "epoch": 3.762969970703125e-05, "model_forward_time": 0.024550676345825195, "step": 24661 }, { "epoch": 3.762969970703125e-05, "step": 24661, "training_step_time": 0.17178106307983398 }, { "epoch": 3.76312255859375e-05, "model_forward_time": 0.02426433563232422, "step": 24662 }, { "epoch": 3.76312255859375e-05, "step": 24662, "training_step_time": 0.13344168663024902 }, { "epoch": 3.763275146484375e-05, "model_forward_time": 0.024158954620361328, "step": 24663 }, { "epoch": 3.763275146484375e-05, "step": 24663, "training_step_time": 0.1519787311553955 }, { "epoch": 3.763427734375e-05, "model_forward_time": 0.024387121200561523, "step": 24664 }, { "epoch": 3.763427734375e-05, "step": 24664, "training_step_time": 0.10198736190795898 }, { "epoch": 3.763580322265625e-05, "model_forward_time": 0.02516317367553711, "step": 24665 }, { "epoch": 3.763580322265625e-05, "step": 24665, "training_step_time": 0.11841869354248047 }, { "epoch": 3.76373291015625e-05, "model_forward_time": 0.02498149871826172, "step": 24666 }, { "epoch": 3.76373291015625e-05, "step": 24666, "training_step_time": 0.1155397891998291 }, { "epoch": 3.763885498046875e-05, "model_forward_time": 0.025063514709472656, "step": 24667 }, { "epoch": 3.763885498046875e-05, "step": 24667, "training_step_time": 0.10306620597839355 }, { "epoch": 3.7640380859375e-05, "model_forward_time": 0.025312423706054688, "step": 24668 }, { "epoch": 3.7640380859375e-05, "step": 24668, "training_step_time": 0.10446715354919434 }, { "epoch": 3.764190673828125e-05, "model_forward_time": 0.02540874481201172, "step": 24669 }, { "epoch": 3.764190673828125e-05, "step": 24669, "training_step_time": 0.1122593879699707 }, { "epoch": 3.76434326171875e-05, "grad_norm": 0.11139220744371414, "learning_rate": 8.384463801065434e-06, "loss": 0.0091, "step": 24670 }, { "epoch": 3.76434326171875e-05, "model_forward_time": 0.025043487548828125, "step": 24670 }, { "epoch": 3.76434326171875e-05, "step": 24670, "training_step_time": 0.12331867218017578 }, { "epoch": 3.764495849609375e-05, "model_forward_time": 0.02539825439453125, "step": 24671 }, { "epoch": 3.764495849609375e-05, "step": 24671, "training_step_time": 0.20186948776245117 }, { "epoch": 3.7646484375e-05, "model_forward_time": 0.024463891983032227, "step": 24672 }, { "epoch": 3.7646484375e-05, "step": 24672, "training_step_time": 0.13306069374084473 }, { "epoch": 3.764801025390625e-05, "model_forward_time": 0.024277448654174805, "step": 24673 }, { "epoch": 3.764801025390625e-05, "step": 24673, "training_step_time": 0.1932995319366455 }, { "epoch": 3.76495361328125e-05, "model_forward_time": 0.02436518669128418, "step": 24674 }, { "epoch": 3.76495361328125e-05, "step": 24674, "training_step_time": 0.16255617141723633 }, { "epoch": 3.765106201171875e-05, "model_forward_time": 0.024561166763305664, "step": 24675 }, { "epoch": 3.765106201171875e-05, "step": 24675, "training_step_time": 0.2116239070892334 }, { "epoch": 3.7652587890625e-05, "model_forward_time": 0.024524927139282227, "step": 24676 }, { "epoch": 3.7652587890625e-05, "step": 24676, "training_step_time": 0.1497359275817871 }, { "epoch": 3.765411376953125e-05, "model_forward_time": 0.024440526962280273, "step": 24677 }, { "epoch": 3.765411376953125e-05, "step": 24677, "training_step_time": 0.10516738891601562 }, { "epoch": 3.76556396484375e-05, "model_forward_time": 0.024184226989746094, "step": 24678 }, { "epoch": 3.76556396484375e-05, "step": 24678, "training_step_time": 0.10623979568481445 }, { "epoch": 3.765716552734375e-05, "model_forward_time": 0.025327682495117188, "step": 24679 }, { "epoch": 3.765716552734375e-05, "step": 24679, "training_step_time": 0.10723495483398438 }, { "epoch": 3.765869140625e-05, "grad_norm": 0.1620771437883377, "learning_rate": 8.353937964495029e-06, "loss": 0.0058, "step": 24680 }, { "epoch": 3.765869140625e-05, "model_forward_time": 0.025048017501831055, "step": 24680 }, { "epoch": 3.765869140625e-05, "step": 24680, "training_step_time": 0.10497760772705078 }, { "epoch": 3.766021728515625e-05, "model_forward_time": 0.025429248809814453, "step": 24681 }, { "epoch": 3.766021728515625e-05, "step": 24681, "training_step_time": 0.11380624771118164 }, { "epoch": 3.76617431640625e-05, "model_forward_time": 0.025442123413085938, "step": 24682 }, { "epoch": 3.76617431640625e-05, "step": 24682, "training_step_time": 0.10442304611206055 }, { "epoch": 3.766326904296875e-05, "model_forward_time": 0.027236461639404297, "step": 24683 }, { "epoch": 3.766326904296875e-05, "step": 24683, "training_step_time": 0.10898351669311523 }, { "epoch": 3.7664794921875e-05, "model_forward_time": 0.025313138961791992, "step": 24684 }, { "epoch": 3.7664794921875e-05, "step": 24684, "training_step_time": 0.10648298263549805 }, { "epoch": 3.766632080078125e-05, "model_forward_time": 0.025181293487548828, "step": 24685 }, { "epoch": 3.766632080078125e-05, "step": 24685, "training_step_time": 0.10473275184631348 }, { "epoch": 3.76678466796875e-05, "model_forward_time": 0.02634143829345703, "step": 24686 }, { "epoch": 3.76678466796875e-05, "step": 24686, "training_step_time": 0.10670804977416992 }, { "epoch": 3.766937255859375e-05, "model_forward_time": 0.025484323501586914, "step": 24687 }, { "epoch": 3.766937255859375e-05, "step": 24687, "training_step_time": 0.11006927490234375 }, { "epoch": 3.76708984375e-05, "model_forward_time": 0.02533698081970215, "step": 24688 }, { "epoch": 3.76708984375e-05, "step": 24688, "training_step_time": 0.11202621459960938 }, { "epoch": 3.767242431640625e-05, "model_forward_time": 0.0254364013671875, "step": 24689 }, { "epoch": 3.767242431640625e-05, "step": 24689, "training_step_time": 0.10852289199829102 }, { "epoch": 3.76739501953125e-05, "grad_norm": 0.3249269127845764, "learning_rate": 8.323462731816961e-06, "loss": 0.0065, "step": 24690 }, { "epoch": 3.76739501953125e-05, "model_forward_time": 0.025275707244873047, "step": 24690 }, { "epoch": 3.76739501953125e-05, "step": 24690, "training_step_time": 0.11052441596984863 }, { "epoch": 3.767547607421875e-05, "model_forward_time": 0.026571273803710938, "step": 24691 }, { "epoch": 3.767547607421875e-05, "step": 24691, "training_step_time": 0.11166548728942871 }, { "epoch": 3.7677001953125e-05, "model_forward_time": 0.025646209716796875, "step": 24692 }, { "epoch": 3.7677001953125e-05, "step": 24692, "training_step_time": 0.10544109344482422 }, { "epoch": 3.767852783203125e-05, "model_forward_time": 0.025808334350585938, "step": 24693 }, { "epoch": 3.767852783203125e-05, "step": 24693, "training_step_time": 0.10573482513427734 }, { "epoch": 3.76800537109375e-05, "model_forward_time": 0.025771141052246094, "step": 24694 }, { "epoch": 3.76800537109375e-05, "step": 24694, "training_step_time": 0.10361909866333008 }, { "epoch": 3.768157958984375e-05, "model_forward_time": 0.027199268341064453, "step": 24695 }, { "epoch": 3.768157958984375e-05, "step": 24695, "training_step_time": 0.10750150680541992 }, { "epoch": 3.768310546875e-05, "model_forward_time": 0.025446414947509766, "step": 24696 }, { "epoch": 3.768310546875e-05, "step": 24696, "training_step_time": 0.10345077514648438 }, { "epoch": 3.768463134765625e-05, "model_forward_time": 0.02542877197265625, "step": 24697 }, { "epoch": 3.768463134765625e-05, "step": 24697, "training_step_time": 0.1033928394317627 }, { "epoch": 3.76861572265625e-05, "model_forward_time": 0.025301456451416016, "step": 24698 }, { "epoch": 3.76861572265625e-05, "step": 24698, "training_step_time": 0.10829472541809082 }, { "epoch": 3.768768310546875e-05, "model_forward_time": 0.025367021560668945, "step": 24699 }, { "epoch": 3.768768310546875e-05, "step": 24699, "training_step_time": 0.10846209526062012 }, { "epoch": 3.7689208984375e-05, "grad_norm": 0.20272240042686462, "learning_rate": 8.293038140061515e-06, "loss": 0.0069, "step": 24700 }, { "epoch": 3.7689208984375e-05, "model_forward_time": 0.02530694007873535, "step": 24700 }, { "epoch": 3.7689208984375e-05, "step": 24700, "training_step_time": 0.10509347915649414 }, { "epoch": 3.769073486328125e-05, "model_forward_time": 0.02567458152770996, "step": 24701 }, { "epoch": 3.769073486328125e-05, "step": 24701, "training_step_time": 0.10825347900390625 }, { "epoch": 3.76922607421875e-05, "model_forward_time": 0.026160478591918945, "step": 24702 }, { "epoch": 3.76922607421875e-05, "step": 24702, "training_step_time": 0.104736328125 }, { "epoch": 3.769378662109375e-05, "model_forward_time": 0.02494978904724121, "step": 24703 }, { "epoch": 3.769378662109375e-05, "step": 24703, "training_step_time": 0.10441446304321289 }, { "epoch": 3.76953125e-05, "model_forward_time": 0.025556325912475586, "step": 24704 }, { "epoch": 3.76953125e-05, "step": 24704, "training_step_time": 0.11941909790039062 }, { "epoch": 3.769683837890625e-05, "model_forward_time": 0.024611234664916992, "step": 24705 }, { "epoch": 3.769683837890625e-05, "step": 24705, "training_step_time": 0.15364336967468262 }, { "epoch": 3.76983642578125e-05, "model_forward_time": 0.024694204330444336, "step": 24706 }, { "epoch": 3.76983642578125e-05, "step": 24706, "training_step_time": 0.19009661674499512 }, { "epoch": 3.769989013671875e-05, "model_forward_time": 0.028205394744873047, "step": 24707 }, { "epoch": 3.769989013671875e-05, "step": 24707, "training_step_time": 0.17749595642089844 }, { "epoch": 3.7701416015625e-05, "model_forward_time": 0.024264097213745117, "step": 24708 }, { "epoch": 3.7701416015625e-05, "step": 24708, "training_step_time": 0.14220094680786133 }, { "epoch": 3.770294189453125e-05, "model_forward_time": 0.024566173553466797, "step": 24709 }, { "epoch": 3.770294189453125e-05, "step": 24709, "training_step_time": 0.1421966552734375 }, { "epoch": 3.77044677734375e-05, "grad_norm": 0.2357063889503479, "learning_rate": 8.262664226197436e-06, "loss": 0.0064, "step": 24710 }, { "epoch": 3.77044677734375e-05, "model_forward_time": 0.024228572845458984, "step": 24710 }, { "epoch": 3.77044677734375e-05, "step": 24710, "training_step_time": 0.21212506294250488 }, { "epoch": 3.770599365234375e-05, "model_forward_time": 0.025084972381591797, "step": 24711 }, { "epoch": 3.770599365234375e-05, "step": 24711, "training_step_time": 0.1172323226928711 }, { "epoch": 3.770751953125e-05, "model_forward_time": 0.024891138076782227, "step": 24712 }, { "epoch": 3.770751953125e-05, "step": 24712, "training_step_time": 0.11734604835510254 }, { "epoch": 3.770904541015625e-05, "model_forward_time": 0.025814056396484375, "step": 24713 }, { "epoch": 3.770904541015625e-05, "step": 24713, "training_step_time": 0.113189697265625 }, { "epoch": 3.77105712890625e-05, "model_forward_time": 0.02547168731689453, "step": 24714 }, { "epoch": 3.77105712890625e-05, "step": 24714, "training_step_time": 0.11117315292358398 }, { "epoch": 3.771209716796875e-05, "model_forward_time": 0.024678707122802734, "step": 24715 }, { "epoch": 3.771209716796875e-05, "step": 24715, "training_step_time": 0.10904335975646973 }, { "epoch": 3.7713623046875e-05, "model_forward_time": 0.02523016929626465, "step": 24716 }, { "epoch": 3.7713623046875e-05, "step": 24716, "training_step_time": 0.11055374145507812 }, { "epoch": 3.771514892578125e-05, "model_forward_time": 0.025364398956298828, "step": 24717 }, { "epoch": 3.771514892578125e-05, "step": 24717, "training_step_time": 0.1768040657043457 }, { "epoch": 3.77166748046875e-05, "model_forward_time": 0.025030851364135742, "step": 24718 }, { "epoch": 3.77166748046875e-05, "step": 24718, "training_step_time": 0.11713981628417969 }, { "epoch": 3.771820068359375e-05, "model_forward_time": 0.02494072914123535, "step": 24719 }, { "epoch": 3.771820068359375e-05, "step": 24719, "training_step_time": 0.11198687553405762 }, { "epoch": 3.77197265625e-05, "grad_norm": 0.17401650547981262, "learning_rate": 8.232341027131885e-06, "loss": 0.0067, "step": 24720 }, { "epoch": 3.77197265625e-05, "model_forward_time": 0.025256633758544922, "step": 24720 }, { "epoch": 3.77197265625e-05, "step": 24720, "training_step_time": 0.11959171295166016 }, { "epoch": 3.772125244140625e-05, "model_forward_time": 0.025823116302490234, "step": 24721 }, { "epoch": 3.772125244140625e-05, "step": 24721, "training_step_time": 0.1282958984375 }, { "epoch": 3.77227783203125e-05, "model_forward_time": 0.025454282760620117, "step": 24722 }, { "epoch": 3.77227783203125e-05, "step": 24722, "training_step_time": 0.11265850067138672 }, { "epoch": 3.772430419921875e-05, "model_forward_time": 0.0255277156829834, "step": 24723 }, { "epoch": 3.772430419921875e-05, "step": 24723, "training_step_time": 0.10761475563049316 }, { "epoch": 3.7725830078125e-05, "model_forward_time": 0.025624513626098633, "step": 24724 }, { "epoch": 3.7725830078125e-05, "step": 24724, "training_step_time": 0.10893058776855469 }, { "epoch": 3.772735595703125e-05, "model_forward_time": 0.025362253189086914, "step": 24725 }, { "epoch": 3.772735595703125e-05, "step": 24725, "training_step_time": 0.10471963882446289 }, { "epoch": 3.77288818359375e-05, "model_forward_time": 0.025725841522216797, "step": 24726 }, { "epoch": 3.77288818359375e-05, "step": 24726, "training_step_time": 0.10526204109191895 }, { "epoch": 3.773040771484375e-05, "model_forward_time": 0.025378704071044922, "step": 24727 }, { "epoch": 3.773040771484375e-05, "step": 24727, "training_step_time": 0.10561084747314453 }, { "epoch": 3.773193359375e-05, "model_forward_time": 0.025200366973876953, "step": 24728 }, { "epoch": 3.773193359375e-05, "step": 24728, "training_step_time": 0.10489869117736816 }, { "epoch": 3.773345947265625e-05, "model_forward_time": 0.024952173233032227, "step": 24729 }, { "epoch": 3.773345947265625e-05, "step": 24729, "training_step_time": 0.1089940071105957 }, { "epoch": 3.77349853515625e-05, "grad_norm": 0.09289814531803131, "learning_rate": 8.202068579710431e-06, "loss": 0.0054, "step": 24730 }, { "epoch": 3.77349853515625e-05, "model_forward_time": 0.025407075881958008, "step": 24730 }, { "epoch": 3.77349853515625e-05, "step": 24730, "training_step_time": 0.1092367172241211 }, { "epoch": 3.773651123046875e-05, "model_forward_time": 0.02545022964477539, "step": 24731 }, { "epoch": 3.773651123046875e-05, "step": 24731, "training_step_time": 0.10315895080566406 }, { "epoch": 3.7738037109375e-05, "model_forward_time": 0.02464437484741211, "step": 24732 }, { "epoch": 3.7738037109375e-05, "step": 24732, "training_step_time": 0.14650726318359375 }, { "epoch": 3.773956298828125e-05, "model_forward_time": 0.02463507652282715, "step": 24733 }, { "epoch": 3.773956298828125e-05, "step": 24733, "training_step_time": 0.11372923851013184 }, { "epoch": 3.77410888671875e-05, "model_forward_time": 0.02526712417602539, "step": 24734 }, { "epoch": 3.77410888671875e-05, "step": 24734, "training_step_time": 0.11133694648742676 }, { "epoch": 3.774261474609375e-05, "model_forward_time": 0.02531719207763672, "step": 24735 }, { "epoch": 3.774261474609375e-05, "step": 24735, "training_step_time": 0.11350131034851074 }, { "epoch": 3.7744140625e-05, "model_forward_time": 0.02557826042175293, "step": 24736 }, { "epoch": 3.7744140625e-05, "step": 24736, "training_step_time": 0.19521737098693848 }, { "epoch": 3.774566650390625e-05, "model_forward_time": 0.02521228790283203, "step": 24737 }, { "epoch": 3.774566650390625e-05, "step": 24737, "training_step_time": 0.13204026222229004 }, { "epoch": 3.77471923828125e-05, "model_forward_time": 0.027198314666748047, "step": 24738 }, { "epoch": 3.77471923828125e-05, "step": 24738, "training_step_time": 0.10526013374328613 }, { "epoch": 3.774871826171875e-05, "model_forward_time": 0.025277376174926758, "step": 24739 }, { "epoch": 3.774871826171875e-05, "step": 24739, "training_step_time": 0.10287141799926758 }, { "epoch": 3.7750244140625e-05, "grad_norm": 0.11223476380109787, "learning_rate": 8.17184692071694e-06, "loss": 0.0061, "step": 24740 }, { "epoch": 3.7750244140625e-05, "model_forward_time": 0.025348424911499023, "step": 24740 }, { "epoch": 3.7750244140625e-05, "step": 24740, "training_step_time": 0.1029973030090332 }, { "epoch": 3.775177001953125e-05, "model_forward_time": 0.025543212890625, "step": 24741 }, { "epoch": 3.775177001953125e-05, "step": 24741, "training_step_time": 0.10318160057067871 }, { "epoch": 3.77532958984375e-05, "model_forward_time": 0.025265216827392578, "step": 24742 }, { "epoch": 3.77532958984375e-05, "step": 24742, "training_step_time": 0.1047062873840332 }, { "epoch": 3.775482177734375e-05, "model_forward_time": 0.025072813034057617, "step": 24743 }, { "epoch": 3.775482177734375e-05, "step": 24743, "training_step_time": 0.10767769813537598 }, { "epoch": 3.775634765625e-05, "model_forward_time": 0.02535700798034668, "step": 24744 }, { "epoch": 3.775634765625e-05, "step": 24744, "training_step_time": 0.11373639106750488 }, { "epoch": 3.775787353515625e-05, "model_forward_time": 0.025676488876342773, "step": 24745 }, { "epoch": 3.775787353515625e-05, "step": 24745, "training_step_time": 0.12296128273010254 }, { "epoch": 3.77593994140625e-05, "model_forward_time": 0.02505350112915039, "step": 24746 }, { "epoch": 3.77593994140625e-05, "step": 24746, "training_step_time": 0.11015033721923828 }, { "epoch": 3.776092529296875e-05, "model_forward_time": 0.025101900100708008, "step": 24747 }, { "epoch": 3.776092529296875e-05, "step": 24747, "training_step_time": 0.11037921905517578 }, { "epoch": 3.7762451171875e-05, "model_forward_time": 0.02519989013671875, "step": 24748 }, { "epoch": 3.7762451171875e-05, "step": 24748, "training_step_time": 0.11372828483581543 }, { "epoch": 3.776397705078125e-05, "model_forward_time": 0.025419235229492188, "step": 24749 }, { "epoch": 3.776397705078125e-05, "step": 24749, "training_step_time": 0.1130373477935791 }, { "epoch": 3.77655029296875e-05, "grad_norm": 0.22124172747135162, "learning_rate": 8.141676086873572e-06, "loss": 0.0076, "step": 24750 }, { "epoch": 3.77655029296875e-05, "model_forward_time": 0.02508831024169922, "step": 24750 }, { "epoch": 3.77655029296875e-05, "step": 24750, "training_step_time": 0.11056804656982422 }, { "epoch": 3.776702880859375e-05, "model_forward_time": 0.025234222412109375, "step": 24751 }, { "epoch": 3.776702880859375e-05, "step": 24751, "training_step_time": 0.11130142211914062 }, { "epoch": 3.77685546875e-05, "model_forward_time": 0.025200843811035156, "step": 24752 }, { "epoch": 3.77685546875e-05, "step": 24752, "training_step_time": 0.2108454704284668 }, { "epoch": 3.777008056640625e-05, "model_forward_time": 0.024896860122680664, "step": 24753 }, { "epoch": 3.777008056640625e-05, "step": 24753, "training_step_time": 0.1269211769104004 }, { "epoch": 3.77716064453125e-05, "model_forward_time": 0.02443552017211914, "step": 24754 }, { "epoch": 3.77716064453125e-05, "step": 24754, "training_step_time": 0.12057995796203613 }, { "epoch": 3.777313232421875e-05, "model_forward_time": 0.026112794876098633, "step": 24755 }, { "epoch": 3.777313232421875e-05, "step": 24755, "training_step_time": 0.15973544120788574 }, { "epoch": 3.7774658203125e-05, "model_forward_time": 0.024621009826660156, "step": 24756 }, { "epoch": 3.7774658203125e-05, "step": 24756, "training_step_time": 0.1714780330657959 }, { "epoch": 3.777618408203125e-05, "model_forward_time": 0.02417612075805664, "step": 24757 }, { "epoch": 3.777618408203125e-05, "step": 24757, "training_step_time": 0.16345715522766113 }, { "epoch": 3.77777099609375e-05, "model_forward_time": 0.024393081665039062, "step": 24758 }, { "epoch": 3.77777099609375e-05, "step": 24758, "training_step_time": 0.1044621467590332 }, { "epoch": 3.777923583984375e-05, "model_forward_time": 0.0247037410736084, "step": 24759 }, { "epoch": 3.777923583984375e-05, "step": 24759, "training_step_time": 0.10347580909729004 }, { "epoch": 3.778076171875e-05, "grad_norm": 0.2569579482078552, "learning_rate": 8.111556114840746e-06, "loss": 0.0091, "step": 24760 }, { "epoch": 3.778076171875e-05, "model_forward_time": 0.025109529495239258, "step": 24760 }, { "epoch": 3.778076171875e-05, "step": 24760, "training_step_time": 0.1094667911529541 }, { "epoch": 3.778228759765625e-05, "model_forward_time": 0.025603771209716797, "step": 24761 }, { "epoch": 3.778228759765625e-05, "step": 24761, "training_step_time": 0.10563373565673828 }, { "epoch": 3.77838134765625e-05, "model_forward_time": 0.024660110473632812, "step": 24762 }, { "epoch": 3.77838134765625e-05, "step": 24762, "training_step_time": 0.10436439514160156 }, { "epoch": 3.778533935546875e-05, "model_forward_time": 0.025661706924438477, "step": 24763 }, { "epoch": 3.778533935546875e-05, "step": 24763, "training_step_time": 0.13325762748718262 }, { "epoch": 3.7786865234375e-05, "model_forward_time": 0.025525331497192383, "step": 24764 }, { "epoch": 3.7786865234375e-05, "step": 24764, "training_step_time": 0.14995694160461426 }, { "epoch": 3.778839111328125e-05, "model_forward_time": 0.025220155715942383, "step": 24765 }, { "epoch": 3.778839111328125e-05, "step": 24765, "training_step_time": 0.14374899864196777 }, { "epoch": 3.77899169921875e-05, "model_forward_time": 0.025132179260253906, "step": 24766 }, { "epoch": 3.77899169921875e-05, "step": 24766, "training_step_time": 0.18506383895874023 }, { "epoch": 3.779144287109375e-05, "model_forward_time": 0.024749279022216797, "step": 24767 }, { "epoch": 3.779144287109375e-05, "step": 24767, "training_step_time": 0.1897413730621338 }, { "epoch": 3.779296875e-05, "model_forward_time": 0.02484416961669922, "step": 24768 }, { "epoch": 3.779296875e-05, "step": 24768, "training_step_time": 0.17188239097595215 }, { "epoch": 3.779449462890625e-05, "model_forward_time": 0.024226903915405273, "step": 24769 }, { "epoch": 3.779449462890625e-05, "step": 24769, "training_step_time": 0.10722899436950684 }, { "epoch": 3.77960205078125e-05, "grad_norm": 0.1255207508802414, "learning_rate": 8.08148704121705e-06, "loss": 0.0067, "step": 24770 }, { "epoch": 3.77960205078125e-05, "model_forward_time": 0.024807214736938477, "step": 24770 }, { "epoch": 3.77960205078125e-05, "step": 24770, "training_step_time": 0.1028585433959961 }, { "epoch": 3.779754638671875e-05, "model_forward_time": 0.02544236183166504, "step": 24771 }, { "epoch": 3.779754638671875e-05, "step": 24771, "training_step_time": 0.1083834171295166 }, { "epoch": 3.7799072265625e-05, "model_forward_time": 0.02590322494506836, "step": 24772 }, { "epoch": 3.7799072265625e-05, "step": 24772, "training_step_time": 0.10726642608642578 }, { "epoch": 3.780059814453125e-05, "model_forward_time": 0.028682708740234375, "step": 24773 }, { "epoch": 3.780059814453125e-05, "step": 24773, "training_step_time": 0.10828852653503418 }, { "epoch": 3.78021240234375e-05, "model_forward_time": 0.025840282440185547, "step": 24774 }, { "epoch": 3.78021240234375e-05, "step": 24774, "training_step_time": 0.10555505752563477 }, { "epoch": 3.780364990234375e-05, "model_forward_time": 0.025646448135375977, "step": 24775 }, { "epoch": 3.780364990234375e-05, "step": 24775, "training_step_time": 0.1038515567779541 }, { "epoch": 3.780517578125e-05, "model_forward_time": 0.02525782585144043, "step": 24776 }, { "epoch": 3.780517578125e-05, "step": 24776, "training_step_time": 0.11771941184997559 }, { "epoch": 3.780670166015625e-05, "model_forward_time": 0.02422308921813965, "step": 24777 }, { "epoch": 3.780670166015625e-05, "step": 24777, "training_step_time": 0.16733479499816895 }, { "epoch": 3.78082275390625e-05, "model_forward_time": 0.02463507652282715, "step": 24778 }, { "epoch": 3.78082275390625e-05, "step": 24778, "training_step_time": 0.17283844947814941 }, { "epoch": 3.780975341796875e-05, "model_forward_time": 0.024823904037475586, "step": 24779 }, { "epoch": 3.780975341796875e-05, "step": 24779, "training_step_time": 0.13217663764953613 }, { "epoch": 3.7811279296875e-05, "grad_norm": 0.19120188057422638, "learning_rate": 8.051468902539272e-06, "loss": 0.0061, "step": 24780 }, { "epoch": 3.7811279296875e-05, "model_forward_time": 0.02465653419494629, "step": 24780 }, { "epoch": 3.7811279296875e-05, "step": 24780, "training_step_time": 0.11967062950134277 }, { "epoch": 3.781280517578125e-05, "model_forward_time": 0.025326967239379883, "step": 24781 }, { "epoch": 3.781280517578125e-05, "step": 24781, "training_step_time": 0.18398427963256836 }, { "epoch": 3.78143310546875e-05, "model_forward_time": 0.024626731872558594, "step": 24782 }, { "epoch": 3.78143310546875e-05, "step": 24782, "training_step_time": 0.1132512092590332 }, { "epoch": 3.781585693359375e-05, "model_forward_time": 0.025081634521484375, "step": 24783 }, { "epoch": 3.781585693359375e-05, "step": 24783, "training_step_time": 0.10939168930053711 }, { "epoch": 3.78173828125e-05, "model_forward_time": 0.025606393814086914, "step": 24784 }, { "epoch": 3.78173828125e-05, "step": 24784, "training_step_time": 0.1085500717163086 }, { "epoch": 3.781890869140625e-05, "model_forward_time": 0.024557113647460938, "step": 24785 }, { "epoch": 3.781890869140625e-05, "step": 24785, "training_step_time": 0.11060738563537598 }, { "epoch": 3.78204345703125e-05, "model_forward_time": 0.024101734161376953, "step": 24786 }, { "epoch": 3.78204345703125e-05, "step": 24786, "training_step_time": 0.10655021667480469 }, { "epoch": 3.782196044921875e-05, "model_forward_time": 0.025232315063476562, "step": 24787 }, { "epoch": 3.782196044921875e-05, "step": 24787, "training_step_time": 0.10672950744628906 }, { "epoch": 3.7823486328125e-05, "model_forward_time": 0.025208234786987305, "step": 24788 }, { "epoch": 3.7823486328125e-05, "step": 24788, "training_step_time": 0.10477900505065918 }, { "epoch": 3.782501220703125e-05, "model_forward_time": 0.024916887283325195, "step": 24789 }, { "epoch": 3.782501220703125e-05, "step": 24789, "training_step_time": 0.10869503021240234 }, { "epoch": 3.78265380859375e-05, "grad_norm": 0.0718858391046524, "learning_rate": 8.021501735282266e-06, "loss": 0.0065, "step": 24790 }, { "epoch": 3.78265380859375e-05, "model_forward_time": 0.025175809860229492, "step": 24790 }, { "epoch": 3.78265380859375e-05, "step": 24790, "training_step_time": 0.10774946212768555 }, { "epoch": 3.782806396484375e-05, "model_forward_time": 0.02553534507751465, "step": 24791 }, { "epoch": 3.782806396484375e-05, "step": 24791, "training_step_time": 0.10527801513671875 }, { "epoch": 3.782958984375e-05, "model_forward_time": 0.025774002075195312, "step": 24792 }, { "epoch": 3.782958984375e-05, "step": 24792, "training_step_time": 0.10697340965270996 }, { "epoch": 3.783111572265625e-05, "model_forward_time": 0.0253450870513916, "step": 24793 }, { "epoch": 3.783111572265625e-05, "step": 24793, "training_step_time": 0.10596108436584473 }, { "epoch": 3.78326416015625e-05, "model_forward_time": 0.025180339813232422, "step": 24794 }, { "epoch": 3.78326416015625e-05, "step": 24794, "training_step_time": 0.10549402236938477 }, { "epoch": 3.783416748046875e-05, "model_forward_time": 0.025147438049316406, "step": 24795 }, { "epoch": 3.783416748046875e-05, "step": 24795, "training_step_time": 0.10689115524291992 }, { "epoch": 3.7835693359375e-05, "model_forward_time": 0.02551126480102539, "step": 24796 }, { "epoch": 3.7835693359375e-05, "step": 24796, "training_step_time": 0.1756894588470459 }, { "epoch": 3.783721923828125e-05, "model_forward_time": 0.024276256561279297, "step": 24797 }, { "epoch": 3.783721923828125e-05, "step": 24797, "training_step_time": 0.11551117897033691 }, { "epoch": 3.78387451171875e-05, "model_forward_time": 0.024549245834350586, "step": 24798 }, { "epoch": 3.78387451171875e-05, "step": 24798, "training_step_time": 0.13073468208312988 }, { "epoch": 3.784027099609375e-05, "model_forward_time": 0.024800539016723633, "step": 24799 }, { "epoch": 3.784027099609375e-05, "step": 24799, "training_step_time": 0.15263056755065918 }, { "epoch": 3.7841796875e-05, "grad_norm": 0.09488678723573685, "learning_rate": 7.991585575858961e-06, "loss": 0.0029, "step": 24800 }, { "epoch": 3.7841796875e-05, "model_forward_time": 0.024598121643066406, "step": 24800 }, { "epoch": 3.7841796875e-05, "step": 24800, "training_step_time": 0.11114120483398438 }, { "epoch": 3.784332275390625e-05, "model_forward_time": 0.024631500244140625, "step": 24801 }, { "epoch": 3.784332275390625e-05, "step": 24801, "training_step_time": 0.11336207389831543 }, { "epoch": 3.78448486328125e-05, "model_forward_time": 0.025830745697021484, "step": 24802 }, { "epoch": 3.78448486328125e-05, "step": 24802, "training_step_time": 0.11868858337402344 }, { "epoch": 3.784637451171875e-05, "model_forward_time": 0.02573108673095703, "step": 24803 }, { "epoch": 3.784637451171875e-05, "step": 24803, "training_step_time": 0.10431504249572754 }, { "epoch": 3.7847900390625e-05, "model_forward_time": 0.025104284286499023, "step": 24804 }, { "epoch": 3.7847900390625e-05, "step": 24804, "training_step_time": 0.10731983184814453 }, { "epoch": 3.784942626953125e-05, "model_forward_time": 0.025099992752075195, "step": 24805 }, { "epoch": 3.784942626953125e-05, "step": 24805, "training_step_time": 0.10547733306884766 }, { "epoch": 3.78509521484375e-05, "model_forward_time": 0.02505350112915039, "step": 24806 }, { "epoch": 3.78509521484375e-05, "step": 24806, "training_step_time": 0.10595989227294922 }, { "epoch": 3.785247802734375e-05, "model_forward_time": 0.0246126651763916, "step": 24807 }, { "epoch": 3.785247802734375e-05, "step": 24807, "training_step_time": 0.10457277297973633 }, { "epoch": 3.785400390625e-05, "model_forward_time": 0.02515435218811035, "step": 24808 }, { "epoch": 3.785400390625e-05, "step": 24808, "training_step_time": 0.12071609497070312 }, { "epoch": 3.785552978515625e-05, "model_forward_time": 0.025447368621826172, "step": 24809 }, { "epoch": 3.785552978515625e-05, "step": 24809, "training_step_time": 0.14785480499267578 }, { "epoch": 3.78570556640625e-05, "grad_norm": 0.1033230870962143, "learning_rate": 7.96172046062032e-06, "loss": 0.0049, "step": 24810 }, { "epoch": 3.78570556640625e-05, "model_forward_time": 0.024608850479125977, "step": 24810 }, { "epoch": 3.78570556640625e-05, "step": 24810, "training_step_time": 0.17712163925170898 }, { "epoch": 3.785858154296875e-05, "model_forward_time": 0.025165557861328125, "step": 24811 }, { "epoch": 3.785858154296875e-05, "step": 24811, "training_step_time": 0.17335987091064453 }, { "epoch": 3.7860107421875e-05, "model_forward_time": 0.024616241455078125, "step": 24812 }, { "epoch": 3.7860107421875e-05, "step": 24812, "training_step_time": 0.19646239280700684 }, { "epoch": 3.786163330078125e-05, "model_forward_time": 0.02452993392944336, "step": 24813 }, { "epoch": 3.786163330078125e-05, "step": 24813, "training_step_time": 0.13982915878295898 }, { "epoch": 3.78631591796875e-05, "model_forward_time": 0.025112152099609375, "step": 24814 }, { "epoch": 3.78631591796875e-05, "step": 24814, "training_step_time": 0.2377030849456787 }, { "epoch": 3.786468505859375e-05, "model_forward_time": 0.02540302276611328, "step": 24815 }, { "epoch": 3.786468505859375e-05, "step": 24815, "training_step_time": 0.10286259651184082 }, { "epoch": 3.78662109375e-05, "model_forward_time": 0.024864673614501953, "step": 24816 }, { "epoch": 3.78662109375e-05, "step": 24816, "training_step_time": 0.10845470428466797 }, { "epoch": 3.786773681640625e-05, "model_forward_time": 0.025322675704956055, "step": 24817 }, { "epoch": 3.786773681640625e-05, "step": 24817, "training_step_time": 0.10558915138244629 }, { "epoch": 3.78692626953125e-05, "model_forward_time": 0.025572776794433594, "step": 24818 }, { "epoch": 3.78692626953125e-05, "step": 24818, "training_step_time": 0.10982322692871094 }, { "epoch": 3.787078857421875e-05, "model_forward_time": 0.025274038314819336, "step": 24819 }, { "epoch": 3.787078857421875e-05, "step": 24819, "training_step_time": 0.11100578308105469 }, { "epoch": 3.7872314453125e-05, "grad_norm": 0.10404244065284729, "learning_rate": 7.931906425855268e-06, "loss": 0.0054, "step": 24820 }, { "epoch": 3.7872314453125e-05, "model_forward_time": 0.02564859390258789, "step": 24820 }, { "epoch": 3.7872314453125e-05, "step": 24820, "training_step_time": 0.1286334991455078 }, { "epoch": 3.787384033203125e-05, "model_forward_time": 0.025827407836914062, "step": 24821 }, { "epoch": 3.787384033203125e-05, "step": 24821, "training_step_time": 0.11369776725769043 }, { "epoch": 3.78753662109375e-05, "model_forward_time": 0.024773597717285156, "step": 24822 }, { "epoch": 3.78753662109375e-05, "step": 24822, "training_step_time": 0.15262794494628906 }, { "epoch": 3.787689208984375e-05, "model_forward_time": 0.02492666244506836, "step": 24823 }, { "epoch": 3.787689208984375e-05, "step": 24823, "training_step_time": 0.15572428703308105 }, { "epoch": 3.787841796875e-05, "model_forward_time": 0.024875640869140625, "step": 24824 }, { "epoch": 3.787841796875e-05, "step": 24824, "training_step_time": 0.1204080581665039 }, { "epoch": 3.787994384765625e-05, "model_forward_time": 0.024516582489013672, "step": 24825 }, { "epoch": 3.787994384765625e-05, "step": 24825, "training_step_time": 0.11985611915588379 }, { "epoch": 3.78814697265625e-05, "model_forward_time": 0.025489330291748047, "step": 24826 }, { "epoch": 3.78814697265625e-05, "step": 24826, "training_step_time": 0.19942951202392578 }, { "epoch": 3.788299560546875e-05, "model_forward_time": 0.024477005004882812, "step": 24827 }, { "epoch": 3.788299560546875e-05, "step": 24827, "training_step_time": 0.10553121566772461 }, { "epoch": 3.7884521484375e-05, "model_forward_time": 0.024403810501098633, "step": 24828 }, { "epoch": 3.7884521484375e-05, "step": 24828, "training_step_time": 0.10562300682067871 }, { "epoch": 3.788604736328125e-05, "model_forward_time": 0.025500059127807617, "step": 24829 }, { "epoch": 3.788604736328125e-05, "step": 24829, "training_step_time": 0.11027669906616211 }, { "epoch": 3.78875732421875e-05, "grad_norm": 0.07293904572725296, "learning_rate": 7.902143507790661e-06, "loss": 0.0077, "step": 24830 }, { "epoch": 3.78875732421875e-05, "model_forward_time": 0.02542424201965332, "step": 24830 }, { "epoch": 3.78875732421875e-05, "step": 24830, "training_step_time": 0.10589122772216797 }, { "epoch": 3.788909912109375e-05, "model_forward_time": 0.025682687759399414, "step": 24831 }, { "epoch": 3.788909912109375e-05, "step": 24831, "training_step_time": 0.10745906829833984 }, { "epoch": 3.7890625e-05, "model_forward_time": 0.025574207305908203, "step": 24832 }, { "epoch": 3.7890625e-05, "step": 24832, "training_step_time": 0.10569071769714355 }, { "epoch": 3.789215087890625e-05, "model_forward_time": 0.025380849838256836, "step": 24833 }, { "epoch": 3.789215087890625e-05, "step": 24833, "training_step_time": 0.10640430450439453 }, { "epoch": 3.78936767578125e-05, "model_forward_time": 0.02699446678161621, "step": 24834 }, { "epoch": 3.78936767578125e-05, "step": 24834, "training_step_time": 0.10876202583312988 }, { "epoch": 3.789520263671875e-05, "model_forward_time": 0.0253145694732666, "step": 24835 }, { "epoch": 3.789520263671875e-05, "step": 24835, "training_step_time": 0.10648202896118164 }, { "epoch": 3.7896728515625e-05, "model_forward_time": 0.025691986083984375, "step": 24836 }, { "epoch": 3.7896728515625e-05, "step": 24836, "training_step_time": 0.10921978950500488 }, { "epoch": 3.789825439453125e-05, "model_forward_time": 0.025574922561645508, "step": 24837 }, { "epoch": 3.789825439453125e-05, "step": 24837, "training_step_time": 0.10442423820495605 }, { "epoch": 3.78997802734375e-05, "model_forward_time": 0.02602386474609375, "step": 24838 }, { "epoch": 3.78997802734375e-05, "step": 24838, "training_step_time": 0.10700273513793945 }, { "epoch": 3.790130615234375e-05, "model_forward_time": 0.025583982467651367, "step": 24839 }, { "epoch": 3.790130615234375e-05, "step": 24839, "training_step_time": 0.1078481674194336 }, { "epoch": 3.790283203125e-05, "grad_norm": 0.2304941564798355, "learning_rate": 7.872431742591268e-06, "loss": 0.0055, "step": 24840 }, { "epoch": 3.790283203125e-05, "model_forward_time": 0.025130748748779297, "step": 24840 }, { "epoch": 3.790283203125e-05, "step": 24840, "training_step_time": 0.10458827018737793 }, { "epoch": 3.790435791015625e-05, "model_forward_time": 0.025232791900634766, "step": 24841 }, { "epoch": 3.790435791015625e-05, "step": 24841, "training_step_time": 0.16073036193847656 }, { "epoch": 3.79058837890625e-05, "model_forward_time": 0.02534031867980957, "step": 24842 }, { "epoch": 3.79058837890625e-05, "step": 24842, "training_step_time": 0.12326836585998535 }, { "epoch": 3.790740966796875e-05, "model_forward_time": 0.024576425552368164, "step": 24843 }, { "epoch": 3.790740966796875e-05, "step": 24843, "training_step_time": 0.10553526878356934 }, { "epoch": 3.7908935546875e-05, "model_forward_time": 0.025333642959594727, "step": 24844 }, { "epoch": 3.7908935546875e-05, "step": 24844, "training_step_time": 0.1187744140625 }, { "epoch": 3.791046142578125e-05, "model_forward_time": 0.025060653686523438, "step": 24845 }, { "epoch": 3.791046142578125e-05, "step": 24845, "training_step_time": 0.17016363143920898 }, { "epoch": 3.79119873046875e-05, "model_forward_time": 0.02486729621887207, "step": 24846 }, { "epoch": 3.79119873046875e-05, "step": 24846, "training_step_time": 0.12769484519958496 }, { "epoch": 3.791351318359375e-05, "model_forward_time": 0.024884939193725586, "step": 24847 }, { "epoch": 3.791351318359375e-05, "step": 24847, "training_step_time": 0.10581326484680176 }, { "epoch": 3.79150390625e-05, "model_forward_time": 0.025372743606567383, "step": 24848 }, { "epoch": 3.79150390625e-05, "step": 24848, "training_step_time": 0.10676956176757812 }, { "epoch": 3.791656494140625e-05, "model_forward_time": 0.025522708892822266, "step": 24849 }, { "epoch": 3.791656494140625e-05, "step": 24849, "training_step_time": 0.10846567153930664 }, { "epoch": 3.79180908203125e-05, "grad_norm": 0.10245202481746674, "learning_rate": 7.842771166359681e-06, "loss": 0.0037, "step": 24850 }, { "epoch": 3.79180908203125e-05, "model_forward_time": 0.025577783584594727, "step": 24850 }, { "epoch": 3.79180908203125e-05, "step": 24850, "training_step_time": 0.1047210693359375 }, { "epoch": 3.791961669921875e-05, "model_forward_time": 0.025493144989013672, "step": 24851 }, { "epoch": 3.791961669921875e-05, "step": 24851, "training_step_time": 0.10752224922180176 }, { "epoch": 3.7921142578125e-05, "model_forward_time": 0.025140762329101562, "step": 24852 }, { "epoch": 3.7921142578125e-05, "step": 24852, "training_step_time": 0.11283659934997559 }, { "epoch": 3.792266845703125e-05, "model_forward_time": 0.025024890899658203, "step": 24853 }, { "epoch": 3.792266845703125e-05, "step": 24853, "training_step_time": 0.15247511863708496 }, { "epoch": 3.79241943359375e-05, "model_forward_time": 0.025197505950927734, "step": 24854 }, { "epoch": 3.79241943359375e-05, "step": 24854, "training_step_time": 0.13590192794799805 }, { "epoch": 3.792572021484375e-05, "model_forward_time": 0.02521491050720215, "step": 24855 }, { "epoch": 3.792572021484375e-05, "step": 24855, "training_step_time": 0.15951108932495117 }, { "epoch": 3.792724609375e-05, "model_forward_time": 0.02447032928466797, "step": 24856 }, { "epoch": 3.792724609375e-05, "step": 24856, "training_step_time": 0.1755228042602539 }, { "epoch": 3.792877197265625e-05, "model_forward_time": 0.024471282958984375, "step": 24857 }, { "epoch": 3.792877197265625e-05, "step": 24857, "training_step_time": 0.17865204811096191 }, { "epoch": 3.79302978515625e-05, "model_forward_time": 0.025138378143310547, "step": 24858 }, { "epoch": 3.79302978515625e-05, "step": 24858, "training_step_time": 0.13460373878479004 }, { "epoch": 3.793182373046875e-05, "model_forward_time": 0.023801088333129883, "step": 24859 }, { "epoch": 3.793182373046875e-05, "step": 24859, "training_step_time": 0.10778236389160156 }, { "epoch": 3.7933349609375e-05, "grad_norm": 0.06984902173280716, "learning_rate": 7.813161815136294e-06, "loss": 0.0047, "step": 24860 }, { "epoch": 3.7933349609375e-05, "model_forward_time": 0.0252077579498291, "step": 24860 }, { "epoch": 3.7933349609375e-05, "step": 24860, "training_step_time": 0.11913013458251953 }, { "epoch": 3.793487548828125e-05, "model_forward_time": 0.025884628295898438, "step": 24861 }, { "epoch": 3.793487548828125e-05, "step": 24861, "training_step_time": 0.11512041091918945 }, { "epoch": 3.79364013671875e-05, "model_forward_time": 0.025649070739746094, "step": 24862 }, { "epoch": 3.79364013671875e-05, "step": 24862, "training_step_time": 0.12096667289733887 }, { "epoch": 3.793792724609375e-05, "model_forward_time": 0.025758981704711914, "step": 24863 }, { "epoch": 3.793792724609375e-05, "step": 24863, "training_step_time": 0.14333343505859375 }, { "epoch": 3.7939453125e-05, "model_forward_time": 0.025099515914916992, "step": 24864 }, { "epoch": 3.7939453125e-05, "step": 24864, "training_step_time": 0.12896156311035156 }, { "epoch": 3.794097900390625e-05, "model_forward_time": 0.02467060089111328, "step": 24865 }, { "epoch": 3.794097900390625e-05, "step": 24865, "training_step_time": 0.1246025562286377 }, { "epoch": 3.79425048828125e-05, "model_forward_time": 0.025817394256591797, "step": 24866 }, { "epoch": 3.79425048828125e-05, "step": 24866, "training_step_time": 0.10906815528869629 }, { "epoch": 3.794403076171875e-05, "model_forward_time": 0.024901390075683594, "step": 24867 }, { "epoch": 3.794403076171875e-05, "step": 24867, "training_step_time": 0.15019822120666504 }, { "epoch": 3.7945556640625e-05, "model_forward_time": 0.025325298309326172, "step": 24868 }, { "epoch": 3.7945556640625e-05, "step": 24868, "training_step_time": 0.1845095157623291 }, { "epoch": 3.794708251953125e-05, "model_forward_time": 0.02500748634338379, "step": 24869 }, { "epoch": 3.794708251953125e-05, "step": 24869, "training_step_time": 0.11902189254760742 }, { "epoch": 3.79486083984375e-05, "grad_norm": 0.3862765431404114, "learning_rate": 7.783603724899257e-06, "loss": 0.0124, "step": 24870 }, { "epoch": 3.79486083984375e-05, "model_forward_time": 0.02477431297302246, "step": 24870 }, { "epoch": 3.79486083984375e-05, "step": 24870, "training_step_time": 0.11454105377197266 }, { "epoch": 3.795013427734375e-05, "model_forward_time": 0.025516510009765625, "step": 24871 }, { "epoch": 3.795013427734375e-05, "step": 24871, "training_step_time": 0.11250638961791992 }, { "epoch": 3.795166015625e-05, "model_forward_time": 0.02574324607849121, "step": 24872 }, { "epoch": 3.795166015625e-05, "step": 24872, "training_step_time": 0.121307373046875 }, { "epoch": 3.795318603515625e-05, "model_forward_time": 0.027637243270874023, "step": 24873 }, { "epoch": 3.795318603515625e-05, "step": 24873, "training_step_time": 0.110382080078125 }, { "epoch": 3.79547119140625e-05, "model_forward_time": 0.02527165412902832, "step": 24874 }, { "epoch": 3.79547119140625e-05, "step": 24874, "training_step_time": 0.10771608352661133 }, { "epoch": 3.795623779296875e-05, "model_forward_time": 0.025479793548583984, "step": 24875 }, { "epoch": 3.795623779296875e-05, "step": 24875, "training_step_time": 0.11657094955444336 }, { "epoch": 3.7957763671875e-05, "model_forward_time": 0.025415897369384766, "step": 24876 }, { "epoch": 3.7957763671875e-05, "step": 24876, "training_step_time": 0.10892415046691895 }, { "epoch": 3.795928955078125e-05, "model_forward_time": 0.0256345272064209, "step": 24877 }, { "epoch": 3.795928955078125e-05, "step": 24877, "training_step_time": 0.10950374603271484 }, { "epoch": 3.79608154296875e-05, "model_forward_time": 0.02517533302307129, "step": 24878 }, { "epoch": 3.79608154296875e-05, "step": 24878, "training_step_time": 0.10645461082458496 }, { "epoch": 3.796234130859375e-05, "model_forward_time": 0.025511741638183594, "step": 24879 }, { "epoch": 3.796234130859375e-05, "step": 24879, "training_step_time": 0.10791182518005371 }, { "epoch": 3.79638671875e-05, "grad_norm": 0.10975080728530884, "learning_rate": 7.754096931564431e-06, "loss": 0.0035, "step": 24880 }, { "epoch": 3.79638671875e-05, "model_forward_time": 0.025007247924804688, "step": 24880 }, { "epoch": 3.79638671875e-05, "step": 24880, "training_step_time": 0.10908365249633789 }, { "epoch": 3.796539306640625e-05, "model_forward_time": 0.025380611419677734, "step": 24881 }, { "epoch": 3.796539306640625e-05, "step": 24881, "training_step_time": 0.10821056365966797 }, { "epoch": 3.79669189453125e-05, "model_forward_time": 0.025385379791259766, "step": 24882 }, { "epoch": 3.79669189453125e-05, "step": 24882, "training_step_time": 0.10691094398498535 }, { "epoch": 3.796844482421875e-05, "model_forward_time": 0.025415658950805664, "step": 24883 }, { "epoch": 3.796844482421875e-05, "step": 24883, "training_step_time": 0.10950255393981934 }, { "epoch": 3.7969970703125e-05, "model_forward_time": 0.02545762062072754, "step": 24884 }, { "epoch": 3.7969970703125e-05, "step": 24884, "training_step_time": 0.10799217224121094 }, { "epoch": 3.797149658203125e-05, "model_forward_time": 0.025293588638305664, "step": 24885 }, { "epoch": 3.797149658203125e-05, "step": 24885, "training_step_time": 0.107025146484375 }, { "epoch": 3.79730224609375e-05, "model_forward_time": 0.02547144889831543, "step": 24886 }, { "epoch": 3.79730224609375e-05, "step": 24886, "training_step_time": 0.17642974853515625 }, { "epoch": 3.797454833984375e-05, "model_forward_time": 0.025175809860229492, "step": 24887 }, { "epoch": 3.797454833984375e-05, "step": 24887, "training_step_time": 0.12482285499572754 }, { "epoch": 3.797607421875e-05, "model_forward_time": 0.025087833404541016, "step": 24888 }, { "epoch": 3.797607421875e-05, "step": 24888, "training_step_time": 0.1306302547454834 }, { "epoch": 3.797760009765625e-05, "model_forward_time": 0.02541947364807129, "step": 24889 }, { "epoch": 3.797760009765625e-05, "step": 24889, "training_step_time": 0.10664749145507812 }, { "epoch": 3.79791259765625e-05, "grad_norm": 0.14474695920944214, "learning_rate": 7.724641470985378e-06, "loss": 0.0063, "step": 24890 }, { "epoch": 3.79791259765625e-05, "model_forward_time": 0.02539348602294922, "step": 24890 }, { "epoch": 3.79791259765625e-05, "step": 24890, "training_step_time": 0.15142178535461426 }, { "epoch": 3.798065185546875e-05, "model_forward_time": 0.024908781051635742, "step": 24891 }, { "epoch": 3.798065185546875e-05, "step": 24891, "training_step_time": 0.12293601036071777 }, { "epoch": 3.7982177734375e-05, "model_forward_time": 0.024709463119506836, "step": 24892 }, { "epoch": 3.7982177734375e-05, "step": 24892, "training_step_time": 0.11086010932922363 }, { "epoch": 3.798370361328125e-05, "model_forward_time": 0.025502920150756836, "step": 24893 }, { "epoch": 3.798370361328125e-05, "step": 24893, "training_step_time": 0.10486412048339844 }, { "epoch": 3.79852294921875e-05, "model_forward_time": 0.025340557098388672, "step": 24894 }, { "epoch": 3.79852294921875e-05, "step": 24894, "training_step_time": 0.10611915588378906 }, { "epoch": 3.798675537109375e-05, "model_forward_time": 0.025301694869995117, "step": 24895 }, { "epoch": 3.798675537109375e-05, "step": 24895, "training_step_time": 0.10987544059753418 }, { "epoch": 3.798828125e-05, "model_forward_time": 0.02521967887878418, "step": 24896 }, { "epoch": 3.798828125e-05, "step": 24896, "training_step_time": 0.10521864891052246 }, { "epoch": 3.798980712890625e-05, "model_forward_time": 0.025878190994262695, "step": 24897 }, { "epoch": 3.798980712890625e-05, "step": 24897, "training_step_time": 0.11138153076171875 }, { "epoch": 3.79913330078125e-05, "model_forward_time": 0.0255584716796875, "step": 24898 }, { "epoch": 3.79913330078125e-05, "step": 24898, "training_step_time": 0.14584064483642578 }, { "epoch": 3.799285888671875e-05, "model_forward_time": 0.025267362594604492, "step": 24899 }, { "epoch": 3.799285888671875e-05, "step": 24899, "training_step_time": 0.14218854904174805 }, { "epoch": 3.7994384765625e-05, "grad_norm": 0.14371325075626373, "learning_rate": 7.695237378953223e-06, "loss": 0.0077, "step": 24900 }, { "epoch": 3.7994384765625e-05, "model_forward_time": 0.02547931671142578, "step": 24900 }, { "epoch": 3.7994384765625e-05, "step": 24900, "training_step_time": 0.14834284782409668 }, { "epoch": 3.799591064453125e-05, "model_forward_time": 0.024954795837402344, "step": 24901 }, { "epoch": 3.799591064453125e-05, "step": 24901, "training_step_time": 0.20576214790344238 }, { "epoch": 3.79974365234375e-05, "model_forward_time": 0.024922847747802734, "step": 24902 }, { "epoch": 3.79974365234375e-05, "step": 24902, "training_step_time": 0.2338714599609375 }, { "epoch": 3.799896240234375e-05, "model_forward_time": 0.02437114715576172, "step": 24903 }, { "epoch": 3.799896240234375e-05, "step": 24903, "training_step_time": 0.14308381080627441 }, { "epoch": 3.800048828125e-05, "model_forward_time": 0.024288177490234375, "step": 24904 }, { "epoch": 3.800048828125e-05, "step": 24904, "training_step_time": 0.16642093658447266 }, { "epoch": 3.800201416015625e-05, "model_forward_time": 0.02491283416748047, "step": 24905 }, { "epoch": 3.800201416015625e-05, "step": 24905, "training_step_time": 0.13856101036071777 }, { "epoch": 3.80035400390625e-05, "model_forward_time": 0.024410724639892578, "step": 24906 }, { "epoch": 3.80035400390625e-05, "step": 24906, "training_step_time": 0.10593867301940918 }, { "epoch": 3.800506591796875e-05, "model_forward_time": 0.025135278701782227, "step": 24907 }, { "epoch": 3.800506591796875e-05, "step": 24907, "training_step_time": 0.10287213325500488 }, { "epoch": 3.8006591796875e-05, "model_forward_time": 0.025495290756225586, "step": 24908 }, { "epoch": 3.8006591796875e-05, "step": 24908, "training_step_time": 0.10518527030944824 }, { "epoch": 3.800811767578125e-05, "model_forward_time": 0.025587797164916992, "step": 24909 }, { "epoch": 3.800811767578125e-05, "step": 24909, "training_step_time": 0.10477018356323242 }, { "epoch": 3.80096435546875e-05, "grad_norm": 0.06774347275495529, "learning_rate": 7.66588469119675e-06, "loss": 0.0034, "step": 24910 }, { "epoch": 3.80096435546875e-05, "model_forward_time": 0.025551795959472656, "step": 24910 }, { "epoch": 3.80096435546875e-05, "step": 24910, "training_step_time": 0.10431957244873047 }, { "epoch": 3.801116943359375e-05, "model_forward_time": 0.025641918182373047, "step": 24911 }, { "epoch": 3.801116943359375e-05, "step": 24911, "training_step_time": 0.10320639610290527 }, { "epoch": 3.80126953125e-05, "model_forward_time": 0.025409221649169922, "step": 24912 }, { "epoch": 3.80126953125e-05, "step": 24912, "training_step_time": 0.1884145736694336 }, { "epoch": 3.801422119140625e-05, "model_forward_time": 0.025036334991455078, "step": 24913 }, { "epoch": 3.801422119140625e-05, "step": 24913, "training_step_time": 0.10158181190490723 }, { "epoch": 3.80157470703125e-05, "model_forward_time": 0.024612903594970703, "step": 24914 }, { "epoch": 3.80157470703125e-05, "step": 24914, "training_step_time": 0.10265064239501953 }, { "epoch": 3.801727294921875e-05, "model_forward_time": 0.02495288848876953, "step": 24915 }, { "epoch": 3.801727294921875e-05, "step": 24915, "training_step_time": 0.1779017448425293 }, { "epoch": 3.8018798828125e-05, "model_forward_time": 0.025072097778320312, "step": 24916 }, { "epoch": 3.8018798828125e-05, "step": 24916, "training_step_time": 0.10364437103271484 }, { "epoch": 3.802032470703125e-05, "model_forward_time": 0.02512192726135254, "step": 24917 }, { "epoch": 3.802032470703125e-05, "step": 24917, "training_step_time": 0.102691650390625 }, { "epoch": 3.80218505859375e-05, "model_forward_time": 0.024658203125, "step": 24918 }, { "epoch": 3.80218505859375e-05, "step": 24918, "training_step_time": 0.10444974899291992 }, { "epoch": 3.802337646484375e-05, "model_forward_time": 0.024409055709838867, "step": 24919 }, { "epoch": 3.802337646484375e-05, "step": 24919, "training_step_time": 0.10645794868469238 }, { "epoch": 3.802490234375e-05, "grad_norm": 0.06481382250785828, "learning_rate": 7.636583443382223e-06, "loss": 0.0039, "step": 24920 }, { "epoch": 3.802490234375e-05, "model_forward_time": 0.025545358657836914, "step": 24920 }, { "epoch": 3.802490234375e-05, "step": 24920, "training_step_time": 0.12274575233459473 }, { "epoch": 3.802642822265625e-05, "model_forward_time": 0.02527475357055664, "step": 24921 }, { "epoch": 3.802642822265625e-05, "step": 24921, "training_step_time": 0.16277265548706055 }, { "epoch": 3.80279541015625e-05, "model_forward_time": 0.024474620819091797, "step": 24922 }, { "epoch": 3.80279541015625e-05, "step": 24922, "training_step_time": 0.1651322841644287 }, { "epoch": 3.802947998046875e-05, "model_forward_time": 0.02408003807067871, "step": 24923 }, { "epoch": 3.802947998046875e-05, "step": 24923, "training_step_time": 0.15870404243469238 }, { "epoch": 3.8031005859375e-05, "model_forward_time": 0.024092435836791992, "step": 24924 }, { "epoch": 3.8031005859375e-05, "step": 24924, "training_step_time": 0.14549851417541504 }, { "epoch": 3.803253173828125e-05, "model_forward_time": 0.024658679962158203, "step": 24925 }, { "epoch": 3.803253173828125e-05, "step": 24925, "training_step_time": 0.14201974868774414 }, { "epoch": 3.80340576171875e-05, "model_forward_time": 0.024497270584106445, "step": 24926 }, { "epoch": 3.80340576171875e-05, "step": 24926, "training_step_time": 0.12845873832702637 }, { "epoch": 3.803558349609375e-05, "model_forward_time": 0.024453401565551758, "step": 24927 }, { "epoch": 3.803558349609375e-05, "step": 24927, "training_step_time": 0.12379312515258789 }, { "epoch": 3.8037109375e-05, "model_forward_time": 0.02467799186706543, "step": 24928 }, { "epoch": 3.8037109375e-05, "step": 24928, "training_step_time": 0.11667633056640625 }, { "epoch": 3.803863525390625e-05, "model_forward_time": 0.02508091926574707, "step": 24929 }, { "epoch": 3.803863525390625e-05, "step": 24929, "training_step_time": 0.17278504371643066 }, { "epoch": 3.80401611328125e-05, "grad_norm": 0.06115950644016266, "learning_rate": 7.607333671113409e-06, "loss": 0.0036, "step": 24930 }, { "epoch": 3.80401611328125e-05, "model_forward_time": 0.02344369888305664, "step": 24930 }, { "epoch": 3.80401611328125e-05, "step": 24930, "training_step_time": 0.11387872695922852 }, { "epoch": 3.804168701171875e-05, "model_forward_time": 0.02404499053955078, "step": 24931 }, { "epoch": 3.804168701171875e-05, "step": 24931, "training_step_time": 0.13034701347351074 }, { "epoch": 3.8043212890625e-05, "model_forward_time": 0.025043249130249023, "step": 24932 }, { "epoch": 3.8043212890625e-05, "step": 24932, "training_step_time": 0.1595907211303711 }, { "epoch": 3.804473876953125e-05, "model_forward_time": 0.024644136428833008, "step": 24933 }, { "epoch": 3.804473876953125e-05, "step": 24933, "training_step_time": 0.21171975135803223 }, { "epoch": 3.80462646484375e-05, "model_forward_time": 0.02386307716369629, "step": 24934 }, { "epoch": 3.80462646484375e-05, "step": 24934, "training_step_time": 0.128248929977417 }, { "epoch": 3.804779052734375e-05, "model_forward_time": 0.024129629135131836, "step": 24935 }, { "epoch": 3.804779052734375e-05, "step": 24935, "training_step_time": 0.10307908058166504 }, { "epoch": 3.804931640625e-05, "model_forward_time": 0.025177001953125, "step": 24936 }, { "epoch": 3.804931640625e-05, "step": 24936, "training_step_time": 0.10569930076599121 }, { "epoch": 3.805084228515625e-05, "model_forward_time": 0.025166034698486328, "step": 24937 }, { "epoch": 3.805084228515625e-05, "step": 24937, "training_step_time": 0.10778021812438965 }, { "epoch": 3.80523681640625e-05, "model_forward_time": 0.02506113052368164, "step": 24938 }, { "epoch": 3.80523681640625e-05, "step": 24938, "training_step_time": 0.10543990135192871 }, { "epoch": 3.805389404296875e-05, "model_forward_time": 0.025112152099609375, "step": 24939 }, { "epoch": 3.805389404296875e-05, "step": 24939, "training_step_time": 0.10551738739013672 }, { "epoch": 3.8055419921875e-05, "grad_norm": 0.08124295622110367, "learning_rate": 7.578135409931558e-06, "loss": 0.0064, "step": 24940 }, { "epoch": 3.8055419921875e-05, "model_forward_time": 0.02514934539794922, "step": 24940 }, { "epoch": 3.8055419921875e-05, "step": 24940, "training_step_time": 0.18249154090881348 }, { "epoch": 3.805694580078125e-05, "model_forward_time": 0.024329662322998047, "step": 24941 }, { "epoch": 3.805694580078125e-05, "step": 24941, "training_step_time": 0.12040019035339355 }, { "epoch": 3.80584716796875e-05, "model_forward_time": 0.02434563636779785, "step": 24942 }, { "epoch": 3.80584716796875e-05, "step": 24942, "training_step_time": 0.10678720474243164 }, { "epoch": 3.805999755859375e-05, "model_forward_time": 0.025267362594604492, "step": 24943 }, { "epoch": 3.805999755859375e-05, "step": 24943, "training_step_time": 0.19119977951049805 }, { "epoch": 3.80615234375e-05, "model_forward_time": 0.02421402931213379, "step": 24944 }, { "epoch": 3.80615234375e-05, "step": 24944, "training_step_time": 0.14198565483093262 }, { "epoch": 3.806304931640625e-05, "model_forward_time": 0.024152517318725586, "step": 24945 }, { "epoch": 3.806304931640625e-05, "step": 24945, "training_step_time": 0.2109529972076416 }, { "epoch": 3.80645751953125e-05, "model_forward_time": 0.024147987365722656, "step": 24946 }, { "epoch": 3.80645751953125e-05, "step": 24946, "training_step_time": 0.12692499160766602 }, { "epoch": 3.806610107421875e-05, "model_forward_time": 0.02456974983215332, "step": 24947 }, { "epoch": 3.806610107421875e-05, "step": 24947, "training_step_time": 0.11321187019348145 }, { "epoch": 3.8067626953125e-05, "model_forward_time": 0.02545475959777832, "step": 24948 }, { "epoch": 3.8067626953125e-05, "step": 24948, "training_step_time": 0.11681842803955078 }, { "epoch": 3.806915283203125e-05, "model_forward_time": 0.02518749237060547, "step": 24949 }, { "epoch": 3.806915283203125e-05, "step": 24949, "training_step_time": 0.10446286201477051 }, { "epoch": 3.80706787109375e-05, "grad_norm": 0.28254634141921997, "learning_rate": 7.5489886953153125e-06, "loss": 0.0096, "step": 24950 }, { "epoch": 3.80706787109375e-05, "model_forward_time": 0.02513575553894043, "step": 24950 }, { "epoch": 3.80706787109375e-05, "step": 24950, "training_step_time": 0.1038975715637207 }, { "epoch": 3.807220458984375e-05, "model_forward_time": 0.025346040725708008, "step": 24951 }, { "epoch": 3.807220458984375e-05, "step": 24951, "training_step_time": 0.11147212982177734 }, { "epoch": 3.807373046875e-05, "model_forward_time": 0.024932384490966797, "step": 24952 }, { "epoch": 3.807373046875e-05, "step": 24952, "training_step_time": 0.11800408363342285 }, { "epoch": 3.807525634765625e-05, "model_forward_time": 0.025098323822021484, "step": 24953 }, { "epoch": 3.807525634765625e-05, "step": 24953, "training_step_time": 0.10401368141174316 }, { "epoch": 3.80767822265625e-05, "model_forward_time": 0.024615049362182617, "step": 24954 }, { "epoch": 3.80767822265625e-05, "step": 24954, "training_step_time": 0.15782952308654785 }, { "epoch": 3.807830810546875e-05, "model_forward_time": 0.024782896041870117, "step": 24955 }, { "epoch": 3.807830810546875e-05, "step": 24955, "training_step_time": 0.16702771186828613 }, { "epoch": 3.8079833984375e-05, "model_forward_time": 0.024484872817993164, "step": 24956 }, { "epoch": 3.8079833984375e-05, "step": 24956, "training_step_time": 0.10866475105285645 }, { "epoch": 3.808135986328125e-05, "model_forward_time": 0.02648019790649414, "step": 24957 }, { "epoch": 3.808135986328125e-05, "step": 24957, "training_step_time": 0.1665642261505127 }, { "epoch": 3.80828857421875e-05, "model_forward_time": 0.02428150177001953, "step": 24958 }, { "epoch": 3.80828857421875e-05, "step": 24958, "training_step_time": 0.16640949249267578 }, { "epoch": 3.808441162109375e-05, "model_forward_time": 0.02436995506286621, "step": 24959 }, { "epoch": 3.808441162109375e-05, "step": 24959, "training_step_time": 0.10438036918640137 }, { "epoch": 3.80859375e-05, "grad_norm": 0.4482848048210144, "learning_rate": 7.519893562680663e-06, "loss": 0.005, "step": 24960 }, { "epoch": 3.80859375e-05, "model_forward_time": 0.024839401245117188, "step": 24960 }, { "epoch": 3.80859375e-05, "step": 24960, "training_step_time": 0.10334157943725586 }, { "epoch": 3.808746337890625e-05, "model_forward_time": 0.025321245193481445, "step": 24961 }, { "epoch": 3.808746337890625e-05, "step": 24961, "training_step_time": 0.11086511611938477 }, { "epoch": 3.80889892578125e-05, "model_forward_time": 0.025043725967407227, "step": 24962 }, { "epoch": 3.80889892578125e-05, "step": 24962, "training_step_time": 0.1073462963104248 }, { "epoch": 3.809051513671875e-05, "model_forward_time": 0.025263309478759766, "step": 24963 }, { "epoch": 3.809051513671875e-05, "step": 24963, "training_step_time": 0.10519695281982422 }, { "epoch": 3.8092041015625e-05, "model_forward_time": 0.026035308837890625, "step": 24964 }, { "epoch": 3.8092041015625e-05, "step": 24964, "training_step_time": 0.1066734790802002 }, { "epoch": 3.809356689453125e-05, "model_forward_time": 0.0254669189453125, "step": 24965 }, { "epoch": 3.809356689453125e-05, "step": 24965, "training_step_time": 0.1047217845916748 }, { "epoch": 3.80950927734375e-05, "model_forward_time": 0.025545358657836914, "step": 24966 }, { "epoch": 3.80950927734375e-05, "step": 24966, "training_step_time": 0.10612916946411133 }, { "epoch": 3.809661865234375e-05, "model_forward_time": 0.026499271392822266, "step": 24967 }, { "epoch": 3.809661865234375e-05, "step": 24967, "training_step_time": 0.10616946220397949 }, { "epoch": 3.809814453125e-05, "model_forward_time": 0.02509617805480957, "step": 24968 }, { "epoch": 3.809814453125e-05, "step": 24968, "training_step_time": 0.10578656196594238 }, { "epoch": 3.809967041015625e-05, "model_forward_time": 0.026165008544921875, "step": 24969 }, { "epoch": 3.809967041015625e-05, "step": 24969, "training_step_time": 0.10547590255737305 }, { "epoch": 3.81011962890625e-05, "grad_norm": 0.0776326060295105, "learning_rate": 7.490850047380954e-06, "loss": 0.0056, "step": 24970 }, { "epoch": 3.81011962890625e-05, "model_forward_time": 0.025587797164916992, "step": 24970 }, { "epoch": 3.81011962890625e-05, "step": 24970, "training_step_time": 0.10755610466003418 }, { "epoch": 3.810272216796875e-05, "model_forward_time": 0.024991273880004883, "step": 24971 }, { "epoch": 3.810272216796875e-05, "step": 24971, "training_step_time": 0.10566306114196777 }, { "epoch": 3.8104248046875e-05, "model_forward_time": 0.025124549865722656, "step": 24972 }, { "epoch": 3.8104248046875e-05, "step": 24972, "training_step_time": 0.10666823387145996 }, { "epoch": 3.810577392578125e-05, "model_forward_time": 0.024968624114990234, "step": 24973 }, { "epoch": 3.810577392578125e-05, "step": 24973, "training_step_time": 0.10543656349182129 }, { "epoch": 3.81072998046875e-05, "model_forward_time": 0.02499532699584961, "step": 24974 }, { "epoch": 3.81072998046875e-05, "step": 24974, "training_step_time": 0.1470661163330078 }, { "epoch": 3.810882568359375e-05, "model_forward_time": 0.024958372116088867, "step": 24975 }, { "epoch": 3.810882568359375e-05, "step": 24975, "training_step_time": 0.1149139404296875 }, { "epoch": 3.81103515625e-05, "model_forward_time": 0.024538278579711914, "step": 24976 }, { "epoch": 3.81103515625e-05, "step": 24976, "training_step_time": 0.12495040893554688 }, { "epoch": 3.811187744140625e-05, "model_forward_time": 0.025423288345336914, "step": 24977 }, { "epoch": 3.811187744140625e-05, "step": 24977, "training_step_time": 0.14210271835327148 }, { "epoch": 3.81134033203125e-05, "model_forward_time": 0.025188922882080078, "step": 24978 }, { "epoch": 3.81134033203125e-05, "step": 24978, "training_step_time": 0.11399030685424805 }, { "epoch": 3.811492919921875e-05, "model_forward_time": 0.024793386459350586, "step": 24979 }, { "epoch": 3.811492919921875e-05, "step": 24979, "training_step_time": 0.12336969375610352 }, { "epoch": 3.8116455078125e-05, "grad_norm": 0.24275687336921692, "learning_rate": 7.461858184706777e-06, "loss": 0.004, "step": 24980 }, { "epoch": 3.8116455078125e-05, "model_forward_time": 0.025673866271972656, "step": 24980 }, { "epoch": 3.8116455078125e-05, "step": 24980, "training_step_time": 0.11063647270202637 }, { "epoch": 3.811798095703125e-05, "model_forward_time": 0.025603532791137695, "step": 24981 }, { "epoch": 3.811798095703125e-05, "step": 24981, "training_step_time": 0.10349082946777344 }, { "epoch": 3.81195068359375e-05, "model_forward_time": 0.0253140926361084, "step": 24982 }, { "epoch": 3.81195068359375e-05, "step": 24982, "training_step_time": 0.10505938529968262 }, { "epoch": 3.812103271484375e-05, "model_forward_time": 0.02457404136657715, "step": 24983 }, { "epoch": 3.812103271484375e-05, "step": 24983, "training_step_time": 0.10200023651123047 }, { "epoch": 3.812255859375e-05, "model_forward_time": 0.025217533111572266, "step": 24984 }, { "epoch": 3.812255859375e-05, "step": 24984, "training_step_time": 0.1066584587097168 }, { "epoch": 3.812408447265625e-05, "model_forward_time": 0.025299787521362305, "step": 24985 }, { "epoch": 3.812408447265625e-05, "step": 24985, "training_step_time": 0.10512661933898926 }, { "epoch": 3.81256103515625e-05, "model_forward_time": 0.025567054748535156, "step": 24986 }, { "epoch": 3.81256103515625e-05, "step": 24986, "training_step_time": 0.10823464393615723 }, { "epoch": 3.812713623046875e-05, "model_forward_time": 0.02527451515197754, "step": 24987 }, { "epoch": 3.812713623046875e-05, "step": 24987, "training_step_time": 0.1365799903869629 }, { "epoch": 3.8128662109375e-05, "model_forward_time": 0.02547430992126465, "step": 24988 }, { "epoch": 3.8128662109375e-05, "step": 24988, "training_step_time": 0.15511178970336914 }, { "epoch": 3.813018798828125e-05, "model_forward_time": 0.02442479133605957, "step": 24989 }, { "epoch": 3.813018798828125e-05, "step": 24989, "training_step_time": 0.1694643497467041 }, { "epoch": 3.81317138671875e-05, "grad_norm": 0.14347414672374725, "learning_rate": 7.432918009885997e-06, "loss": 0.0141, "step": 24990 }, { "epoch": 3.81317138671875e-05, "model_forward_time": 0.024274587631225586, "step": 24990 }, { "epoch": 3.81317138671875e-05, "step": 24990, "training_step_time": 0.2005767822265625 }, { "epoch": 3.813323974609375e-05, "model_forward_time": 0.024074077606201172, "step": 24991 }, { "epoch": 3.813323974609375e-05, "step": 24991, "training_step_time": 0.14263916015625 }, { "epoch": 3.8134765625e-05, "model_forward_time": 0.024479389190673828, "step": 24992 }, { "epoch": 3.8134765625e-05, "step": 24992, "training_step_time": 0.20880579948425293 }, { "epoch": 3.813629150390625e-05, "model_forward_time": 0.024613380432128906, "step": 24993 }, { "epoch": 3.813629150390625e-05, "step": 24993, "training_step_time": 0.14316463470458984 }, { "epoch": 3.81378173828125e-05, "model_forward_time": 0.024414777755737305, "step": 24994 }, { "epoch": 3.81378173828125e-05, "step": 24994, "training_step_time": 0.1896836757659912 }, { "epoch": 3.813934326171875e-05, "model_forward_time": 0.024456262588500977, "step": 24995 }, { "epoch": 3.813934326171875e-05, "step": 24995, "training_step_time": 0.12531328201293945 }, { "epoch": 3.8140869140625e-05, "model_forward_time": 0.02366161346435547, "step": 24996 }, { "epoch": 3.8140869140625e-05, "step": 24996, "training_step_time": 0.11908125877380371 }, { "epoch": 3.814239501953125e-05, "model_forward_time": 0.024873733520507812, "step": 24997 }, { "epoch": 3.814239501953125e-05, "step": 24997, "training_step_time": 0.11833357810974121 }, { "epoch": 3.81439208984375e-05, "model_forward_time": 0.025428295135498047, "step": 24998 }, { "epoch": 3.81439208984375e-05, "step": 24998, "training_step_time": 0.18886041641235352 }, { "epoch": 3.814544677734375e-05, "model_forward_time": 0.02479076385498047, "step": 24999 }, { "epoch": 3.814544677734375e-05, "step": 24999, "training_step_time": 0.11611771583557129 }, { "epoch": 3.814697265625e-05, "grad_norm": 0.24925269186496735, "learning_rate": 7.404029558083653e-06, "loss": 0.0049, "step": 25000 }, { "epoch": 3.814697265625e-05, "model_forward_time": 0.02887892723083496, "step": 25000 }, { "epoch": 3.814697265625e-05, "step": 25000, "training_step_time": 0.10677766799926758 }, { "epoch": 3.814849853515625e-05, "model_forward_time": 0.023868560791015625, "step": 25001 }, { "epoch": 3.814849853515625e-05, "step": 25001, "training_step_time": 0.10685253143310547 }, { "epoch": 3.81500244140625e-05, "model_forward_time": 0.024806499481201172, "step": 25002 }, { "epoch": 3.81500244140625e-05, "step": 25002, "training_step_time": 0.10273003578186035 }, { "epoch": 3.815155029296875e-05, "model_forward_time": 0.02677750587463379, "step": 25003 }, { "epoch": 3.815155029296875e-05, "step": 25003, "training_step_time": 0.1894700527191162 }, { "epoch": 3.8153076171875e-05, "model_forward_time": 0.024010658264160156, "step": 25004 }, { "epoch": 3.8153076171875e-05, "step": 25004, "training_step_time": 0.1422407627105713 }, { "epoch": 3.815460205078125e-05, "model_forward_time": 0.024502038955688477, "step": 25005 }, { "epoch": 3.815460205078125e-05, "step": 25005, "training_step_time": 0.10293221473693848 }, { "epoch": 3.81561279296875e-05, "model_forward_time": 0.02557826042175293, "step": 25006 }, { "epoch": 3.81561279296875e-05, "step": 25006, "training_step_time": 0.10453414916992188 }, { "epoch": 3.815765380859375e-05, "model_forward_time": 0.025261878967285156, "step": 25007 }, { "epoch": 3.815765380859375e-05, "step": 25007, "training_step_time": 0.10351347923278809 }, { "epoch": 3.81591796875e-05, "model_forward_time": 0.025469303131103516, "step": 25008 }, { "epoch": 3.81591796875e-05, "step": 25008, "training_step_time": 0.10572147369384766 }, { "epoch": 3.816070556640625e-05, "model_forward_time": 0.02524566650390625, "step": 25009 }, { "epoch": 3.816070556640625e-05, "step": 25009, "training_step_time": 0.10544347763061523 }, { "epoch": 3.81622314453125e-05, "grad_norm": 0.09094515442848206, "learning_rate": 7.375192864401931e-06, "loss": 0.0079, "step": 25010 }, { "epoch": 3.81622314453125e-05, "model_forward_time": 0.02510523796081543, "step": 25010 }, { "epoch": 3.81622314453125e-05, "step": 25010, "training_step_time": 0.10578727722167969 }, { "epoch": 3.816375732421875e-05, "model_forward_time": 0.024981975555419922, "step": 25011 }, { "epoch": 3.816375732421875e-05, "step": 25011, "training_step_time": 0.10748124122619629 }, { "epoch": 3.8165283203125e-05, "model_forward_time": 0.025338172912597656, "step": 25012 }, { "epoch": 3.8165283203125e-05, "step": 25012, "training_step_time": 0.10608077049255371 }, { "epoch": 3.816680908203125e-05, "model_forward_time": 0.025037527084350586, "step": 25013 }, { "epoch": 3.816680908203125e-05, "step": 25013, "training_step_time": 0.10314059257507324 }, { "epoch": 3.81683349609375e-05, "model_forward_time": 0.02476811408996582, "step": 25014 }, { "epoch": 3.81683349609375e-05, "step": 25014, "training_step_time": 0.13168883323669434 }, { "epoch": 3.816986083984375e-05, "model_forward_time": 0.02413010597229004, "step": 25015 }, { "epoch": 3.816986083984375e-05, "step": 25015, "training_step_time": 0.15410351753234863 }, { "epoch": 3.817138671875e-05, "model_forward_time": 0.023876428604125977, "step": 25016 }, { "epoch": 3.817138671875e-05, "step": 25016, "training_step_time": 0.1416473388671875 }, { "epoch": 3.817291259765625e-05, "model_forward_time": 0.02338433265686035, "step": 25017 }, { "epoch": 3.817291259765625e-05, "step": 25017, "training_step_time": 0.129685640335083 }, { "epoch": 3.81744384765625e-05, "model_forward_time": 0.02310633659362793, "step": 25018 }, { "epoch": 3.81744384765625e-05, "step": 25018, "training_step_time": 0.12094259262084961 }, { "epoch": 3.817596435546875e-05, "model_forward_time": 0.02378988265991211, "step": 25019 }, { "epoch": 3.817596435546875e-05, "step": 25019, "training_step_time": 0.11940789222717285 }, { "epoch": 3.8177490234375e-05, "grad_norm": 0.4444446265697479, "learning_rate": 7.3464079638801365e-06, "loss": 0.0072, "step": 25020 }, { "epoch": 3.8177490234375e-05, "model_forward_time": 0.02408909797668457, "step": 25020 }, { "epoch": 3.8177490234375e-05, "step": 25020, "training_step_time": 0.15147876739501953 }, { "epoch": 3.817901611328125e-05, "model_forward_time": 0.024954557418823242, "step": 25021 }, { "epoch": 3.817901611328125e-05, "step": 25021, "training_step_time": 0.11741471290588379 }, { "epoch": 3.81805419921875e-05, "model_forward_time": 0.025370121002197266, "step": 25022 }, { "epoch": 3.81805419921875e-05, "step": 25022, "training_step_time": 0.11539936065673828 }, { "epoch": 3.818206787109375e-05, "model_forward_time": 0.025111675262451172, "step": 25023 }, { "epoch": 3.818206787109375e-05, "step": 25023, "training_step_time": 0.12031149864196777 }, { "epoch": 3.818359375e-05, "model_forward_time": 0.025614500045776367, "step": 25024 }, { "epoch": 3.818359375e-05, "step": 25024, "training_step_time": 0.20996356010437012 }, { "epoch": 3.818511962890625e-05, "model_forward_time": 0.024765729904174805, "step": 25025 }, { "epoch": 3.818511962890625e-05, "step": 25025, "training_step_time": 0.11164402961730957 }, { "epoch": 3.81866455078125e-05, "model_forward_time": 0.024080514907836914, "step": 25026 }, { "epoch": 3.81866455078125e-05, "step": 25026, "training_step_time": 0.10087704658508301 }, { "epoch": 3.818817138671875e-05, "model_forward_time": 0.02563333511352539, "step": 25027 }, { "epoch": 3.818817138671875e-05, "step": 25027, "training_step_time": 0.10654091835021973 }, { "epoch": 3.8189697265625e-05, "model_forward_time": 0.025554418563842773, "step": 25028 }, { "epoch": 3.8189697265625e-05, "step": 25028, "training_step_time": 0.10500931739807129 }, { "epoch": 3.819122314453125e-05, "model_forward_time": 0.026189327239990234, "step": 25029 }, { "epoch": 3.819122314453125e-05, "step": 25029, "training_step_time": 0.10583662986755371 }, { "epoch": 3.81927490234375e-05, "grad_norm": 0.3237009644508362, "learning_rate": 7.317674891494625e-06, "loss": 0.0146, "step": 25030 }, { "epoch": 3.81927490234375e-05, "model_forward_time": 0.026188135147094727, "step": 25030 }, { "epoch": 3.81927490234375e-05, "step": 25030, "training_step_time": 0.10995769500732422 }, { "epoch": 3.819427490234375e-05, "model_forward_time": 0.025097131729125977, "step": 25031 }, { "epoch": 3.819427490234375e-05, "step": 25031, "training_step_time": 0.10294198989868164 }, { "epoch": 3.819580078125e-05, "model_forward_time": 0.025513887405395508, "step": 25032 }, { "epoch": 3.819580078125e-05, "step": 25032, "training_step_time": 0.10381102561950684 }, { "epoch": 3.819732666015625e-05, "model_forward_time": 0.025522232055664062, "step": 25033 }, { "epoch": 3.819732666015625e-05, "step": 25033, "training_step_time": 0.10345840454101562 }, { "epoch": 3.81988525390625e-05, "model_forward_time": 0.025183677673339844, "step": 25034 }, { "epoch": 3.81988525390625e-05, "step": 25034, "training_step_time": 0.17425274848937988 }, { "epoch": 3.820037841796875e-05, "model_forward_time": 0.024954557418823242, "step": 25035 }, { "epoch": 3.820037841796875e-05, "step": 25035, "training_step_time": 0.10679125785827637 }, { "epoch": 3.8201904296875e-05, "model_forward_time": 0.025027990341186523, "step": 25036 }, { "epoch": 3.8201904296875e-05, "step": 25036, "training_step_time": 0.1847996711730957 }, { "epoch": 3.820343017578125e-05, "model_forward_time": 0.02515578269958496, "step": 25037 }, { "epoch": 3.820343017578125e-05, "step": 25037, "training_step_time": 0.1349475383758545 }, { "epoch": 3.82049560546875e-05, "model_forward_time": 0.028076648712158203, "step": 25038 }, { "epoch": 3.82049560546875e-05, "step": 25038, "training_step_time": 0.2188117504119873 }, { "epoch": 3.820648193359375e-05, "model_forward_time": 0.02476954460144043, "step": 25039 }, { "epoch": 3.820648193359375e-05, "step": 25039, "training_step_time": 0.22105002403259277 }, { "epoch": 3.82080078125e-05, "grad_norm": 0.12376312166452408, "learning_rate": 7.2889936821588125e-06, "loss": 0.0047, "step": 25040 }, { "epoch": 3.82080078125e-05, "model_forward_time": 0.024588346481323242, "step": 25040 }, { "epoch": 3.82080078125e-05, "step": 25040, "training_step_time": 0.12495565414428711 }, { "epoch": 3.820953369140625e-05, "model_forward_time": 0.024629831314086914, "step": 25041 }, { "epoch": 3.820953369140625e-05, "step": 25041, "training_step_time": 0.12820649147033691 }, { "epoch": 3.82110595703125e-05, "model_forward_time": 0.0247347354888916, "step": 25042 }, { "epoch": 3.82110595703125e-05, "step": 25042, "training_step_time": 0.11042594909667969 }, { "epoch": 3.821258544921875e-05, "model_forward_time": 0.025394201278686523, "step": 25043 }, { "epoch": 3.821258544921875e-05, "step": 25043, "training_step_time": 0.10480952262878418 }, { "epoch": 3.8214111328125e-05, "model_forward_time": 0.02598094940185547, "step": 25044 }, { "epoch": 3.8214111328125e-05, "step": 25044, "training_step_time": 0.10991525650024414 }, { "epoch": 3.821563720703125e-05, "model_forward_time": 0.02616095542907715, "step": 25045 }, { "epoch": 3.821563720703125e-05, "step": 25045, "training_step_time": 0.10643768310546875 }, { "epoch": 3.82171630859375e-05, "model_forward_time": 0.025621891021728516, "step": 25046 }, { "epoch": 3.82171630859375e-05, "step": 25046, "training_step_time": 0.12440085411071777 }, { "epoch": 3.821868896484375e-05, "model_forward_time": 0.02585124969482422, "step": 25047 }, { "epoch": 3.821868896484375e-05, "step": 25047, "training_step_time": 0.1076042652130127 }, { "epoch": 3.822021484375e-05, "model_forward_time": 0.025577545166015625, "step": 25048 }, { "epoch": 3.822021484375e-05, "step": 25048, "training_step_time": 0.2098526954650879 }, { "epoch": 3.822174072265625e-05, "model_forward_time": 0.02441716194152832, "step": 25049 }, { "epoch": 3.822174072265625e-05, "step": 25049, "training_step_time": 0.12787818908691406 }, { "epoch": 3.82232666015625e-05, "grad_norm": 0.1526920348405838, "learning_rate": 7.260364370723044e-06, "loss": 0.0064, "step": 25050 }, { "epoch": 3.82232666015625e-05, "model_forward_time": 0.02455925941467285, "step": 25050 }, { "epoch": 3.82232666015625e-05, "step": 25050, "training_step_time": 0.21976304054260254 }, { "epoch": 3.822479248046875e-05, "model_forward_time": 0.024837970733642578, "step": 25051 }, { "epoch": 3.822479248046875e-05, "step": 25051, "training_step_time": 0.10841727256774902 }, { "epoch": 3.8226318359375e-05, "model_forward_time": 0.02497386932373047, "step": 25052 }, { "epoch": 3.8226318359375e-05, "step": 25052, "training_step_time": 0.11075544357299805 }, { "epoch": 3.822784423828125e-05, "model_forward_time": 0.0252230167388916, "step": 25053 }, { "epoch": 3.822784423828125e-05, "step": 25053, "training_step_time": 0.10830831527709961 }, { "epoch": 3.82293701171875e-05, "model_forward_time": 0.025828838348388672, "step": 25054 }, { "epoch": 3.82293701171875e-05, "step": 25054, "training_step_time": 0.11139225959777832 }, { "epoch": 3.823089599609375e-05, "model_forward_time": 0.025738000869750977, "step": 25055 }, { "epoch": 3.823089599609375e-05, "step": 25055, "training_step_time": 0.1082918643951416 }, { "epoch": 3.8232421875e-05, "model_forward_time": 0.025001049041748047, "step": 25056 }, { "epoch": 3.8232421875e-05, "step": 25056, "training_step_time": 0.10857510566711426 }, { "epoch": 3.823394775390625e-05, "model_forward_time": 0.025480985641479492, "step": 25057 }, { "epoch": 3.823394775390625e-05, "step": 25057, "training_step_time": 0.10761666297912598 }, { "epoch": 3.82354736328125e-05, "model_forward_time": 0.025412321090698242, "step": 25058 }, { "epoch": 3.82354736328125e-05, "step": 25058, "training_step_time": 0.10713672637939453 }, { "epoch": 3.823699951171875e-05, "model_forward_time": 0.025943756103515625, "step": 25059 }, { "epoch": 3.823699951171875e-05, "step": 25059, "training_step_time": 0.11415624618530273 }, { "epoch": 3.8238525390625e-05, "grad_norm": 0.11616542935371399, "learning_rate": 7.2317869919746705e-06, "loss": 0.0057, "step": 25060 }, { "epoch": 3.8238525390625e-05, "model_forward_time": 0.025337934494018555, "step": 25060 }, { "epoch": 3.8238525390625e-05, "step": 25060, "training_step_time": 0.11009097099304199 }, { "epoch": 3.824005126953125e-05, "model_forward_time": 0.025023221969604492, "step": 25061 }, { "epoch": 3.824005126953125e-05, "step": 25061, "training_step_time": 0.10696959495544434 }, { "epoch": 3.82415771484375e-05, "model_forward_time": 0.025115489959716797, "step": 25062 }, { "epoch": 3.82415771484375e-05, "step": 25062, "training_step_time": 0.10770344734191895 }, { "epoch": 3.824310302734375e-05, "model_forward_time": 0.02571845054626465, "step": 25063 }, { "epoch": 3.824310302734375e-05, "step": 25063, "training_step_time": 0.10859918594360352 }, { "epoch": 3.824462890625e-05, "model_forward_time": 0.025569677352905273, "step": 25064 }, { "epoch": 3.824462890625e-05, "step": 25064, "training_step_time": 0.1079866886138916 }, { "epoch": 3.824615478515625e-05, "model_forward_time": 0.025261402130126953, "step": 25065 }, { "epoch": 3.824615478515625e-05, "step": 25065, "training_step_time": 0.20171332359313965 }, { "epoch": 3.82476806640625e-05, "model_forward_time": 0.0255277156829834, "step": 25066 }, { "epoch": 3.82476806640625e-05, "step": 25066, "training_step_time": 0.16324639320373535 }, { "epoch": 3.824920654296875e-05, "model_forward_time": 0.024769306182861328, "step": 25067 }, { "epoch": 3.824920654296875e-05, "step": 25067, "training_step_time": 0.1580650806427002 }, { "epoch": 3.8250732421875e-05, "model_forward_time": 0.024834156036376953, "step": 25068 }, { "epoch": 3.8250732421875e-05, "step": 25068, "training_step_time": 0.1171274185180664 }, { "epoch": 3.825225830078125e-05, "model_forward_time": 0.025124549865722656, "step": 25069 }, { "epoch": 3.825225830078125e-05, "step": 25069, "training_step_time": 0.19052863121032715 }, { "epoch": 3.82537841796875e-05, "grad_norm": 0.07350145280361176, "learning_rate": 7.203261580637877e-06, "loss": 0.0042, "step": 25070 }, { "epoch": 3.82537841796875e-05, "model_forward_time": 0.0254669189453125, "step": 25070 }, { "epoch": 3.82537841796875e-05, "step": 25070, "training_step_time": 0.10476255416870117 }, { "epoch": 3.825531005859375e-05, "model_forward_time": 0.024896621704101562, "step": 25071 }, { "epoch": 3.825531005859375e-05, "step": 25071, "training_step_time": 0.10831308364868164 }, { "epoch": 3.82568359375e-05, "model_forward_time": 0.025502920150756836, "step": 25072 }, { "epoch": 3.82568359375e-05, "step": 25072, "training_step_time": 0.11693692207336426 }, { "epoch": 3.825836181640625e-05, "model_forward_time": 0.025450468063354492, "step": 25073 }, { "epoch": 3.825836181640625e-05, "step": 25073, "training_step_time": 0.11407184600830078 }, { "epoch": 3.82598876953125e-05, "model_forward_time": 0.0255887508392334, "step": 25074 }, { "epoch": 3.82598876953125e-05, "step": 25074, "training_step_time": 0.1074974536895752 }, { "epoch": 3.826141357421875e-05, "model_forward_time": 0.02521038055419922, "step": 25075 }, { "epoch": 3.826141357421875e-05, "step": 25075, "training_step_time": 0.10771012306213379 }, { "epoch": 3.8262939453125e-05, "model_forward_time": 0.02560257911682129, "step": 25076 }, { "epoch": 3.8262939453125e-05, "step": 25076, "training_step_time": 0.1079399585723877 }, { "epoch": 3.826446533203125e-05, "model_forward_time": 0.025557279586791992, "step": 25077 }, { "epoch": 3.826446533203125e-05, "step": 25077, "training_step_time": 0.1669304370880127 }, { "epoch": 3.82659912109375e-05, "model_forward_time": 0.02476668357849121, "step": 25078 }, { "epoch": 3.82659912109375e-05, "step": 25078, "training_step_time": 0.17777013778686523 }, { "epoch": 3.826751708984375e-05, "model_forward_time": 0.02395343780517578, "step": 25079 }, { "epoch": 3.826751708984375e-05, "step": 25079, "training_step_time": 0.19781708717346191 }, { "epoch": 3.826904296875e-05, "grad_norm": 0.09397833794355392, "learning_rate": 7.174788171373731e-06, "loss": 0.0061, "step": 25080 }, { "epoch": 3.826904296875e-05, "model_forward_time": 0.02518463134765625, "step": 25080 }, { "epoch": 3.826904296875e-05, "step": 25080, "training_step_time": 0.17037034034729004 }, { "epoch": 3.827056884765625e-05, "model_forward_time": 0.02469348907470703, "step": 25081 }, { "epoch": 3.827056884765625e-05, "step": 25081, "training_step_time": 0.16231226921081543 }, { "epoch": 3.82720947265625e-05, "model_forward_time": 0.02478766441345215, "step": 25082 }, { "epoch": 3.82720947265625e-05, "step": 25082, "training_step_time": 0.19936633110046387 }, { "epoch": 3.827362060546875e-05, "model_forward_time": 0.024800777435302734, "step": 25083 }, { "epoch": 3.827362060546875e-05, "step": 25083, "training_step_time": 0.1291823387145996 }, { "epoch": 3.8275146484375e-05, "model_forward_time": 0.023612022399902344, "step": 25084 }, { "epoch": 3.8275146484375e-05, "step": 25084, "training_step_time": 0.11693453788757324 }, { "epoch": 3.827667236328125e-05, "model_forward_time": 0.0256500244140625, "step": 25085 }, { "epoch": 3.827667236328125e-05, "step": 25085, "training_step_time": 0.11795973777770996 }, { "epoch": 3.82781982421875e-05, "model_forward_time": 0.025600433349609375, "step": 25086 }, { "epoch": 3.82781982421875e-05, "step": 25086, "training_step_time": 0.11570882797241211 }, { "epoch": 3.827972412109375e-05, "model_forward_time": 0.025560855865478516, "step": 25087 }, { "epoch": 3.827972412109375e-05, "step": 25087, "training_step_time": 0.11034107208251953 }, { "epoch": 3.828125e-05, "model_forward_time": 0.025393009185791016, "step": 25088 }, { "epoch": 3.828125e-05, "step": 25088, "training_step_time": 0.1136167049407959 }, { "epoch": 3.828277587890625e-05, "model_forward_time": 0.025386810302734375, "step": 25089 }, { "epoch": 3.828277587890625e-05, "step": 25089, "training_step_time": 0.15917563438415527 }, { "epoch": 3.82843017578125e-05, "grad_norm": 0.29310888051986694, "learning_rate": 7.146366798780096e-06, "loss": 0.0061, "step": 25090 }, { "epoch": 3.82843017578125e-05, "model_forward_time": 0.025224685668945312, "step": 25090 }, { "epoch": 3.82843017578125e-05, "step": 25090, "training_step_time": 0.1576242446899414 }, { "epoch": 3.828582763671875e-05, "model_forward_time": 0.024659156799316406, "step": 25091 }, { "epoch": 3.828582763671875e-05, "step": 25091, "training_step_time": 0.11840939521789551 }, { "epoch": 3.8287353515625e-05, "model_forward_time": 0.025058984756469727, "step": 25092 }, { "epoch": 3.8287353515625e-05, "step": 25092, "training_step_time": 0.10769271850585938 }, { "epoch": 3.828887939453125e-05, "model_forward_time": 0.0273892879486084, "step": 25093 }, { "epoch": 3.828887939453125e-05, "step": 25093, "training_step_time": 0.12745928764343262 }, { "epoch": 3.82904052734375e-05, "model_forward_time": 0.025051593780517578, "step": 25094 }, { "epoch": 3.82904052734375e-05, "step": 25094, "training_step_time": 0.1198267936706543 }, { "epoch": 3.829193115234375e-05, "model_forward_time": 0.025104284286499023, "step": 25095 }, { "epoch": 3.829193115234375e-05, "step": 25095, "training_step_time": 0.1038351058959961 }, { "epoch": 3.829345703125e-05, "model_forward_time": 0.025053977966308594, "step": 25096 }, { "epoch": 3.829345703125e-05, "step": 25096, "training_step_time": 0.10745882987976074 }, { "epoch": 3.829498291015625e-05, "model_forward_time": 0.025100231170654297, "step": 25097 }, { "epoch": 3.829498291015625e-05, "step": 25097, "training_step_time": 0.1049656867980957 }, { "epoch": 3.82965087890625e-05, "model_forward_time": 0.024740219116210938, "step": 25098 }, { "epoch": 3.82965087890625e-05, "step": 25098, "training_step_time": 0.10576200485229492 }, { "epoch": 3.829803466796875e-05, "model_forward_time": 0.02411651611328125, "step": 25099 }, { "epoch": 3.829803466796875e-05, "step": 25099, "training_step_time": 0.10433840751647949 }, { "epoch": 3.8299560546875e-05, "grad_norm": 0.09635431319475174, "learning_rate": 7.1179974973916486e-06, "loss": 0.0055, "step": 25100 }, { "epoch": 3.8299560546875e-05, "model_forward_time": 0.024544954299926758, "step": 25100 }, { "epoch": 3.8299560546875e-05, "step": 25100, "training_step_time": 0.10450315475463867 }, { "epoch": 3.830108642578125e-05, "model_forward_time": 0.02492213249206543, "step": 25101 }, { "epoch": 3.830108642578125e-05, "step": 25101, "training_step_time": 0.10894417762756348 }, { "epoch": 3.83026123046875e-05, "model_forward_time": 0.024777650833129883, "step": 25102 }, { "epoch": 3.83026123046875e-05, "step": 25102, "training_step_time": 0.10705733299255371 }, { "epoch": 3.830413818359375e-05, "model_forward_time": 0.02514505386352539, "step": 25103 }, { "epoch": 3.830413818359375e-05, "step": 25103, "training_step_time": 0.10733366012573242 }, { "epoch": 3.83056640625e-05, "model_forward_time": 0.02443099021911621, "step": 25104 }, { "epoch": 3.83056640625e-05, "step": 25104, "training_step_time": 0.10692095756530762 }, { "epoch": 3.830718994140625e-05, "model_forward_time": 0.0247344970703125, "step": 25105 }, { "epoch": 3.830718994140625e-05, "step": 25105, "training_step_time": 0.10637307167053223 }, { "epoch": 3.83087158203125e-05, "model_forward_time": 0.024736881256103516, "step": 25106 }, { "epoch": 3.83087158203125e-05, "step": 25106, "training_step_time": 0.10445284843444824 }, { "epoch": 3.831024169921875e-05, "model_forward_time": 0.024989843368530273, "step": 25107 }, { "epoch": 3.831024169921875e-05, "step": 25107, "training_step_time": 0.11058211326599121 }, { "epoch": 3.8311767578125e-05, "model_forward_time": 0.02555108070373535, "step": 25108 }, { "epoch": 3.8311767578125e-05, "step": 25108, "training_step_time": 0.18748998641967773 }, { "epoch": 3.831329345703125e-05, "model_forward_time": 0.026265382766723633, "step": 25109 }, { "epoch": 3.831329345703125e-05, "step": 25109, "training_step_time": 0.16661643981933594 }, { "epoch": 3.83148193359375e-05, "grad_norm": 0.09016188234090805, "learning_rate": 7.089680301679752e-06, "loss": 0.0049, "step": 25110 }, { "epoch": 3.83148193359375e-05, "model_forward_time": 0.023697853088378906, "step": 25110 }, { "epoch": 3.83148193359375e-05, "step": 25110, "training_step_time": 0.14166927337646484 }, { "epoch": 3.831634521484375e-05, "model_forward_time": 0.024077892303466797, "step": 25111 }, { "epoch": 3.831634521484375e-05, "step": 25111, "training_step_time": 0.12889647483825684 }, { "epoch": 3.831787109375e-05, "model_forward_time": 0.024435043334960938, "step": 25112 }, { "epoch": 3.831787109375e-05, "step": 25112, "training_step_time": 0.18923521041870117 }, { "epoch": 3.831939697265625e-05, "model_forward_time": 0.02459430694580078, "step": 25113 }, { "epoch": 3.831939697265625e-05, "step": 25113, "training_step_time": 0.11600804328918457 }, { "epoch": 3.83209228515625e-05, "model_forward_time": 0.02452397346496582, "step": 25114 }, { "epoch": 3.83209228515625e-05, "step": 25114, "training_step_time": 0.1154024600982666 }, { "epoch": 3.832244873046875e-05, "model_forward_time": 0.024894237518310547, "step": 25115 }, { "epoch": 3.832244873046875e-05, "step": 25115, "training_step_time": 0.11747336387634277 }, { "epoch": 3.8323974609375e-05, "model_forward_time": 0.025285959243774414, "step": 25116 }, { "epoch": 3.8323974609375e-05, "step": 25116, "training_step_time": 0.11216425895690918 }, { "epoch": 3.832550048828125e-05, "model_forward_time": 0.02533102035522461, "step": 25117 }, { "epoch": 3.832550048828125e-05, "step": 25117, "training_step_time": 0.12253761291503906 }, { "epoch": 3.83270263671875e-05, "model_forward_time": 0.02374267578125, "step": 25118 }, { "epoch": 3.83270263671875e-05, "step": 25118, "training_step_time": 0.11725926399230957 }, { "epoch": 3.832855224609375e-05, "model_forward_time": 0.0248870849609375, "step": 25119 }, { "epoch": 3.832855224609375e-05, "step": 25119, "training_step_time": 0.11341476440429688 }, { "epoch": 3.8330078125e-05, "grad_norm": 0.20135025680065155, "learning_rate": 7.061415246052466e-06, "loss": 0.0069, "step": 25120 }, { "epoch": 3.8330078125e-05, "model_forward_time": 0.024172067642211914, "step": 25120 }, { "epoch": 3.8330078125e-05, "step": 25120, "training_step_time": 0.16681456565856934 }, { "epoch": 3.833160400390625e-05, "model_forward_time": 0.02413463592529297, "step": 25121 }, { "epoch": 3.833160400390625e-05, "step": 25121, "training_step_time": 0.1462693214416504 }, { "epoch": 3.83331298828125e-05, "model_forward_time": 0.024616003036499023, "step": 25122 }, { "epoch": 3.83331298828125e-05, "step": 25122, "training_step_time": 0.10736346244812012 }, { "epoch": 3.833465576171875e-05, "model_forward_time": 0.02481245994567871, "step": 25123 }, { "epoch": 3.833465576171875e-05, "step": 25123, "training_step_time": 0.1087954044342041 }, { "epoch": 3.8336181640625e-05, "model_forward_time": 0.02494955062866211, "step": 25124 }, { "epoch": 3.8336181640625e-05, "step": 25124, "training_step_time": 0.1054372787475586 }, { "epoch": 3.833770751953125e-05, "model_forward_time": 0.024939775466918945, "step": 25125 }, { "epoch": 3.833770751953125e-05, "step": 25125, "training_step_time": 0.13022804260253906 }, { "epoch": 3.83392333984375e-05, "model_forward_time": 0.024874448776245117, "step": 25126 }, { "epoch": 3.83392333984375e-05, "step": 25126, "training_step_time": 0.23420286178588867 }, { "epoch": 3.834075927734375e-05, "model_forward_time": 0.02402496337890625, "step": 25127 }, { "epoch": 3.834075927734375e-05, "step": 25127, "training_step_time": 0.1067955493927002 }, { "epoch": 3.834228515625e-05, "model_forward_time": 0.02367877960205078, "step": 25128 }, { "epoch": 3.834228515625e-05, "step": 25128, "training_step_time": 0.13083958625793457 }, { "epoch": 3.834381103515625e-05, "model_forward_time": 0.02477741241455078, "step": 25129 }, { "epoch": 3.834381103515625e-05, "step": 25129, "training_step_time": 0.12528634071350098 }, { "epoch": 3.83453369140625e-05, "grad_norm": 0.08731285482645035, "learning_rate": 7.0332023648544965e-06, "loss": 0.0131, "step": 25130 }, { "epoch": 3.83453369140625e-05, "model_forward_time": 0.02522420883178711, "step": 25130 }, { "epoch": 3.83453369140625e-05, "step": 25130, "training_step_time": 0.11361551284790039 }, { "epoch": 3.834686279296875e-05, "model_forward_time": 0.027456283569335938, "step": 25131 }, { "epoch": 3.834686279296875e-05, "step": 25131, "training_step_time": 0.13729524612426758 }, { "epoch": 3.8348388671875e-05, "model_forward_time": 0.025107145309448242, "step": 25132 }, { "epoch": 3.8348388671875e-05, "step": 25132, "training_step_time": 0.10460591316223145 }, { "epoch": 3.834991455078125e-05, "model_forward_time": 0.02473306655883789, "step": 25133 }, { "epoch": 3.834991455078125e-05, "step": 25133, "training_step_time": 0.10378789901733398 }, { "epoch": 3.83514404296875e-05, "model_forward_time": 0.024318456649780273, "step": 25134 }, { "epoch": 3.83514404296875e-05, "step": 25134, "training_step_time": 0.14612698554992676 }, { "epoch": 3.835296630859375e-05, "model_forward_time": 0.024158000946044922, "step": 25135 }, { "epoch": 3.835296630859375e-05, "step": 25135, "training_step_time": 0.10166192054748535 }, { "epoch": 3.83544921875e-05, "model_forward_time": 0.02513265609741211, "step": 25136 }, { "epoch": 3.83544921875e-05, "step": 25136, "training_step_time": 0.20757484436035156 }, { "epoch": 3.835601806640625e-05, "model_forward_time": 0.02434396743774414, "step": 25137 }, { "epoch": 3.835601806640625e-05, "step": 25137, "training_step_time": 0.131483793258667 }, { "epoch": 3.83575439453125e-05, "model_forward_time": 0.024017333984375, "step": 25138 }, { "epoch": 3.83575439453125e-05, "step": 25138, "training_step_time": 0.10367035865783691 }, { "epoch": 3.835906982421875e-05, "model_forward_time": 0.024717092514038086, "step": 25139 }, { "epoch": 3.835906982421875e-05, "step": 25139, "training_step_time": 0.11057567596435547 }, { "epoch": 3.8360595703125e-05, "grad_norm": 0.12092097848653793, "learning_rate": 7.005041692367154e-06, "loss": 0.0064, "step": 25140 }, { "epoch": 3.8360595703125e-05, "model_forward_time": 0.02500295639038086, "step": 25140 }, { "epoch": 3.8360595703125e-05, "step": 25140, "training_step_time": 0.10316228866577148 }, { "epoch": 3.836212158203125e-05, "model_forward_time": 0.027331113815307617, "step": 25141 }, { "epoch": 3.836212158203125e-05, "step": 25141, "training_step_time": 0.1068110466003418 }, { "epoch": 3.83636474609375e-05, "model_forward_time": 0.025352001190185547, "step": 25142 }, { "epoch": 3.83636474609375e-05, "step": 25142, "training_step_time": 0.10509109497070312 }, { "epoch": 3.836517333984375e-05, "model_forward_time": 0.02638864517211914, "step": 25143 }, { "epoch": 3.836517333984375e-05, "step": 25143, "training_step_time": 0.10861968994140625 }, { "epoch": 3.836669921875e-05, "model_forward_time": 0.02500152587890625, "step": 25144 }, { "epoch": 3.836669921875e-05, "step": 25144, "training_step_time": 0.10393834114074707 }, { "epoch": 3.836822509765625e-05, "model_forward_time": 0.025623083114624023, "step": 25145 }, { "epoch": 3.836822509765625e-05, "step": 25145, "training_step_time": 0.10502266883850098 }, { "epoch": 3.83697509765625e-05, "model_forward_time": 0.025543689727783203, "step": 25146 }, { "epoch": 3.83697509765625e-05, "step": 25146, "training_step_time": 0.10497379302978516 }, { "epoch": 3.837127685546875e-05, "model_forward_time": 0.02489328384399414, "step": 25147 }, { "epoch": 3.837127685546875e-05, "step": 25147, "training_step_time": 0.10405993461608887 }, { "epoch": 3.8372802734375e-05, "model_forward_time": 0.025033235549926758, "step": 25148 }, { "epoch": 3.8372802734375e-05, "step": 25148, "training_step_time": 0.1046895980834961 }, { "epoch": 3.837432861328125e-05, "model_forward_time": 0.024989843368530273, "step": 25149 }, { "epoch": 3.837432861328125e-05, "step": 25149, "training_step_time": 0.10675525665283203 }, { "epoch": 3.83758544921875e-05, "grad_norm": 0.09404527395963669, "learning_rate": 6.976933262808322e-06, "loss": 0.0044, "step": 25150 }, { "epoch": 3.83758544921875e-05, "model_forward_time": 0.024916410446166992, "step": 25150 }, { "epoch": 3.83758544921875e-05, "step": 25150, "training_step_time": 0.10892200469970703 }, { "epoch": 3.837738037109375e-05, "model_forward_time": 0.024976730346679688, "step": 25151 }, { "epoch": 3.837738037109375e-05, "step": 25151, "training_step_time": 0.13535356521606445 }, { "epoch": 3.837890625e-05, "model_forward_time": 0.024065494537353516, "step": 25152 }, { "epoch": 3.837890625e-05, "step": 25152, "training_step_time": 0.1938610076904297 }, { "epoch": 3.838043212890625e-05, "model_forward_time": 0.02413201332092285, "step": 25153 }, { "epoch": 3.838043212890625e-05, "step": 25153, "training_step_time": 0.21370458602905273 }, { "epoch": 3.83819580078125e-05, "model_forward_time": 0.024088144302368164, "step": 25154 }, { "epoch": 3.83819580078125e-05, "step": 25154, "training_step_time": 0.11835098266601562 }, { "epoch": 3.838348388671875e-05, "model_forward_time": 0.023360490798950195, "step": 25155 }, { "epoch": 3.838348388671875e-05, "step": 25155, "training_step_time": 0.12659215927124023 }, { "epoch": 3.8385009765625e-05, "model_forward_time": 0.024137020111083984, "step": 25156 }, { "epoch": 3.8385009765625e-05, "step": 25156, "training_step_time": 0.16567277908325195 }, { "epoch": 3.838653564453125e-05, "model_forward_time": 0.024781465530395508, "step": 25157 }, { "epoch": 3.838653564453125e-05, "step": 25157, "training_step_time": 0.11098098754882812 }, { "epoch": 3.83880615234375e-05, "model_forward_time": 0.023736238479614258, "step": 25158 }, { "epoch": 3.83880615234375e-05, "step": 25158, "training_step_time": 0.11285543441772461 }, { "epoch": 3.838958740234375e-05, "model_forward_time": 0.02384042739868164, "step": 25159 }, { "epoch": 3.838958740234375e-05, "step": 25159, "training_step_time": 0.11149430274963379 }, { "epoch": 3.839111328125e-05, "grad_norm": 0.1139388158917427, "learning_rate": 6.948877110332386e-06, "loss": 0.0043, "step": 25160 }, { "epoch": 3.839111328125e-05, "model_forward_time": 0.02417922019958496, "step": 25160 }, { "epoch": 3.839111328125e-05, "step": 25160, "training_step_time": 0.10627007484436035 }, { "epoch": 3.839263916015625e-05, "model_forward_time": 0.0244596004486084, "step": 25161 }, { "epoch": 3.839263916015625e-05, "step": 25161, "training_step_time": 0.10705280303955078 }, { "epoch": 3.83941650390625e-05, "model_forward_time": 0.02525639533996582, "step": 25162 }, { "epoch": 3.83941650390625e-05, "step": 25162, "training_step_time": 0.10651969909667969 }, { "epoch": 3.839569091796875e-05, "model_forward_time": 0.024994850158691406, "step": 25163 }, { "epoch": 3.839569091796875e-05, "step": 25163, "training_step_time": 0.10958194732666016 }, { "epoch": 3.8397216796875e-05, "model_forward_time": 0.024828433990478516, "step": 25164 }, { "epoch": 3.8397216796875e-05, "step": 25164, "training_step_time": 0.10748767852783203 }, { "epoch": 3.839874267578125e-05, "model_forward_time": 0.02532339096069336, "step": 25165 }, { "epoch": 3.839874267578125e-05, "step": 25165, "training_step_time": 0.1090538501739502 }, { "epoch": 3.84002685546875e-05, "model_forward_time": 0.025115489959716797, "step": 25166 }, { "epoch": 3.84002685546875e-05, "step": 25166, "training_step_time": 0.14368319511413574 }, { "epoch": 3.840179443359375e-05, "model_forward_time": 0.02502155303955078, "step": 25167 }, { "epoch": 3.840179443359375e-05, "step": 25167, "training_step_time": 0.11168575286865234 }, { "epoch": 3.84033203125e-05, "model_forward_time": 0.02512502670288086, "step": 25168 }, { "epoch": 3.84033203125e-05, "step": 25168, "training_step_time": 0.10748672485351562 }, { "epoch": 3.840484619140625e-05, "model_forward_time": 0.025332927703857422, "step": 25169 }, { "epoch": 3.840484619140625e-05, "step": 25169, "training_step_time": 0.11197352409362793 }, { "epoch": 3.84063720703125e-05, "grad_norm": 0.08475989103317261, "learning_rate": 6.92087326903022e-06, "loss": 0.0028, "step": 25170 }, { "epoch": 3.84063720703125e-05, "model_forward_time": 0.026821374893188477, "step": 25170 }, { "epoch": 3.84063720703125e-05, "step": 25170, "training_step_time": 0.1554727554321289 }, { "epoch": 3.840789794921875e-05, "model_forward_time": 0.02486419677734375, "step": 25171 }, { "epoch": 3.840789794921875e-05, "step": 25171, "training_step_time": 0.15167880058288574 }, { "epoch": 3.8409423828125e-05, "model_forward_time": 0.023644447326660156, "step": 25172 }, { "epoch": 3.8409423828125e-05, "step": 25172, "training_step_time": 0.17271733283996582 }, { "epoch": 3.841094970703125e-05, "model_forward_time": 0.024074316024780273, "step": 25173 }, { "epoch": 3.841094970703125e-05, "step": 25173, "training_step_time": 0.13364219665527344 }, { "epoch": 3.84124755859375e-05, "model_forward_time": 0.024051904678344727, "step": 25174 }, { "epoch": 3.84124755859375e-05, "step": 25174, "training_step_time": 0.1359097957611084 }, { "epoch": 3.841400146484375e-05, "model_forward_time": 0.02477550506591797, "step": 25175 }, { "epoch": 3.841400146484375e-05, "step": 25175, "training_step_time": 0.12213563919067383 }, { "epoch": 3.841552734375e-05, "model_forward_time": 0.024684667587280273, "step": 25176 }, { "epoch": 3.841552734375e-05, "step": 25176, "training_step_time": 0.12748503684997559 }, { "epoch": 3.841705322265625e-05, "model_forward_time": 0.02458786964416504, "step": 25177 }, { "epoch": 3.841705322265625e-05, "step": 25177, "training_step_time": 0.11606669425964355 }, { "epoch": 3.84185791015625e-05, "model_forward_time": 0.025003910064697266, "step": 25178 }, { "epoch": 3.84185791015625e-05, "step": 25178, "training_step_time": 0.10651397705078125 }, { "epoch": 3.842010498046875e-05, "model_forward_time": 0.026062726974487305, "step": 25179 }, { "epoch": 3.842010498046875e-05, "step": 25179, "training_step_time": 0.10550117492675781 }, { "epoch": 3.8421630859375e-05, "grad_norm": 0.1294359266757965, "learning_rate": 6.892921772929112e-06, "loss": 0.0046, "step": 25180 }, { "epoch": 3.8421630859375e-05, "model_forward_time": 0.02583479881286621, "step": 25180 }, { "epoch": 3.8421630859375e-05, "step": 25180, "training_step_time": 0.11344361305236816 }, { "epoch": 3.842315673828125e-05, "model_forward_time": 0.025258302688598633, "step": 25181 }, { "epoch": 3.842315673828125e-05, "step": 25181, "training_step_time": 0.1184835433959961 }, { "epoch": 3.84246826171875e-05, "model_forward_time": 0.025079011917114258, "step": 25182 }, { "epoch": 3.84246826171875e-05, "step": 25182, "training_step_time": 0.13540220260620117 }, { "epoch": 3.842620849609375e-05, "model_forward_time": 0.02540755271911621, "step": 25183 }, { "epoch": 3.842620849609375e-05, "step": 25183, "training_step_time": 0.2040727138519287 }, { "epoch": 3.8427734375e-05, "model_forward_time": 0.024396181106567383, "step": 25184 }, { "epoch": 3.8427734375e-05, "step": 25184, "training_step_time": 0.14046525955200195 }, { "epoch": 3.842926025390625e-05, "model_forward_time": 0.024919509887695312, "step": 25185 }, { "epoch": 3.842926025390625e-05, "step": 25185, "training_step_time": 0.13116455078125 }, { "epoch": 3.84307861328125e-05, "model_forward_time": 0.02443552017211914, "step": 25186 }, { "epoch": 3.84307861328125e-05, "step": 25186, "training_step_time": 0.12222766876220703 }, { "epoch": 3.843231201171875e-05, "model_forward_time": 0.024930477142333984, "step": 25187 }, { "epoch": 3.843231201171875e-05, "step": 25187, "training_step_time": 0.11912012100219727 }, { "epoch": 3.8433837890625e-05, "model_forward_time": 0.024785757064819336, "step": 25188 }, { "epoch": 3.8433837890625e-05, "step": 25188, "training_step_time": 0.11849308013916016 }, { "epoch": 3.843536376953125e-05, "model_forward_time": 0.025221824645996094, "step": 25189 }, { "epoch": 3.843536376953125e-05, "step": 25189, "training_step_time": 0.10950469970703125 }, { "epoch": 3.84368896484375e-05, "grad_norm": 0.09333091974258423, "learning_rate": 6.865022655992798e-06, "loss": 0.0083, "step": 25190 }, { "epoch": 3.84368896484375e-05, "model_forward_time": 0.024535417556762695, "step": 25190 }, { "epoch": 3.84368896484375e-05, "step": 25190, "training_step_time": 0.11505532264709473 }, { "epoch": 3.843841552734375e-05, "model_forward_time": 0.02514815330505371, "step": 25191 }, { "epoch": 3.843841552734375e-05, "step": 25191, "training_step_time": 0.11290836334228516 }, { "epoch": 3.843994140625e-05, "model_forward_time": 0.024916410446166992, "step": 25192 }, { "epoch": 3.843994140625e-05, "step": 25192, "training_step_time": 0.8118109703063965 }, { "epoch": 3.844146728515625e-05, "model_forward_time": 0.02252984046936035, "step": 25193 }, { "epoch": 3.844146728515625e-05, "step": 25193, "training_step_time": 0.10952901840209961 }, { "epoch": 3.84429931640625e-05, "model_forward_time": 0.02465200424194336, "step": 25194 }, { "epoch": 3.84429931640625e-05, "step": 25194, "training_step_time": 0.14066648483276367 }, { "epoch": 3.844451904296875e-05, "model_forward_time": 0.024449586868286133, "step": 25195 }, { "epoch": 3.844451904296875e-05, "step": 25195, "training_step_time": 0.1685500144958496 }, { "epoch": 3.8446044921875e-05, "model_forward_time": 0.024286746978759766, "step": 25196 }, { "epoch": 3.8446044921875e-05, "step": 25196, "training_step_time": 0.10648798942565918 }, { "epoch": 3.844757080078125e-05, "model_forward_time": 0.02438497543334961, "step": 25197 }, { "epoch": 3.844757080078125e-05, "step": 25197, "training_step_time": 0.10839629173278809 }, { "epoch": 3.84490966796875e-05, "model_forward_time": 0.024999618530273438, "step": 25198 }, { "epoch": 3.84490966796875e-05, "step": 25198, "training_step_time": 0.10742521286010742 }, { "epoch": 3.845062255859375e-05, "model_forward_time": 0.028119325637817383, "step": 25199 }, { "epoch": 3.845062255859375e-05, "step": 25199, "training_step_time": 0.10874581336975098 }, { "epoch": 3.84521484375e-05, "grad_norm": 0.10074328631162643, "learning_rate": 6.837175952121306e-06, "loss": 0.0033, "step": 25200 }, { "epoch": 3.84521484375e-05, "model_forward_time": 0.025171279907226562, "step": 25200 }, { "epoch": 3.84521484375e-05, "step": 25200, "training_step_time": 0.11043524742126465 }, { "epoch": 3.845367431640625e-05, "model_forward_time": 0.024697065353393555, "step": 25201 }, { "epoch": 3.845367431640625e-05, "step": 25201, "training_step_time": 0.11094307899475098 }, { "epoch": 3.84552001953125e-05, "model_forward_time": 0.024790287017822266, "step": 25202 }, { "epoch": 3.84552001953125e-05, "step": 25202, "training_step_time": 0.10855364799499512 }, { "epoch": 3.845672607421875e-05, "model_forward_time": 0.024943113327026367, "step": 25203 }, { "epoch": 3.845672607421875e-05, "step": 25203, "training_step_time": 0.10452532768249512 }, { "epoch": 3.8458251953125e-05, "model_forward_time": 0.025376319885253906, "step": 25204 }, { "epoch": 3.8458251953125e-05, "step": 25204, "training_step_time": 0.10694217681884766 }, { "epoch": 3.845977783203125e-05, "model_forward_time": 0.02469921112060547, "step": 25205 }, { "epoch": 3.845977783203125e-05, "step": 25205, "training_step_time": 0.10993647575378418 }, { "epoch": 3.84613037109375e-05, "model_forward_time": 0.02429652214050293, "step": 25206 }, { "epoch": 3.84613037109375e-05, "step": 25206, "training_step_time": 0.11123275756835938 }, { "epoch": 3.846282958984375e-05, "model_forward_time": 0.024782657623291016, "step": 25207 }, { "epoch": 3.846282958984375e-05, "step": 25207, "training_step_time": 0.1198720932006836 }, { "epoch": 3.846435546875e-05, "model_forward_time": 0.02489304542541504, "step": 25208 }, { "epoch": 3.846435546875e-05, "step": 25208, "training_step_time": 0.1151118278503418 }, { "epoch": 3.846588134765625e-05, "model_forward_time": 0.02565455436706543, "step": 25209 }, { "epoch": 3.846588134765625e-05, "step": 25209, "training_step_time": 0.10985040664672852 }, { "epoch": 3.84674072265625e-05, "grad_norm": 0.14441217482089996, "learning_rate": 6.809381695151029e-06, "loss": 0.0081, "step": 25210 }, { "epoch": 3.84674072265625e-05, "model_forward_time": 0.024967432022094727, "step": 25210 }, { "epoch": 3.84674072265625e-05, "step": 25210, "training_step_time": 0.10529351234436035 }, { "epoch": 3.846893310546875e-05, "model_forward_time": 0.02514958381652832, "step": 25211 }, { "epoch": 3.846893310546875e-05, "step": 25211, "training_step_time": 0.16489481925964355 }, { "epoch": 3.8470458984375e-05, "model_forward_time": 0.024161815643310547, "step": 25212 }, { "epoch": 3.8470458984375e-05, "step": 25212, "training_step_time": 0.16814017295837402 }, { "epoch": 3.847198486328125e-05, "model_forward_time": 0.024060964584350586, "step": 25213 }, { "epoch": 3.847198486328125e-05, "step": 25213, "training_step_time": 0.10434556007385254 }, { "epoch": 3.84735107421875e-05, "model_forward_time": 0.02449941635131836, "step": 25214 }, { "epoch": 3.84735107421875e-05, "step": 25214, "training_step_time": 0.11175775527954102 }, { "epoch": 3.847503662109375e-05, "model_forward_time": 0.025772809982299805, "step": 25215 }, { "epoch": 3.847503662109375e-05, "step": 25215, "training_step_time": 0.12450742721557617 }, { "epoch": 3.84765625e-05, "model_forward_time": 0.025485992431640625, "step": 25216 }, { "epoch": 3.84765625e-05, "step": 25216, "training_step_time": 0.12683820724487305 }, { "epoch": 3.847808837890625e-05, "model_forward_time": 0.024663448333740234, "step": 25217 }, { "epoch": 3.847808837890625e-05, "step": 25217, "training_step_time": 0.1242976188659668 }, { "epoch": 3.84796142578125e-05, "model_forward_time": 0.02523517608642578, "step": 25218 }, { "epoch": 3.84796142578125e-05, "step": 25218, "training_step_time": 0.11219596862792969 }, { "epoch": 3.848114013671875e-05, "model_forward_time": 0.025315523147583008, "step": 25219 }, { "epoch": 3.848114013671875e-05, "step": 25219, "training_step_time": 0.10870885848999023 }, { "epoch": 3.8482666015625e-05, "grad_norm": 0.0839950442314148, "learning_rate": 6.781639918854604e-06, "loss": 0.0044, "step": 25220 }, { "epoch": 3.8482666015625e-05, "model_forward_time": 0.024003982543945312, "step": 25220 }, { "epoch": 3.8482666015625e-05, "step": 25220, "training_step_time": 0.1184072494506836 }, { "epoch": 3.848419189453125e-05, "model_forward_time": 0.02411937713623047, "step": 25221 }, { "epoch": 3.848419189453125e-05, "step": 25221, "training_step_time": 0.21102666854858398 }, { "epoch": 3.84857177734375e-05, "model_forward_time": 0.026324987411499023, "step": 25222 }, { "epoch": 3.84857177734375e-05, "step": 25222, "training_step_time": 0.16436362266540527 }, { "epoch": 3.848724365234375e-05, "model_forward_time": 0.024543285369873047, "step": 25223 }, { "epoch": 3.848724365234375e-05, "step": 25223, "training_step_time": 0.17293238639831543 }, { "epoch": 3.848876953125e-05, "model_forward_time": 0.024868488311767578, "step": 25224 }, { "epoch": 3.848876953125e-05, "step": 25224, "training_step_time": 0.21651887893676758 }, { "epoch": 3.849029541015625e-05, "model_forward_time": 0.02461862564086914, "step": 25225 }, { "epoch": 3.849029541015625e-05, "step": 25225, "training_step_time": 0.14304709434509277 }, { "epoch": 3.84918212890625e-05, "model_forward_time": 0.02400660514831543, "step": 25226 }, { "epoch": 3.84918212890625e-05, "step": 25226, "training_step_time": 0.12431192398071289 }, { "epoch": 3.849334716796875e-05, "model_forward_time": 0.02480006217956543, "step": 25227 }, { "epoch": 3.849334716796875e-05, "step": 25227, "training_step_time": 0.12487483024597168 }, { "epoch": 3.8494873046875e-05, "model_forward_time": 0.027973413467407227, "step": 25228 }, { "epoch": 3.8494873046875e-05, "step": 25228, "training_step_time": 0.12360429763793945 }, { "epoch": 3.849639892578125e-05, "model_forward_time": 0.025107383728027344, "step": 25229 }, { "epoch": 3.849639892578125e-05, "step": 25229, "training_step_time": 0.11370325088500977 }, { "epoch": 3.84979248046875e-05, "grad_norm": 0.16888341307640076, "learning_rate": 6.753950656940905e-06, "loss": 0.0071, "step": 25230 }, { "epoch": 3.84979248046875e-05, "model_forward_time": 0.025272130966186523, "step": 25230 }, { "epoch": 3.84979248046875e-05, "step": 25230, "training_step_time": 0.11623501777648926 }, { "epoch": 3.849945068359375e-05, "model_forward_time": 0.02508234977722168, "step": 25231 }, { "epoch": 3.849945068359375e-05, "step": 25231, "training_step_time": 0.1121664047241211 }, { "epoch": 3.85009765625e-05, "model_forward_time": 0.02481555938720703, "step": 25232 }, { "epoch": 3.85009765625e-05, "step": 25232, "training_step_time": 0.10651111602783203 }, { "epoch": 3.850250244140625e-05, "model_forward_time": 0.02503061294555664, "step": 25233 }, { "epoch": 3.850250244140625e-05, "step": 25233, "training_step_time": 0.10639071464538574 }, { "epoch": 3.85040283203125e-05, "model_forward_time": 0.02515244483947754, "step": 25234 }, { "epoch": 3.85040283203125e-05, "step": 25234, "training_step_time": 0.1109929084777832 }, { "epoch": 3.850555419921875e-05, "model_forward_time": 0.025001049041748047, "step": 25235 }, { "epoch": 3.850555419921875e-05, "step": 25235, "training_step_time": 0.15361475944519043 }, { "epoch": 3.8507080078125e-05, "model_forward_time": 0.02449822425842285, "step": 25236 }, { "epoch": 3.8507080078125e-05, "step": 25236, "training_step_time": 0.11384963989257812 }, { "epoch": 3.850860595703125e-05, "model_forward_time": 0.02476048469543457, "step": 25237 }, { "epoch": 3.850860595703125e-05, "step": 25237, "training_step_time": 0.2113199234008789 }, { "epoch": 3.85101318359375e-05, "model_forward_time": 0.023433446884155273, "step": 25238 }, { "epoch": 3.85101318359375e-05, "step": 25238, "training_step_time": 0.10644245147705078 }, { "epoch": 3.851165771484375e-05, "model_forward_time": 0.024072647094726562, "step": 25239 }, { "epoch": 3.851165771484375e-05, "step": 25239, "training_step_time": 0.12456560134887695 }, { "epoch": 3.851318359375e-05, "grad_norm": 0.20325633883476257, "learning_rate": 6.726313943054991e-06, "loss": 0.0037, "step": 25240 }, { "epoch": 3.851318359375e-05, "model_forward_time": 0.024942398071289062, "step": 25240 }, { "epoch": 3.851318359375e-05, "step": 25240, "training_step_time": 0.1884911060333252 }, { "epoch": 3.851470947265625e-05, "model_forward_time": 0.023912429809570312, "step": 25241 }, { "epoch": 3.851470947265625e-05, "step": 25241, "training_step_time": 0.10571026802062988 }, { "epoch": 3.85162353515625e-05, "model_forward_time": 0.02435302734375, "step": 25242 }, { "epoch": 3.85162353515625e-05, "step": 25242, "training_step_time": 0.10498785972595215 }, { "epoch": 3.851776123046875e-05, "model_forward_time": 0.02514195442199707, "step": 25243 }, { "epoch": 3.851776123046875e-05, "step": 25243, "training_step_time": 0.10512852668762207 }, { "epoch": 3.8519287109375e-05, "model_forward_time": 0.02498173713684082, "step": 25244 }, { "epoch": 3.8519287109375e-05, "step": 25244, "training_step_time": 0.1062934398651123 }, { "epoch": 3.852081298828125e-05, "model_forward_time": 0.025046586990356445, "step": 25245 }, { "epoch": 3.852081298828125e-05, "step": 25245, "training_step_time": 0.10586977005004883 }, { "epoch": 3.85223388671875e-05, "model_forward_time": 0.025201797485351562, "step": 25246 }, { "epoch": 3.85223388671875e-05, "step": 25246, "training_step_time": 0.11025691032409668 }, { "epoch": 3.852386474609375e-05, "model_forward_time": 0.02483844757080078, "step": 25247 }, { "epoch": 3.852386474609375e-05, "step": 25247, "training_step_time": 0.10923433303833008 }, { "epoch": 3.8525390625e-05, "model_forward_time": 0.025164127349853516, "step": 25248 }, { "epoch": 3.8525390625e-05, "step": 25248, "training_step_time": 0.1074984073638916 }, { "epoch": 3.852691650390625e-05, "model_forward_time": 0.025119543075561523, "step": 25249 }, { "epoch": 3.852691650390625e-05, "step": 25249, "training_step_time": 0.10442519187927246 }, { "epoch": 3.85284423828125e-05, "grad_norm": 0.14675350487232208, "learning_rate": 6.698729810778065e-06, "loss": 0.0026, "step": 25250 }, { "epoch": 3.85284423828125e-05, "model_forward_time": 0.024845600128173828, "step": 25250 }, { "epoch": 3.85284423828125e-05, "step": 25250, "training_step_time": 0.18504643440246582 }, { "epoch": 3.852996826171875e-05, "model_forward_time": 0.024434328079223633, "step": 25251 }, { "epoch": 3.852996826171875e-05, "step": 25251, "training_step_time": 0.1173238754272461 }, { "epoch": 3.8531494140625e-05, "model_forward_time": 0.025635957717895508, "step": 25252 }, { "epoch": 3.8531494140625e-05, "step": 25252, "training_step_time": 0.11297369003295898 }, { "epoch": 3.853302001953125e-05, "model_forward_time": 0.024103879928588867, "step": 25253 }, { "epoch": 3.853302001953125e-05, "step": 25253, "training_step_time": 0.1129603385925293 }, { "epoch": 3.85345458984375e-05, "model_forward_time": 0.024142026901245117, "step": 25254 }, { "epoch": 3.85345458984375e-05, "step": 25254, "training_step_time": 0.11487889289855957 }, { "epoch": 3.853607177734375e-05, "model_forward_time": 0.02368330955505371, "step": 25255 }, { "epoch": 3.853607177734375e-05, "step": 25255, "training_step_time": 0.1786808967590332 }, { "epoch": 3.853759765625e-05, "model_forward_time": 0.024399757385253906, "step": 25256 }, { "epoch": 3.853759765625e-05, "step": 25256, "training_step_time": 0.16494536399841309 }, { "epoch": 3.853912353515625e-05, "model_forward_time": 0.024187564849853516, "step": 25257 }, { "epoch": 3.853912353515625e-05, "step": 25257, "training_step_time": 0.18324923515319824 }, { "epoch": 3.85406494140625e-05, "model_forward_time": 0.024520397186279297, "step": 25258 }, { "epoch": 3.85406494140625e-05, "step": 25258, "training_step_time": 0.1116330623626709 }, { "epoch": 3.854217529296875e-05, "model_forward_time": 0.02411341667175293, "step": 25259 }, { "epoch": 3.854217529296875e-05, "step": 25259, "training_step_time": 0.12581205368041992 }, { "epoch": 3.8543701171875e-05, "grad_norm": 0.0906037762761116, "learning_rate": 6.671198293627479e-06, "loss": 0.0033, "step": 25260 }, { "epoch": 3.8543701171875e-05, "model_forward_time": 0.024939298629760742, "step": 25260 }, { "epoch": 3.8543701171875e-05, "step": 25260, "training_step_time": 0.12609195709228516 }, { "epoch": 3.854522705078125e-05, "model_forward_time": 0.025319337844848633, "step": 25261 }, { "epoch": 3.854522705078125e-05, "step": 25261, "training_step_time": 0.11149287223815918 }, { "epoch": 3.85467529296875e-05, "model_forward_time": 0.02513265609741211, "step": 25262 }, { "epoch": 3.85467529296875e-05, "step": 25262, "training_step_time": 0.12021136283874512 }, { "epoch": 3.854827880859375e-05, "model_forward_time": 0.025261402130126953, "step": 25263 }, { "epoch": 3.854827880859375e-05, "step": 25263, "training_step_time": 0.10793924331665039 }, { "epoch": 3.85498046875e-05, "model_forward_time": 0.02543020248413086, "step": 25264 }, { "epoch": 3.85498046875e-05, "step": 25264, "training_step_time": 0.10606551170349121 }, { "epoch": 3.855133056640625e-05, "model_forward_time": 0.0254666805267334, "step": 25265 }, { "epoch": 3.855133056640625e-05, "step": 25265, "training_step_time": 0.10589218139648438 }, { "epoch": 3.85528564453125e-05, "model_forward_time": 0.025512218475341797, "step": 25266 }, { "epoch": 3.85528564453125e-05, "step": 25266, "training_step_time": 0.1088101863861084 }, { "epoch": 3.855438232421875e-05, "model_forward_time": 0.02538442611694336, "step": 25267 }, { "epoch": 3.855438232421875e-05, "step": 25267, "training_step_time": 0.10999250411987305 }, { "epoch": 3.8555908203125e-05, "model_forward_time": 0.02529740333557129, "step": 25268 }, { "epoch": 3.8555908203125e-05, "step": 25268, "training_step_time": 0.1115717887878418 }, { "epoch": 3.855743408203125e-05, "model_forward_time": 0.02535414695739746, "step": 25269 }, { "epoch": 3.855743408203125e-05, "step": 25269, "training_step_time": 0.2054903507232666 }, { "epoch": 3.85589599609375e-05, "grad_norm": 0.08070753514766693, "learning_rate": 6.6437194250566e-06, "loss": 0.0056, "step": 25270 }, { "epoch": 3.85589599609375e-05, "model_forward_time": 0.0259246826171875, "step": 25270 }, { "epoch": 3.85589599609375e-05, "step": 25270, "training_step_time": 0.12135863304138184 }, { "epoch": 3.856048583984375e-05, "model_forward_time": 0.024018287658691406, "step": 25271 }, { "epoch": 3.856048583984375e-05, "step": 25271, "training_step_time": 0.10303616523742676 }, { "epoch": 3.856201171875e-05, "model_forward_time": 0.024944543838500977, "step": 25272 }, { "epoch": 3.856201171875e-05, "step": 25272, "training_step_time": 0.10857033729553223 }, { "epoch": 3.856353759765625e-05, "model_forward_time": 0.02508831024169922, "step": 25273 }, { "epoch": 3.856353759765625e-05, "step": 25273, "training_step_time": 0.10518026351928711 }, { "epoch": 3.85650634765625e-05, "model_forward_time": 0.025967836380004883, "step": 25274 }, { "epoch": 3.85650634765625e-05, "step": 25274, "training_step_time": 0.1077573299407959 }, { "epoch": 3.856658935546875e-05, "model_forward_time": 0.02474665641784668, "step": 25275 }, { "epoch": 3.856658935546875e-05, "step": 25275, "training_step_time": 0.10894632339477539 }, { "epoch": 3.8568115234375e-05, "model_forward_time": 0.02567911148071289, "step": 25276 }, { "epoch": 3.8568115234375e-05, "step": 25276, "training_step_time": 0.10602593421936035 }, { "epoch": 3.856964111328125e-05, "model_forward_time": 0.02532672882080078, "step": 25277 }, { "epoch": 3.856964111328125e-05, "step": 25277, "training_step_time": 0.10677409172058105 }, { "epoch": 3.85711669921875e-05, "model_forward_time": 0.02521967887878418, "step": 25278 }, { "epoch": 3.85711669921875e-05, "step": 25278, "training_step_time": 0.10509228706359863 }, { "epoch": 3.857269287109375e-05, "model_forward_time": 0.0249483585357666, "step": 25279 }, { "epoch": 3.857269287109375e-05, "step": 25279, "training_step_time": 0.10787010192871094 }, { "epoch": 3.857421875e-05, "grad_norm": 0.11665597558021545, "learning_rate": 6.6162932384548515e-06, "loss": 0.0056, "step": 25280 }, { "epoch": 3.857421875e-05, "model_forward_time": 0.02484130859375, "step": 25280 }, { "epoch": 3.857421875e-05, "step": 25280, "training_step_time": 0.10541749000549316 }, { "epoch": 3.857574462890625e-05, "model_forward_time": 0.025354862213134766, "step": 25281 }, { "epoch": 3.857574462890625e-05, "step": 25281, "training_step_time": 0.19835305213928223 }, { "epoch": 3.85772705078125e-05, "model_forward_time": 0.02431964874267578, "step": 25282 }, { "epoch": 3.85772705078125e-05, "step": 25282, "training_step_time": 0.1661357879638672 }, { "epoch": 3.857879638671875e-05, "model_forward_time": 0.024013280868530273, "step": 25283 }, { "epoch": 3.857879638671875e-05, "step": 25283, "training_step_time": 0.1451733112335205 }, { "epoch": 3.8580322265625e-05, "model_forward_time": 0.024252891540527344, "step": 25284 }, { "epoch": 3.8580322265625e-05, "step": 25284, "training_step_time": 0.12817168235778809 }, { "epoch": 3.858184814453125e-05, "model_forward_time": 0.02437138557434082, "step": 25285 }, { "epoch": 3.858184814453125e-05, "step": 25285, "training_step_time": 0.15700125694274902 }, { "epoch": 3.85833740234375e-05, "model_forward_time": 0.02458333969116211, "step": 25286 }, { "epoch": 3.85833740234375e-05, "step": 25286, "training_step_time": 0.10454750061035156 }, { "epoch": 3.858489990234375e-05, "model_forward_time": 0.025320768356323242, "step": 25287 }, { "epoch": 3.858489990234375e-05, "step": 25287, "training_step_time": 0.10701179504394531 }, { "epoch": 3.858642578125e-05, "model_forward_time": 0.02560734748840332, "step": 25288 }, { "epoch": 3.858642578125e-05, "step": 25288, "training_step_time": 0.1059112548828125 }, { "epoch": 3.858795166015625e-05, "model_forward_time": 0.028051376342773438, "step": 25289 }, { "epoch": 3.858795166015625e-05, "step": 25289, "training_step_time": 0.10999107360839844 }, { "epoch": 3.85894775390625e-05, "grad_norm": 0.059240199625492096, "learning_rate": 6.588919767147639e-06, "loss": 0.0036, "step": 25290 }, { "epoch": 3.85894775390625e-05, "model_forward_time": 0.02502155303955078, "step": 25290 }, { "epoch": 3.85894775390625e-05, "step": 25290, "training_step_time": 0.10929417610168457 }, { "epoch": 3.859100341796875e-05, "model_forward_time": 0.025186777114868164, "step": 25291 }, { "epoch": 3.859100341796875e-05, "step": 25291, "training_step_time": 0.10497212409973145 }, { "epoch": 3.8592529296875e-05, "model_forward_time": 0.02509284019470215, "step": 25292 }, { "epoch": 3.8592529296875e-05, "step": 25292, "training_step_time": 0.10451841354370117 }, { "epoch": 3.859405517578125e-05, "model_forward_time": 0.02517223358154297, "step": 25293 }, { "epoch": 3.859405517578125e-05, "step": 25293, "training_step_time": 0.10543155670166016 }, { "epoch": 3.85955810546875e-05, "model_forward_time": 0.025140047073364258, "step": 25294 }, { "epoch": 3.85955810546875e-05, "step": 25294, "training_step_time": 0.11189889907836914 }, { "epoch": 3.859710693359375e-05, "model_forward_time": 0.02509140968322754, "step": 25295 }, { "epoch": 3.859710693359375e-05, "step": 25295, "training_step_time": 0.10634517669677734 }, { "epoch": 3.85986328125e-05, "model_forward_time": 0.026823997497558594, "step": 25296 }, { "epoch": 3.85986328125e-05, "step": 25296, "training_step_time": 0.1382431983947754 }, { "epoch": 3.860015869140625e-05, "model_forward_time": 0.025195598602294922, "step": 25297 }, { "epoch": 3.860015869140625e-05, "step": 25297, "training_step_time": 0.1555635929107666 }, { "epoch": 3.86016845703125e-05, "model_forward_time": 0.024409055709838867, "step": 25298 }, { "epoch": 3.86016845703125e-05, "step": 25298, "training_step_time": 0.13388967514038086 }, { "epoch": 3.860321044921875e-05, "model_forward_time": 0.024042606353759766, "step": 25299 }, { "epoch": 3.860321044921875e-05, "step": 25299, "training_step_time": 0.11905717849731445 }, { "epoch": 3.8604736328125e-05, "grad_norm": 0.1335275024175644, "learning_rate": 6.561599044396288e-06, "loss": 0.0055, "step": 25300 }, { "epoch": 3.8604736328125e-05, "model_forward_time": 0.025087594985961914, "step": 25300 }, { "epoch": 3.8604736328125e-05, "step": 25300, "training_step_time": 0.18134593963623047 }, { "epoch": 3.860626220703125e-05, "model_forward_time": 0.025032520294189453, "step": 25301 }, { "epoch": 3.860626220703125e-05, "step": 25301, "training_step_time": 0.1632080078125 }, { "epoch": 3.86077880859375e-05, "model_forward_time": 0.024095773696899414, "step": 25302 }, { "epoch": 3.86077880859375e-05, "step": 25302, "training_step_time": 0.17557477951049805 }, { "epoch": 3.860931396484375e-05, "model_forward_time": 0.024164676666259766, "step": 25303 }, { "epoch": 3.860931396484375e-05, "step": 25303, "training_step_time": 0.10519266128540039 }, { "epoch": 3.861083984375e-05, "model_forward_time": 0.024523258209228516, "step": 25304 }, { "epoch": 3.861083984375e-05, "step": 25304, "training_step_time": 0.11417293548583984 }, { "epoch": 3.861236572265625e-05, "model_forward_time": 0.024652719497680664, "step": 25305 }, { "epoch": 3.861236572265625e-05, "step": 25305, "training_step_time": 0.12977242469787598 }, { "epoch": 3.86138916015625e-05, "model_forward_time": 0.02606821060180664, "step": 25306 }, { "epoch": 3.86138916015625e-05, "step": 25306, "training_step_time": 0.12696194648742676 }, { "epoch": 3.861541748046875e-05, "model_forward_time": 0.024944305419921875, "step": 25307 }, { "epoch": 3.861541748046875e-05, "step": 25307, "training_step_time": 0.11117339134216309 }, { "epoch": 3.8616943359375e-05, "model_forward_time": 0.02504730224609375, "step": 25308 }, { "epoch": 3.8616943359375e-05, "step": 25308, "training_step_time": 0.1170191764831543 }, { "epoch": 3.861846923828125e-05, "model_forward_time": 0.025201082229614258, "step": 25309 }, { "epoch": 3.861846923828125e-05, "step": 25309, "training_step_time": 0.10639405250549316 }, { "epoch": 3.86199951171875e-05, "grad_norm": 0.3475443720817566, "learning_rate": 6.5343311033980895e-06, "loss": 0.0055, "step": 25310 }, { "epoch": 3.86199951171875e-05, "model_forward_time": 0.025807619094848633, "step": 25310 }, { "epoch": 3.86199951171875e-05, "step": 25310, "training_step_time": 0.10708856582641602 }, { "epoch": 3.862152099609375e-05, "model_forward_time": 0.02544879913330078, "step": 25311 }, { "epoch": 3.862152099609375e-05, "step": 25311, "training_step_time": 0.15946292877197266 }, { "epoch": 3.8623046875e-05, "model_forward_time": 0.024806737899780273, "step": 25312 }, { "epoch": 3.8623046875e-05, "step": 25312, "training_step_time": 0.15897512435913086 }, { "epoch": 3.862457275390625e-05, "model_forward_time": 0.024178504943847656, "step": 25313 }, { "epoch": 3.862457275390625e-05, "step": 25313, "training_step_time": 0.11198568344116211 }, { "epoch": 3.86260986328125e-05, "model_forward_time": 0.02472519874572754, "step": 25314 }, { "epoch": 3.86260986328125e-05, "step": 25314, "training_step_time": 0.16807842254638672 }, { "epoch": 3.862762451171875e-05, "model_forward_time": 0.02388763427734375, "step": 25315 }, { "epoch": 3.862762451171875e-05, "step": 25315, "training_step_time": 0.16779780387878418 }, { "epoch": 3.8629150390625e-05, "model_forward_time": 0.02494359016418457, "step": 25316 }, { "epoch": 3.8629150390625e-05, "step": 25316, "training_step_time": 0.10834145545959473 }, { "epoch": 3.863067626953125e-05, "model_forward_time": 0.02557682991027832, "step": 25317 }, { "epoch": 3.863067626953125e-05, "step": 25317, "training_step_time": 0.10547757148742676 }, { "epoch": 3.86322021484375e-05, "model_forward_time": 0.024999380111694336, "step": 25318 }, { "epoch": 3.86322021484375e-05, "step": 25318, "training_step_time": 0.10612010955810547 }, { "epoch": 3.863372802734375e-05, "model_forward_time": 0.025054931640625, "step": 25319 }, { "epoch": 3.863372802734375e-05, "step": 25319, "training_step_time": 0.10550165176391602 }, { "epoch": 3.863525390625e-05, "grad_norm": 0.07553213089704514, "learning_rate": 6.5071159772861436e-06, "loss": 0.0032, "step": 25320 }, { "epoch": 3.863525390625e-05, "model_forward_time": 0.025118589401245117, "step": 25320 }, { "epoch": 3.863525390625e-05, "step": 25320, "training_step_time": 0.10319638252258301 }, { "epoch": 3.863677978515625e-05, "model_forward_time": 0.02546977996826172, "step": 25321 }, { "epoch": 3.863677978515625e-05, "step": 25321, "training_step_time": 0.10753941535949707 }, { "epoch": 3.86383056640625e-05, "model_forward_time": 0.025127649307250977, "step": 25322 }, { "epoch": 3.86383056640625e-05, "step": 25322, "training_step_time": 0.10435867309570312 }, { "epoch": 3.863983154296875e-05, "model_forward_time": 0.025287628173828125, "step": 25323 }, { "epoch": 3.863983154296875e-05, "step": 25323, "training_step_time": 0.11372256278991699 }, { "epoch": 3.8641357421875e-05, "model_forward_time": 0.02526235580444336, "step": 25324 }, { "epoch": 3.8641357421875e-05, "step": 25324, "training_step_time": 0.10542845726013184 }, { "epoch": 3.864288330078125e-05, "model_forward_time": 0.028254270553588867, "step": 25325 }, { "epoch": 3.864288330078125e-05, "step": 25325, "training_step_time": 0.17860078811645508 }, { "epoch": 3.86444091796875e-05, "model_forward_time": 0.02472853660583496, "step": 25326 }, { "epoch": 3.86444091796875e-05, "step": 25326, "training_step_time": 0.1298069953918457 }, { "epoch": 3.864593505859375e-05, "model_forward_time": 0.02404642105102539, "step": 25327 }, { "epoch": 3.864593505859375e-05, "step": 25327, "training_step_time": 0.13077878952026367 }, { "epoch": 3.86474609375e-05, "model_forward_time": 0.024319887161254883, "step": 25328 }, { "epoch": 3.86474609375e-05, "step": 25328, "training_step_time": 0.10719943046569824 }, { "epoch": 3.864898681640625e-05, "model_forward_time": 0.024889230728149414, "step": 25329 }, { "epoch": 3.864898681640625e-05, "step": 25329, "training_step_time": 0.17375946044921875 }, { "epoch": 3.86505126953125e-05, "grad_norm": 0.16404885053634644, "learning_rate": 6.479953699129382e-06, "loss": 0.0111, "step": 25330 }, { "epoch": 3.86505126953125e-05, "model_forward_time": 0.024220705032348633, "step": 25330 }, { "epoch": 3.86505126953125e-05, "step": 25330, "training_step_time": 0.13584685325622559 }, { "epoch": 3.865203857421875e-05, "model_forward_time": 0.02403426170349121, "step": 25331 }, { "epoch": 3.865203857421875e-05, "step": 25331, "training_step_time": 0.11289763450622559 }, { "epoch": 3.8653564453125e-05, "model_forward_time": 0.025214433670043945, "step": 25332 }, { "epoch": 3.8653564453125e-05, "step": 25332, "training_step_time": 0.10968136787414551 }, { "epoch": 3.865509033203125e-05, "model_forward_time": 0.02488541603088379, "step": 25333 }, { "epoch": 3.865509033203125e-05, "step": 25333, "training_step_time": 0.10695290565490723 }, { "epoch": 3.86566162109375e-05, "model_forward_time": 0.025032520294189453, "step": 25334 }, { "epoch": 3.86566162109375e-05, "step": 25334, "training_step_time": 0.10447812080383301 }, { "epoch": 3.865814208984375e-05, "model_forward_time": 0.02508997917175293, "step": 25335 }, { "epoch": 3.865814208984375e-05, "step": 25335, "training_step_time": 0.1040501594543457 }, { "epoch": 3.865966796875e-05, "model_forward_time": 0.025412321090698242, "step": 25336 }, { "epoch": 3.865966796875e-05, "step": 25336, "training_step_time": 0.10371184349060059 }, { "epoch": 3.866119384765625e-05, "model_forward_time": 0.02505183219909668, "step": 25337 }, { "epoch": 3.866119384765625e-05, "step": 25337, "training_step_time": 0.10430741310119629 }, { "epoch": 3.86627197265625e-05, "model_forward_time": 0.024888992309570312, "step": 25338 }, { "epoch": 3.86627197265625e-05, "step": 25338, "training_step_time": 0.10526514053344727 }, { "epoch": 3.866424560546875e-05, "model_forward_time": 0.02501511573791504, "step": 25339 }, { "epoch": 3.866424560546875e-05, "step": 25339, "training_step_time": 0.10430669784545898 }, { "epoch": 3.8665771484375e-05, "grad_norm": 0.061237893998622894, "learning_rate": 6.452844301932559e-06, "loss": 0.0028, "step": 25340 }, { "epoch": 3.8665771484375e-05, "model_forward_time": 0.02504134178161621, "step": 25340 }, { "epoch": 3.8665771484375e-05, "step": 25340, "training_step_time": 0.10606074333190918 }, { "epoch": 3.866729736328125e-05, "model_forward_time": 0.024821043014526367, "step": 25341 }, { "epoch": 3.866729736328125e-05, "step": 25341, "training_step_time": 0.16702795028686523 }, { "epoch": 3.86688232421875e-05, "model_forward_time": 0.0244293212890625, "step": 25342 }, { "epoch": 3.86688232421875e-05, "step": 25342, "training_step_time": 0.24446725845336914 }, { "epoch": 3.867034912109375e-05, "model_forward_time": 0.02311396598815918, "step": 25343 }, { "epoch": 3.867034912109375e-05, "step": 25343, "training_step_time": 0.2396857738494873 }, { "epoch": 3.8671875e-05, "model_forward_time": 0.023109912872314453, "step": 25344 }, { "epoch": 3.8671875e-05, "step": 25344, "training_step_time": 0.23199129104614258 }, { "epoch": 3.867340087890625e-05, "model_forward_time": 0.0237734317779541, "step": 25345 }, { "epoch": 3.867340087890625e-05, "step": 25345, "training_step_time": 0.603271484375 }, { "epoch": 3.86749267578125e-05, "model_forward_time": 0.021698713302612305, "step": 25346 }, { "epoch": 3.86749267578125e-05, "step": 25346, "training_step_time": 0.17925190925598145 }, { "epoch": 3.867645263671875e-05, "model_forward_time": 0.022940397262573242, "step": 25347 }, { "epoch": 3.867645263671875e-05, "step": 25347, "training_step_time": 0.1144556999206543 }, { "epoch": 3.8677978515625e-05, "model_forward_time": 0.023488998413085938, "step": 25348 }, { "epoch": 3.8677978515625e-05, "step": 25348, "training_step_time": 0.11192727088928223 }, { "epoch": 3.867950439453125e-05, "model_forward_time": 0.02436518669128418, "step": 25349 }, { "epoch": 3.867950439453125e-05, "step": 25349, "training_step_time": 0.10484552383422852 }, { "epoch": 3.86810302734375e-05, "grad_norm": 0.14866124093532562, "learning_rate": 6.425787818636131e-06, "loss": 0.005, "step": 25350 }, { "epoch": 3.86810302734375e-05, "model_forward_time": 0.024731159210205078, "step": 25350 }, { "epoch": 3.86810302734375e-05, "step": 25350, "training_step_time": 0.10678625106811523 }, { "epoch": 3.868255615234375e-05, "model_forward_time": 0.02425098419189453, "step": 25351 }, { "epoch": 3.868255615234375e-05, "step": 25351, "training_step_time": 0.22017335891723633 }, { "epoch": 3.868408203125e-05, "model_forward_time": 0.02360820770263672, "step": 25352 }, { "epoch": 3.868408203125e-05, "step": 25352, "training_step_time": 0.13580703735351562 }, { "epoch": 3.868560791015625e-05, "model_forward_time": 0.02344369888305664, "step": 25353 }, { "epoch": 3.868560791015625e-05, "step": 25353, "training_step_time": 0.1740431785583496 }, { "epoch": 3.86871337890625e-05, "model_forward_time": 0.023654937744140625, "step": 25354 }, { "epoch": 3.86871337890625e-05, "step": 25354, "training_step_time": 0.13831400871276855 }, { "epoch": 3.868865966796875e-05, "model_forward_time": 0.024185895919799805, "step": 25355 }, { "epoch": 3.868865966796875e-05, "step": 25355, "training_step_time": 0.12133073806762695 }, { "epoch": 3.8690185546875e-05, "model_forward_time": 0.02361750602722168, "step": 25356 }, { "epoch": 3.8690185546875e-05, "step": 25356, "training_step_time": 0.11653017997741699 }, { "epoch": 3.869171142578125e-05, "model_forward_time": 0.024273395538330078, "step": 25357 }, { "epoch": 3.869171142578125e-05, "step": 25357, "training_step_time": 0.11455297470092773 }, { "epoch": 3.86932373046875e-05, "model_forward_time": 0.02433919906616211, "step": 25358 }, { "epoch": 3.86932373046875e-05, "step": 25358, "training_step_time": 0.11362361907958984 }, { "epoch": 3.869476318359375e-05, "model_forward_time": 0.023983001708984375, "step": 25359 }, { "epoch": 3.869476318359375e-05, "step": 25359, "training_step_time": 0.11241531372070312 }, { "epoch": 3.86962890625e-05, "grad_norm": 0.1651516705751419, "learning_rate": 6.398784282116293e-06, "loss": 0.0034, "step": 25360 }, { "epoch": 3.86962890625e-05, "model_forward_time": 0.02460503578186035, "step": 25360 }, { "epoch": 3.86962890625e-05, "step": 25360, "training_step_time": 0.11083292961120605 }, { "epoch": 3.869781494140625e-05, "model_forward_time": 0.024419784545898438, "step": 25361 }, { "epoch": 3.869781494140625e-05, "step": 25361, "training_step_time": 0.10853338241577148 }, { "epoch": 3.86993408203125e-05, "model_forward_time": 0.024364948272705078, "step": 25362 }, { "epoch": 3.86993408203125e-05, "step": 25362, "training_step_time": 0.11388635635375977 }, { "epoch": 3.870086669921875e-05, "model_forward_time": 0.024256229400634766, "step": 25363 }, { "epoch": 3.870086669921875e-05, "step": 25363, "training_step_time": 0.1103055477142334 }, { "epoch": 3.8702392578125e-05, "model_forward_time": 0.024222612380981445, "step": 25364 }, { "epoch": 3.8702392578125e-05, "step": 25364, "training_step_time": 0.11123013496398926 }, { "epoch": 3.870391845703125e-05, "model_forward_time": 0.024135828018188477, "step": 25365 }, { "epoch": 3.870391845703125e-05, "step": 25365, "training_step_time": 0.174058198928833 }, { "epoch": 3.87054443359375e-05, "model_forward_time": 0.023679733276367188, "step": 25366 }, { "epoch": 3.87054443359375e-05, "step": 25366, "training_step_time": 0.12102866172790527 }, { "epoch": 3.870697021484375e-05, "model_forward_time": 0.023659467697143555, "step": 25367 }, { "epoch": 3.870697021484375e-05, "step": 25367, "training_step_time": 0.12207388877868652 }, { "epoch": 3.870849609375e-05, "model_forward_time": 0.024297714233398438, "step": 25368 }, { "epoch": 3.870849609375e-05, "step": 25368, "training_step_time": 0.1427316665649414 }, { "epoch": 3.871002197265625e-05, "model_forward_time": 0.024331331253051758, "step": 25369 }, { "epoch": 3.871002197265625e-05, "step": 25369, "training_step_time": 0.11813926696777344 }, { "epoch": 3.87115478515625e-05, "grad_norm": 0.3118550777435303, "learning_rate": 6.3718337251848785e-06, "loss": 0.0062, "step": 25370 }, { "epoch": 3.87115478515625e-05, "model_forward_time": 0.02426457405090332, "step": 25370 }, { "epoch": 3.87115478515625e-05, "step": 25370, "training_step_time": 0.12220478057861328 }, { "epoch": 3.871307373046875e-05, "model_forward_time": 0.02439427375793457, "step": 25371 }, { "epoch": 3.871307373046875e-05, "step": 25371, "training_step_time": 0.11439251899719238 }, { "epoch": 3.8714599609375e-05, "model_forward_time": 0.024241924285888672, "step": 25372 }, { "epoch": 3.8714599609375e-05, "step": 25372, "training_step_time": 0.11199736595153809 }, { "epoch": 3.871612548828125e-05, "model_forward_time": 0.02438664436340332, "step": 25373 }, { "epoch": 3.871612548828125e-05, "step": 25373, "training_step_time": 0.1089169979095459 }, { "epoch": 3.87176513671875e-05, "model_forward_time": 0.023924827575683594, "step": 25374 }, { "epoch": 3.87176513671875e-05, "step": 25374, "training_step_time": 0.11027193069458008 }, { "epoch": 3.871917724609375e-05, "model_forward_time": 0.024034738540649414, "step": 25375 }, { "epoch": 3.871917724609375e-05, "step": 25375, "training_step_time": 0.10641932487487793 }, { "epoch": 3.8720703125e-05, "model_forward_time": 0.024310588836669922, "step": 25376 }, { "epoch": 3.8720703125e-05, "step": 25376, "training_step_time": 0.10937619209289551 }, { "epoch": 3.872222900390625e-05, "model_forward_time": 0.02431774139404297, "step": 25377 }, { "epoch": 3.872222900390625e-05, "step": 25377, "training_step_time": 0.10970902442932129 }, { "epoch": 3.87237548828125e-05, "model_forward_time": 0.023674726486206055, "step": 25378 }, { "epoch": 3.87237548828125e-05, "step": 25378, "training_step_time": 0.1079556941986084 }, { "epoch": 3.872528076171875e-05, "model_forward_time": 0.02441573143005371, "step": 25379 }, { "epoch": 3.872528076171875e-05, "step": 25379, "training_step_time": 0.1071014404296875 }, { "epoch": 3.8726806640625e-05, "grad_norm": 0.05735749006271362, "learning_rate": 6.344936180589351e-06, "loss": 0.0028, "step": 25380 }, { "epoch": 3.8726806640625e-05, "model_forward_time": 0.023977994918823242, "step": 25380 }, { "epoch": 3.8726806640625e-05, "step": 25380, "training_step_time": 0.17162346839904785 }, { "epoch": 3.872833251953125e-05, "model_forward_time": 0.023584365844726562, "step": 25381 }, { "epoch": 3.872833251953125e-05, "step": 25381, "training_step_time": 0.11606383323669434 }, { "epoch": 3.87298583984375e-05, "model_forward_time": 0.023459911346435547, "step": 25382 }, { "epoch": 3.87298583984375e-05, "step": 25382, "training_step_time": 0.10790681838989258 }, { "epoch": 3.873138427734375e-05, "model_forward_time": 0.024460792541503906, "step": 25383 }, { "epoch": 3.873138427734375e-05, "step": 25383, "training_step_time": 0.12716245651245117 }, { "epoch": 3.873291015625e-05, "model_forward_time": 0.024287939071655273, "step": 25384 }, { "epoch": 3.873291015625e-05, "step": 25384, "training_step_time": 0.12456107139587402 }, { "epoch": 3.873443603515625e-05, "model_forward_time": 0.02418804168701172, "step": 25385 }, { "epoch": 3.873443603515625e-05, "step": 25385, "training_step_time": 0.11058545112609863 }, { "epoch": 3.87359619140625e-05, "model_forward_time": 0.02461099624633789, "step": 25386 }, { "epoch": 3.87359619140625e-05, "step": 25386, "training_step_time": 0.11049628257751465 }, { "epoch": 3.873748779296875e-05, "model_forward_time": 0.025989532470703125, "step": 25387 }, { "epoch": 3.873748779296875e-05, "step": 25387, "training_step_time": 0.11275649070739746 }, { "epoch": 3.8739013671875e-05, "model_forward_time": 0.024090051651000977, "step": 25388 }, { "epoch": 3.8739013671875e-05, "step": 25388, "training_step_time": 0.1157078742980957 }, { "epoch": 3.874053955078125e-05, "model_forward_time": 0.02395319938659668, "step": 25389 }, { "epoch": 3.874053955078125e-05, "step": 25389, "training_step_time": 0.15762114524841309 }, { "epoch": 3.87420654296875e-05, "grad_norm": 0.04693985730409622, "learning_rate": 6.318091681012772e-06, "loss": 0.0033, "step": 25390 }, { "epoch": 3.87420654296875e-05, "model_forward_time": 0.024863004684448242, "step": 25390 }, { "epoch": 3.87420654296875e-05, "step": 25390, "training_step_time": 0.10880446434020996 }, { "epoch": 3.874359130859375e-05, "model_forward_time": 0.023578882217407227, "step": 25391 }, { "epoch": 3.874359130859375e-05, "step": 25391, "training_step_time": 0.11078977584838867 }, { "epoch": 3.87451171875e-05, "model_forward_time": 0.024204254150390625, "step": 25392 }, { "epoch": 3.87451171875e-05, "step": 25392, "training_step_time": 0.12035489082336426 }, { "epoch": 3.874664306640625e-05, "model_forward_time": 0.025246620178222656, "step": 25393 }, { "epoch": 3.874664306640625e-05, "step": 25393, "training_step_time": 0.1287531852722168 }, { "epoch": 3.87481689453125e-05, "model_forward_time": 0.02458643913269043, "step": 25394 }, { "epoch": 3.87481689453125e-05, "step": 25394, "training_step_time": 0.1159520149230957 }, { "epoch": 3.874969482421875e-05, "model_forward_time": 0.024595260620117188, "step": 25395 }, { "epoch": 3.874969482421875e-05, "step": 25395, "training_step_time": 0.11445164680480957 }, { "epoch": 3.8751220703125e-05, "model_forward_time": 0.02393341064453125, "step": 25396 }, { "epoch": 3.8751220703125e-05, "step": 25396, "training_step_time": 0.10309743881225586 }, { "epoch": 3.875274658203125e-05, "model_forward_time": 0.023143291473388672, "step": 25397 }, { "epoch": 3.875274658203125e-05, "step": 25397, "training_step_time": 0.14821481704711914 }, { "epoch": 3.87542724609375e-05, "model_forward_time": 0.02362966537475586, "step": 25398 }, { "epoch": 3.87542724609375e-05, "step": 25398, "training_step_time": 0.10236740112304688 }, { "epoch": 3.875579833984375e-05, "model_forward_time": 0.024408578872680664, "step": 25399 }, { "epoch": 3.875579833984375e-05, "step": 25399, "training_step_time": 0.19769644737243652 }, { "epoch": 3.875732421875e-05, "grad_norm": 0.07606486231088638, "learning_rate": 6.291300259073724e-06, "loss": 0.0031, "step": 25400 }, { "epoch": 3.875732421875e-05, "model_forward_time": 0.023204565048217773, "step": 25400 }, { "epoch": 3.875732421875e-05, "step": 25400, "training_step_time": 0.13474082946777344 }, { "epoch": 3.875885009765625e-05, "model_forward_time": 0.023603200912475586, "step": 25401 }, { "epoch": 3.875885009765625e-05, "step": 25401, "training_step_time": 0.10623502731323242 }, { "epoch": 3.87603759765625e-05, "model_forward_time": 0.02403569221496582, "step": 25402 }, { "epoch": 3.87603759765625e-05, "step": 25402, "training_step_time": 0.11485481262207031 }, { "epoch": 3.876190185546875e-05, "model_forward_time": 0.024296283721923828, "step": 25403 }, { "epoch": 3.876190185546875e-05, "step": 25403, "training_step_time": 0.10331153869628906 }, { "epoch": 3.8763427734375e-05, "model_forward_time": 0.024227380752563477, "step": 25404 }, { "epoch": 3.8763427734375e-05, "step": 25404, "training_step_time": 0.10404515266418457 }, { "epoch": 3.876495361328125e-05, "model_forward_time": 0.024018526077270508, "step": 25405 }, { "epoch": 3.876495361328125e-05, "step": 25405, "training_step_time": 0.10932493209838867 }, { "epoch": 3.87664794921875e-05, "model_forward_time": 0.02414703369140625, "step": 25406 }, { "epoch": 3.87664794921875e-05, "step": 25406, "training_step_time": 0.10357952117919922 }, { "epoch": 3.876800537109375e-05, "model_forward_time": 0.025178909301757812, "step": 25407 }, { "epoch": 3.876800537109375e-05, "step": 25407, "training_step_time": 0.1069800853729248 }, { "epoch": 3.876953125e-05, "model_forward_time": 0.023879051208496094, "step": 25408 }, { "epoch": 3.876953125e-05, "step": 25408, "training_step_time": 0.10327744483947754 }, { "epoch": 3.877105712890625e-05, "model_forward_time": 0.024163246154785156, "step": 25409 }, { "epoch": 3.877105712890625e-05, "step": 25409, "training_step_time": 0.1080482006072998 }, { "epoch": 3.87725830078125e-05, "grad_norm": 0.39555227756500244, "learning_rate": 6.264561947326331e-06, "loss": 0.0053, "step": 25410 }, { "epoch": 3.87725830078125e-05, "model_forward_time": 0.0241546630859375, "step": 25410 }, { "epoch": 3.87725830078125e-05, "step": 25410, "training_step_time": 0.10388040542602539 }, { "epoch": 3.877410888671875e-05, "model_forward_time": 0.024187326431274414, "step": 25411 }, { "epoch": 3.877410888671875e-05, "step": 25411, "training_step_time": 0.20867228507995605 }, { "epoch": 3.8775634765625e-05, "model_forward_time": 0.02392411231994629, "step": 25412 }, { "epoch": 3.8775634765625e-05, "step": 25412, "training_step_time": 0.1299741268157959 }, { "epoch": 3.877716064453125e-05, "model_forward_time": 0.023581981658935547, "step": 25413 }, { "epoch": 3.877716064453125e-05, "step": 25413, "training_step_time": 0.10932707786560059 }, { "epoch": 3.87786865234375e-05, "model_forward_time": 0.024120330810546875, "step": 25414 }, { "epoch": 3.87786865234375e-05, "step": 25414, "training_step_time": 0.11785531044006348 }, { "epoch": 3.878021240234375e-05, "model_forward_time": 0.024335145950317383, "step": 25415 }, { "epoch": 3.878021240234375e-05, "step": 25415, "training_step_time": 0.10770893096923828 }, { "epoch": 3.878173828125e-05, "model_forward_time": 0.024138927459716797, "step": 25416 }, { "epoch": 3.878173828125e-05, "step": 25416, "training_step_time": 0.12378740310668945 }, { "epoch": 3.878326416015625e-05, "model_forward_time": 0.024194955825805664, "step": 25417 }, { "epoch": 3.878326416015625e-05, "step": 25417, "training_step_time": 0.11513447761535645 }, { "epoch": 3.87847900390625e-05, "model_forward_time": 0.024196863174438477, "step": 25418 }, { "epoch": 3.87847900390625e-05, "step": 25418, "training_step_time": 0.11105608940124512 }, { "epoch": 3.878631591796875e-05, "model_forward_time": 0.024331092834472656, "step": 25419 }, { "epoch": 3.878631591796875e-05, "step": 25419, "training_step_time": 0.11199045181274414 }, { "epoch": 3.8787841796875e-05, "grad_norm": 0.40201613306999207, "learning_rate": 6.237876778260155e-06, "loss": 0.0041, "step": 25420 }, { "epoch": 3.8787841796875e-05, "model_forward_time": 0.02438807487487793, "step": 25420 }, { "epoch": 3.8787841796875e-05, "step": 25420, "training_step_time": 0.10882282257080078 }, { "epoch": 3.878936767578125e-05, "model_forward_time": 0.024309873580932617, "step": 25421 }, { "epoch": 3.878936767578125e-05, "step": 25421, "training_step_time": 0.10506296157836914 }, { "epoch": 3.87908935546875e-05, "model_forward_time": 0.0242612361907959, "step": 25422 }, { "epoch": 3.87908935546875e-05, "step": 25422, "training_step_time": 0.10626745223999023 }, { "epoch": 3.879241943359375e-05, "model_forward_time": 0.0245208740234375, "step": 25423 }, { "epoch": 3.879241943359375e-05, "step": 25423, "training_step_time": 0.10762453079223633 }, { "epoch": 3.87939453125e-05, "model_forward_time": 0.024312734603881836, "step": 25424 }, { "epoch": 3.87939453125e-05, "step": 25424, "training_step_time": 0.10991978645324707 }, { "epoch": 3.879547119140625e-05, "model_forward_time": 0.02463364601135254, "step": 25425 }, { "epoch": 3.879547119140625e-05, "step": 25425, "training_step_time": 0.11229228973388672 }, { "epoch": 3.87969970703125e-05, "model_forward_time": 0.024311065673828125, "step": 25426 }, { "epoch": 3.87969970703125e-05, "step": 25426, "training_step_time": 0.10521245002746582 }, { "epoch": 3.879852294921875e-05, "model_forward_time": 0.02433466911315918, "step": 25427 }, { "epoch": 3.879852294921875e-05, "step": 25427, "training_step_time": 0.18108463287353516 }, { "epoch": 3.8800048828125e-05, "model_forward_time": 0.023327112197875977, "step": 25428 }, { "epoch": 3.8800048828125e-05, "step": 25428, "training_step_time": 0.10900735855102539 }, { "epoch": 3.880157470703125e-05, "model_forward_time": 0.02354598045349121, "step": 25429 }, { "epoch": 3.880157470703125e-05, "step": 25429, "training_step_time": 0.11237430572509766 }, { "epoch": 3.88031005859375e-05, "grad_norm": 0.3016217052936554, "learning_rate": 6.211244784300197e-06, "loss": 0.0086, "step": 25430 }, { "epoch": 3.88031005859375e-05, "model_forward_time": 0.0246126651763916, "step": 25430 }, { "epoch": 3.88031005859375e-05, "step": 25430, "training_step_time": 0.10767698287963867 }, { "epoch": 3.880462646484375e-05, "model_forward_time": 0.02469348907470703, "step": 25431 }, { "epoch": 3.880462646484375e-05, "step": 25431, "training_step_time": 0.12374615669250488 }, { "epoch": 3.880615234375e-05, "model_forward_time": 0.024221181869506836, "step": 25432 }, { "epoch": 3.880615234375e-05, "step": 25432, "training_step_time": 0.10997200012207031 }, { "epoch": 3.880767822265625e-05, "model_forward_time": 0.02444601058959961, "step": 25433 }, { "epoch": 3.880767822265625e-05, "step": 25433, "training_step_time": 0.12539339065551758 }, { "epoch": 3.88092041015625e-05, "model_forward_time": 0.024117469787597656, "step": 25434 }, { "epoch": 3.88092041015625e-05, "step": 25434, "training_step_time": 0.11922788619995117 }, { "epoch": 3.881072998046875e-05, "model_forward_time": 0.024095535278320312, "step": 25435 }, { "epoch": 3.881072998046875e-05, "step": 25435, "training_step_time": 0.10279631614685059 }, { "epoch": 3.8812255859375e-05, "model_forward_time": 0.024552345275878906, "step": 25436 }, { "epoch": 3.8812255859375e-05, "step": 25436, "training_step_time": 0.14910650253295898 }, { "epoch": 3.881378173828125e-05, "model_forward_time": 0.025129079818725586, "step": 25437 }, { "epoch": 3.881378173828125e-05, "step": 25437, "training_step_time": 0.10879397392272949 }, { "epoch": 3.88153076171875e-05, "model_forward_time": 0.02491164207458496, "step": 25438 }, { "epoch": 3.88153076171875e-05, "step": 25438, "training_step_time": 0.10946846008300781 }, { "epoch": 3.881683349609375e-05, "model_forward_time": 0.027214765548706055, "step": 25439 }, { "epoch": 3.881683349609375e-05, "step": 25439, "training_step_time": 0.12307286262512207 }, { "epoch": 3.8818359375e-05, "grad_norm": 0.13172994554042816, "learning_rate": 6.184665997806832e-06, "loss": 0.0059, "step": 25440 }, { "epoch": 3.8818359375e-05, "model_forward_time": 0.025621652603149414, "step": 25440 }, { "epoch": 3.8818359375e-05, "step": 25440, "training_step_time": 0.12520098686218262 }, { "epoch": 3.881988525390625e-05, "model_forward_time": 0.025380373001098633, "step": 25441 }, { "epoch": 3.881988525390625e-05, "step": 25441, "training_step_time": 0.11148405075073242 }, { "epoch": 3.88214111328125e-05, "model_forward_time": 0.02640247344970703, "step": 25442 }, { "epoch": 3.88214111328125e-05, "step": 25442, "training_step_time": 0.12137484550476074 }, { "epoch": 3.882293701171875e-05, "model_forward_time": 0.02528095245361328, "step": 25443 }, { "epoch": 3.882293701171875e-05, "step": 25443, "training_step_time": 0.10832333564758301 }, { "epoch": 3.8824462890625e-05, "model_forward_time": 0.025427579879760742, "step": 25444 }, { "epoch": 3.8824462890625e-05, "step": 25444, "training_step_time": 0.10382676124572754 }, { "epoch": 3.882598876953125e-05, "model_forward_time": 0.025367021560668945, "step": 25445 }, { "epoch": 3.882598876953125e-05, "step": 25445, "training_step_time": 0.11740756034851074 }, { "epoch": 3.88275146484375e-05, "model_forward_time": 0.025234222412109375, "step": 25446 }, { "epoch": 3.88275146484375e-05, "step": 25446, "training_step_time": 0.11193680763244629 }, { "epoch": 3.882904052734375e-05, "model_forward_time": 0.025371074676513672, "step": 25447 }, { "epoch": 3.882904052734375e-05, "step": 25447, "training_step_time": 0.11610627174377441 }, { "epoch": 3.883056640625e-05, "model_forward_time": 0.025132179260253906, "step": 25448 }, { "epoch": 3.883056640625e-05, "step": 25448, "training_step_time": 0.1174323558807373 }, { "epoch": 3.883209228515625e-05, "model_forward_time": 0.025345563888549805, "step": 25449 }, { "epoch": 3.883209228515625e-05, "step": 25449, "training_step_time": 0.1506328582763672 }, { "epoch": 3.88336181640625e-05, "grad_norm": 0.10546907037496567, "learning_rate": 6.158140451075795e-06, "loss": 0.0098, "step": 25450 }, { "epoch": 3.88336181640625e-05, "model_forward_time": 0.025005578994750977, "step": 25450 }, { "epoch": 3.88336181640625e-05, "step": 25450, "training_step_time": 0.2106471061706543 }, { "epoch": 3.883514404296875e-05, "model_forward_time": 0.0242156982421875, "step": 25451 }, { "epoch": 3.883514404296875e-05, "step": 25451, "training_step_time": 0.1814403533935547 }, { "epoch": 3.8836669921875e-05, "model_forward_time": 0.023371458053588867, "step": 25452 }, { "epoch": 3.8836669921875e-05, "step": 25452, "training_step_time": 0.15460419654846191 }, { "epoch": 3.883819580078125e-05, "model_forward_time": 0.024232149124145508, "step": 25453 }, { "epoch": 3.883819580078125e-05, "step": 25453, "training_step_time": 0.14319753646850586 }, { "epoch": 3.88397216796875e-05, "model_forward_time": 0.024031400680541992, "step": 25454 }, { "epoch": 3.88397216796875e-05, "step": 25454, "training_step_time": 0.13707470893859863 }, { "epoch": 3.884124755859375e-05, "model_forward_time": 0.024137258529663086, "step": 25455 }, { "epoch": 3.884124755859375e-05, "step": 25455, "training_step_time": 0.12358450889587402 }, { "epoch": 3.88427734375e-05, "model_forward_time": 0.024290084838867188, "step": 25456 }, { "epoch": 3.88427734375e-05, "step": 25456, "training_step_time": 0.12165403366088867 }, { "epoch": 3.884429931640625e-05, "model_forward_time": 0.024755239486694336, "step": 25457 }, { "epoch": 3.884429931640625e-05, "step": 25457, "training_step_time": 0.15129709243774414 }, { "epoch": 3.88458251953125e-05, "model_forward_time": 0.02418971061706543, "step": 25458 }, { "epoch": 3.88458251953125e-05, "step": 25458, "training_step_time": 0.1242375373840332 }, { "epoch": 3.884735107421875e-05, "model_forward_time": 0.02429056167602539, "step": 25459 }, { "epoch": 3.884735107421875e-05, "step": 25459, "training_step_time": 0.19938230514526367 }, { "epoch": 3.8848876953125e-05, "grad_norm": 0.13312338292598724, "learning_rate": 6.131668176338118e-06, "loss": 0.004, "step": 25460 }, { "epoch": 3.8848876953125e-05, "model_forward_time": 0.02457737922668457, "step": 25460 }, { "epoch": 3.8848876953125e-05, "step": 25460, "training_step_time": 0.10796260833740234 }, { "epoch": 3.885040283203125e-05, "model_forward_time": 0.02477264404296875, "step": 25461 }, { "epoch": 3.885040283203125e-05, "step": 25461, "training_step_time": 0.11083841323852539 }, { "epoch": 3.88519287109375e-05, "model_forward_time": 0.0251312255859375, "step": 25462 }, { "epoch": 3.88519287109375e-05, "step": 25462, "training_step_time": 0.1939094066619873 }, { "epoch": 3.885345458984375e-05, "model_forward_time": 0.024007797241210938, "step": 25463 }, { "epoch": 3.885345458984375e-05, "step": 25463, "training_step_time": 0.10380721092224121 }, { "epoch": 3.885498046875e-05, "model_forward_time": 0.024460315704345703, "step": 25464 }, { "epoch": 3.885498046875e-05, "step": 25464, "training_step_time": 0.10663843154907227 }, { "epoch": 3.885650634765625e-05, "model_forward_time": 0.025180578231811523, "step": 25465 }, { "epoch": 3.885650634765625e-05, "step": 25465, "training_step_time": 0.10714197158813477 }, { "epoch": 3.88580322265625e-05, "model_forward_time": 0.024775028228759766, "step": 25466 }, { "epoch": 3.88580322265625e-05, "step": 25466, "training_step_time": 0.1098780632019043 }, { "epoch": 3.885955810546875e-05, "model_forward_time": 0.024919986724853516, "step": 25467 }, { "epoch": 3.885955810546875e-05, "step": 25467, "training_step_time": 0.10839533805847168 }, { "epoch": 3.8861083984375e-05, "model_forward_time": 0.024760007858276367, "step": 25468 }, { "epoch": 3.8861083984375e-05, "step": 25468, "training_step_time": 0.10966777801513672 }, { "epoch": 3.886260986328125e-05, "model_forward_time": 0.025433778762817383, "step": 25469 }, { "epoch": 3.886260986328125e-05, "step": 25469, "training_step_time": 0.10736584663391113 }, { "epoch": 3.88641357421875e-05, "grad_norm": 0.1521390676498413, "learning_rate": 6.1052492057601275e-06, "loss": 0.0096, "step": 25470 }, { "epoch": 3.88641357421875e-05, "model_forward_time": 0.02476644515991211, "step": 25470 }, { "epoch": 3.88641357421875e-05, "step": 25470, "training_step_time": 0.11431407928466797 }, { "epoch": 3.886566162109375e-05, "model_forward_time": 0.02544426918029785, "step": 25471 }, { "epoch": 3.886566162109375e-05, "step": 25471, "training_step_time": 0.10705018043518066 }, { "epoch": 3.88671875e-05, "model_forward_time": 0.02576446533203125, "step": 25472 }, { "epoch": 3.88671875e-05, "step": 25472, "training_step_time": 0.15487051010131836 }, { "epoch": 3.886871337890625e-05, "model_forward_time": 0.02484726905822754, "step": 25473 }, { "epoch": 3.886871337890625e-05, "step": 25473, "training_step_time": 0.10788321495056152 }, { "epoch": 3.88702392578125e-05, "model_forward_time": 0.024809598922729492, "step": 25474 }, { "epoch": 3.88702392578125e-05, "step": 25474, "training_step_time": 0.20707416534423828 }, { "epoch": 3.887176513671875e-05, "model_forward_time": 0.02425360679626465, "step": 25475 }, { "epoch": 3.887176513671875e-05, "step": 25475, "training_step_time": 0.1291813850402832 }, { "epoch": 3.8873291015625e-05, "model_forward_time": 0.02449941635131836, "step": 25476 }, { "epoch": 3.8873291015625e-05, "step": 25476, "training_step_time": 0.10614562034606934 }, { "epoch": 3.887481689453125e-05, "model_forward_time": 0.025359392166137695, "step": 25477 }, { "epoch": 3.887481689453125e-05, "step": 25477, "training_step_time": 0.11648726463317871 }, { "epoch": 3.88763427734375e-05, "model_forward_time": 0.024828672409057617, "step": 25478 }, { "epoch": 3.88763427734375e-05, "step": 25478, "training_step_time": 0.11557388305664062 }, { "epoch": 3.887786865234375e-05, "model_forward_time": 0.024878501892089844, "step": 25479 }, { "epoch": 3.887786865234375e-05, "step": 25479, "training_step_time": 0.10682368278503418 }, { "epoch": 3.887939453125e-05, "grad_norm": 0.09279409795999527, "learning_rate": 6.07888357144335e-06, "loss": 0.0043, "step": 25480 }, { "epoch": 3.887939453125e-05, "model_forward_time": 0.024669170379638672, "step": 25480 }, { "epoch": 3.887939453125e-05, "step": 25480, "training_step_time": 0.18503689765930176 }, { "epoch": 3.888092041015625e-05, "model_forward_time": 0.024567604064941406, "step": 25481 }, { "epoch": 3.888092041015625e-05, "step": 25481, "training_step_time": 0.11229681968688965 }, { "epoch": 3.88824462890625e-05, "model_forward_time": 0.024555206298828125, "step": 25482 }, { "epoch": 3.88824462890625e-05, "step": 25482, "training_step_time": 0.10936594009399414 }, { "epoch": 3.888397216796875e-05, "model_forward_time": 0.024868488311767578, "step": 25483 }, { "epoch": 3.888397216796875e-05, "step": 25483, "training_step_time": 0.1266651153564453 }, { "epoch": 3.8885498046875e-05, "model_forward_time": 0.025046825408935547, "step": 25484 }, { "epoch": 3.8885498046875e-05, "step": 25484, "training_step_time": 0.12912607192993164 }, { "epoch": 3.888702392578125e-05, "model_forward_time": 0.02477574348449707, "step": 25485 }, { "epoch": 3.888702392578125e-05, "step": 25485, "training_step_time": 0.11734819412231445 }, { "epoch": 3.88885498046875e-05, "model_forward_time": 0.02499842643737793, "step": 25486 }, { "epoch": 3.88885498046875e-05, "step": 25486, "training_step_time": 0.10732221603393555 }, { "epoch": 3.889007568359375e-05, "model_forward_time": 0.02551555633544922, "step": 25487 }, { "epoch": 3.889007568359375e-05, "step": 25487, "training_step_time": 0.11947464942932129 }, { "epoch": 3.88916015625e-05, "model_forward_time": 0.02565455436706543, "step": 25488 }, { "epoch": 3.88916015625e-05, "step": 25488, "training_step_time": 0.10640478134155273 }, { "epoch": 3.889312744140625e-05, "model_forward_time": 0.02460479736328125, "step": 25489 }, { "epoch": 3.889312744140625e-05, "step": 25489, "training_step_time": 0.14508295059204102 }, { "epoch": 3.88946533203125e-05, "grad_norm": 0.05056336522102356, "learning_rate": 6.052571305424531e-06, "loss": 0.0042, "step": 25490 }, { "epoch": 3.88946533203125e-05, "model_forward_time": 0.024528980255126953, "step": 25490 }, { "epoch": 3.88946533203125e-05, "step": 25490, "training_step_time": 0.10434603691101074 }, { "epoch": 3.889617919921875e-05, "model_forward_time": 0.02530503273010254, "step": 25491 }, { "epoch": 3.889617919921875e-05, "step": 25491, "training_step_time": 0.20235967636108398 }, { "epoch": 3.8897705078125e-05, "model_forward_time": 0.023984193801879883, "step": 25492 }, { "epoch": 3.8897705078125e-05, "step": 25492, "training_step_time": 0.170928955078125 }, { "epoch": 3.889923095703125e-05, "model_forward_time": 0.023818016052246094, "step": 25493 }, { "epoch": 3.889923095703125e-05, "step": 25493, "training_step_time": 0.19007015228271484 }, { "epoch": 3.89007568359375e-05, "model_forward_time": 0.024364233016967773, "step": 25494 }, { "epoch": 3.89007568359375e-05, "step": 25494, "training_step_time": 0.15430808067321777 }, { "epoch": 3.890228271484375e-05, "model_forward_time": 0.02402782440185547, "step": 25495 }, { "epoch": 3.890228271484375e-05, "step": 25495, "training_step_time": 0.1305985450744629 }, { "epoch": 3.890380859375e-05, "model_forward_time": 0.024447917938232422, "step": 25496 }, { "epoch": 3.890380859375e-05, "step": 25496, "training_step_time": 0.12353014945983887 }, { "epoch": 3.890533447265625e-05, "model_forward_time": 0.025117874145507812, "step": 25497 }, { "epoch": 3.890533447265625e-05, "step": 25497, "training_step_time": 0.12730169296264648 }, { "epoch": 3.89068603515625e-05, "model_forward_time": 0.024924755096435547, "step": 25498 }, { "epoch": 3.89068603515625e-05, "step": 25498, "training_step_time": 0.11938858032226562 }, { "epoch": 3.890838623046875e-05, "model_forward_time": 0.028336763381958008, "step": 25499 }, { "epoch": 3.890838623046875e-05, "step": 25499, "training_step_time": 0.11535835266113281 }, { "epoch": 3.8909912109375e-05, "grad_norm": 0.18776196241378784, "learning_rate": 6.026312439675552e-06, "loss": 0.0063, "step": 25500 }, { "epoch": 3.8909912109375e-05, "model_forward_time": 0.02557992935180664, "step": 25500 }, { "epoch": 3.8909912109375e-05, "step": 25500, "training_step_time": 0.1474306583404541 }, { "epoch": 3.891143798828125e-05, "model_forward_time": 0.024645328521728516, "step": 25501 }, { "epoch": 3.891143798828125e-05, "step": 25501, "training_step_time": 0.11099815368652344 }, { "epoch": 3.89129638671875e-05, "model_forward_time": 0.024839401245117188, "step": 25502 }, { "epoch": 3.89129638671875e-05, "step": 25502, "training_step_time": 0.19975566864013672 }, { "epoch": 3.891448974609375e-05, "model_forward_time": 0.024172067642211914, "step": 25503 }, { "epoch": 3.891448974609375e-05, "step": 25503, "training_step_time": 0.1842026710510254 }, { "epoch": 3.8916015625e-05, "model_forward_time": 0.024082422256469727, "step": 25504 }, { "epoch": 3.8916015625e-05, "step": 25504, "training_step_time": 0.13479351997375488 }, { "epoch": 3.891754150390625e-05, "model_forward_time": 0.023392200469970703, "step": 25505 }, { "epoch": 3.891754150390625e-05, "step": 25505, "training_step_time": 0.11596822738647461 }, { "epoch": 3.89190673828125e-05, "model_forward_time": 0.02487778663635254, "step": 25506 }, { "epoch": 3.89190673828125e-05, "step": 25506, "training_step_time": 0.10736489295959473 }, { "epoch": 3.892059326171875e-05, "model_forward_time": 0.02492833137512207, "step": 25507 }, { "epoch": 3.892059326171875e-05, "step": 25507, "training_step_time": 0.10467791557312012 }, { "epoch": 3.8922119140625e-05, "model_forward_time": 0.025055885314941406, "step": 25508 }, { "epoch": 3.8922119140625e-05, "step": 25508, "training_step_time": 0.10467839241027832 }, { "epoch": 3.892364501953125e-05, "model_forward_time": 0.024783611297607422, "step": 25509 }, { "epoch": 3.892364501953125e-05, "step": 25509, "training_step_time": 0.10506153106689453 }, { "epoch": 3.89251708984375e-05, "grad_norm": 0.10060159116983414, "learning_rate": 6.0001070061033945e-06, "loss": 0.0056, "step": 25510 }, { "epoch": 3.89251708984375e-05, "model_forward_time": 0.02523636817932129, "step": 25510 }, { "epoch": 3.89251708984375e-05, "step": 25510, "training_step_time": 0.10757756233215332 }, { "epoch": 3.892669677734375e-05, "model_forward_time": 0.0253751277923584, "step": 25511 }, { "epoch": 3.892669677734375e-05, "step": 25511, "training_step_time": 0.10904908180236816 }, { "epoch": 3.892822265625e-05, "model_forward_time": 0.025452375411987305, "step": 25512 }, { "epoch": 3.892822265625e-05, "step": 25512, "training_step_time": 0.10541844367980957 }, { "epoch": 3.892974853515625e-05, "model_forward_time": 0.02606201171875, "step": 25513 }, { "epoch": 3.892974853515625e-05, "step": 25513, "training_step_time": 0.10569357872009277 }, { "epoch": 3.89312744140625e-05, "model_forward_time": 0.025310277938842773, "step": 25514 }, { "epoch": 3.89312744140625e-05, "step": 25514, "training_step_time": 0.10470843315124512 }, { "epoch": 3.893280029296875e-05, "model_forward_time": 0.02553725242614746, "step": 25515 }, { "epoch": 3.893280029296875e-05, "step": 25515, "training_step_time": 0.10432934761047363 }, { "epoch": 3.8934326171875e-05, "model_forward_time": 0.025233983993530273, "step": 25516 }, { "epoch": 3.8934326171875e-05, "step": 25516, "training_step_time": 0.1124420166015625 }, { "epoch": 3.893585205078125e-05, "model_forward_time": 0.025113344192504883, "step": 25517 }, { "epoch": 3.893585205078125e-05, "step": 25517, "training_step_time": 0.1388993263244629 }, { "epoch": 3.89373779296875e-05, "model_forward_time": 0.025104522705078125, "step": 25518 }, { "epoch": 3.89373779296875e-05, "step": 25518, "training_step_time": 0.10932540893554688 }, { "epoch": 3.893890380859375e-05, "model_forward_time": 0.02524733543395996, "step": 25519 }, { "epoch": 3.893890380859375e-05, "step": 25519, "training_step_time": 0.1075446605682373 }, { "epoch": 3.89404296875e-05, "grad_norm": 0.16142567992210388, "learning_rate": 5.9739550365501494e-06, "loss": 0.0043, "step": 25520 }, { "epoch": 3.89404296875e-05, "model_forward_time": 0.02544569969177246, "step": 25520 }, { "epoch": 3.89404296875e-05, "step": 25520, "training_step_time": 0.18267250061035156 }, { "epoch": 3.894195556640625e-05, "model_forward_time": 0.024279117584228516, "step": 25521 }, { "epoch": 3.894195556640625e-05, "step": 25521, "training_step_time": 0.11330366134643555 }, { "epoch": 3.89434814453125e-05, "model_forward_time": 0.024857759475708008, "step": 25522 }, { "epoch": 3.89434814453125e-05, "step": 25522, "training_step_time": 0.10360121726989746 }, { "epoch": 3.894500732421875e-05, "model_forward_time": 0.024950742721557617, "step": 25523 }, { "epoch": 3.894500732421875e-05, "step": 25523, "training_step_time": 0.10548281669616699 }, { "epoch": 3.8946533203125e-05, "model_forward_time": 0.02507638931274414, "step": 25524 }, { "epoch": 3.8946533203125e-05, "step": 25524, "training_step_time": 0.10666632652282715 }, { "epoch": 3.894805908203125e-05, "model_forward_time": 0.024740219116210938, "step": 25525 }, { "epoch": 3.894805908203125e-05, "step": 25525, "training_step_time": 0.17696356773376465 }, { "epoch": 3.89495849609375e-05, "model_forward_time": 0.024800777435302734, "step": 25526 }, { "epoch": 3.89495849609375e-05, "step": 25526, "training_step_time": 0.11639523506164551 }, { "epoch": 3.895111083984375e-05, "model_forward_time": 0.0248258113861084, "step": 25527 }, { "epoch": 3.895111083984375e-05, "step": 25527, "training_step_time": 0.10590386390686035 }, { "epoch": 3.895263671875e-05, "model_forward_time": 0.025141239166259766, "step": 25528 }, { "epoch": 3.895263671875e-05, "step": 25528, "training_step_time": 0.12512445449829102 }, { "epoch": 3.895416259765625e-05, "model_forward_time": 0.025000810623168945, "step": 25529 }, { "epoch": 3.895416259765625e-05, "step": 25529, "training_step_time": 0.12553691864013672 }, { "epoch": 3.89556884765625e-05, "grad_norm": 0.19904199242591858, "learning_rate": 5.947856562792925e-06, "loss": 0.0089, "step": 25530 }, { "epoch": 3.89556884765625e-05, "model_forward_time": 0.025087594985961914, "step": 25530 }, { "epoch": 3.89556884765625e-05, "step": 25530, "training_step_time": 0.11814475059509277 }, { "epoch": 3.895721435546875e-05, "model_forward_time": 0.025214195251464844, "step": 25531 }, { "epoch": 3.895721435546875e-05, "step": 25531, "training_step_time": 0.10726809501647949 }, { "epoch": 3.8958740234375e-05, "model_forward_time": 0.024995088577270508, "step": 25532 }, { "epoch": 3.8958740234375e-05, "step": 25532, "training_step_time": 0.10777974128723145 }, { "epoch": 3.896026611328125e-05, "model_forward_time": 0.025814533233642578, "step": 25533 }, { "epoch": 3.896026611328125e-05, "step": 25533, "training_step_time": 0.10593247413635254 }, { "epoch": 3.89617919921875e-05, "model_forward_time": 0.02538609504699707, "step": 25534 }, { "epoch": 3.89617919921875e-05, "step": 25534, "training_step_time": 0.12034368515014648 }, { "epoch": 3.896331787109375e-05, "model_forward_time": 0.025116443634033203, "step": 25535 }, { "epoch": 3.896331787109375e-05, "step": 25535, "training_step_time": 0.10885787010192871 }, { "epoch": 3.896484375e-05, "model_forward_time": 0.02543926239013672, "step": 25536 }, { "epoch": 3.896484375e-05, "step": 25536, "training_step_time": 0.11658310890197754 }, { "epoch": 3.896636962890625e-05, "model_forward_time": 0.025649547576904297, "step": 25537 }, { "epoch": 3.896636962890625e-05, "step": 25537, "training_step_time": 0.14539098739624023 }, { "epoch": 3.89678955078125e-05, "model_forward_time": 0.025517702102661133, "step": 25538 }, { "epoch": 3.89678955078125e-05, "step": 25538, "training_step_time": 0.21758008003234863 }, { "epoch": 3.896942138671875e-05, "model_forward_time": 0.024441957473754883, "step": 25539 }, { "epoch": 3.896942138671875e-05, "step": 25539, "training_step_time": 0.1700425148010254 }, { "epoch": 3.8970947265625e-05, "grad_norm": 0.22975318133831024, "learning_rate": 5.921811616543821e-06, "loss": 0.0053, "step": 25540 }, { "epoch": 3.8970947265625e-05, "model_forward_time": 0.024410009384155273, "step": 25540 }, { "epoch": 3.8970947265625e-05, "step": 25540, "training_step_time": 0.1416919231414795 }, { "epoch": 3.897247314453125e-05, "model_forward_time": 0.024623394012451172, "step": 25541 }, { "epoch": 3.897247314453125e-05, "step": 25541, "training_step_time": 0.14141368865966797 }, { "epoch": 3.89739990234375e-05, "model_forward_time": 0.02434563636779785, "step": 25542 }, { "epoch": 3.89739990234375e-05, "step": 25542, "training_step_time": 0.126176118850708 }, { "epoch": 3.897552490234375e-05, "model_forward_time": 0.02461528778076172, "step": 25543 }, { "epoch": 3.897552490234375e-05, "step": 25543, "training_step_time": 0.1281261444091797 }, { "epoch": 3.897705078125e-05, "model_forward_time": 0.024849653244018555, "step": 25544 }, { "epoch": 3.897705078125e-05, "step": 25544, "training_step_time": 0.11936807632446289 }, { "epoch": 3.897857666015625e-05, "model_forward_time": 0.025206565856933594, "step": 25545 }, { "epoch": 3.897857666015625e-05, "step": 25545, "training_step_time": 0.13366270065307617 }, { "epoch": 3.89801025390625e-05, "model_forward_time": 0.02492046356201172, "step": 25546 }, { "epoch": 3.89801025390625e-05, "step": 25546, "training_step_time": 0.12001824378967285 }, { "epoch": 3.898162841796875e-05, "model_forward_time": 0.025002002716064453, "step": 25547 }, { "epoch": 3.898162841796875e-05, "step": 25547, "training_step_time": 0.2169508934020996 }, { "epoch": 3.8983154296875e-05, "model_forward_time": 0.024854421615600586, "step": 25548 }, { "epoch": 3.8983154296875e-05, "step": 25548, "training_step_time": 0.10827016830444336 }, { "epoch": 3.898468017578125e-05, "model_forward_time": 0.024919748306274414, "step": 25549 }, { "epoch": 3.898468017578125e-05, "step": 25549, "training_step_time": 0.11370134353637695 }, { "epoch": 3.89862060546875e-05, "grad_norm": 0.18529093265533447, "learning_rate": 5.895820229449906e-06, "loss": 0.0036, "step": 25550 }, { "epoch": 3.89862060546875e-05, "model_forward_time": 0.025145769119262695, "step": 25550 }, { "epoch": 3.89862060546875e-05, "step": 25550, "training_step_time": 0.11251330375671387 }, { "epoch": 3.898773193359375e-05, "model_forward_time": 0.02513885498046875, "step": 25551 }, { "epoch": 3.898773193359375e-05, "step": 25551, "training_step_time": 0.10723400115966797 }, { "epoch": 3.89892578125e-05, "model_forward_time": 0.02491307258605957, "step": 25552 }, { "epoch": 3.89892578125e-05, "step": 25552, "training_step_time": 0.10551571846008301 }, { "epoch": 3.899078369140625e-05, "model_forward_time": 0.025411367416381836, "step": 25553 }, { "epoch": 3.899078369140625e-05, "step": 25553, "training_step_time": 0.10664844512939453 }, { "epoch": 3.89923095703125e-05, "model_forward_time": 0.025348663330078125, "step": 25554 }, { "epoch": 3.89923095703125e-05, "step": 25554, "training_step_time": 0.11042571067810059 }, { "epoch": 3.899383544921875e-05, "model_forward_time": 0.02509307861328125, "step": 25555 }, { "epoch": 3.899383544921875e-05, "step": 25555, "training_step_time": 0.10487008094787598 }, { "epoch": 3.8995361328125e-05, "model_forward_time": 0.025470972061157227, "step": 25556 }, { "epoch": 3.8995361328125e-05, "step": 25556, "training_step_time": 0.10513949394226074 }, { "epoch": 3.899688720703125e-05, "model_forward_time": 0.02498602867126465, "step": 25557 }, { "epoch": 3.899688720703125e-05, "step": 25557, "training_step_time": 0.10964155197143555 }, { "epoch": 3.89984130859375e-05, "model_forward_time": 0.02649235725402832, "step": 25558 }, { "epoch": 3.89984130859375e-05, "step": 25558, "training_step_time": 0.11021280288696289 }, { "epoch": 3.899993896484375e-05, "model_forward_time": 0.025105714797973633, "step": 25559 }, { "epoch": 3.899993896484375e-05, "step": 25559, "training_step_time": 0.1058199405670166 }, { "epoch": 3.900146484375e-05, "grad_norm": 0.29988551139831543, "learning_rate": 5.869882433093155e-06, "loss": 0.0076, "step": 25560 }, { "epoch": 3.900146484375e-05, "model_forward_time": 0.025360822677612305, "step": 25560 }, { "epoch": 3.900146484375e-05, "step": 25560, "training_step_time": 0.10655856132507324 }, { "epoch": 3.900299072265625e-05, "model_forward_time": 0.02500176429748535, "step": 25561 }, { "epoch": 3.900299072265625e-05, "step": 25561, "training_step_time": 0.13875365257263184 }, { "epoch": 3.90045166015625e-05, "model_forward_time": 0.02505636215209961, "step": 25562 }, { "epoch": 3.90045166015625e-05, "step": 25562, "training_step_time": 0.1727886199951172 }, { "epoch": 3.900604248046875e-05, "model_forward_time": 0.02421259880065918, "step": 25563 }, { "epoch": 3.900604248046875e-05, "step": 25563, "training_step_time": 0.10332989692687988 }, { "epoch": 3.9007568359375e-05, "model_forward_time": 0.024941682815551758, "step": 25564 }, { "epoch": 3.9007568359375e-05, "step": 25564, "training_step_time": 0.14249873161315918 }, { "epoch": 3.900909423828125e-05, "model_forward_time": 0.024708986282348633, "step": 25565 }, { "epoch": 3.900909423828125e-05, "step": 25565, "training_step_time": 0.14078879356384277 }, { "epoch": 3.90106201171875e-05, "model_forward_time": 0.024506807327270508, "step": 25566 }, { "epoch": 3.90106201171875e-05, "step": 25566, "training_step_time": 0.10442519187927246 }, { "epoch": 3.901214599609375e-05, "model_forward_time": 0.0277099609375, "step": 25567 }, { "epoch": 3.901214599609375e-05, "step": 25567, "training_step_time": 0.10838723182678223 }, { "epoch": 3.9013671875e-05, "model_forward_time": 0.02527618408203125, "step": 25568 }, { "epoch": 3.9013671875e-05, "step": 25568, "training_step_time": 0.10336041450500488 }, { "epoch": 3.901519775390625e-05, "model_forward_time": 0.025417804718017578, "step": 25569 }, { "epoch": 3.901519775390625e-05, "step": 25569, "training_step_time": 0.10367798805236816 }, { "epoch": 3.90167236328125e-05, "grad_norm": 0.16870911419391632, "learning_rate": 5.843998258990452e-06, "loss": 0.0053, "step": 25570 }, { "epoch": 3.90167236328125e-05, "model_forward_time": 0.02520155906677246, "step": 25570 }, { "epoch": 3.90167236328125e-05, "step": 25570, "training_step_time": 0.10549402236938477 }, { "epoch": 3.901824951171875e-05, "model_forward_time": 0.026612281799316406, "step": 25571 }, { "epoch": 3.901824951171875e-05, "step": 25571, "training_step_time": 0.12080693244934082 }, { "epoch": 3.9019775390625e-05, "model_forward_time": 0.02537369728088379, "step": 25572 }, { "epoch": 3.9019775390625e-05, "step": 25572, "training_step_time": 0.11647725105285645 }, { "epoch": 3.902130126953125e-05, "model_forward_time": 0.025553226470947266, "step": 25573 }, { "epoch": 3.902130126953125e-05, "step": 25573, "training_step_time": 0.11553478240966797 }, { "epoch": 3.90228271484375e-05, "model_forward_time": 0.025547504425048828, "step": 25574 }, { "epoch": 3.90228271484375e-05, "step": 25574, "training_step_time": 0.11660146713256836 }, { "epoch": 3.902435302734375e-05, "model_forward_time": 0.025110960006713867, "step": 25575 }, { "epoch": 3.902435302734375e-05, "step": 25575, "training_step_time": 0.13137245178222656 }, { "epoch": 3.902587890625e-05, "model_forward_time": 0.025641679763793945, "step": 25576 }, { "epoch": 3.902587890625e-05, "step": 25576, "training_step_time": 0.13186287879943848 }, { "epoch": 3.902740478515625e-05, "model_forward_time": 0.0248410701751709, "step": 25577 }, { "epoch": 3.902740478515625e-05, "step": 25577, "training_step_time": 0.1991100311279297 }, { "epoch": 3.90289306640625e-05, "model_forward_time": 0.025206804275512695, "step": 25578 }, { "epoch": 3.90289306640625e-05, "step": 25578, "training_step_time": 0.10902571678161621 }, { "epoch": 3.903045654296875e-05, "model_forward_time": 0.024801254272460938, "step": 25579 }, { "epoch": 3.903045654296875e-05, "step": 25579, "training_step_time": 0.11234736442565918 }, { "epoch": 3.9031982421875e-05, "grad_norm": 0.07595854252576828, "learning_rate": 5.818167738593505e-06, "loss": 0.0044, "step": 25580 }, { "epoch": 3.9031982421875e-05, "model_forward_time": 0.025113344192504883, "step": 25580 }, { "epoch": 3.9031982421875e-05, "step": 25580, "training_step_time": 0.11252951622009277 }, { "epoch": 3.903350830078125e-05, "model_forward_time": 0.02551102638244629, "step": 25581 }, { "epoch": 3.903350830078125e-05, "step": 25581, "training_step_time": 0.11149215698242188 }, { "epoch": 3.90350341796875e-05, "model_forward_time": 0.02556443214416504, "step": 25582 }, { "epoch": 3.90350341796875e-05, "step": 25582, "training_step_time": 0.15487217903137207 }, { "epoch": 3.903656005859375e-05, "model_forward_time": 0.024904489517211914, "step": 25583 }, { "epoch": 3.903656005859375e-05, "step": 25583, "training_step_time": 0.15715956687927246 }, { "epoch": 3.90380859375e-05, "model_forward_time": 0.02442646026611328, "step": 25584 }, { "epoch": 3.90380859375e-05, "step": 25584, "training_step_time": 0.12434697151184082 }, { "epoch": 3.903961181640625e-05, "model_forward_time": 0.024468183517456055, "step": 25585 }, { "epoch": 3.903961181640625e-05, "step": 25585, "training_step_time": 0.10962200164794922 }, { "epoch": 3.90411376953125e-05, "model_forward_time": 0.025383949279785156, "step": 25586 }, { "epoch": 3.90411376953125e-05, "step": 25586, "training_step_time": 0.10750842094421387 }, { "epoch": 3.904266357421875e-05, "model_forward_time": 0.0253145694732666, "step": 25587 }, { "epoch": 3.904266357421875e-05, "step": 25587, "training_step_time": 0.10890340805053711 }, { "epoch": 3.9044189453125e-05, "model_forward_time": 0.024791955947875977, "step": 25588 }, { "epoch": 3.9044189453125e-05, "step": 25588, "training_step_time": 0.10819697380065918 }, { "epoch": 3.904571533203125e-05, "model_forward_time": 0.024837732315063477, "step": 25589 }, { "epoch": 3.904571533203125e-05, "step": 25589, "training_step_time": 0.10453629493713379 }, { "epoch": 3.90472412109375e-05, "grad_norm": 0.0564974881708622, "learning_rate": 5.79239090328883e-06, "loss": 0.0061, "step": 25590 }, { "epoch": 3.90472412109375e-05, "model_forward_time": 0.025051355361938477, "step": 25590 }, { "epoch": 3.90472412109375e-05, "step": 25590, "training_step_time": 0.10926365852355957 }, { "epoch": 3.904876708984375e-05, "model_forward_time": 0.02535700798034668, "step": 25591 }, { "epoch": 3.904876708984375e-05, "step": 25591, "training_step_time": 0.17145252227783203 }, { "epoch": 3.905029296875e-05, "model_forward_time": 0.023957490921020508, "step": 25592 }, { "epoch": 3.905029296875e-05, "step": 25592, "training_step_time": 0.1307966709136963 }, { "epoch": 3.905181884765625e-05, "model_forward_time": 0.024610519409179688, "step": 25593 }, { "epoch": 3.905181884765625e-05, "step": 25593, "training_step_time": 0.20780324935913086 }, { "epoch": 3.90533447265625e-05, "model_forward_time": 0.023279190063476562, "step": 25594 }, { "epoch": 3.90533447265625e-05, "step": 25594, "training_step_time": 0.10392260551452637 }, { "epoch": 3.905487060546875e-05, "model_forward_time": 0.024003267288208008, "step": 25595 }, { "epoch": 3.905487060546875e-05, "step": 25595, "training_step_time": 0.10835456848144531 }, { "epoch": 3.9056396484375e-05, "model_forward_time": 0.02535080909729004, "step": 25596 }, { "epoch": 3.9056396484375e-05, "step": 25596, "training_step_time": 0.16095876693725586 }, { "epoch": 3.905792236328125e-05, "model_forward_time": 0.024729013442993164, "step": 25597 }, { "epoch": 3.905792236328125e-05, "step": 25597, "training_step_time": 0.10477471351623535 }, { "epoch": 3.90594482421875e-05, "model_forward_time": 0.024656295776367188, "step": 25598 }, { "epoch": 3.90594482421875e-05, "step": 25598, "training_step_time": 0.11065888404846191 }, { "epoch": 3.906097412109375e-05, "model_forward_time": 0.025243282318115234, "step": 25599 }, { "epoch": 3.906097412109375e-05, "step": 25599, "training_step_time": 0.10509681701660156 }, { "epoch": 3.90625e-05, "grad_norm": 0.07542379200458527, "learning_rate": 5.766667784397706e-06, "loss": 0.0044, "step": 25600 }, { "epoch": 3.90625e-05, "model_forward_time": 0.025211334228515625, "step": 25600 }, { "epoch": 3.90625e-05, "step": 25600, "training_step_time": 0.10764813423156738 }, { "epoch": 3.906402587890625e-05, "model_forward_time": 0.025035858154296875, "step": 25601 }, { "epoch": 3.906402587890625e-05, "step": 25601, "training_step_time": 0.10495615005493164 }, { "epoch": 3.90655517578125e-05, "model_forward_time": 0.024874210357666016, "step": 25602 }, { "epoch": 3.90655517578125e-05, "step": 25602, "training_step_time": 0.10628414154052734 }, { "epoch": 3.906707763671875e-05, "model_forward_time": 0.025064706802368164, "step": 25603 }, { "epoch": 3.906707763671875e-05, "step": 25603, "training_step_time": 0.10490751266479492 }, { "epoch": 3.9068603515625e-05, "model_forward_time": 0.0250551700592041, "step": 25604 }, { "epoch": 3.9068603515625e-05, "step": 25604, "training_step_time": 0.10753703117370605 }, { "epoch": 3.907012939453125e-05, "model_forward_time": 0.025205612182617188, "step": 25605 }, { "epoch": 3.907012939453125e-05, "step": 25605, "training_step_time": 0.1823282241821289 }, { "epoch": 3.90716552734375e-05, "model_forward_time": 0.024515628814697266, "step": 25606 }, { "epoch": 3.90716552734375e-05, "step": 25606, "training_step_time": 0.16070127487182617 }, { "epoch": 3.907318115234375e-05, "model_forward_time": 0.024546384811401367, "step": 25607 }, { "epoch": 3.907318115234375e-05, "step": 25607, "training_step_time": 0.15581202507019043 }, { "epoch": 3.907470703125e-05, "model_forward_time": 0.024753332138061523, "step": 25608 }, { "epoch": 3.907470703125e-05, "step": 25608, "training_step_time": 0.11556077003479004 }, { "epoch": 3.907623291015625e-05, "model_forward_time": 0.02471303939819336, "step": 25609 }, { "epoch": 3.907623291015625e-05, "step": 25609, "training_step_time": 0.15000581741333008 }, { "epoch": 3.90777587890625e-05, "grad_norm": 0.06577904522418976, "learning_rate": 5.740998413176163e-06, "loss": 0.0073, "step": 25610 }, { "epoch": 3.90777587890625e-05, "model_forward_time": 0.02368474006652832, "step": 25610 }, { "epoch": 3.90777587890625e-05, "step": 25610, "training_step_time": 0.1308121681213379 }, { "epoch": 3.907928466796875e-05, "model_forward_time": 0.024390220642089844, "step": 25611 }, { "epoch": 3.907928466796875e-05, "step": 25611, "training_step_time": 0.13128447532653809 }, { "epoch": 3.9080810546875e-05, "model_forward_time": 0.023720741271972656, "step": 25612 }, { "epoch": 3.9080810546875e-05, "step": 25612, "training_step_time": 0.12599420547485352 }, { "epoch": 3.908233642578125e-05, "model_forward_time": 0.023225784301757812, "step": 25613 }, { "epoch": 3.908233642578125e-05, "step": 25613, "training_step_time": 0.12048888206481934 }, { "epoch": 3.90838623046875e-05, "model_forward_time": 0.023950576782226562, "step": 25614 }, { "epoch": 3.90838623046875e-05, "step": 25614, "training_step_time": 0.1151275634765625 }, { "epoch": 3.908538818359375e-05, "model_forward_time": 0.024353504180908203, "step": 25615 }, { "epoch": 3.908538818359375e-05, "step": 25615, "training_step_time": 0.19266080856323242 }, { "epoch": 3.90869140625e-05, "model_forward_time": 0.024433612823486328, "step": 25616 }, { "epoch": 3.90869140625e-05, "step": 25616, "training_step_time": 0.12280988693237305 }, { "epoch": 3.908843994140625e-05, "model_forward_time": 0.02443528175354004, "step": 25617 }, { "epoch": 3.908843994140625e-05, "step": 25617, "training_step_time": 0.10584902763366699 }, { "epoch": 3.90899658203125e-05, "model_forward_time": 0.025110483169555664, "step": 25618 }, { "epoch": 3.90899658203125e-05, "step": 25618, "training_step_time": 0.12384200096130371 }, { "epoch": 3.909149169921875e-05, "model_forward_time": 0.025412321090698242, "step": 25619 }, { "epoch": 3.909149169921875e-05, "step": 25619, "training_step_time": 0.12585735321044922 }, { "epoch": 3.9093017578125e-05, "grad_norm": 0.11997129768133163, "learning_rate": 5.715382820814885e-06, "loss": 0.0072, "step": 25620 }, { "epoch": 3.9093017578125e-05, "model_forward_time": 0.024902820587158203, "step": 25620 }, { "epoch": 3.9093017578125e-05, "step": 25620, "training_step_time": 0.11174440383911133 }, { "epoch": 3.909454345703125e-05, "model_forward_time": 0.02500295639038086, "step": 25621 }, { "epoch": 3.909454345703125e-05, "step": 25621, "training_step_time": 0.11805343627929688 }, { "epoch": 3.90960693359375e-05, "model_forward_time": 0.02550220489501953, "step": 25622 }, { "epoch": 3.90960693359375e-05, "step": 25622, "training_step_time": 0.10918116569519043 }, { "epoch": 3.909759521484375e-05, "model_forward_time": 0.026175260543823242, "step": 25623 }, { "epoch": 3.909759521484375e-05, "step": 25623, "training_step_time": 0.10597038269042969 }, { "epoch": 3.909912109375e-05, "model_forward_time": 0.025219202041625977, "step": 25624 }, { "epoch": 3.909912109375e-05, "step": 25624, "training_step_time": 0.1372206211090088 }, { "epoch": 3.910064697265625e-05, "model_forward_time": 0.025004148483276367, "step": 25625 }, { "epoch": 3.910064697265625e-05, "step": 25625, "training_step_time": 0.18448543548583984 }, { "epoch": 3.91021728515625e-05, "model_forward_time": 0.024839162826538086, "step": 25626 }, { "epoch": 3.91021728515625e-05, "step": 25626, "training_step_time": 0.1083991527557373 }, { "epoch": 3.910369873046875e-05, "model_forward_time": 0.024603843688964844, "step": 25627 }, { "epoch": 3.910369873046875e-05, "step": 25627, "training_step_time": 0.104400634765625 }, { "epoch": 3.9105224609375e-05, "model_forward_time": 0.02505183219909668, "step": 25628 }, { "epoch": 3.9105224609375e-05, "step": 25628, "training_step_time": 0.12014245986938477 }, { "epoch": 3.910675048828125e-05, "model_forward_time": 0.025826454162597656, "step": 25629 }, { "epoch": 3.910675048828125e-05, "step": 25629, "training_step_time": 0.12395095825195312 }, { "epoch": 3.91082763671875e-05, "grad_norm": 0.15930169820785522, "learning_rate": 5.689821038439263e-06, "loss": 0.0073, "step": 25630 }, { "epoch": 3.91082763671875e-05, "model_forward_time": 0.027637243270874023, "step": 25630 }, { "epoch": 3.91082763671875e-05, "step": 25630, "training_step_time": 0.11075472831726074 }, { "epoch": 3.910980224609375e-05, "model_forward_time": 0.02682781219482422, "step": 25631 }, { "epoch": 3.910980224609375e-05, "step": 25631, "training_step_time": 0.11002898216247559 }, { "epoch": 3.9111328125e-05, "model_forward_time": 0.026536941528320312, "step": 25632 }, { "epoch": 3.9111328125e-05, "step": 25632, "training_step_time": 0.10537457466125488 }, { "epoch": 3.911285400390625e-05, "model_forward_time": 0.02462172508239746, "step": 25633 }, { "epoch": 3.911285400390625e-05, "step": 25633, "training_step_time": 0.10932016372680664 }, { "epoch": 3.91143798828125e-05, "model_forward_time": 0.025548696517944336, "step": 25634 }, { "epoch": 3.91143798828125e-05, "step": 25634, "training_step_time": 0.10496759414672852 }, { "epoch": 3.911590576171875e-05, "model_forward_time": 0.02543187141418457, "step": 25635 }, { "epoch": 3.911590576171875e-05, "step": 25635, "training_step_time": 0.10525250434875488 }, { "epoch": 3.9117431640625e-05, "model_forward_time": 0.025232315063476562, "step": 25636 }, { "epoch": 3.9117431640625e-05, "step": 25636, "training_step_time": 0.15641379356384277 }, { "epoch": 3.911895751953125e-05, "model_forward_time": 0.025319814682006836, "step": 25637 }, { "epoch": 3.911895751953125e-05, "step": 25637, "training_step_time": 0.11387395858764648 }, { "epoch": 3.91204833984375e-05, "model_forward_time": 0.025980472564697266, "step": 25638 }, { "epoch": 3.91204833984375e-05, "step": 25638, "training_step_time": 0.21283435821533203 }, { "epoch": 3.912200927734375e-05, "model_forward_time": 0.025949716567993164, "step": 25639 }, { "epoch": 3.912200927734375e-05, "step": 25639, "training_step_time": 0.10436105728149414 }, { "epoch": 3.912353515625e-05, "grad_norm": 0.07371170818805695, "learning_rate": 5.6643130971092525e-06, "loss": 0.0029, "step": 25640 }, { "epoch": 3.912353515625e-05, "model_forward_time": 0.025353670120239258, "step": 25640 }, { "epoch": 3.912353515625e-05, "step": 25640, "training_step_time": 0.11340928077697754 }, { "epoch": 3.912506103515625e-05, "model_forward_time": 0.02543354034423828, "step": 25641 }, { "epoch": 3.912506103515625e-05, "step": 25641, "training_step_time": 0.19071650505065918 }, { "epoch": 3.91265869140625e-05, "model_forward_time": 0.024455785751342773, "step": 25642 }, { "epoch": 3.91265869140625e-05, "step": 25642, "training_step_time": 0.10558080673217773 }, { "epoch": 3.912811279296875e-05, "model_forward_time": 0.0236358642578125, "step": 25643 }, { "epoch": 3.912811279296875e-05, "step": 25643, "training_step_time": 0.10583257675170898 }, { "epoch": 3.9129638671875e-05, "model_forward_time": 0.025152921676635742, "step": 25644 }, { "epoch": 3.9129638671875e-05, "step": 25644, "training_step_time": 0.10885071754455566 }, { "epoch": 3.913116455078125e-05, "model_forward_time": 0.02553868293762207, "step": 25645 }, { "epoch": 3.913116455078125e-05, "step": 25645, "training_step_time": 0.10665583610534668 }, { "epoch": 3.91326904296875e-05, "model_forward_time": 0.02538466453552246, "step": 25646 }, { "epoch": 3.91326904296875e-05, "step": 25646, "training_step_time": 0.1088404655456543 }, { "epoch": 3.913421630859375e-05, "model_forward_time": 0.025075435638427734, "step": 25647 }, { "epoch": 3.913421630859375e-05, "step": 25647, "training_step_time": 0.10593247413635254 }, { "epoch": 3.91357421875e-05, "model_forward_time": 0.02521657943725586, "step": 25648 }, { "epoch": 3.91357421875e-05, "step": 25648, "training_step_time": 0.10477113723754883 }, { "epoch": 3.913726806640625e-05, "model_forward_time": 0.02536940574645996, "step": 25649 }, { "epoch": 3.913726806640625e-05, "step": 25649, "training_step_time": 0.12700748443603516 }, { "epoch": 3.91387939453125e-05, "grad_norm": 0.16840949654579163, "learning_rate": 5.6388590278194096e-06, "loss": 0.0073, "step": 25650 }, { "epoch": 3.91387939453125e-05, "model_forward_time": 0.02490377426147461, "step": 25650 }, { "epoch": 3.91387939453125e-05, "step": 25650, "training_step_time": 0.15639615058898926 }, { "epoch": 3.914031982421875e-05, "model_forward_time": 0.024352073669433594, "step": 25651 }, { "epoch": 3.914031982421875e-05, "step": 25651, "training_step_time": 0.1851494312286377 }, { "epoch": 3.9141845703125e-05, "model_forward_time": 0.02477884292602539, "step": 25652 }, { "epoch": 3.9141845703125e-05, "step": 25652, "training_step_time": 0.1990673542022705 }, { "epoch": 3.914337158203125e-05, "model_forward_time": 0.024531841278076172, "step": 25653 }, { "epoch": 3.914337158203125e-05, "step": 25653, "training_step_time": 0.10471057891845703 }, { "epoch": 3.91448974609375e-05, "model_forward_time": 0.026381492614746094, "step": 25654 }, { "epoch": 3.91448974609375e-05, "step": 25654, "training_step_time": 0.10498905181884766 }, { "epoch": 3.914642333984375e-05, "model_forward_time": 0.025449037551879883, "step": 25655 }, { "epoch": 3.914642333984375e-05, "step": 25655, "training_step_time": 0.10742378234863281 }, { "epoch": 3.914794921875e-05, "model_forward_time": 0.025290489196777344, "step": 25656 }, { "epoch": 3.914794921875e-05, "step": 25656, "training_step_time": 0.10626339912414551 }, { "epoch": 3.914947509765625e-05, "model_forward_time": 0.028519153594970703, "step": 25657 }, { "epoch": 3.914947509765625e-05, "step": 25657, "training_step_time": 0.11069893836975098 }, { "epoch": 3.91510009765625e-05, "model_forward_time": 0.025428295135498047, "step": 25658 }, { "epoch": 3.91510009765625e-05, "step": 25658, "training_step_time": 0.10828733444213867 }, { "epoch": 3.915252685546875e-05, "model_forward_time": 0.025597095489501953, "step": 25659 }, { "epoch": 3.915252685546875e-05, "step": 25659, "training_step_time": 0.10703015327453613 }, { "epoch": 3.9154052734375e-05, "grad_norm": 0.08039369434118271, "learning_rate": 5.613458861498832e-06, "loss": 0.0021, "step": 25660 }, { "epoch": 3.9154052734375e-05, "model_forward_time": 0.026313304901123047, "step": 25660 }, { "epoch": 3.9154052734375e-05, "step": 25660, "training_step_time": 0.1084744930267334 }, { "epoch": 3.915557861328125e-05, "model_forward_time": 0.025353193283081055, "step": 25661 }, { "epoch": 3.915557861328125e-05, "step": 25661, "training_step_time": 0.19197702407836914 }, { "epoch": 3.91571044921875e-05, "model_forward_time": 0.02449178695678711, "step": 25662 }, { "epoch": 3.91571044921875e-05, "step": 25662, "training_step_time": 0.11492657661437988 }, { "epoch": 3.915863037109375e-05, "model_forward_time": 0.02454972267150879, "step": 25663 }, { "epoch": 3.915863037109375e-05, "step": 25663, "training_step_time": 0.10592961311340332 }, { "epoch": 3.916015625e-05, "model_forward_time": 0.025347471237182617, "step": 25664 }, { "epoch": 3.916015625e-05, "step": 25664, "training_step_time": 0.12412405014038086 }, { "epoch": 3.916168212890625e-05, "model_forward_time": 0.026282548904418945, "step": 25665 }, { "epoch": 3.916168212890625e-05, "step": 25665, "training_step_time": 0.13102340698242188 }, { "epoch": 3.91632080078125e-05, "model_forward_time": 0.024775981903076172, "step": 25666 }, { "epoch": 3.91632080078125e-05, "step": 25666, "training_step_time": 0.11069440841674805 }, { "epoch": 3.916473388671875e-05, "model_forward_time": 0.0256500244140625, "step": 25667 }, { "epoch": 3.916473388671875e-05, "step": 25667, "training_step_time": 0.14937353134155273 }, { "epoch": 3.9166259765625e-05, "model_forward_time": 0.024778366088867188, "step": 25668 }, { "epoch": 3.9166259765625e-05, "step": 25668, "training_step_time": 0.1075131893157959 }, { "epoch": 3.916778564453125e-05, "model_forward_time": 0.024269819259643555, "step": 25669 }, { "epoch": 3.916778564453125e-05, "step": 25669, "training_step_time": 0.10494184494018555 }, { "epoch": 3.91693115234375e-05, "grad_norm": 0.10808060318231583, "learning_rate": 5.58811262901111e-06, "loss": 0.0138, "step": 25670 }, { "epoch": 3.91693115234375e-05, "model_forward_time": 0.024515390396118164, "step": 25670 }, { "epoch": 3.91693115234375e-05, "step": 25670, "training_step_time": 0.10735058784484863 }, { "epoch": 3.917083740234375e-05, "model_forward_time": 0.02536463737487793, "step": 25671 }, { "epoch": 3.917083740234375e-05, "step": 25671, "training_step_time": 0.1108241081237793 }, { "epoch": 3.917236328125e-05, "model_forward_time": 0.024974346160888672, "step": 25672 }, { "epoch": 3.917236328125e-05, "step": 25672, "training_step_time": 0.11455512046813965 }, { "epoch": 3.917388916015625e-05, "model_forward_time": 0.0255124568939209, "step": 25673 }, { "epoch": 3.917388916015625e-05, "step": 25673, "training_step_time": 0.21761584281921387 }, { "epoch": 3.91754150390625e-05, "model_forward_time": 0.024854183197021484, "step": 25674 }, { "epoch": 3.91754150390625e-05, "step": 25674, "training_step_time": 0.11709213256835938 }, { "epoch": 3.917694091796875e-05, "model_forward_time": 0.024358510971069336, "step": 25675 }, { "epoch": 3.917694091796875e-05, "step": 25675, "training_step_time": 0.1076667308807373 }, { "epoch": 3.9178466796875e-05, "model_forward_time": 0.02534341812133789, "step": 25676 }, { "epoch": 3.9178466796875e-05, "step": 25676, "training_step_time": 0.10744547843933105 }, { "epoch": 3.917999267578125e-05, "model_forward_time": 0.02511453628540039, "step": 25677 }, { "epoch": 3.917999267578125e-05, "step": 25677, "training_step_time": 0.11441755294799805 }, { "epoch": 3.91815185546875e-05, "model_forward_time": 0.025025129318237305, "step": 25678 }, { "epoch": 3.91815185546875e-05, "step": 25678, "training_step_time": 0.10583353042602539 }, { "epoch": 3.918304443359375e-05, "model_forward_time": 0.024213552474975586, "step": 25679 }, { "epoch": 3.918304443359375e-05, "step": 25679, "training_step_time": 0.10575151443481445 }, { "epoch": 3.91845703125e-05, "grad_norm": 0.062384042888879776, "learning_rate": 5.562820361154314e-06, "loss": 0.0027, "step": 25680 }, { "epoch": 3.91845703125e-05, "model_forward_time": 0.024627685546875, "step": 25680 }, { "epoch": 3.91845703125e-05, "step": 25680, "training_step_time": 0.11226844787597656 }, { "epoch": 3.918609619140625e-05, "model_forward_time": 0.024151086807250977, "step": 25681 }, { "epoch": 3.918609619140625e-05, "step": 25681, "training_step_time": 0.1999814510345459 }, { "epoch": 3.91876220703125e-05, "model_forward_time": 0.02432727813720703, "step": 25682 }, { "epoch": 3.91876220703125e-05, "step": 25682, "training_step_time": 0.21282196044921875 }, { "epoch": 3.918914794921875e-05, "model_forward_time": 0.02435016632080078, "step": 25683 }, { "epoch": 3.918914794921875e-05, "step": 25683, "training_step_time": 0.1020662784576416 }, { "epoch": 3.9190673828125e-05, "model_forward_time": 0.02452397346496582, "step": 25684 }, { "epoch": 3.9190673828125e-05, "step": 25684, "training_step_time": 0.12163519859313965 }, { "epoch": 3.919219970703125e-05, "model_forward_time": 0.024925947189331055, "step": 25685 }, { "epoch": 3.919219970703125e-05, "step": 25685, "training_step_time": 0.19746088981628418 }, { "epoch": 3.91937255859375e-05, "model_forward_time": 0.023735523223876953, "step": 25686 }, { "epoch": 3.91937255859375e-05, "step": 25686, "training_step_time": 0.10606765747070312 }, { "epoch": 3.919525146484375e-05, "model_forward_time": 0.02452254295349121, "step": 25687 }, { "epoch": 3.919525146484375e-05, "step": 25687, "training_step_time": 0.1051628589630127 }, { "epoch": 3.919677734375e-05, "model_forward_time": 0.025292396545410156, "step": 25688 }, { "epoch": 3.919677734375e-05, "step": 25688, "training_step_time": 0.10851192474365234 }, { "epoch": 3.919830322265625e-05, "model_forward_time": 0.025179147720336914, "step": 25689 }, { "epoch": 3.919830322265625e-05, "step": 25689, "training_step_time": 0.1076967716217041 }, { "epoch": 3.91998291015625e-05, "grad_norm": 0.056697502732276917, "learning_rate": 5.537582088660937e-06, "loss": 0.0038, "step": 25690 }, { "epoch": 3.91998291015625e-05, "model_forward_time": 0.024643898010253906, "step": 25690 }, { "epoch": 3.91998291015625e-05, "step": 25690, "training_step_time": 0.10825562477111816 }, { "epoch": 3.920135498046875e-05, "model_forward_time": 0.02487349510192871, "step": 25691 }, { "epoch": 3.920135498046875e-05, "step": 25691, "training_step_time": 0.10646200180053711 }, { "epoch": 3.9202880859375e-05, "model_forward_time": 0.024039030075073242, "step": 25692 }, { "epoch": 3.9202880859375e-05, "step": 25692, "training_step_time": 0.14775347709655762 }, { "epoch": 3.920440673828125e-05, "model_forward_time": 0.025356769561767578, "step": 25693 }, { "epoch": 3.920440673828125e-05, "step": 25693, "training_step_time": 0.1188511848449707 }, { "epoch": 3.92059326171875e-05, "model_forward_time": 0.025005817413330078, "step": 25694 }, { "epoch": 3.92059326171875e-05, "step": 25694, "training_step_time": 0.11051774024963379 }, { "epoch": 3.920745849609375e-05, "model_forward_time": 0.025234699249267578, "step": 25695 }, { "epoch": 3.920745849609375e-05, "step": 25695, "training_step_time": 0.11675667762756348 }, { "epoch": 3.9208984375e-05, "model_forward_time": 0.02503037452697754, "step": 25696 }, { "epoch": 3.9208984375e-05, "step": 25696, "training_step_time": 0.13229918479919434 }, { "epoch": 3.921051025390625e-05, "model_forward_time": 0.027141332626342773, "step": 25697 }, { "epoch": 3.921051025390625e-05, "step": 25697, "training_step_time": 0.11215829849243164 }, { "epoch": 3.92120361328125e-05, "model_forward_time": 0.025089263916015625, "step": 25698 }, { "epoch": 3.92120361328125e-05, "step": 25698, "training_step_time": 0.10865283012390137 }, { "epoch": 3.921356201171875e-05, "model_forward_time": 0.025014638900756836, "step": 25699 }, { "epoch": 3.921356201171875e-05, "step": 25699, "training_step_time": 0.11489462852478027 }, { "epoch": 3.9215087890625e-05, "grad_norm": 0.22654645144939423, "learning_rate": 5.512397842197847e-06, "loss": 0.0035, "step": 25700 }, { "epoch": 3.9215087890625e-05, "model_forward_time": 0.025211811065673828, "step": 25700 }, { "epoch": 3.9215087890625e-05, "step": 25700, "training_step_time": 0.10718441009521484 }, { "epoch": 3.921661376953125e-05, "model_forward_time": 0.025472640991210938, "step": 25701 }, { "epoch": 3.921661376953125e-05, "step": 25701, "training_step_time": 0.10580039024353027 }, { "epoch": 3.92181396484375e-05, "model_forward_time": 0.025278806686401367, "step": 25702 }, { "epoch": 3.92181396484375e-05, "step": 25702, "training_step_time": 0.10976624488830566 }, { "epoch": 3.921966552734375e-05, "model_forward_time": 0.0254058837890625, "step": 25703 }, { "epoch": 3.921966552734375e-05, "step": 25703, "training_step_time": 0.10440897941589355 }, { "epoch": 3.922119140625e-05, "model_forward_time": 0.025019407272338867, "step": 25704 }, { "epoch": 3.922119140625e-05, "step": 25704, "training_step_time": 0.10904145240783691 }, { "epoch": 3.922271728515625e-05, "model_forward_time": 0.024916410446166992, "step": 25705 }, { "epoch": 3.922271728515625e-05, "step": 25705, "training_step_time": 0.10468459129333496 }, { "epoch": 3.92242431640625e-05, "model_forward_time": 0.0260467529296875, "step": 25706 }, { "epoch": 3.92242431640625e-05, "step": 25706, "training_step_time": 0.10792136192321777 }, { "epoch": 3.922576904296875e-05, "model_forward_time": 0.02516460418701172, "step": 25707 }, { "epoch": 3.922576904296875e-05, "step": 25707, "training_step_time": 0.1550905704498291 }, { "epoch": 3.9227294921875e-05, "model_forward_time": 0.02447032928466797, "step": 25708 }, { "epoch": 3.9227294921875e-05, "step": 25708, "training_step_time": 0.11077237129211426 }, { "epoch": 3.922882080078125e-05, "model_forward_time": 0.0246274471282959, "step": 25709 }, { "epoch": 3.922882080078125e-05, "step": 25709, "training_step_time": 0.11900806427001953 }, { "epoch": 3.92303466796875e-05, "grad_norm": 0.09287601709365845, "learning_rate": 5.48726765236629e-06, "loss": 0.0045, "step": 25710 }, { "epoch": 3.92303466796875e-05, "model_forward_time": 0.024851322174072266, "step": 25710 }, { "epoch": 3.92303466796875e-05, "step": 25710, "training_step_time": 0.10952281951904297 }, { "epoch": 3.923187255859375e-05, "model_forward_time": 0.025232315063476562, "step": 25711 }, { "epoch": 3.923187255859375e-05, "step": 25711, "training_step_time": 0.13521337509155273 }, { "epoch": 3.92333984375e-05, "model_forward_time": 0.025053977966308594, "step": 25712 }, { "epoch": 3.92333984375e-05, "step": 25712, "training_step_time": 0.10973072052001953 }, { "epoch": 3.923492431640625e-05, "model_forward_time": 0.024811744689941406, "step": 25713 }, { "epoch": 3.923492431640625e-05, "step": 25713, "training_step_time": 0.12299489974975586 }, { "epoch": 3.92364501953125e-05, "model_forward_time": 0.025449275970458984, "step": 25714 }, { "epoch": 3.92364501953125e-05, "step": 25714, "training_step_time": 0.12157297134399414 }, { "epoch": 3.923797607421875e-05, "model_forward_time": 0.025534629821777344, "step": 25715 }, { "epoch": 3.923797607421875e-05, "step": 25715, "training_step_time": 0.1352531909942627 }, { "epoch": 3.9239501953125e-05, "model_forward_time": 0.024773120880126953, "step": 25716 }, { "epoch": 3.9239501953125e-05, "step": 25716, "training_step_time": 0.1955418586730957 }, { "epoch": 3.924102783203125e-05, "model_forward_time": 0.025046586990356445, "step": 25717 }, { "epoch": 3.924102783203125e-05, "step": 25717, "training_step_time": 0.15355563163757324 }, { "epoch": 3.92425537109375e-05, "model_forward_time": 0.024736642837524414, "step": 25718 }, { "epoch": 3.92425537109375e-05, "step": 25718, "training_step_time": 0.1703050136566162 }, { "epoch": 3.924407958984375e-05, "model_forward_time": 0.025574445724487305, "step": 25719 }, { "epoch": 3.924407958984375e-05, "step": 25719, "training_step_time": 0.13472294807434082 }, { "epoch": 3.924560546875e-05, "grad_norm": 0.08618535101413727, "learning_rate": 5.462191549701806e-06, "loss": 0.0028, "step": 25720 }, { "epoch": 3.924560546875e-05, "model_forward_time": 0.024643421173095703, "step": 25720 }, { "epoch": 3.924560546875e-05, "step": 25720, "training_step_time": 0.12391018867492676 }, { "epoch": 3.924713134765625e-05, "model_forward_time": 0.024260759353637695, "step": 25721 }, { "epoch": 3.924713134765625e-05, "step": 25721, "training_step_time": 0.11868810653686523 }, { "epoch": 3.92486572265625e-05, "model_forward_time": 0.025470495223999023, "step": 25722 }, { "epoch": 3.92486572265625e-05, "step": 25722, "training_step_time": 0.11401057243347168 }, { "epoch": 3.925018310546875e-05, "model_forward_time": 0.02627086639404297, "step": 25723 }, { "epoch": 3.925018310546875e-05, "step": 25723, "training_step_time": 0.11565279960632324 }, { "epoch": 3.9251708984375e-05, "model_forward_time": 0.025832653045654297, "step": 25724 }, { "epoch": 3.9251708984375e-05, "step": 25724, "training_step_time": 0.11051440238952637 }, { "epoch": 3.925323486328125e-05, "model_forward_time": 0.025552034378051758, "step": 25725 }, { "epoch": 3.925323486328125e-05, "step": 25725, "training_step_time": 0.20847535133361816 }, { "epoch": 3.92547607421875e-05, "model_forward_time": 0.025653600692749023, "step": 25726 }, { "epoch": 3.92547607421875e-05, "step": 25726, "training_step_time": 0.12891721725463867 }, { "epoch": 3.925628662109375e-05, "model_forward_time": 0.02463674545288086, "step": 25727 }, { "epoch": 3.925628662109375e-05, "step": 25727, "training_step_time": 0.1246955394744873 }, { "epoch": 3.92578125e-05, "model_forward_time": 0.025015592575073242, "step": 25728 }, { "epoch": 3.92578125e-05, "step": 25728, "training_step_time": 0.10704970359802246 }, { "epoch": 3.925933837890625e-05, "model_forward_time": 0.025545358657836914, "step": 25729 }, { "epoch": 3.925933837890625e-05, "step": 25729, "training_step_time": 0.17906618118286133 }, { "epoch": 3.92608642578125e-05, "grad_norm": 0.4258580505847931, "learning_rate": 5.437169564674233e-06, "loss": 0.0074, "step": 25730 }, { "epoch": 3.92608642578125e-05, "model_forward_time": 0.02471303939819336, "step": 25730 }, { "epoch": 3.92608642578125e-05, "step": 25730, "training_step_time": 0.1317582130432129 }, { "epoch": 3.926239013671875e-05, "model_forward_time": 0.024985551834106445, "step": 25731 }, { "epoch": 3.926239013671875e-05, "step": 25731, "training_step_time": 0.12200403213500977 }, { "epoch": 3.9263916015625e-05, "model_forward_time": 0.025798797607421875, "step": 25732 }, { "epoch": 3.9263916015625e-05, "step": 25732, "training_step_time": 0.10599493980407715 }, { "epoch": 3.926544189453125e-05, "model_forward_time": 0.02905583381652832, "step": 25733 }, { "epoch": 3.926544189453125e-05, "step": 25733, "training_step_time": 0.11008810997009277 }, { "epoch": 3.92669677734375e-05, "model_forward_time": 0.025323867797851562, "step": 25734 }, { "epoch": 3.92669677734375e-05, "step": 25734, "training_step_time": 0.10917782783508301 }, { "epoch": 3.926849365234375e-05, "model_forward_time": 0.025103330612182617, "step": 25735 }, { "epoch": 3.926849365234375e-05, "step": 25735, "training_step_time": 0.10687589645385742 }, { "epoch": 3.927001953125e-05, "model_forward_time": 0.02430582046508789, "step": 25736 }, { "epoch": 3.927001953125e-05, "step": 25736, "training_step_time": 0.10477185249328613 }, { "epoch": 3.927154541015625e-05, "model_forward_time": 0.024344205856323242, "step": 25737 }, { "epoch": 3.927154541015625e-05, "step": 25737, "training_step_time": 0.10497522354125977 }, { "epoch": 3.92730712890625e-05, "model_forward_time": 0.024689435958862305, "step": 25738 }, { "epoch": 3.92730712890625e-05, "step": 25738, "training_step_time": 0.1224520206451416 }, { "epoch": 3.927459716796875e-05, "model_forward_time": 0.025063037872314453, "step": 25739 }, { "epoch": 3.927459716796875e-05, "step": 25739, "training_step_time": 0.14628863334655762 }, { "epoch": 3.9276123046875e-05, "grad_norm": 0.1529744565486908, "learning_rate": 5.412201727687644e-06, "loss": 0.0041, "step": 25740 }, { "epoch": 3.9276123046875e-05, "model_forward_time": 0.02437138557434082, "step": 25740 }, { "epoch": 3.9276123046875e-05, "step": 25740, "training_step_time": 0.10451459884643555 }, { "epoch": 3.927764892578125e-05, "model_forward_time": 0.02879023551940918, "step": 25741 }, { "epoch": 3.927764892578125e-05, "step": 25741, "training_step_time": 0.10657334327697754 }, { "epoch": 3.92791748046875e-05, "model_forward_time": 0.02557826042175293, "step": 25742 }, { "epoch": 3.92791748046875e-05, "step": 25742, "training_step_time": 0.11675786972045898 }, { "epoch": 3.928070068359375e-05, "model_forward_time": 0.025258541107177734, "step": 25743 }, { "epoch": 3.928070068359375e-05, "step": 25743, "training_step_time": 0.1812269687652588 }, { "epoch": 3.92822265625e-05, "model_forward_time": 0.024876832962036133, "step": 25744 }, { "epoch": 3.92822265625e-05, "step": 25744, "training_step_time": 0.12037539482116699 }, { "epoch": 3.928375244140625e-05, "model_forward_time": 0.02480792999267578, "step": 25745 }, { "epoch": 3.928375244140625e-05, "step": 25745, "training_step_time": 0.10139083862304688 }, { "epoch": 3.92852783203125e-05, "model_forward_time": 0.025223493576049805, "step": 25746 }, { "epoch": 3.92852783203125e-05, "step": 25746, "training_step_time": 0.1031332015991211 }, { "epoch": 3.928680419921875e-05, "model_forward_time": 0.025554656982421875, "step": 25747 }, { "epoch": 3.928680419921875e-05, "step": 25747, "training_step_time": 0.10414552688598633 }, { "epoch": 3.9288330078125e-05, "model_forward_time": 0.0257260799407959, "step": 25748 }, { "epoch": 3.9288330078125e-05, "step": 25748, "training_step_time": 0.10771751403808594 }, { "epoch": 3.928985595703125e-05, "model_forward_time": 0.029303789138793945, "step": 25749 }, { "epoch": 3.928985595703125e-05, "step": 25749, "training_step_time": 0.11090660095214844 }, { "epoch": 3.92913818359375e-05, "grad_norm": 0.10374678671360016, "learning_rate": 5.387288069080299e-06, "loss": 0.0034, "step": 25750 }, { "epoch": 3.92913818359375e-05, "model_forward_time": 0.026171207427978516, "step": 25750 }, { "epoch": 3.92913818359375e-05, "step": 25750, "training_step_time": 0.11107182502746582 }, { "epoch": 3.929290771484375e-05, "model_forward_time": 0.026156902313232422, "step": 25751 }, { "epoch": 3.929290771484375e-05, "step": 25751, "training_step_time": 0.1349201202392578 }, { "epoch": 3.929443359375e-05, "model_forward_time": 0.02539992332458496, "step": 25752 }, { "epoch": 3.929443359375e-05, "step": 25752, "training_step_time": 0.18234872817993164 }, { "epoch": 3.929595947265625e-05, "model_forward_time": 0.026047468185424805, "step": 25753 }, { "epoch": 3.929595947265625e-05, "step": 25753, "training_step_time": 0.1540982723236084 }, { "epoch": 3.92974853515625e-05, "model_forward_time": 0.024721145629882812, "step": 25754 }, { "epoch": 3.92974853515625e-05, "step": 25754, "training_step_time": 0.18913793563842773 }, { "epoch": 3.929901123046875e-05, "model_forward_time": 0.024372339248657227, "step": 25755 }, { "epoch": 3.929901123046875e-05, "step": 25755, "training_step_time": 0.1396317481994629 }, { "epoch": 3.9300537109375e-05, "model_forward_time": 0.025102615356445312, "step": 25756 }, { "epoch": 3.9300537109375e-05, "step": 25756, "training_step_time": 0.12041687965393066 }, { "epoch": 3.930206298828125e-05, "model_forward_time": 0.02565765380859375, "step": 25757 }, { "epoch": 3.930206298828125e-05, "step": 25757, "training_step_time": 0.15949535369873047 }, { "epoch": 3.93035888671875e-05, "model_forward_time": 0.025142431259155273, "step": 25758 }, { "epoch": 3.93035888671875e-05, "step": 25758, "training_step_time": 0.10625123977661133 }, { "epoch": 3.930511474609375e-05, "model_forward_time": 0.02498149871826172, "step": 25759 }, { "epoch": 3.930511474609375e-05, "step": 25759, "training_step_time": 0.20183086395263672 }, { "epoch": 3.9306640625e-05, "grad_norm": 0.16779078543186188, "learning_rate": 5.362428619124666e-06, "loss": 0.0043, "step": 25760 }, { "epoch": 3.9306640625e-05, "model_forward_time": 0.024631977081298828, "step": 25760 }, { "epoch": 3.9306640625e-05, "step": 25760, "training_step_time": 0.11922073364257812 }, { "epoch": 3.930816650390625e-05, "model_forward_time": 0.025808334350585938, "step": 25761 }, { "epoch": 3.930816650390625e-05, "step": 25761, "training_step_time": 0.10827970504760742 }, { "epoch": 3.93096923828125e-05, "model_forward_time": 0.026005029678344727, "step": 25762 }, { "epoch": 3.93096923828125e-05, "step": 25762, "training_step_time": 0.17543554306030273 }, { "epoch": 3.931121826171875e-05, "model_forward_time": 0.024573087692260742, "step": 25763 }, { "epoch": 3.931121826171875e-05, "step": 25763, "training_step_time": 0.15593242645263672 }, { "epoch": 3.9312744140625e-05, "model_forward_time": 0.024667739868164062, "step": 25764 }, { "epoch": 3.9312744140625e-05, "step": 25764, "training_step_time": 0.10363101959228516 }, { "epoch": 3.931427001953125e-05, "model_forward_time": 0.025342702865600586, "step": 25765 }, { "epoch": 3.931427001953125e-05, "step": 25765, "training_step_time": 0.10560822486877441 }, { "epoch": 3.93157958984375e-05, "model_forward_time": 0.025417327880859375, "step": 25766 }, { "epoch": 3.93157958984375e-05, "step": 25766, "training_step_time": 0.10665202140808105 }, { "epoch": 3.931732177734375e-05, "model_forward_time": 0.025538206100463867, "step": 25767 }, { "epoch": 3.931732177734375e-05, "step": 25767, "training_step_time": 0.10861325263977051 }, { "epoch": 3.931884765625e-05, "model_forward_time": 0.025404930114746094, "step": 25768 }, { "epoch": 3.931884765625e-05, "step": 25768, "training_step_time": 0.10567378997802734 }, { "epoch": 3.932037353515625e-05, "model_forward_time": 0.02545166015625, "step": 25769 }, { "epoch": 3.932037353515625e-05, "step": 25769, "training_step_time": 0.19520998001098633 }, { "epoch": 3.93218994140625e-05, "grad_norm": 0.06318707764148712, "learning_rate": 5.337623408027293e-06, "loss": 0.0036, "step": 25770 }, { "epoch": 3.93218994140625e-05, "model_forward_time": 0.02448296546936035, "step": 25770 }, { "epoch": 3.93218994140625e-05, "step": 25770, "training_step_time": 0.11513018608093262 }, { "epoch": 3.932342529296875e-05, "model_forward_time": 0.024393796920776367, "step": 25771 }, { "epoch": 3.932342529296875e-05, "step": 25771, "training_step_time": 0.10962867736816406 }, { "epoch": 3.9324951171875e-05, "model_forward_time": 0.025110244750976562, "step": 25772 }, { "epoch": 3.9324951171875e-05, "step": 25772, "training_step_time": 0.12342977523803711 }, { "epoch": 3.932647705078125e-05, "model_forward_time": 0.025315284729003906, "step": 25773 }, { "epoch": 3.932647705078125e-05, "step": 25773, "training_step_time": 0.10531783103942871 }, { "epoch": 3.93280029296875e-05, "model_forward_time": 0.024760007858276367, "step": 25774 }, { "epoch": 3.93280029296875e-05, "step": 25774, "training_step_time": 0.10808610916137695 }, { "epoch": 3.932952880859375e-05, "model_forward_time": 0.025420427322387695, "step": 25775 }, { "epoch": 3.932952880859375e-05, "step": 25775, "training_step_time": 0.11801028251647949 }, { "epoch": 3.93310546875e-05, "model_forward_time": 0.025914430618286133, "step": 25776 }, { "epoch": 3.93310546875e-05, "step": 25776, "training_step_time": 0.11122965812683105 }, { "epoch": 3.933258056640625e-05, "model_forward_time": 0.02548813819885254, "step": 25777 }, { "epoch": 3.933258056640625e-05, "step": 25777, "training_step_time": 0.10440516471862793 }, { "epoch": 3.93341064453125e-05, "model_forward_time": 0.025194644927978516, "step": 25778 }, { "epoch": 3.93341064453125e-05, "step": 25778, "training_step_time": 0.10861372947692871 }, { "epoch": 3.933563232421875e-05, "model_forward_time": 0.024774789810180664, "step": 25779 }, { "epoch": 3.933563232421875e-05, "step": 25779, "training_step_time": 0.10414600372314453 }, { "epoch": 3.9337158203125e-05, "grad_norm": 0.08112119138240814, "learning_rate": 5.312872465928881e-06, "loss": 0.0025, "step": 25780 }, { "epoch": 3.9337158203125e-05, "model_forward_time": 0.024657011032104492, "step": 25780 }, { "epoch": 3.9337158203125e-05, "step": 25780, "training_step_time": 0.10488772392272949 }, { "epoch": 3.933868408203125e-05, "model_forward_time": 0.025719404220581055, "step": 25781 }, { "epoch": 3.933868408203125e-05, "step": 25781, "training_step_time": 0.10438919067382812 }, { "epoch": 3.93402099609375e-05, "model_forward_time": 0.025116920471191406, "step": 25782 }, { "epoch": 3.93402099609375e-05, "step": 25782, "training_step_time": 0.16245317459106445 }, { "epoch": 3.934173583984375e-05, "model_forward_time": 0.02440667152404785, "step": 25783 }, { "epoch": 3.934173583984375e-05, "step": 25783, "training_step_time": 0.1704566478729248 }, { "epoch": 3.934326171875e-05, "model_forward_time": 0.025737762451171875, "step": 25784 }, { "epoch": 3.934326171875e-05, "step": 25784, "training_step_time": 0.10982155799865723 }, { "epoch": 3.934478759765625e-05, "model_forward_time": 0.02428412437438965, "step": 25785 }, { "epoch": 3.934478759765625e-05, "step": 25785, "training_step_time": 0.10641980171203613 }, { "epoch": 3.93463134765625e-05, "model_forward_time": 0.02510356903076172, "step": 25786 }, { "epoch": 3.93463134765625e-05, "step": 25786, "training_step_time": 0.12202334403991699 }, { "epoch": 3.934783935546875e-05, "model_forward_time": 0.024773120880126953, "step": 25787 }, { "epoch": 3.934783935546875e-05, "step": 25787, "training_step_time": 0.11218762397766113 }, { "epoch": 3.9349365234375e-05, "model_forward_time": 0.025330781936645508, "step": 25788 }, { "epoch": 3.9349365234375e-05, "step": 25788, "training_step_time": 0.18675541877746582 }, { "epoch": 3.935089111328125e-05, "model_forward_time": 0.024500370025634766, "step": 25789 }, { "epoch": 3.935089111328125e-05, "step": 25789, "training_step_time": 0.11127114295959473 }, { "epoch": 3.93524169921875e-05, "grad_norm": 0.12586982548236847, "learning_rate": 5.28817582290414e-06, "loss": 0.0038, "step": 25790 }, { "epoch": 3.93524169921875e-05, "model_forward_time": 0.024554014205932617, "step": 25790 }, { "epoch": 3.93524169921875e-05, "step": 25790, "training_step_time": 0.1174323558807373 }, { "epoch": 3.935394287109375e-05, "model_forward_time": 0.024805545806884766, "step": 25791 }, { "epoch": 3.935394287109375e-05, "step": 25791, "training_step_time": 0.11386752128601074 }, { "epoch": 3.935546875e-05, "model_forward_time": 0.02553391456604004, "step": 25792 }, { "epoch": 3.935546875e-05, "step": 25792, "training_step_time": 0.10772323608398438 }, { "epoch": 3.935699462890625e-05, "model_forward_time": 0.025073766708374023, "step": 25793 }, { "epoch": 3.935699462890625e-05, "step": 25793, "training_step_time": 0.11549782752990723 }, { "epoch": 3.93585205078125e-05, "model_forward_time": 0.024329423904418945, "step": 25794 }, { "epoch": 3.93585205078125e-05, "step": 25794, "training_step_time": 0.11139535903930664 }, { "epoch": 3.936004638671875e-05, "model_forward_time": 0.024079322814941406, "step": 25795 }, { "epoch": 3.936004638671875e-05, "step": 25795, "training_step_time": 0.11285805702209473 }, { "epoch": 3.9361572265625e-05, "model_forward_time": 0.023992538452148438, "step": 25796 }, { "epoch": 3.9361572265625e-05, "step": 25796, "training_step_time": 0.19027233123779297 }, { "epoch": 3.936309814453125e-05, "model_forward_time": 0.024167299270629883, "step": 25797 }, { "epoch": 3.936309814453125e-05, "step": 25797, "training_step_time": 0.10805416107177734 }, { "epoch": 3.93646240234375e-05, "model_forward_time": 0.02456951141357422, "step": 25798 }, { "epoch": 3.93646240234375e-05, "step": 25798, "training_step_time": 0.11515474319458008 }, { "epoch": 3.936614990234375e-05, "model_forward_time": 0.02531719207763672, "step": 25799 }, { "epoch": 3.936614990234375e-05, "step": 25799, "training_step_time": 0.12576889991760254 }, { "epoch": 3.936767578125e-05, "grad_norm": 0.06786226481199265, "learning_rate": 5.263533508961827e-06, "loss": 0.0037, "step": 25800 }, { "epoch": 3.936767578125e-05, "model_forward_time": 0.025504112243652344, "step": 25800 }, { "epoch": 3.936767578125e-05, "step": 25800, "training_step_time": 0.12991046905517578 }, { "epoch": 3.936920166015625e-05, "model_forward_time": 0.024959087371826172, "step": 25801 }, { "epoch": 3.936920166015625e-05, "step": 25801, "training_step_time": 0.11065268516540527 }, { "epoch": 3.93707275390625e-05, "model_forward_time": 0.025199174880981445, "step": 25802 }, { "epoch": 3.93707275390625e-05, "step": 25802, "training_step_time": 0.1100606918334961 }, { "epoch": 3.937225341796875e-05, "model_forward_time": 0.025185108184814453, "step": 25803 }, { "epoch": 3.937225341796875e-05, "step": 25803, "training_step_time": 0.10994124412536621 }, { "epoch": 3.9373779296875e-05, "model_forward_time": 0.024806737899780273, "step": 25804 }, { "epoch": 3.9373779296875e-05, "step": 25804, "training_step_time": 0.10913348197937012 }, { "epoch": 3.937530517578125e-05, "model_forward_time": 0.025193214416503906, "step": 25805 }, { "epoch": 3.937530517578125e-05, "step": 25805, "training_step_time": 0.10763764381408691 }, { "epoch": 3.93768310546875e-05, "model_forward_time": 0.024234294891357422, "step": 25806 }, { "epoch": 3.93768310546875e-05, "step": 25806, "training_step_time": 0.1584615707397461 }, { "epoch": 3.937835693359375e-05, "model_forward_time": 0.02449822425842285, "step": 25807 }, { "epoch": 3.937835693359375e-05, "step": 25807, "training_step_time": 0.15854668617248535 }, { "epoch": 3.93798828125e-05, "model_forward_time": 0.024201154708862305, "step": 25808 }, { "epoch": 3.93798828125e-05, "step": 25808, "training_step_time": 0.11106729507446289 }, { "epoch": 3.938140869140625e-05, "model_forward_time": 0.0257720947265625, "step": 25809 }, { "epoch": 3.938140869140625e-05, "step": 25809, "training_step_time": 0.13876628875732422 }, { "epoch": 3.93829345703125e-05, "grad_norm": 0.16554300487041473, "learning_rate": 5.238945554044672e-06, "loss": 0.0029, "step": 25810 }, { "epoch": 3.93829345703125e-05, "model_forward_time": 0.02499842643737793, "step": 25810 }, { "epoch": 3.93829345703125e-05, "step": 25810, "training_step_time": 0.10503816604614258 }, { "epoch": 3.938446044921875e-05, "model_forward_time": 0.02539682388305664, "step": 25811 }, { "epoch": 3.938446044921875e-05, "step": 25811, "training_step_time": 0.10937714576721191 }, { "epoch": 3.9385986328125e-05, "model_forward_time": 0.025429248809814453, "step": 25812 }, { "epoch": 3.9385986328125e-05, "step": 25812, "training_step_time": 0.10881733894348145 }, { "epoch": 3.938751220703125e-05, "model_forward_time": 0.024729013442993164, "step": 25813 }, { "epoch": 3.938751220703125e-05, "step": 25813, "training_step_time": 0.11156845092773438 }, { "epoch": 3.93890380859375e-05, "model_forward_time": 0.02475428581237793, "step": 25814 }, { "epoch": 3.93890380859375e-05, "step": 25814, "training_step_time": 0.10944437980651855 }, { "epoch": 3.939056396484375e-05, "model_forward_time": 0.025429248809814453, "step": 25815 }, { "epoch": 3.939056396484375e-05, "step": 25815, "training_step_time": 0.10462164878845215 }, { "epoch": 3.939208984375e-05, "model_forward_time": 0.02531886100769043, "step": 25816 }, { "epoch": 3.939208984375e-05, "step": 25816, "training_step_time": 0.11170005798339844 }, { "epoch": 3.939361572265625e-05, "model_forward_time": 0.025026559829711914, "step": 25817 }, { "epoch": 3.939361572265625e-05, "step": 25817, "training_step_time": 0.11561369895935059 }, { "epoch": 3.93951416015625e-05, "model_forward_time": 0.024882078170776367, "step": 25818 }, { "epoch": 3.93951416015625e-05, "step": 25818, "training_step_time": 0.1978001594543457 }, { "epoch": 3.939666748046875e-05, "model_forward_time": 0.023891210556030273, "step": 25819 }, { "epoch": 3.939666748046875e-05, "step": 25819, "training_step_time": 0.10515642166137695 }, { "epoch": 3.9398193359375e-05, "grad_norm": 0.0756898820400238, "learning_rate": 5.214411988029355e-06, "loss": 0.0034, "step": 25820 }, { "epoch": 3.9398193359375e-05, "model_forward_time": 0.02410435676574707, "step": 25820 }, { "epoch": 3.9398193359375e-05, "step": 25820, "training_step_time": 0.1097557544708252 }, { "epoch": 3.939971923828125e-05, "model_forward_time": 0.024553298950195312, "step": 25821 }, { "epoch": 3.939971923828125e-05, "step": 25821, "training_step_time": 0.1083064079284668 }, { "epoch": 3.94012451171875e-05, "model_forward_time": 0.025244712829589844, "step": 25822 }, { "epoch": 3.94012451171875e-05, "step": 25822, "training_step_time": 0.10894370079040527 }, { "epoch": 3.940277099609375e-05, "model_forward_time": 0.025018692016601562, "step": 25823 }, { "epoch": 3.940277099609375e-05, "step": 25823, "training_step_time": 0.11373591423034668 }, { "epoch": 3.9404296875e-05, "model_forward_time": 0.024715423583984375, "step": 25824 }, { "epoch": 3.9404296875e-05, "step": 25824, "training_step_time": 0.10692095756530762 }, { "epoch": 3.940582275390625e-05, "model_forward_time": 0.024602413177490234, "step": 25825 }, { "epoch": 3.940582275390625e-05, "step": 25825, "training_step_time": 0.10637497901916504 }, { "epoch": 3.94073486328125e-05, "model_forward_time": 0.025034427642822266, "step": 25826 }, { "epoch": 3.94073486328125e-05, "step": 25826, "training_step_time": 0.10526394844055176 }, { "epoch": 3.940887451171875e-05, "model_forward_time": 0.025151491165161133, "step": 25827 }, { "epoch": 3.940887451171875e-05, "step": 25827, "training_step_time": 0.10574555397033691 }, { "epoch": 3.9410400390625e-05, "model_forward_time": 0.02508687973022461, "step": 25828 }, { "epoch": 3.9410400390625e-05, "step": 25828, "training_step_time": 0.10580277442932129 }, { "epoch": 3.941192626953125e-05, "model_forward_time": 0.02500295639038086, "step": 25829 }, { "epoch": 3.941192626953125e-05, "step": 25829, "training_step_time": 0.1128387451171875 }, { "epoch": 3.94134521484375e-05, "grad_norm": 0.06646464765071869, "learning_rate": 5.1899328407264855e-06, "loss": 0.0031, "step": 25830 }, { "epoch": 3.94134521484375e-05, "model_forward_time": 0.024760961532592773, "step": 25830 }, { "epoch": 3.94134521484375e-05, "step": 25830, "training_step_time": 0.13074183464050293 }, { "epoch": 3.941497802734375e-05, "model_forward_time": 0.02509331703186035, "step": 25831 }, { "epoch": 3.941497802734375e-05, "step": 25831, "training_step_time": 0.10674715042114258 }, { "epoch": 3.941650390625e-05, "model_forward_time": 0.025125503540039062, "step": 25832 }, { "epoch": 3.941650390625e-05, "step": 25832, "training_step_time": 0.10644292831420898 }, { "epoch": 3.941802978515625e-05, "model_forward_time": 0.024548768997192383, "step": 25833 }, { "epoch": 3.941802978515625e-05, "step": 25833, "training_step_time": 0.11115407943725586 }, { "epoch": 3.94195556640625e-05, "model_forward_time": 0.027149438858032227, "step": 25834 }, { "epoch": 3.94195556640625e-05, "step": 25834, "training_step_time": 0.11219453811645508 }, { "epoch": 3.942108154296875e-05, "model_forward_time": 0.0251617431640625, "step": 25835 }, { "epoch": 3.942108154296875e-05, "step": 25835, "training_step_time": 0.19274139404296875 }, { "epoch": 3.9422607421875e-05, "model_forward_time": 0.024132490158081055, "step": 25836 }, { "epoch": 3.9422607421875e-05, "step": 25836, "training_step_time": 0.10600972175598145 }, { "epoch": 3.942413330078125e-05, "model_forward_time": 0.024664878845214844, "step": 25837 }, { "epoch": 3.942413330078125e-05, "step": 25837, "training_step_time": 0.10587453842163086 }, { "epoch": 3.94256591796875e-05, "model_forward_time": 0.02506852149963379, "step": 25838 }, { "epoch": 3.94256591796875e-05, "step": 25838, "training_step_time": 0.10826420783996582 }, { "epoch": 3.942718505859375e-05, "model_forward_time": 0.02536296844482422, "step": 25839 }, { "epoch": 3.942718505859375e-05, "step": 25839, "training_step_time": 0.10845065116882324 }, { "epoch": 3.94287109375e-05, "grad_norm": 0.12488628923892975, "learning_rate": 5.165508141880526e-06, "loss": 0.0079, "step": 25840 }, { "epoch": 3.94287109375e-05, "model_forward_time": 0.025051116943359375, "step": 25840 }, { "epoch": 3.94287109375e-05, "step": 25840, "training_step_time": 0.10573840141296387 }, { "epoch": 3.943023681640625e-05, "model_forward_time": 0.02532649040222168, "step": 25841 }, { "epoch": 3.943023681640625e-05, "step": 25841, "training_step_time": 0.10563445091247559 }, { "epoch": 3.94317626953125e-05, "model_forward_time": 0.025229692459106445, "step": 25842 }, { "epoch": 3.94317626953125e-05, "step": 25842, "training_step_time": 0.10614538192749023 }, { "epoch": 3.943328857421875e-05, "model_forward_time": 0.025068998336791992, "step": 25843 }, { "epoch": 3.943328857421875e-05, "step": 25843, "training_step_time": 0.1072382926940918 }, { "epoch": 3.9434814453125e-05, "model_forward_time": 0.02489781379699707, "step": 25844 }, { "epoch": 3.9434814453125e-05, "step": 25844, "training_step_time": 0.17466282844543457 }, { "epoch": 3.943634033203125e-05, "model_forward_time": 0.0245053768157959, "step": 25845 }, { "epoch": 3.943634033203125e-05, "step": 25845, "training_step_time": 0.1095881462097168 }, { "epoch": 3.94378662109375e-05, "model_forward_time": 0.024741411209106445, "step": 25846 }, { "epoch": 3.94378662109375e-05, "step": 25846, "training_step_time": 0.11125016212463379 }, { "epoch": 3.943939208984375e-05, "model_forward_time": 0.025305986404418945, "step": 25847 }, { "epoch": 3.943939208984375e-05, "step": 25847, "training_step_time": 0.12735271453857422 }, { "epoch": 3.944091796875e-05, "model_forward_time": 0.025331735610961914, "step": 25848 }, { "epoch": 3.944091796875e-05, "step": 25848, "training_step_time": 0.12441110610961914 }, { "epoch": 3.944244384765625e-05, "model_forward_time": 0.024831056594848633, "step": 25849 }, { "epoch": 3.944244384765625e-05, "step": 25849, "training_step_time": 0.10965299606323242 }, { "epoch": 3.94439697265625e-05, "grad_norm": 0.07019779831171036, "learning_rate": 5.141137921169792e-06, "loss": 0.0046, "step": 25850 }, { "epoch": 3.94439697265625e-05, "model_forward_time": 0.025171756744384766, "step": 25850 }, { "epoch": 3.94439697265625e-05, "step": 25850, "training_step_time": 0.11248254776000977 }, { "epoch": 3.944549560546875e-05, "model_forward_time": 0.02486562728881836, "step": 25851 }, { "epoch": 3.944549560546875e-05, "step": 25851, "training_step_time": 0.10500907897949219 }, { "epoch": 3.9447021484375e-05, "model_forward_time": 0.02517986297607422, "step": 25852 }, { "epoch": 3.9447021484375e-05, "step": 25852, "training_step_time": 0.10858440399169922 }, { "epoch": 3.944854736328125e-05, "model_forward_time": 0.025420188903808594, "step": 25853 }, { "epoch": 3.944854736328125e-05, "step": 25853, "training_step_time": 0.10614418983459473 }, { "epoch": 3.94500732421875e-05, "model_forward_time": 0.024549484252929688, "step": 25854 }, { "epoch": 3.94500732421875e-05, "step": 25854, "training_step_time": 0.14817190170288086 }, { "epoch": 3.945159912109375e-05, "model_forward_time": 0.024013757705688477, "step": 25855 }, { "epoch": 3.945159912109375e-05, "step": 25855, "training_step_time": 0.1492147445678711 }, { "epoch": 3.9453125e-05, "model_forward_time": 0.024506330490112305, "step": 25856 }, { "epoch": 3.9453125e-05, "step": 25856, "training_step_time": 0.15873360633850098 }, { "epoch": 3.945465087890625e-05, "model_forward_time": 0.02424478530883789, "step": 25857 }, { "epoch": 3.945465087890625e-05, "step": 25857, "training_step_time": 0.14077019691467285 }, { "epoch": 3.94561767578125e-05, "model_forward_time": 0.023600339889526367, "step": 25858 }, { "epoch": 3.94561767578125e-05, "step": 25858, "training_step_time": 0.17223834991455078 }, { "epoch": 3.945770263671875e-05, "model_forward_time": 0.024563074111938477, "step": 25859 }, { "epoch": 3.945770263671875e-05, "step": 25859, "training_step_time": 0.13352560997009277 }, { "epoch": 3.9459228515625e-05, "grad_norm": 0.08645788580179214, "learning_rate": 5.116822208206396e-06, "loss": 0.0033, "step": 25860 }, { "epoch": 3.9459228515625e-05, "model_forward_time": 0.024778127670288086, "step": 25860 }, { "epoch": 3.9459228515625e-05, "step": 25860, "training_step_time": 0.11933231353759766 }, { "epoch": 3.946075439453125e-05, "model_forward_time": 0.025158166885375977, "step": 25861 }, { "epoch": 3.946075439453125e-05, "step": 25861, "training_step_time": 0.12124156951904297 }, { "epoch": 3.94622802734375e-05, "model_forward_time": 0.025393009185791016, "step": 25862 }, { "epoch": 3.94622802734375e-05, "step": 25862, "training_step_time": 0.1193697452545166 }, { "epoch": 3.946380615234375e-05, "model_forward_time": 0.025251388549804688, "step": 25863 }, { "epoch": 3.946380615234375e-05, "step": 25863, "training_step_time": 0.11401009559631348 }, { "epoch": 3.946533203125e-05, "model_forward_time": 0.025269031524658203, "step": 25864 }, { "epoch": 3.946533203125e-05, "step": 25864, "training_step_time": 0.19955134391784668 }, { "epoch": 3.946685791015625e-05, "model_forward_time": 0.02443218231201172, "step": 25865 }, { "epoch": 3.946685791015625e-05, "step": 25865, "training_step_time": 0.10823512077331543 }, { "epoch": 3.94683837890625e-05, "model_forward_time": 0.02444624900817871, "step": 25866 }, { "epoch": 3.94683837890625e-05, "step": 25866, "training_step_time": 0.11129999160766602 }, { "epoch": 3.946990966796875e-05, "model_forward_time": 0.025321006774902344, "step": 25867 }, { "epoch": 3.946990966796875e-05, "step": 25867, "training_step_time": 0.16054272651672363 }, { "epoch": 3.9471435546875e-05, "model_forward_time": 0.025015830993652344, "step": 25868 }, { "epoch": 3.9471435546875e-05, "step": 25868, "training_step_time": 0.10740137100219727 }, { "epoch": 3.947296142578125e-05, "model_forward_time": 0.024646520614624023, "step": 25869 }, { "epoch": 3.947296142578125e-05, "step": 25869, "training_step_time": 0.10657501220703125 }, { "epoch": 3.94744873046875e-05, "grad_norm": 0.06746818125247955, "learning_rate": 5.092561032536225e-06, "loss": 0.0053, "step": 25870 }, { "epoch": 3.94744873046875e-05, "model_forward_time": 0.02529740333557129, "step": 25870 }, { "epoch": 3.94744873046875e-05, "step": 25870, "training_step_time": 0.1047513484954834 }, { "epoch": 3.947601318359375e-05, "model_forward_time": 0.02526378631591797, "step": 25871 }, { "epoch": 3.947601318359375e-05, "step": 25871, "training_step_time": 0.10605978965759277 }, { "epoch": 3.94775390625e-05, "model_forward_time": 0.025127410888671875, "step": 25872 }, { "epoch": 3.94775390625e-05, "step": 25872, "training_step_time": 0.106475830078125 }, { "epoch": 3.947906494140625e-05, "model_forward_time": 0.025197744369506836, "step": 25873 }, { "epoch": 3.947906494140625e-05, "step": 25873, "training_step_time": 0.10813331604003906 }, { "epoch": 3.94805908203125e-05, "model_forward_time": 0.02519702911376953, "step": 25874 }, { "epoch": 3.94805908203125e-05, "step": 25874, "training_step_time": 0.1748976707458496 }, { "epoch": 3.948211669921875e-05, "model_forward_time": 0.024928569793701172, "step": 25875 }, { "epoch": 3.948211669921875e-05, "step": 25875, "training_step_time": 0.14101552963256836 }, { "epoch": 3.9483642578125e-05, "model_forward_time": 0.02435612678527832, "step": 25876 }, { "epoch": 3.9483642578125e-05, "step": 25876, "training_step_time": 0.10667800903320312 }, { "epoch": 3.948516845703125e-05, "model_forward_time": 0.025377750396728516, "step": 25877 }, { "epoch": 3.948516845703125e-05, "step": 25877, "training_step_time": 0.10916304588317871 }, { "epoch": 3.94866943359375e-05, "model_forward_time": 0.025196552276611328, "step": 25878 }, { "epoch": 3.94866943359375e-05, "step": 25878, "training_step_time": 0.11152124404907227 }, { "epoch": 3.948822021484375e-05, "model_forward_time": 0.025081396102905273, "step": 25879 }, { "epoch": 3.948822021484375e-05, "step": 25879, "training_step_time": 0.11188125610351562 }, { "epoch": 3.948974609375e-05, "grad_norm": 0.2724435329437256, "learning_rate": 5.068354423638882e-06, "loss": 0.0144, "step": 25880 }, { "epoch": 3.948974609375e-05, "model_forward_time": 0.0252227783203125, "step": 25880 }, { "epoch": 3.948974609375e-05, "step": 25880, "training_step_time": 0.18145179748535156 }, { "epoch": 3.949127197265625e-05, "model_forward_time": 0.02443838119506836, "step": 25881 }, { "epoch": 3.949127197265625e-05, "step": 25881, "training_step_time": 0.1037139892578125 }, { "epoch": 3.94927978515625e-05, "model_forward_time": 0.02463984489440918, "step": 25882 }, { "epoch": 3.94927978515625e-05, "step": 25882, "training_step_time": 0.10824704170227051 }, { "epoch": 3.949432373046875e-05, "model_forward_time": 0.02535414695739746, "step": 25883 }, { "epoch": 3.949432373046875e-05, "step": 25883, "training_step_time": 0.10746097564697266 }, { "epoch": 3.9495849609375e-05, "model_forward_time": 0.024901628494262695, "step": 25884 }, { "epoch": 3.9495849609375e-05, "step": 25884, "training_step_time": 0.10793447494506836 }, { "epoch": 3.949737548828125e-05, "model_forward_time": 0.025607585906982422, "step": 25885 }, { "epoch": 3.949737548828125e-05, "step": 25885, "training_step_time": 0.10767006874084473 }, { "epoch": 3.94989013671875e-05, "model_forward_time": 0.02497100830078125, "step": 25886 }, { "epoch": 3.94989013671875e-05, "step": 25886, "training_step_time": 0.1054372787475586 }, { "epoch": 3.950042724609375e-05, "model_forward_time": 0.025310754776000977, "step": 25887 }, { "epoch": 3.950042724609375e-05, "step": 25887, "training_step_time": 0.10844683647155762 }, { "epoch": 3.9501953125e-05, "model_forward_time": 0.024905920028686523, "step": 25888 }, { "epoch": 3.9501953125e-05, "step": 25888, "training_step_time": 0.10787725448608398 }, { "epoch": 3.950347900390625e-05, "model_forward_time": 0.025113344192504883, "step": 25889 }, { "epoch": 3.950347900390625e-05, "step": 25889, "training_step_time": 0.10764265060424805 }, { "epoch": 3.95050048828125e-05, "grad_norm": 0.0626010000705719, "learning_rate": 5.044202410927706e-06, "loss": 0.0053, "step": 25890 }, { "epoch": 3.95050048828125e-05, "model_forward_time": 0.025409221649169922, "step": 25890 }, { "epoch": 3.95050048828125e-05, "step": 25890, "training_step_time": 0.15995287895202637 }, { "epoch": 3.950653076171875e-05, "model_forward_time": 0.0246884822845459, "step": 25891 }, { "epoch": 3.950653076171875e-05, "step": 25891, "training_step_time": 0.23072195053100586 }, { "epoch": 3.9508056640625e-05, "model_forward_time": 0.024190664291381836, "step": 25892 }, { "epoch": 3.9508056640625e-05, "step": 25892, "training_step_time": 0.10637116432189941 }, { "epoch": 3.950958251953125e-05, "model_forward_time": 0.02459120750427246, "step": 25893 }, { "epoch": 3.950958251953125e-05, "step": 25893, "training_step_time": 0.12749242782592773 }, { "epoch": 3.95111083984375e-05, "model_forward_time": 0.02506875991821289, "step": 25894 }, { "epoch": 3.95111083984375e-05, "step": 25894, "training_step_time": 0.12562847137451172 }, { "epoch": 3.951263427734375e-05, "model_forward_time": 0.02498769760131836, "step": 25895 }, { "epoch": 3.951263427734375e-05, "step": 25895, "training_step_time": 0.11781740188598633 }, { "epoch": 3.951416015625e-05, "model_forward_time": 0.025946617126464844, "step": 25896 }, { "epoch": 3.951416015625e-05, "step": 25896, "training_step_time": 0.1454763412475586 }, { "epoch": 3.951568603515625e-05, "model_forward_time": 0.02486252784729004, "step": 25897 }, { "epoch": 3.951568603515625e-05, "step": 25897, "training_step_time": 0.10721755027770996 }, { "epoch": 3.95172119140625e-05, "model_forward_time": 0.0254514217376709, "step": 25898 }, { "epoch": 3.95172119140625e-05, "step": 25898, "training_step_time": 0.10650753974914551 }, { "epoch": 3.951873779296875e-05, "model_forward_time": 0.024816513061523438, "step": 25899 }, { "epoch": 3.951873779296875e-05, "step": 25899, "training_step_time": 0.11905241012573242 }, { "epoch": 3.9520263671875e-05, "grad_norm": 0.08930026739835739, "learning_rate": 5.020105023749644e-06, "loss": 0.0028, "step": 25900 }, { "epoch": 3.9520263671875e-05, "model_forward_time": 0.025156259536743164, "step": 25900 }, { "epoch": 3.9520263671875e-05, "step": 25900, "training_step_time": 0.10870695114135742 }, { "epoch": 3.952178955078125e-05, "model_forward_time": 0.02541208267211914, "step": 25901 }, { "epoch": 3.952178955078125e-05, "step": 25901, "training_step_time": 0.11130738258361816 }, { "epoch": 3.95233154296875e-05, "model_forward_time": 0.025548696517944336, "step": 25902 }, { "epoch": 3.95233154296875e-05, "step": 25902, "training_step_time": 0.11432433128356934 }, { "epoch": 3.952484130859375e-05, "model_forward_time": 0.025470733642578125, "step": 25903 }, { "epoch": 3.952484130859375e-05, "step": 25903, "training_step_time": 0.11244487762451172 }, { "epoch": 3.95263671875e-05, "model_forward_time": 0.024974584579467773, "step": 25904 }, { "epoch": 3.95263671875e-05, "step": 25904, "training_step_time": 0.12554359436035156 }, { "epoch": 3.952789306640625e-05, "model_forward_time": 0.02522897720336914, "step": 25905 }, { "epoch": 3.952789306640625e-05, "step": 25905, "training_step_time": 0.12853384017944336 }, { "epoch": 3.95294189453125e-05, "model_forward_time": 0.025153398513793945, "step": 25906 }, { "epoch": 3.95294189453125e-05, "step": 25906, "training_step_time": 0.14028525352478027 }, { "epoch": 3.953094482421875e-05, "model_forward_time": 0.024993181228637695, "step": 25907 }, { "epoch": 3.953094482421875e-05, "step": 25907, "training_step_time": 0.17972278594970703 }, { "epoch": 3.9532470703125e-05, "model_forward_time": 0.024346113204956055, "step": 25908 }, { "epoch": 3.9532470703125e-05, "step": 25908, "training_step_time": 0.1316385269165039 }, { "epoch": 3.953399658203125e-05, "model_forward_time": 0.024237394332885742, "step": 25909 }, { "epoch": 3.953399658203125e-05, "step": 25909, "training_step_time": 0.12460756301879883 }, { "epoch": 3.95355224609375e-05, "grad_norm": 0.0735696479678154, "learning_rate": 4.996062291385317e-06, "loss": 0.0079, "step": 25910 }, { "epoch": 3.95355224609375e-05, "model_forward_time": 0.024611711502075195, "step": 25910 }, { "epoch": 3.95355224609375e-05, "step": 25910, "training_step_time": 0.16543006896972656 }, { "epoch": 3.953704833984375e-05, "model_forward_time": 0.023830652236938477, "step": 25911 }, { "epoch": 3.953704833984375e-05, "step": 25911, "training_step_time": 0.1216738224029541 }, { "epoch": 3.953857421875e-05, "model_forward_time": 0.024100542068481445, "step": 25912 }, { "epoch": 3.953857421875e-05, "step": 25912, "training_step_time": 0.1915435791015625 }, { "epoch": 3.954010009765625e-05, "model_forward_time": 0.023758649826049805, "step": 25913 }, { "epoch": 3.954010009765625e-05, "step": 25913, "training_step_time": 0.10660505294799805 }, { "epoch": 3.95416259765625e-05, "model_forward_time": 0.024452686309814453, "step": 25914 }, { "epoch": 3.95416259765625e-05, "step": 25914, "training_step_time": 0.1056051254272461 }, { "epoch": 3.954315185546875e-05, "model_forward_time": 0.025722026824951172, "step": 25915 }, { "epoch": 3.954315185546875e-05, "step": 25915, "training_step_time": 0.10785388946533203 }, { "epoch": 3.9544677734375e-05, "model_forward_time": 0.0253298282623291, "step": 25916 }, { "epoch": 3.9544677734375e-05, "step": 25916, "training_step_time": 0.1070556640625 }, { "epoch": 3.954620361328125e-05, "model_forward_time": 0.025110483169555664, "step": 25917 }, { "epoch": 3.954620361328125e-05, "step": 25917, "training_step_time": 0.10883021354675293 }, { "epoch": 3.95477294921875e-05, "model_forward_time": 0.025153398513793945, "step": 25918 }, { "epoch": 3.95477294921875e-05, "step": 25918, "training_step_time": 0.10463094711303711 }, { "epoch": 3.954925537109375e-05, "model_forward_time": 0.02500152587890625, "step": 25919 }, { "epoch": 3.954925537109375e-05, "step": 25919, "training_step_time": 0.13227343559265137 }, { "epoch": 3.955078125e-05, "grad_norm": 0.304913729429245, "learning_rate": 4.972074243048897e-06, "loss": 0.005, "step": 25920 }, { "epoch": 3.955078125e-05, "model_forward_time": 0.025394916534423828, "step": 25920 }, { "epoch": 3.955078125e-05, "step": 25920, "training_step_time": 0.13139581680297852 }, { "epoch": 3.955230712890625e-05, "model_forward_time": 0.024790287017822266, "step": 25921 }, { "epoch": 3.955230712890625e-05, "step": 25921, "training_step_time": 0.10678887367248535 }, { "epoch": 3.95538330078125e-05, "model_forward_time": 0.024930477142333984, "step": 25922 }, { "epoch": 3.95538330078125e-05, "step": 25922, "training_step_time": 0.10747551918029785 }, { "epoch": 3.955535888671875e-05, "model_forward_time": 0.025041580200195312, "step": 25923 }, { "epoch": 3.955535888671875e-05, "step": 25923, "training_step_time": 0.12308549880981445 }, { "epoch": 3.9556884765625e-05, "model_forward_time": 0.025037527084350586, "step": 25924 }, { "epoch": 3.9556884765625e-05, "step": 25924, "training_step_time": 0.10683393478393555 }, { "epoch": 3.955841064453125e-05, "model_forward_time": 0.025164365768432617, "step": 25925 }, { "epoch": 3.955841064453125e-05, "step": 25925, "training_step_time": 0.13396239280700684 }, { "epoch": 3.95599365234375e-05, "model_forward_time": 0.02528214454650879, "step": 25926 }, { "epoch": 3.95599365234375e-05, "step": 25926, "training_step_time": 0.12454581260681152 }, { "epoch": 3.956146240234375e-05, "model_forward_time": 0.025037765502929688, "step": 25927 }, { "epoch": 3.956146240234375e-05, "step": 25927, "training_step_time": 0.10522723197937012 }, { "epoch": 3.956298828125e-05, "model_forward_time": 0.024893760681152344, "step": 25928 }, { "epoch": 3.956298828125e-05, "step": 25928, "training_step_time": 0.11438608169555664 }, { "epoch": 3.956451416015625e-05, "model_forward_time": 0.02494525909423828, "step": 25929 }, { "epoch": 3.956451416015625e-05, "step": 25929, "training_step_time": 0.10952448844909668 }, { "epoch": 3.95660400390625e-05, "grad_norm": 0.08029066026210785, "learning_rate": 4.948140907888121e-06, "loss": 0.0034, "step": 25930 }, { "epoch": 3.95660400390625e-05, "model_forward_time": 0.02507638931274414, "step": 25930 }, { "epoch": 3.95660400390625e-05, "step": 25930, "training_step_time": 0.1027231216430664 }, { "epoch": 3.956756591796875e-05, "model_forward_time": 0.025275707244873047, "step": 25931 }, { "epoch": 3.956756591796875e-05, "step": 25931, "training_step_time": 0.10364890098571777 }, { "epoch": 3.9569091796875e-05, "model_forward_time": 0.025071382522583008, "step": 25932 }, { "epoch": 3.9569091796875e-05, "step": 25932, "training_step_time": 0.1037895679473877 }, { "epoch": 3.957061767578125e-05, "model_forward_time": 0.025154590606689453, "step": 25933 }, { "epoch": 3.957061767578125e-05, "step": 25933, "training_step_time": 0.10785722732543945 }, { "epoch": 3.95721435546875e-05, "model_forward_time": 0.025271892547607422, "step": 25934 }, { "epoch": 3.95721435546875e-05, "step": 25934, "training_step_time": 0.1046285629272461 }, { "epoch": 3.957366943359375e-05, "model_forward_time": 0.025661230087280273, "step": 25935 }, { "epoch": 3.957366943359375e-05, "step": 25935, "training_step_time": 0.10770130157470703 }, { "epoch": 3.95751953125e-05, "model_forward_time": 0.025554656982421875, "step": 25936 }, { "epoch": 3.95751953125e-05, "step": 25936, "training_step_time": 0.10528850555419922 }, { "epoch": 3.957672119140625e-05, "model_forward_time": 0.025127410888671875, "step": 25937 }, { "epoch": 3.957672119140625e-05, "step": 25937, "training_step_time": 0.14862775802612305 }, { "epoch": 3.95782470703125e-05, "model_forward_time": 0.024837970733642578, "step": 25938 }, { "epoch": 3.95782470703125e-05, "step": 25938, "training_step_time": 0.10601663589477539 }, { "epoch": 3.957977294921875e-05, "model_forward_time": 0.024684667587280273, "step": 25939 }, { "epoch": 3.957977294921875e-05, "step": 25939, "training_step_time": 0.11926937103271484 }, { "epoch": 3.9581298828125e-05, "grad_norm": 0.0714607685804367, "learning_rate": 4.924262314984262e-06, "loss": 0.0022, "step": 25940 }, { "epoch": 3.9581298828125e-05, "model_forward_time": 0.024963855743408203, "step": 25940 }, { "epoch": 3.9581298828125e-05, "step": 25940, "training_step_time": 0.13007211685180664 }, { "epoch": 3.958282470703125e-05, "model_forward_time": 0.02481245994567871, "step": 25941 }, { "epoch": 3.958282470703125e-05, "step": 25941, "training_step_time": 0.13599586486816406 }, { "epoch": 3.95843505859375e-05, "model_forward_time": 0.02435779571533203, "step": 25942 }, { "epoch": 3.95843505859375e-05, "step": 25942, "training_step_time": 0.12369418144226074 }, { "epoch": 3.958587646484375e-05, "model_forward_time": 0.02498030662536621, "step": 25943 }, { "epoch": 3.958587646484375e-05, "step": 25943, "training_step_time": 0.12888717651367188 }, { "epoch": 3.958740234375e-05, "model_forward_time": 0.02518939971923828, "step": 25944 }, { "epoch": 3.958740234375e-05, "step": 25944, "training_step_time": 0.1187584400177002 }, { "epoch": 3.958892822265625e-05, "model_forward_time": 0.02643132209777832, "step": 25945 }, { "epoch": 3.958892822265625e-05, "step": 25945, "training_step_time": 0.10482215881347656 }, { "epoch": 3.95904541015625e-05, "model_forward_time": 0.024663686752319336, "step": 25946 }, { "epoch": 3.95904541015625e-05, "step": 25946, "training_step_time": 0.10834193229675293 }, { "epoch": 3.959197998046875e-05, "model_forward_time": 0.025209903717041016, "step": 25947 }, { "epoch": 3.959197998046875e-05, "step": 25947, "training_step_time": 0.11103463172912598 }, { "epoch": 3.9593505859375e-05, "model_forward_time": 0.025087833404541016, "step": 25948 }, { "epoch": 3.9593505859375e-05, "step": 25948, "training_step_time": 0.10713934898376465 }, { "epoch": 3.959503173828125e-05, "model_forward_time": 0.026057004928588867, "step": 25949 }, { "epoch": 3.959503173828125e-05, "step": 25949, "training_step_time": 0.1267542839050293 }, { "epoch": 3.95965576171875e-05, "grad_norm": 0.07329155504703522, "learning_rate": 4.900438493352055e-06, "loss": 0.0059, "step": 25950 }, { "epoch": 3.95965576171875e-05, "model_forward_time": 0.026989459991455078, "step": 25950 }, { "epoch": 3.95965576171875e-05, "step": 25950, "training_step_time": 0.13417577743530273 }, { "epoch": 3.959808349609375e-05, "model_forward_time": 0.02520275115966797, "step": 25951 }, { "epoch": 3.959808349609375e-05, "step": 25951, "training_step_time": 0.11925911903381348 }, { "epoch": 3.9599609375e-05, "model_forward_time": 0.025282621383666992, "step": 25952 }, { "epoch": 3.9599609375e-05, "step": 25952, "training_step_time": 0.10979390144348145 }, { "epoch": 3.960113525390625e-05, "model_forward_time": 0.0253143310546875, "step": 25953 }, { "epoch": 3.960113525390625e-05, "step": 25953, "training_step_time": 0.1065518856048584 }, { "epoch": 3.96026611328125e-05, "model_forward_time": 0.02487921714782715, "step": 25954 }, { "epoch": 3.96026611328125e-05, "step": 25954, "training_step_time": 0.13965511322021484 }, { "epoch": 3.960418701171875e-05, "model_forward_time": 0.024860620498657227, "step": 25955 }, { "epoch": 3.960418701171875e-05, "step": 25955, "training_step_time": 0.1117701530456543 }, { "epoch": 3.9605712890625e-05, "model_forward_time": 0.0248110294342041, "step": 25956 }, { "epoch": 3.9605712890625e-05, "step": 25956, "training_step_time": 0.1071329116821289 }, { "epoch": 3.960723876953125e-05, "model_forward_time": 0.024823665618896484, "step": 25957 }, { "epoch": 3.960723876953125e-05, "step": 25957, "training_step_time": 0.11965560913085938 }, { "epoch": 3.96087646484375e-05, "model_forward_time": 0.024296998977661133, "step": 25958 }, { "epoch": 3.96087646484375e-05, "step": 25958, "training_step_time": 0.10968160629272461 }, { "epoch": 3.961029052734375e-05, "model_forward_time": 0.025357484817504883, "step": 25959 }, { "epoch": 3.961029052734375e-05, "step": 25959, "training_step_time": 0.10692405700683594 }, { "epoch": 3.961181640625e-05, "grad_norm": 0.0413772389292717, "learning_rate": 4.8766694719396875e-06, "loss": 0.0062, "step": 25960 }, { "epoch": 3.961181640625e-05, "model_forward_time": 0.024907827377319336, "step": 25960 }, { "epoch": 3.961181640625e-05, "step": 25960, "training_step_time": 0.18484234809875488 }, { "epoch": 3.961334228515625e-05, "model_forward_time": 0.02427530288696289, "step": 25961 }, { "epoch": 3.961334228515625e-05, "step": 25961, "training_step_time": 0.1016690731048584 }, { "epoch": 3.96148681640625e-05, "model_forward_time": 0.024558067321777344, "step": 25962 }, { "epoch": 3.96148681640625e-05, "step": 25962, "training_step_time": 0.10198664665222168 }, { "epoch": 3.961639404296875e-05, "model_forward_time": 0.024968624114990234, "step": 25963 }, { "epoch": 3.961639404296875e-05, "step": 25963, "training_step_time": 0.10433244705200195 }, { "epoch": 3.9617919921875e-05, "model_forward_time": 0.024876832962036133, "step": 25964 }, { "epoch": 3.9617919921875e-05, "step": 25964, "training_step_time": 0.11409640312194824 }, { "epoch": 3.961944580078125e-05, "model_forward_time": 0.024922847747802734, "step": 25965 }, { "epoch": 3.961944580078125e-05, "step": 25965, "training_step_time": 0.11059832572937012 }, { "epoch": 3.96209716796875e-05, "model_forward_time": 0.025029420852661133, "step": 25966 }, { "epoch": 3.96209716796875e-05, "step": 25966, "training_step_time": 0.1095435619354248 }, { "epoch": 3.962249755859375e-05, "model_forward_time": 0.025345802307128906, "step": 25967 }, { "epoch": 3.962249755859375e-05, "step": 25967, "training_step_time": 0.16017556190490723 }, { "epoch": 3.96240234375e-05, "model_forward_time": 0.024622201919555664, "step": 25968 }, { "epoch": 3.96240234375e-05, "step": 25968, "training_step_time": 0.17154526710510254 }, { "epoch": 3.962554931640625e-05, "model_forward_time": 0.0243685245513916, "step": 25969 }, { "epoch": 3.962554931640625e-05, "step": 25969, "training_step_time": 0.11273956298828125 }, { "epoch": 3.96270751953125e-05, "grad_norm": 0.13631302118301392, "learning_rate": 4.852955279628768e-06, "loss": 0.0046, "step": 25970 }, { "epoch": 3.96270751953125e-05, "model_forward_time": 0.024259090423583984, "step": 25970 }, { "epoch": 3.96270751953125e-05, "step": 25970, "training_step_time": 0.109344482421875 }, { "epoch": 3.962860107421875e-05, "model_forward_time": 0.024977445602416992, "step": 25971 }, { "epoch": 3.962860107421875e-05, "step": 25971, "training_step_time": 0.11905670166015625 }, { "epoch": 3.9630126953125e-05, "model_forward_time": 0.024840593338012695, "step": 25972 }, { "epoch": 3.9630126953125e-05, "step": 25972, "training_step_time": 0.11056184768676758 }, { "epoch": 3.963165283203125e-05, "model_forward_time": 0.024866104125976562, "step": 25973 }, { "epoch": 3.963165283203125e-05, "step": 25973, "training_step_time": 0.11102890968322754 }, { "epoch": 3.96331787109375e-05, "model_forward_time": 0.025064706802368164, "step": 25974 }, { "epoch": 3.96331787109375e-05, "step": 25974, "training_step_time": 0.11691665649414062 }, { "epoch": 3.963470458984375e-05, "model_forward_time": 0.024753332138061523, "step": 25975 }, { "epoch": 3.963470458984375e-05, "step": 25975, "training_step_time": 0.10817646980285645 }, { "epoch": 3.963623046875e-05, "model_forward_time": 0.02526068687438965, "step": 25976 }, { "epoch": 3.963623046875e-05, "step": 25976, "training_step_time": 0.10428309440612793 }, { "epoch": 3.963775634765625e-05, "model_forward_time": 0.02501201629638672, "step": 25977 }, { "epoch": 3.963775634765625e-05, "step": 25977, "training_step_time": 0.10531401634216309 }, { "epoch": 3.96392822265625e-05, "model_forward_time": 0.024677753448486328, "step": 25978 }, { "epoch": 3.96392822265625e-05, "step": 25978, "training_step_time": 0.10532999038696289 }, { "epoch": 3.964080810546875e-05, "model_forward_time": 0.02463674545288086, "step": 25979 }, { "epoch": 3.964080810546875e-05, "step": 25979, "training_step_time": 0.10957884788513184 }, { "epoch": 3.9642333984375e-05, "grad_norm": 0.22917073965072632, "learning_rate": 4.829295945234258e-06, "loss": 0.0043, "step": 25980 }, { "epoch": 3.9642333984375e-05, "model_forward_time": 0.02520012855529785, "step": 25980 }, { "epoch": 3.9642333984375e-05, "step": 25980, "training_step_time": 0.10792231559753418 }, { "epoch": 3.964385986328125e-05, "model_forward_time": 0.025702953338623047, "step": 25981 }, { "epoch": 3.964385986328125e-05, "step": 25981, "training_step_time": 0.10661911964416504 }, { "epoch": 3.96453857421875e-05, "model_forward_time": 0.025025606155395508, "step": 25982 }, { "epoch": 3.96453857421875e-05, "step": 25982, "training_step_time": 0.10843038558959961 }, { "epoch": 3.964691162109375e-05, "model_forward_time": 0.025310277938842773, "step": 25983 }, { "epoch": 3.964691162109375e-05, "step": 25983, "training_step_time": 0.11055278778076172 }, { "epoch": 3.96484375e-05, "model_forward_time": 0.024824857711791992, "step": 25984 }, { "epoch": 3.96484375e-05, "step": 25984, "training_step_time": 0.10470247268676758 }, { "epoch": 3.964996337890625e-05, "model_forward_time": 0.025389909744262695, "step": 25985 }, { "epoch": 3.964996337890625e-05, "step": 25985, "training_step_time": 0.1932811737060547 }, { "epoch": 3.96514892578125e-05, "model_forward_time": 0.02440667152404785, "step": 25986 }, { "epoch": 3.96514892578125e-05, "step": 25986, "training_step_time": 0.1116487979888916 }, { "epoch": 3.965301513671875e-05, "model_forward_time": 0.024024486541748047, "step": 25987 }, { "epoch": 3.965301513671875e-05, "step": 25987, "training_step_time": 0.10891580581665039 }, { "epoch": 3.9654541015625e-05, "model_forward_time": 0.025049686431884766, "step": 25988 }, { "epoch": 3.9654541015625e-05, "step": 25988, "training_step_time": 0.12834739685058594 }, { "epoch": 3.965606689453125e-05, "model_forward_time": 0.024780988693237305, "step": 25989 }, { "epoch": 3.965606689453125e-05, "step": 25989, "training_step_time": 0.12465190887451172 }, { "epoch": 3.96575927734375e-05, "grad_norm": 0.06205694004893303, "learning_rate": 4.805691497504505e-06, "loss": 0.0045, "step": 25990 }, { "epoch": 3.96575927734375e-05, "model_forward_time": 0.024813413619995117, "step": 25990 }, { "epoch": 3.96575927734375e-05, "step": 25990, "training_step_time": 0.10761165618896484 }, { "epoch": 3.965911865234375e-05, "model_forward_time": 0.025182247161865234, "step": 25991 }, { "epoch": 3.965911865234375e-05, "step": 25991, "training_step_time": 0.12384939193725586 }, { "epoch": 3.966064453125e-05, "model_forward_time": 0.02520585060119629, "step": 25992 }, { "epoch": 3.966064453125e-05, "step": 25992, "training_step_time": 0.10612058639526367 }, { "epoch": 3.966217041015625e-05, "model_forward_time": 0.0250701904296875, "step": 25993 }, { "epoch": 3.966217041015625e-05, "step": 25993, "training_step_time": 0.1062781810760498 }, { "epoch": 3.96636962890625e-05, "model_forward_time": 0.024768352508544922, "step": 25994 }, { "epoch": 3.96636962890625e-05, "step": 25994, "training_step_time": 0.11225390434265137 }, { "epoch": 3.966522216796875e-05, "model_forward_time": 0.024724721908569336, "step": 25995 }, { "epoch": 3.966522216796875e-05, "step": 25995, "training_step_time": 0.1076667308807373 }, { "epoch": 3.9666748046875e-05, "model_forward_time": 0.025886058807373047, "step": 25996 }, { "epoch": 3.9666748046875e-05, "step": 25996, "training_step_time": 0.11101484298706055 }, { "epoch": 3.966827392578125e-05, "model_forward_time": 0.025547027587890625, "step": 25997 }, { "epoch": 3.966827392578125e-05, "step": 25997, "training_step_time": 0.11372542381286621 }, { "epoch": 3.96697998046875e-05, "model_forward_time": 0.02540874481201172, "step": 25998 }, { "epoch": 3.96697998046875e-05, "step": 25998, "training_step_time": 0.10763955116271973 }, { "epoch": 3.967132568359375e-05, "model_forward_time": 0.025618791580200195, "step": 25999 }, { "epoch": 3.967132568359375e-05, "step": 25999, "training_step_time": 0.12445354461669922 }, { "epoch": 3.96728515625e-05, "grad_norm": 0.12980355322360992, "learning_rate": 4.782141965121128e-06, "loss": 0.0042, "step": 26000 }, { "epoch": 3.96728515625e-05, "model_forward_time": 0.026137351989746094, "step": 26000 }, { "epoch": 3.96728515625e-05, "step": 26000, "training_step_time": 0.10444116592407227 }, { "epoch": 3.967437744140625e-05, "model_forward_time": 0.02349567413330078, "step": 26001 }, { "epoch": 3.967437744140625e-05, "step": 26001, "training_step_time": 0.13078618049621582 }, { "epoch": 3.96759033203125e-05, "model_forward_time": 0.024665117263793945, "step": 26002 }, { "epoch": 3.96759033203125e-05, "step": 26002, "training_step_time": 0.1827259063720703 }, { "epoch": 3.967742919921875e-05, "model_forward_time": 0.02470684051513672, "step": 26003 }, { "epoch": 3.967742919921875e-05, "step": 26003, "training_step_time": 0.11550045013427734 }, { "epoch": 3.9678955078125e-05, "model_forward_time": 0.026877403259277344, "step": 26004 }, { "epoch": 3.9678955078125e-05, "step": 26004, "training_step_time": 0.2109205722808838 }, { "epoch": 3.968048095703125e-05, "model_forward_time": 0.024364471435546875, "step": 26005 }, { "epoch": 3.968048095703125e-05, "step": 26005, "training_step_time": 0.11206769943237305 }, { "epoch": 3.96820068359375e-05, "model_forward_time": 0.025311708450317383, "step": 26006 }, { "epoch": 3.96820068359375e-05, "step": 26006, "training_step_time": 0.10742998123168945 }, { "epoch": 3.968353271484375e-05, "model_forward_time": 0.02502894401550293, "step": 26007 }, { "epoch": 3.968353271484375e-05, "step": 26007, "training_step_time": 0.1581413745880127 }, { "epoch": 3.968505859375e-05, "model_forward_time": 0.02397918701171875, "step": 26008 }, { "epoch": 3.968505859375e-05, "step": 26008, "training_step_time": 0.1049644947052002 }, { "epoch": 3.968658447265625e-05, "model_forward_time": 0.024847745895385742, "step": 26009 }, { "epoch": 3.968658447265625e-05, "step": 26009, "training_step_time": 0.10390734672546387 }, { "epoch": 3.96881103515625e-05, "grad_norm": 0.1348857283592224, "learning_rate": 4.758647376699032e-06, "loss": 0.0082, "step": 26010 }, { "epoch": 3.96881103515625e-05, "model_forward_time": 0.025327682495117188, "step": 26010 }, { "epoch": 3.96881103515625e-05, "step": 26010, "training_step_time": 0.10662007331848145 }, { "epoch": 3.968963623046875e-05, "model_forward_time": 0.025835275650024414, "step": 26011 }, { "epoch": 3.968963623046875e-05, "step": 26011, "training_step_time": 0.10666418075561523 }, { "epoch": 3.9691162109375e-05, "model_forward_time": 0.02539539337158203, "step": 26012 }, { "epoch": 3.9691162109375e-05, "step": 26012, "training_step_time": 0.10662555694580078 }, { "epoch": 3.969268798828125e-05, "model_forward_time": 0.02554488182067871, "step": 26013 }, { "epoch": 3.969268798828125e-05, "step": 26013, "training_step_time": 0.1114494800567627 }, { "epoch": 3.96942138671875e-05, "model_forward_time": 0.025473833084106445, "step": 26014 }, { "epoch": 3.96942138671875e-05, "step": 26014, "training_step_time": 0.12279200553894043 }, { "epoch": 3.969573974609375e-05, "model_forward_time": 0.024950027465820312, "step": 26015 }, { "epoch": 3.969573974609375e-05, "step": 26015, "training_step_time": 0.1349194049835205 }, { "epoch": 3.9697265625e-05, "model_forward_time": 0.024435758590698242, "step": 26016 }, { "epoch": 3.9697265625e-05, "step": 26016, "training_step_time": 0.21700835227966309 }, { "epoch": 3.969879150390625e-05, "model_forward_time": 0.024574756622314453, "step": 26017 }, { "epoch": 3.969879150390625e-05, "step": 26017, "training_step_time": 0.11485719680786133 }, { "epoch": 3.97003173828125e-05, "model_forward_time": 0.024589061737060547, "step": 26018 }, { "epoch": 3.97003173828125e-05, "step": 26018, "training_step_time": 0.11760711669921875 }, { "epoch": 3.970184326171875e-05, "model_forward_time": 0.02534961700439453, "step": 26019 }, { "epoch": 3.970184326171875e-05, "step": 26019, "training_step_time": 0.16478848457336426 }, { "epoch": 3.9703369140625e-05, "grad_norm": 0.0655096098780632, "learning_rate": 4.7352077607863475e-06, "loss": 0.0036, "step": 26020 }, { "epoch": 3.9703369140625e-05, "model_forward_time": 0.02577376365661621, "step": 26020 }, { "epoch": 3.9703369140625e-05, "step": 26020, "training_step_time": 0.13031339645385742 }, { "epoch": 3.970489501953125e-05, "model_forward_time": 0.024147987365722656, "step": 26021 }, { "epoch": 3.970489501953125e-05, "step": 26021, "training_step_time": 0.10988593101501465 }, { "epoch": 3.97064208984375e-05, "model_forward_time": 0.025187015533447266, "step": 26022 }, { "epoch": 3.97064208984375e-05, "step": 26022, "training_step_time": 0.10748648643493652 }, { "epoch": 3.970794677734375e-05, "model_forward_time": 0.024553298950195312, "step": 26023 }, { "epoch": 3.970794677734375e-05, "step": 26023, "training_step_time": 0.10675692558288574 }, { "epoch": 3.970947265625e-05, "model_forward_time": 0.02523493766784668, "step": 26024 }, { "epoch": 3.970947265625e-05, "step": 26024, "training_step_time": 0.10614490509033203 }, { "epoch": 3.971099853515625e-05, "model_forward_time": 0.025247812271118164, "step": 26025 }, { "epoch": 3.971099853515625e-05, "step": 26025, "training_step_time": 0.10645580291748047 }, { "epoch": 3.97125244140625e-05, "model_forward_time": 0.024952173233032227, "step": 26026 }, { "epoch": 3.97125244140625e-05, "step": 26026, "training_step_time": 0.10694742202758789 }, { "epoch": 3.971405029296875e-05, "model_forward_time": 0.02524423599243164, "step": 26027 }, { "epoch": 3.971405029296875e-05, "step": 26027, "training_step_time": 0.10592460632324219 }, { "epoch": 3.9715576171875e-05, "model_forward_time": 0.025447845458984375, "step": 26028 }, { "epoch": 3.9715576171875e-05, "step": 26028, "training_step_time": 0.10930728912353516 }, { "epoch": 3.971710205078125e-05, "model_forward_time": 0.02537393569946289, "step": 26029 }, { "epoch": 3.971710205078125e-05, "step": 26029, "training_step_time": 0.10821032524108887 }, { "epoch": 3.97186279296875e-05, "grad_norm": 0.3153933584690094, "learning_rate": 4.711823145864419e-06, "loss": 0.0058, "step": 26030 }, { "epoch": 3.97186279296875e-05, "model_forward_time": 0.025151491165161133, "step": 26030 }, { "epoch": 3.97186279296875e-05, "step": 26030, "training_step_time": 0.10540318489074707 }, { "epoch": 3.972015380859375e-05, "model_forward_time": 0.02481532096862793, "step": 26031 }, { "epoch": 3.972015380859375e-05, "step": 26031, "training_step_time": 0.10720038414001465 }, { "epoch": 3.97216796875e-05, "model_forward_time": 0.024936437606811523, "step": 26032 }, { "epoch": 3.97216796875e-05, "step": 26032, "training_step_time": 0.10388326644897461 }, { "epoch": 3.972320556640625e-05, "model_forward_time": 0.025258302688598633, "step": 26033 }, { "epoch": 3.972320556640625e-05, "step": 26033, "training_step_time": 0.1055140495300293 }, { "epoch": 3.97247314453125e-05, "model_forward_time": 0.02509784698486328, "step": 26034 }, { "epoch": 3.97247314453125e-05, "step": 26034, "training_step_time": 0.12137413024902344 }, { "epoch": 3.972625732421875e-05, "model_forward_time": 0.025412321090698242, "step": 26035 }, { "epoch": 3.972625732421875e-05, "step": 26035, "training_step_time": 0.11072444915771484 }, { "epoch": 3.9727783203125e-05, "model_forward_time": 0.025491714477539062, "step": 26036 }, { "epoch": 3.9727783203125e-05, "step": 26036, "training_step_time": 0.11000943183898926 }, { "epoch": 3.972930908203125e-05, "model_forward_time": 0.03134632110595703, "step": 26037 }, { "epoch": 3.972930908203125e-05, "step": 26037, "training_step_time": 0.12406206130981445 }, { "epoch": 3.97308349609375e-05, "model_forward_time": 0.02571725845336914, "step": 26038 }, { "epoch": 3.97308349609375e-05, "step": 26038, "training_step_time": 0.13384413719177246 }, { "epoch": 3.973236083984375e-05, "model_forward_time": 0.025203466415405273, "step": 26039 }, { "epoch": 3.973236083984375e-05, "step": 26039, "training_step_time": 0.11167407035827637 }, { "epoch": 3.973388671875e-05, "grad_norm": 0.22465716302394867, "learning_rate": 4.688493560347773e-06, "loss": 0.0074, "step": 26040 }, { "epoch": 3.973388671875e-05, "model_forward_time": 0.025099754333496094, "step": 26040 }, { "epoch": 3.973388671875e-05, "step": 26040, "training_step_time": 0.10657453536987305 }, { "epoch": 3.973541259765625e-05, "model_forward_time": 0.0262906551361084, "step": 26041 }, { "epoch": 3.973541259765625e-05, "step": 26041, "training_step_time": 0.10577845573425293 }, { "epoch": 3.97369384765625e-05, "model_forward_time": 0.02516031265258789, "step": 26042 }, { "epoch": 3.97369384765625e-05, "step": 26042, "training_step_time": 0.10335135459899902 }, { "epoch": 3.973846435546875e-05, "model_forward_time": 0.025517702102661133, "step": 26043 }, { "epoch": 3.973846435546875e-05, "step": 26043, "training_step_time": 0.10529327392578125 }, { "epoch": 3.9739990234375e-05, "model_forward_time": 0.02576899528503418, "step": 26044 }, { "epoch": 3.9739990234375e-05, "step": 26044, "training_step_time": 0.5889902114868164 }, { "epoch": 3.974151611328125e-05, "model_forward_time": 0.02310466766357422, "step": 26045 }, { "epoch": 3.974151611328125e-05, "step": 26045, "training_step_time": 0.1740434169769287 }, { "epoch": 3.97430419921875e-05, "model_forward_time": 0.02442336082458496, "step": 26046 }, { "epoch": 3.97430419921875e-05, "step": 26046, "training_step_time": 0.1050863265991211 }, { "epoch": 3.974456787109375e-05, "model_forward_time": 0.024844884872436523, "step": 26047 }, { "epoch": 3.974456787109375e-05, "step": 26047, "training_step_time": 0.16608238220214844 }, { "epoch": 3.974609375e-05, "model_forward_time": 0.024823665618896484, "step": 26048 }, { "epoch": 3.974609375e-05, "step": 26048, "training_step_time": 0.14832162857055664 }, { "epoch": 3.974761962890625e-05, "model_forward_time": 0.024488210678100586, "step": 26049 }, { "epoch": 3.974761962890625e-05, "step": 26049, "training_step_time": 0.11818242073059082 }, { "epoch": 3.97491455078125e-05, "grad_norm": 0.05569394305348396, "learning_rate": 4.66521903258404e-06, "loss": 0.0059, "step": 26050 }, { "epoch": 3.97491455078125e-05, "model_forward_time": 0.024953126907348633, "step": 26050 }, { "epoch": 3.97491455078125e-05, "step": 26050, "training_step_time": 0.15697765350341797 }, { "epoch": 3.975067138671875e-05, "model_forward_time": 0.024326086044311523, "step": 26051 }, { "epoch": 3.975067138671875e-05, "step": 26051, "training_step_time": 0.10287904739379883 }, { "epoch": 3.9752197265625e-05, "model_forward_time": 0.024932861328125, "step": 26052 }, { "epoch": 3.9752197265625e-05, "step": 26052, "training_step_time": 0.10367321968078613 }, { "epoch": 3.975372314453125e-05, "model_forward_time": 0.025317907333374023, "step": 26053 }, { "epoch": 3.975372314453125e-05, "step": 26053, "training_step_time": 0.10651707649230957 }, { "epoch": 3.97552490234375e-05, "model_forward_time": 0.02503681182861328, "step": 26054 }, { "epoch": 3.97552490234375e-05, "step": 26054, "training_step_time": 0.10993552207946777 }, { "epoch": 3.975677490234375e-05, "model_forward_time": 0.024713516235351562, "step": 26055 }, { "epoch": 3.975677490234375e-05, "step": 26055, "training_step_time": 0.10696077346801758 }, { "epoch": 3.975830078125e-05, "model_forward_time": 0.028592348098754883, "step": 26056 }, { "epoch": 3.975830078125e-05, "step": 26056, "training_step_time": 0.10900521278381348 }, { "epoch": 3.975982666015625e-05, "model_forward_time": 0.0252683162689209, "step": 26057 }, { "epoch": 3.975982666015625e-05, "step": 26057, "training_step_time": 0.1077885627746582 }, { "epoch": 3.97613525390625e-05, "model_forward_time": 0.02539658546447754, "step": 26058 }, { "epoch": 3.97613525390625e-05, "step": 26058, "training_step_time": 0.10639500617980957 }, { "epoch": 3.976287841796875e-05, "model_forward_time": 0.025281190872192383, "step": 26059 }, { "epoch": 3.976287841796875e-05, "step": 26059, "training_step_time": 0.13728713989257812 }, { "epoch": 3.9764404296875e-05, "grad_norm": 0.06518325209617615, "learning_rate": 4.64199959085398e-06, "loss": 0.0031, "step": 26060 }, { "epoch": 3.9764404296875e-05, "model_forward_time": 0.024903297424316406, "step": 26060 }, { "epoch": 3.9764404296875e-05, "step": 26060, "training_step_time": 0.13059306144714355 }, { "epoch": 3.976593017578125e-05, "model_forward_time": 0.02465987205505371, "step": 26061 }, { "epoch": 3.976593017578125e-05, "step": 26061, "training_step_time": 0.10272479057312012 }, { "epoch": 3.97674560546875e-05, "model_forward_time": 0.02546215057373047, "step": 26062 }, { "epoch": 3.97674560546875e-05, "step": 26062, "training_step_time": 0.11530399322509766 }, { "epoch": 3.976898193359375e-05, "model_forward_time": 0.02499675750732422, "step": 26063 }, { "epoch": 3.976898193359375e-05, "step": 26063, "training_step_time": 0.11090779304504395 }, { "epoch": 3.97705078125e-05, "model_forward_time": 0.02827739715576172, "step": 26064 }, { "epoch": 3.97705078125e-05, "step": 26064, "training_step_time": 0.10848402976989746 }, { "epoch": 3.977203369140625e-05, "model_forward_time": 0.025260448455810547, "step": 26065 }, { "epoch": 3.977203369140625e-05, "step": 26065, "training_step_time": 0.1930980682373047 }, { "epoch": 3.97735595703125e-05, "model_forward_time": 0.024288654327392578, "step": 26066 }, { "epoch": 3.97735595703125e-05, "step": 26066, "training_step_time": 0.10209774971008301 }, { "epoch": 3.977508544921875e-05, "model_forward_time": 0.0244748592376709, "step": 26067 }, { "epoch": 3.977508544921875e-05, "step": 26067, "training_step_time": 0.10619449615478516 }, { "epoch": 3.9776611328125e-05, "model_forward_time": 0.02485489845275879, "step": 26068 }, { "epoch": 3.9776611328125e-05, "step": 26068, "training_step_time": 0.10968208312988281 }, { "epoch": 3.977813720703125e-05, "model_forward_time": 0.027309417724609375, "step": 26069 }, { "epoch": 3.977813720703125e-05, "step": 26069, "training_step_time": 0.11208653450012207 }, { "epoch": 3.97796630859375e-05, "grad_norm": 0.1016891598701477, "learning_rate": 4.618835263371396e-06, "loss": 0.0075, "step": 26070 }, { "epoch": 3.97796630859375e-05, "model_forward_time": 0.025282621383666992, "step": 26070 }, { "epoch": 3.97796630859375e-05, "step": 26070, "training_step_time": 0.11693763732910156 }, { "epoch": 3.978118896484375e-05, "model_forward_time": 0.02524876594543457, "step": 26071 }, { "epoch": 3.978118896484375e-05, "step": 26071, "training_step_time": 0.11122441291809082 }, { "epoch": 3.978271484375e-05, "model_forward_time": 0.025646686553955078, "step": 26072 }, { "epoch": 3.978271484375e-05, "step": 26072, "training_step_time": 0.1073143482208252 }, { "epoch": 3.978424072265625e-05, "model_forward_time": 0.02508234977722168, "step": 26073 }, { "epoch": 3.978424072265625e-05, "step": 26073, "training_step_time": 0.11060881614685059 }, { "epoch": 3.97857666015625e-05, "model_forward_time": 0.02503681182861328, "step": 26074 }, { "epoch": 3.97857666015625e-05, "step": 26074, "training_step_time": 0.1115560531616211 }, { "epoch": 3.978729248046875e-05, "model_forward_time": 0.02522110939025879, "step": 26075 }, { "epoch": 3.978729248046875e-05, "step": 26075, "training_step_time": 0.10809612274169922 }, { "epoch": 3.9788818359375e-05, "model_forward_time": 0.025020599365234375, "step": 26076 }, { "epoch": 3.9788818359375e-05, "step": 26076, "training_step_time": 0.10836076736450195 }, { "epoch": 3.979034423828125e-05, "model_forward_time": 0.02520895004272461, "step": 26077 }, { "epoch": 3.979034423828125e-05, "step": 26077, "training_step_time": 0.10910201072692871 }, { "epoch": 3.97918701171875e-05, "model_forward_time": 0.025068998336791992, "step": 26078 }, { "epoch": 3.97918701171875e-05, "step": 26078, "training_step_time": 0.15309977531433105 }, { "epoch": 3.979339599609375e-05, "model_forward_time": 0.02517390251159668, "step": 26079 }, { "epoch": 3.979339599609375e-05, "step": 26079, "training_step_time": 0.1118612289428711 }, { "epoch": 3.9794921875e-05, "grad_norm": 0.2546069920063019, "learning_rate": 4.595726078283136e-06, "loss": 0.0139, "step": 26080 }, { "epoch": 3.9794921875e-05, "model_forward_time": 0.02528071403503418, "step": 26080 }, { "epoch": 3.9794921875e-05, "step": 26080, "training_step_time": 0.1091151237487793 }, { "epoch": 3.979644775390625e-05, "model_forward_time": 0.025032520294189453, "step": 26081 }, { "epoch": 3.979644775390625e-05, "step": 26081, "training_step_time": 0.11984515190124512 }, { "epoch": 3.97979736328125e-05, "model_forward_time": 0.02518296241760254, "step": 26082 }, { "epoch": 3.97979736328125e-05, "step": 26082, "training_step_time": 0.13815522193908691 }, { "epoch": 3.979949951171875e-05, "model_forward_time": 0.02478480339050293, "step": 26083 }, { "epoch": 3.979949951171875e-05, "step": 26083, "training_step_time": 0.11045360565185547 }, { "epoch": 3.9801025390625e-05, "model_forward_time": 0.025148868560791016, "step": 26084 }, { "epoch": 3.9801025390625e-05, "step": 26084, "training_step_time": 0.11093306541442871 }, { "epoch": 3.980255126953125e-05, "model_forward_time": 0.025937318801879883, "step": 26085 }, { "epoch": 3.980255126953125e-05, "step": 26085, "training_step_time": 0.10609292984008789 }, { "epoch": 3.98040771484375e-05, "model_forward_time": 0.025574922561645508, "step": 26086 }, { "epoch": 3.98040771484375e-05, "step": 26086, "training_step_time": 0.10830426216125488 }, { "epoch": 3.980560302734375e-05, "model_forward_time": 0.027197837829589844, "step": 26087 }, { "epoch": 3.980560302734375e-05, "step": 26087, "training_step_time": 0.11148428916931152 }, { "epoch": 3.980712890625e-05, "model_forward_time": 0.025763273239135742, "step": 26088 }, { "epoch": 3.980712890625e-05, "step": 26088, "training_step_time": 0.11086273193359375 }, { "epoch": 3.980865478515625e-05, "model_forward_time": 0.026128292083740234, "step": 26089 }, { "epoch": 3.980865478515625e-05, "step": 26089, "training_step_time": 0.10941433906555176 }, { "epoch": 3.98101806640625e-05, "grad_norm": 0.06303591281175613, "learning_rate": 4.57267206366902e-06, "loss": 0.0079, "step": 26090 }, { "epoch": 3.98101806640625e-05, "model_forward_time": 0.025932788848876953, "step": 26090 }, { "epoch": 3.98101806640625e-05, "step": 26090, "training_step_time": 0.21492481231689453 }, { "epoch": 3.981170654296875e-05, "model_forward_time": 0.0245361328125, "step": 26091 }, { "epoch": 3.981170654296875e-05, "step": 26091, "training_step_time": 0.11951041221618652 }, { "epoch": 3.9813232421875e-05, "model_forward_time": 0.024872779846191406, "step": 26092 }, { "epoch": 3.9813232421875e-05, "step": 26092, "training_step_time": 0.18544244766235352 }, { "epoch": 3.981475830078125e-05, "model_forward_time": 0.0245816707611084, "step": 26093 }, { "epoch": 3.981475830078125e-05, "step": 26093, "training_step_time": 0.20705294609069824 }, { "epoch": 3.98162841796875e-05, "model_forward_time": 0.024883270263671875, "step": 26094 }, { "epoch": 3.98162841796875e-05, "step": 26094, "training_step_time": 0.18047523498535156 }, { "epoch": 3.981781005859375e-05, "model_forward_time": 0.024225234985351562, "step": 26095 }, { "epoch": 3.981781005859375e-05, "step": 26095, "training_step_time": 0.13805270195007324 }, { "epoch": 3.98193359375e-05, "model_forward_time": 0.024847030639648438, "step": 26096 }, { "epoch": 3.98193359375e-05, "step": 26096, "training_step_time": 0.11334061622619629 }, { "epoch": 3.982086181640625e-05, "model_forward_time": 0.02487659454345703, "step": 26097 }, { "epoch": 3.982086181640625e-05, "step": 26097, "training_step_time": 0.10382723808288574 }, { "epoch": 3.98223876953125e-05, "model_forward_time": 0.025098562240600586, "step": 26098 }, { "epoch": 3.98223876953125e-05, "step": 26098, "training_step_time": 0.10719513893127441 }, { "epoch": 3.982391357421875e-05, "model_forward_time": 0.02562999725341797, "step": 26099 }, { "epoch": 3.982391357421875e-05, "step": 26099, "training_step_time": 0.1083986759185791 }, { "epoch": 3.9825439453125e-05, "grad_norm": 0.4417112469673157, "learning_rate": 4.549673247541875e-06, "loss": 0.0074, "step": 26100 }, { "epoch": 3.9825439453125e-05, "model_forward_time": 0.02549600601196289, "step": 26100 }, { "epoch": 3.9825439453125e-05, "step": 26100, "training_step_time": 0.18639373779296875 }, { "epoch": 3.982696533203125e-05, "model_forward_time": 0.0247344970703125, "step": 26101 }, { "epoch": 3.982696533203125e-05, "step": 26101, "training_step_time": 0.2057359218597412 }, { "epoch": 3.98284912109375e-05, "model_forward_time": 0.024443387985229492, "step": 26102 }, { "epoch": 3.98284912109375e-05, "step": 26102, "training_step_time": 0.21073675155639648 }, { "epoch": 3.983001708984375e-05, "model_forward_time": 0.024087905883789062, "step": 26103 }, { "epoch": 3.983001708984375e-05, "step": 26103, "training_step_time": 0.20769286155700684 }, { "epoch": 3.983154296875e-05, "model_forward_time": 0.024395227432250977, "step": 26104 }, { "epoch": 3.983154296875e-05, "step": 26104, "training_step_time": 0.2117927074432373 }, { "epoch": 3.983306884765625e-05, "model_forward_time": 0.024407625198364258, "step": 26105 }, { "epoch": 3.983306884765625e-05, "step": 26105, "training_step_time": 0.18606281280517578 }, { "epoch": 3.98345947265625e-05, "model_forward_time": 0.02480602264404297, "step": 26106 }, { "epoch": 3.98345947265625e-05, "step": 26106, "training_step_time": 0.13823771476745605 }, { "epoch": 3.983612060546875e-05, "model_forward_time": 0.024769306182861328, "step": 26107 }, { "epoch": 3.983612060546875e-05, "step": 26107, "training_step_time": 0.143110990524292 }, { "epoch": 3.9837646484375e-05, "model_forward_time": 0.02431321144104004, "step": 26108 }, { "epoch": 3.9837646484375e-05, "step": 26108, "training_step_time": 0.11896681785583496 }, { "epoch": 3.983917236328125e-05, "model_forward_time": 0.024739503860473633, "step": 26109 }, { "epoch": 3.983917236328125e-05, "step": 26109, "training_step_time": 0.10262560844421387 }, { "epoch": 3.98406982421875e-05, "grad_norm": 0.08231714367866516, "learning_rate": 4.526729657847423e-06, "loss": 0.0027, "step": 26110 }, { "epoch": 3.98406982421875e-05, "model_forward_time": 0.025008678436279297, "step": 26110 }, { "epoch": 3.98406982421875e-05, "step": 26110, "training_step_time": 0.10529971122741699 }, { "epoch": 3.984222412109375e-05, "model_forward_time": 0.025465011596679688, "step": 26111 }, { "epoch": 3.984222412109375e-05, "step": 26111, "training_step_time": 0.10645794868469238 }, { "epoch": 3.984375e-05, "model_forward_time": 0.025362491607666016, "step": 26112 }, { "epoch": 3.984375e-05, "step": 26112, "training_step_time": 0.1071310043334961 }, { "epoch": 3.984527587890625e-05, "model_forward_time": 0.028286218643188477, "step": 26113 }, { "epoch": 3.984527587890625e-05, "step": 26113, "training_step_time": 0.10985136032104492 }, { "epoch": 3.98468017578125e-05, "model_forward_time": 0.02552056312561035, "step": 26114 }, { "epoch": 3.98468017578125e-05, "step": 26114, "training_step_time": 0.1099252700805664 }, { "epoch": 3.984832763671875e-05, "model_forward_time": 0.02526402473449707, "step": 26115 }, { "epoch": 3.984832763671875e-05, "step": 26115, "training_step_time": 0.10544943809509277 }, { "epoch": 3.9849853515625e-05, "model_forward_time": 0.025406837463378906, "step": 26116 }, { "epoch": 3.9849853515625e-05, "step": 26116, "training_step_time": 0.1081991195678711 }, { "epoch": 3.985137939453125e-05, "model_forward_time": 0.025668621063232422, "step": 26117 }, { "epoch": 3.985137939453125e-05, "step": 26117, "training_step_time": 0.11345839500427246 }, { "epoch": 3.98529052734375e-05, "model_forward_time": 0.025363683700561523, "step": 26118 }, { "epoch": 3.98529052734375e-05, "step": 26118, "training_step_time": 0.10754275321960449 }, { "epoch": 3.985443115234375e-05, "model_forward_time": 0.025487184524536133, "step": 26119 }, { "epoch": 3.985443115234375e-05, "step": 26119, "training_step_time": 0.1067957878112793 }, { "epoch": 3.985595703125e-05, "grad_norm": 0.07585153728723526, "learning_rate": 4.5038413224642805e-06, "loss": 0.0049, "step": 26120 }, { "epoch": 3.985595703125e-05, "model_forward_time": 0.025071144104003906, "step": 26120 }, { "epoch": 3.985595703125e-05, "step": 26120, "training_step_time": 0.10821080207824707 }, { "epoch": 3.985748291015625e-05, "model_forward_time": 0.025506019592285156, "step": 26121 }, { "epoch": 3.985748291015625e-05, "step": 26121, "training_step_time": 0.1264328956604004 }, { "epoch": 3.98590087890625e-05, "model_forward_time": 0.025180339813232422, "step": 26122 }, { "epoch": 3.98590087890625e-05, "step": 26122, "training_step_time": 0.11570143699645996 }, { "epoch": 3.986053466796875e-05, "model_forward_time": 0.02397322654724121, "step": 26123 }, { "epoch": 3.986053466796875e-05, "step": 26123, "training_step_time": 0.17651724815368652 }, { "epoch": 3.9862060546875e-05, "model_forward_time": 0.024910688400268555, "step": 26124 }, { "epoch": 3.9862060546875e-05, "step": 26124, "training_step_time": 0.1839592456817627 }, { "epoch": 3.986358642578125e-05, "model_forward_time": 0.024498462677001953, "step": 26125 }, { "epoch": 3.986358642578125e-05, "step": 26125, "training_step_time": 0.11597871780395508 }, { "epoch": 3.98651123046875e-05, "model_forward_time": 0.025295734405517578, "step": 26126 }, { "epoch": 3.98651123046875e-05, "step": 26126, "training_step_time": 0.10580778121948242 }, { "epoch": 3.986663818359375e-05, "model_forward_time": 0.0245358943939209, "step": 26127 }, { "epoch": 3.986663818359375e-05, "step": 26127, "training_step_time": 0.10715460777282715 }, { "epoch": 3.98681640625e-05, "model_forward_time": 0.02546095848083496, "step": 26128 }, { "epoch": 3.98681640625e-05, "step": 26128, "training_step_time": 0.10633111000061035 }, { "epoch": 3.986968994140625e-05, "model_forward_time": 0.025569677352905273, "step": 26129 }, { "epoch": 3.986968994140625e-05, "step": 26129, "training_step_time": 0.1130685806274414 }, { "epoch": 3.98712158203125e-05, "grad_norm": 0.09220074862241745, "learning_rate": 4.48100826920394e-06, "loss": 0.0079, "step": 26130 }, { "epoch": 3.98712158203125e-05, "model_forward_time": 0.02476215362548828, "step": 26130 }, { "epoch": 3.98712158203125e-05, "step": 26130, "training_step_time": 0.11162185668945312 }, { "epoch": 3.987274169921875e-05, "model_forward_time": 0.024988174438476562, "step": 26131 }, { "epoch": 3.987274169921875e-05, "step": 26131, "training_step_time": 0.21276402473449707 }, { "epoch": 3.9874267578125e-05, "model_forward_time": 0.024491310119628906, "step": 26132 }, { "epoch": 3.9874267578125e-05, "step": 26132, "training_step_time": 0.2246253490447998 }, { "epoch": 3.987579345703125e-05, "model_forward_time": 0.024363279342651367, "step": 26133 }, { "epoch": 3.987579345703125e-05, "step": 26133, "training_step_time": 0.12912964820861816 }, { "epoch": 3.98773193359375e-05, "model_forward_time": 0.02394723892211914, "step": 26134 }, { "epoch": 3.98773193359375e-05, "step": 26134, "training_step_time": 0.12833380699157715 }, { "epoch": 3.987884521484375e-05, "model_forward_time": 0.024984359741210938, "step": 26135 }, { "epoch": 3.987884521484375e-05, "step": 26135, "training_step_time": 0.15347051620483398 }, { "epoch": 3.988037109375e-05, "model_forward_time": 0.024308443069458008, "step": 26136 }, { "epoch": 3.988037109375e-05, "step": 26136, "training_step_time": 0.22919178009033203 }, { "epoch": 3.988189697265625e-05, "model_forward_time": 0.02466106414794922, "step": 26137 }, { "epoch": 3.988189697265625e-05, "step": 26137, "training_step_time": 0.1167445182800293 }, { "epoch": 3.98834228515625e-05, "model_forward_time": 0.025573253631591797, "step": 26138 }, { "epoch": 3.98834228515625e-05, "step": 26138, "training_step_time": 0.11499834060668945 }, { "epoch": 3.988494873046875e-05, "model_forward_time": 0.025053977966308594, "step": 26139 }, { "epoch": 3.988494873046875e-05, "step": 26139, "training_step_time": 0.11554551124572754 }, { "epoch": 3.9886474609375e-05, "grad_norm": 0.11878912150859833, "learning_rate": 4.458230525810708e-06, "loss": 0.0039, "step": 26140 }, { "epoch": 3.9886474609375e-05, "model_forward_time": 0.025297880172729492, "step": 26140 }, { "epoch": 3.9886474609375e-05, "step": 26140, "training_step_time": 0.11057496070861816 }, { "epoch": 3.988800048828125e-05, "model_forward_time": 0.026903152465820312, "step": 26141 }, { "epoch": 3.988800048828125e-05, "step": 26141, "training_step_time": 0.11000514030456543 }, { "epoch": 3.98895263671875e-05, "model_forward_time": 0.024990558624267578, "step": 26142 }, { "epoch": 3.98895263671875e-05, "step": 26142, "training_step_time": 0.11150717735290527 }, { "epoch": 3.989105224609375e-05, "model_forward_time": 0.025076866149902344, "step": 26143 }, { "epoch": 3.989105224609375e-05, "step": 26143, "training_step_time": 0.10941052436828613 }, { "epoch": 3.9892578125e-05, "model_forward_time": 0.025425434112548828, "step": 26144 }, { "epoch": 3.9892578125e-05, "step": 26144, "training_step_time": 0.1105337142944336 }, { "epoch": 3.989410400390625e-05, "model_forward_time": 0.024982213973999023, "step": 26145 }, { "epoch": 3.989410400390625e-05, "step": 26145, "training_step_time": 0.10650372505187988 }, { "epoch": 3.98956298828125e-05, "model_forward_time": 0.025403738021850586, "step": 26146 }, { "epoch": 3.98956298828125e-05, "step": 26146, "training_step_time": 0.1954793930053711 }, { "epoch": 3.989715576171875e-05, "model_forward_time": 0.024821996688842773, "step": 26147 }, { "epoch": 3.989715576171875e-05, "step": 26147, "training_step_time": 0.14196300506591797 }, { "epoch": 3.9898681640625e-05, "model_forward_time": 0.02449512481689453, "step": 26148 }, { "epoch": 3.9898681640625e-05, "step": 26148, "training_step_time": 0.11445331573486328 }, { "epoch": 3.990020751953125e-05, "model_forward_time": 0.024861812591552734, "step": 26149 }, { "epoch": 3.990020751953125e-05, "step": 26149, "training_step_time": 0.10686993598937988 }, { "epoch": 3.99017333984375e-05, "grad_norm": 0.09383201599121094, "learning_rate": 4.435508119961701e-06, "loss": 0.0127, "step": 26150 }, { "epoch": 3.99017333984375e-05, "model_forward_time": 0.025095701217651367, "step": 26150 }, { "epoch": 3.99017333984375e-05, "step": 26150, "training_step_time": 0.12297630310058594 }, { "epoch": 3.990325927734375e-05, "model_forward_time": 0.025191068649291992, "step": 26151 }, { "epoch": 3.990325927734375e-05, "step": 26151, "training_step_time": 0.11027121543884277 }, { "epoch": 3.990478515625e-05, "model_forward_time": 0.025113821029663086, "step": 26152 }, { "epoch": 3.990478515625e-05, "step": 26152, "training_step_time": 0.1085059642791748 }, { "epoch": 3.990631103515625e-05, "model_forward_time": 0.025587081909179688, "step": 26153 }, { "epoch": 3.990631103515625e-05, "step": 26153, "training_step_time": 0.10966634750366211 }, { "epoch": 3.99078369140625e-05, "model_forward_time": 0.025266170501708984, "step": 26154 }, { "epoch": 3.99078369140625e-05, "step": 26154, "training_step_time": 0.10693669319152832 }, { "epoch": 3.990936279296875e-05, "model_forward_time": 0.02513265609741211, "step": 26155 }, { "epoch": 3.990936279296875e-05, "step": 26155, "training_step_time": 0.10780739784240723 }, { "epoch": 3.9910888671875e-05, "model_forward_time": 0.025136947631835938, "step": 26156 }, { "epoch": 3.9910888671875e-05, "step": 26156, "training_step_time": 0.11208248138427734 }, { "epoch": 3.991241455078125e-05, "model_forward_time": 0.025364398956298828, "step": 26157 }, { "epoch": 3.991241455078125e-05, "step": 26157, "training_step_time": 0.10563969612121582 }, { "epoch": 3.99139404296875e-05, "model_forward_time": 0.02491283416748047, "step": 26158 }, { "epoch": 3.99139404296875e-05, "step": 26158, "training_step_time": 0.10661864280700684 }, { "epoch": 3.991546630859375e-05, "model_forward_time": 0.025142192840576172, "step": 26159 }, { "epoch": 3.991546630859375e-05, "step": 26159, "training_step_time": 0.10689234733581543 }, { "epoch": 3.99169921875e-05, "grad_norm": 0.06598341464996338, "learning_rate": 4.412841079266777e-06, "loss": 0.0028, "step": 26160 }, { "epoch": 3.99169921875e-05, "model_forward_time": 0.024890899658203125, "step": 26160 }, { "epoch": 3.99169921875e-05, "step": 26160, "training_step_time": 0.1058659553527832 }, { "epoch": 3.991851806640625e-05, "model_forward_time": 0.024783611297607422, "step": 26161 }, { "epoch": 3.991851806640625e-05, "step": 26161, "training_step_time": 0.10630965232849121 }, { "epoch": 3.99200439453125e-05, "model_forward_time": 0.02535724639892578, "step": 26162 }, { "epoch": 3.99200439453125e-05, "step": 26162, "training_step_time": 0.10709619522094727 }, { "epoch": 3.992156982421875e-05, "model_forward_time": 0.025089740753173828, "step": 26163 }, { "epoch": 3.992156982421875e-05, "step": 26163, "training_step_time": 0.10756564140319824 }, { "epoch": 3.9923095703125e-05, "model_forward_time": 0.0255587100982666, "step": 26164 }, { "epoch": 3.9923095703125e-05, "step": 26164, "training_step_time": 0.11348557472229004 }, { "epoch": 3.992462158203125e-05, "model_forward_time": 0.025056123733520508, "step": 26165 }, { "epoch": 3.992462158203125e-05, "step": 26165, "training_step_time": 0.10958075523376465 }, { "epoch": 3.99261474609375e-05, "model_forward_time": 0.02517080307006836, "step": 26166 }, { "epoch": 3.99261474609375e-05, "step": 26166, "training_step_time": 0.11191010475158691 }, { "epoch": 3.992767333984375e-05, "model_forward_time": 0.025331497192382812, "step": 26167 }, { "epoch": 3.992767333984375e-05, "step": 26167, "training_step_time": 0.10962629318237305 }, { "epoch": 3.992919921875e-05, "model_forward_time": 0.02539205551147461, "step": 26168 }, { "epoch": 3.992919921875e-05, "step": 26168, "training_step_time": 0.1092219352722168 }, { "epoch": 3.993072509765625e-05, "model_forward_time": 0.025058507919311523, "step": 26169 }, { "epoch": 3.993072509765625e-05, "step": 26169, "training_step_time": 0.11667251586914062 }, { "epoch": 3.99322509765625e-05, "grad_norm": 0.07526319473981857, "learning_rate": 4.390229431268534e-06, "loss": 0.0053, "step": 26170 }, { "epoch": 3.99322509765625e-05, "model_forward_time": 0.025171518325805664, "step": 26170 }, { "epoch": 3.99322509765625e-05, "step": 26170, "training_step_time": 0.14108991622924805 }, { "epoch": 3.993377685546875e-05, "model_forward_time": 0.025013208389282227, "step": 26171 }, { "epoch": 3.993377685546875e-05, "step": 26171, "training_step_time": 0.11156725883483887 }, { "epoch": 3.9935302734375e-05, "model_forward_time": 0.024898052215576172, "step": 26172 }, { "epoch": 3.9935302734375e-05, "step": 26172, "training_step_time": 0.17752766609191895 }, { "epoch": 3.993682861328125e-05, "model_forward_time": 0.02470254898071289, "step": 26173 }, { "epoch": 3.993682861328125e-05, "step": 26173, "training_step_time": 0.21763992309570312 }, { "epoch": 3.99383544921875e-05, "model_forward_time": 0.02448129653930664, "step": 26174 }, { "epoch": 3.99383544921875e-05, "step": 26174, "training_step_time": 0.20544886589050293 }, { "epoch": 3.993988037109375e-05, "model_forward_time": 0.024476051330566406, "step": 26175 }, { "epoch": 3.993988037109375e-05, "step": 26175, "training_step_time": 0.20569634437561035 }, { "epoch": 3.994140625e-05, "model_forward_time": 0.024744033813476562, "step": 26176 }, { "epoch": 3.994140625e-05, "step": 26176, "training_step_time": 0.17775177955627441 }, { "epoch": 3.994293212890625e-05, "model_forward_time": 0.024734973907470703, "step": 26177 }, { "epoch": 3.994293212890625e-05, "step": 26177, "training_step_time": 0.17873358726501465 }, { "epoch": 3.99444580078125e-05, "model_forward_time": 0.02461409568786621, "step": 26178 }, { "epoch": 3.99444580078125e-05, "step": 26178, "training_step_time": 0.13012242317199707 }, { "epoch": 3.994598388671875e-05, "model_forward_time": 0.02439403533935547, "step": 26179 }, { "epoch": 3.994598388671875e-05, "step": 26179, "training_step_time": 0.14882636070251465 }, { "epoch": 3.9947509765625e-05, "grad_norm": 0.08003715425729752, "learning_rate": 4.367673203442241e-06, "loss": 0.0052, "step": 26180 }, { "epoch": 3.9947509765625e-05, "model_forward_time": 0.02449512481689453, "step": 26180 }, { "epoch": 3.9947509765625e-05, "step": 26180, "training_step_time": 0.21740436553955078 }, { "epoch": 3.994903564453125e-05, "model_forward_time": 0.024868249893188477, "step": 26181 }, { "epoch": 3.994903564453125e-05, "step": 26181, "training_step_time": 0.11270999908447266 }, { "epoch": 3.99505615234375e-05, "model_forward_time": 0.02455306053161621, "step": 26182 }, { "epoch": 3.99505615234375e-05, "step": 26182, "training_step_time": 0.10604977607727051 }, { "epoch": 3.995208740234375e-05, "model_forward_time": 0.025559425354003906, "step": 26183 }, { "epoch": 3.995208740234375e-05, "step": 26183, "training_step_time": 0.11426830291748047 }, { "epoch": 3.995361328125e-05, "model_forward_time": 0.024730920791625977, "step": 26184 }, { "epoch": 3.995361328125e-05, "step": 26184, "training_step_time": 0.10819077491760254 }, { "epoch": 3.995513916015625e-05, "model_forward_time": 0.025238037109375, "step": 26185 }, { "epoch": 3.995513916015625e-05, "step": 26185, "training_step_time": 0.1103212833404541 }, { "epoch": 3.99566650390625e-05, "model_forward_time": 0.02535271644592285, "step": 26186 }, { "epoch": 3.99566650390625e-05, "step": 26186, "training_step_time": 0.10993123054504395 }, { "epoch": 3.995819091796875e-05, "model_forward_time": 0.024819612503051758, "step": 26187 }, { "epoch": 3.995819091796875e-05, "step": 26187, "training_step_time": 0.12958049774169922 }, { "epoch": 3.9959716796875e-05, "model_forward_time": 0.025050878524780273, "step": 26188 }, { "epoch": 3.9959716796875e-05, "step": 26188, "training_step_time": 0.12558507919311523 }, { "epoch": 3.996124267578125e-05, "model_forward_time": 0.025292634963989258, "step": 26189 }, { "epoch": 3.996124267578125e-05, "step": 26189, "training_step_time": 0.12148118019104004 }, { "epoch": 3.99627685546875e-05, "grad_norm": 0.3032190501689911, "learning_rate": 4.3451724231958644e-06, "loss": 0.0053, "step": 26190 }, { "epoch": 3.99627685546875e-05, "model_forward_time": 0.025188684463500977, "step": 26190 }, { "epoch": 3.99627685546875e-05, "step": 26190, "training_step_time": 0.12166810035705566 }, { "epoch": 3.996429443359375e-05, "model_forward_time": 0.02518630027770996, "step": 26191 }, { "epoch": 3.996429443359375e-05, "step": 26191, "training_step_time": 0.223527193069458 }, { "epoch": 3.99658203125e-05, "model_forward_time": 0.024287939071655273, "step": 26192 }, { "epoch": 3.99658203125e-05, "step": 26192, "training_step_time": 0.12400555610656738 }, { "epoch": 3.996734619140625e-05, "model_forward_time": 0.0239715576171875, "step": 26193 }, { "epoch": 3.996734619140625e-05, "step": 26193, "training_step_time": 0.1965327262878418 }, { "epoch": 3.99688720703125e-05, "model_forward_time": 0.024260282516479492, "step": 26194 }, { "epoch": 3.99688720703125e-05, "step": 26194, "training_step_time": 0.11929082870483398 }, { "epoch": 3.997039794921875e-05, "model_forward_time": 0.024402856826782227, "step": 26195 }, { "epoch": 3.997039794921875e-05, "step": 26195, "training_step_time": 0.19179344177246094 }, { "epoch": 3.9971923828125e-05, "model_forward_time": 0.025172710418701172, "step": 26196 }, { "epoch": 3.9971923828125e-05, "step": 26196, "training_step_time": 0.10701131820678711 }, { "epoch": 3.997344970703125e-05, "model_forward_time": 0.024416685104370117, "step": 26197 }, { "epoch": 3.997344970703125e-05, "step": 26197, "training_step_time": 0.11088728904724121 }, { "epoch": 3.99749755859375e-05, "model_forward_time": 0.0250546932220459, "step": 26198 }, { "epoch": 3.99749755859375e-05, "step": 26198, "training_step_time": 0.10824060440063477 }, { "epoch": 3.997650146484375e-05, "model_forward_time": 0.0250396728515625, "step": 26199 }, { "epoch": 3.997650146484375e-05, "step": 26199, "training_step_time": 0.11015963554382324 }, { "epoch": 3.997802734375e-05, "grad_norm": 0.08461698889732361, "learning_rate": 4.322727117869951e-06, "loss": 0.0061, "step": 26200 }, { "epoch": 3.997802734375e-05, "model_forward_time": 0.025383472442626953, "step": 26200 }, { "epoch": 3.997802734375e-05, "step": 26200, "training_step_time": 0.11152458190917969 }, { "epoch": 3.997955322265625e-05, "model_forward_time": 0.025038719177246094, "step": 26201 }, { "epoch": 3.997955322265625e-05, "step": 26201, "training_step_time": 0.10735130310058594 }, { "epoch": 3.99810791015625e-05, "model_forward_time": 0.025051593780517578, "step": 26202 }, { "epoch": 3.99810791015625e-05, "step": 26202, "training_step_time": 0.1056675910949707 }, { "epoch": 3.998260498046875e-05, "model_forward_time": 0.025054454803466797, "step": 26203 }, { "epoch": 3.998260498046875e-05, "step": 26203, "training_step_time": 0.10694456100463867 }, { "epoch": 3.9984130859375e-05, "model_forward_time": 0.025494813919067383, "step": 26204 }, { "epoch": 3.9984130859375e-05, "step": 26204, "training_step_time": 0.11286544799804688 }, { "epoch": 3.998565673828125e-05, "model_forward_time": 0.025062084197998047, "step": 26205 }, { "epoch": 3.998565673828125e-05, "step": 26205, "training_step_time": 0.10840964317321777 }, { "epoch": 3.99871826171875e-05, "model_forward_time": 0.025116920471191406, "step": 26206 }, { "epoch": 3.99871826171875e-05, "step": 26206, "training_step_time": 0.10819077491760254 }, { "epoch": 3.998870849609375e-05, "model_forward_time": 0.02537989616394043, "step": 26207 }, { "epoch": 3.998870849609375e-05, "step": 26207, "training_step_time": 0.10566186904907227 }, { "epoch": 3.9990234375e-05, "model_forward_time": 0.02541804313659668, "step": 26208 }, { "epoch": 3.9990234375e-05, "step": 26208, "training_step_time": 0.18210983276367188 }, { "epoch": 3.999176025390625e-05, "model_forward_time": 0.024603843688964844, "step": 26209 }, { "epoch": 3.999176025390625e-05, "step": 26209, "training_step_time": 0.10646319389343262 }, { "epoch": 3.99932861328125e-05, "grad_norm": 0.0800323560833931, "learning_rate": 4.300337314737685e-06, "loss": 0.0072, "step": 26210 }, { "epoch": 3.99932861328125e-05, "model_forward_time": 0.02482914924621582, "step": 26210 }, { "epoch": 3.99932861328125e-05, "step": 26210, "training_step_time": 0.10445237159729004 }, { "epoch": 3.999481201171875e-05, "model_forward_time": 0.025142431259155273, "step": 26211 }, { "epoch": 3.999481201171875e-05, "step": 26211, "training_step_time": 0.1262831687927246 }, { "epoch": 3.9996337890625e-05, "model_forward_time": 0.02533888816833496, "step": 26212 }, { "epoch": 3.9996337890625e-05, "step": 26212, "training_step_time": 0.13247418403625488 }, { "epoch": 3.999786376953125e-05, "model_forward_time": 0.02544379234313965, "step": 26213 }, { "epoch": 3.999786376953125e-05, "step": 26213, "training_step_time": 0.10804557800292969 }, { "epoch": 3.99993896484375e-05, "model_forward_time": 0.025707721710205078, "step": 26214 }, { "epoch": 3.99993896484375e-05, "step": 26214, "training_step_time": 0.11357831954956055 }, { "epoch": 4.000091552734375e-05, "model_forward_time": 0.025601863861083984, "step": 26215 }, { "epoch": 4.000091552734375e-05, "step": 26215, "training_step_time": 0.19805407524108887 }, { "epoch": 4.000244140625e-05, "model_forward_time": 0.024850845336914062, "step": 26216 }, { "epoch": 4.000244140625e-05, "step": 26216, "training_step_time": 0.11610579490661621 }, { "epoch": 4.000396728515625e-05, "model_forward_time": 0.024520397186279297, "step": 26217 }, { "epoch": 4.000396728515625e-05, "step": 26217, "training_step_time": 0.10756587982177734 }, { "epoch": 4.00054931640625e-05, "model_forward_time": 0.02550959587097168, "step": 26218 }, { "epoch": 4.00054931640625e-05, "step": 26218, "training_step_time": 0.10598874092102051 }, { "epoch": 4.000701904296875e-05, "model_forward_time": 0.02501988410949707, "step": 26219 }, { "epoch": 4.000701904296875e-05, "step": 26219, "training_step_time": 0.11534667015075684 }, { "epoch": 4.0008544921875e-05, "grad_norm": 0.17419543862342834, "learning_rate": 4.27800304100478e-06, "loss": 0.0053, "step": 26220 }, { "epoch": 4.0008544921875e-05, "model_forward_time": 0.02565741539001465, "step": 26220 }, { "epoch": 4.0008544921875e-05, "step": 26220, "training_step_time": 0.20604896545410156 }, { "epoch": 4.001007080078125e-05, "model_forward_time": 0.024756669998168945, "step": 26221 }, { "epoch": 4.001007080078125e-05, "step": 26221, "training_step_time": 0.11130571365356445 }, { "epoch": 4.00115966796875e-05, "model_forward_time": 0.024776697158813477, "step": 26222 }, { "epoch": 4.00115966796875e-05, "step": 26222, "training_step_time": 0.11346602439880371 }, { "epoch": 4.001312255859375e-05, "model_forward_time": 0.025264739990234375, "step": 26223 }, { "epoch": 4.001312255859375e-05, "step": 26223, "training_step_time": 0.12429213523864746 }, { "epoch": 4.00146484375e-05, "model_forward_time": 0.025708436965942383, "step": 26224 }, { "epoch": 4.00146484375e-05, "step": 26224, "training_step_time": 0.15922069549560547 }, { "epoch": 4.001617431640625e-05, "model_forward_time": 0.0243837833404541, "step": 26225 }, { "epoch": 4.001617431640625e-05, "step": 26225, "training_step_time": 0.16666841506958008 }, { "epoch": 4.00177001953125e-05, "model_forward_time": 0.024760007858276367, "step": 26226 }, { "epoch": 4.00177001953125e-05, "step": 26226, "training_step_time": 0.10526013374328613 }, { "epoch": 4.001922607421875e-05, "model_forward_time": 0.02439141273498535, "step": 26227 }, { "epoch": 4.001922607421875e-05, "step": 26227, "training_step_time": 0.13845491409301758 }, { "epoch": 4.0020751953125e-05, "model_forward_time": 0.025290250778198242, "step": 26228 }, { "epoch": 4.0020751953125e-05, "step": 26228, "training_step_time": 0.17240548133850098 }, { "epoch": 4.002227783203125e-05, "model_forward_time": 0.02397012710571289, "step": 26229 }, { "epoch": 4.002227783203125e-05, "step": 26229, "training_step_time": 0.15689969062805176 }, { "epoch": 4.00238037109375e-05, "grad_norm": 0.17113907635211945, "learning_rate": 4.255724323809479e-06, "loss": 0.0049, "step": 26230 }, { "epoch": 4.00238037109375e-05, "model_forward_time": 0.024091482162475586, "step": 26230 }, { "epoch": 4.00238037109375e-05, "step": 26230, "training_step_time": 0.13382649421691895 }, { "epoch": 4.002532958984375e-05, "model_forward_time": 0.024300336837768555, "step": 26231 }, { "epoch": 4.002532958984375e-05, "step": 26231, "training_step_time": 0.13757538795471191 }, { "epoch": 4.002685546875e-05, "model_forward_time": 0.026445865631103516, "step": 26232 }, { "epoch": 4.002685546875e-05, "step": 26232, "training_step_time": 0.12434101104736328 }, { "epoch": 4.002838134765625e-05, "model_forward_time": 0.024916887283325195, "step": 26233 }, { "epoch": 4.002838134765625e-05, "step": 26233, "training_step_time": 0.12484383583068848 }, { "epoch": 4.00299072265625e-05, "model_forward_time": 0.02515125274658203, "step": 26234 }, { "epoch": 4.00299072265625e-05, "step": 26234, "training_step_time": 0.12027263641357422 }, { "epoch": 4.003143310546875e-05, "model_forward_time": 0.025537490844726562, "step": 26235 }, { "epoch": 4.003143310546875e-05, "step": 26235, "training_step_time": 0.1382596492767334 }, { "epoch": 4.0032958984375e-05, "model_forward_time": 0.025041580200195312, "step": 26236 }, { "epoch": 4.0032958984375e-05, "step": 26236, "training_step_time": 0.11221122741699219 }, { "epoch": 4.003448486328125e-05, "model_forward_time": 0.02627420425415039, "step": 26237 }, { "epoch": 4.003448486328125e-05, "step": 26237, "training_step_time": 0.11217617988586426 }, { "epoch": 4.00360107421875e-05, "model_forward_time": 0.025739192962646484, "step": 26238 }, { "epoch": 4.00360107421875e-05, "step": 26238, "training_step_time": 0.17177367210388184 }, { "epoch": 4.003753662109375e-05, "model_forward_time": 0.026119232177734375, "step": 26239 }, { "epoch": 4.003753662109375e-05, "step": 26239, "training_step_time": 0.15797948837280273 }, { "epoch": 4.00390625e-05, "grad_norm": 0.09232489764690399, "learning_rate": 4.233501190222533e-06, "loss": 0.0039, "step": 26240 }, { "epoch": 4.00390625e-05, "model_forward_time": 0.03803396224975586, "step": 26240 }, { "epoch": 4.00390625e-05, "step": 26240, "training_step_time": 0.16290974617004395 }, { "epoch": 4.004058837890625e-05, "model_forward_time": 0.02740168571472168, "step": 26241 }, { "epoch": 4.004058837890625e-05, "step": 26241, "training_step_time": 0.20537710189819336 }, { "epoch": 4.00421142578125e-05, "model_forward_time": 0.027785778045654297, "step": 26242 }, { "epoch": 4.00421142578125e-05, "step": 26242, "training_step_time": 0.2800023555755615 }, { "epoch": 4.004364013671875e-05, "model_forward_time": 0.028873443603515625, "step": 26243 }, { "epoch": 4.004364013671875e-05, "step": 26243, "training_step_time": 0.2522609233856201 }, { "epoch": 4.0045166015625e-05, "model_forward_time": 0.028441667556762695, "step": 26244 }, { "epoch": 4.0045166015625e-05, "step": 26244, "training_step_time": 0.2918663024902344 }, { "epoch": 4.004669189453125e-05, "model_forward_time": 0.03096938133239746, "step": 26245 }, { "epoch": 4.004669189453125e-05, "step": 26245, "training_step_time": 0.30705881118774414 }, { "epoch": 4.00482177734375e-05, "model_forward_time": 0.03237438201904297, "step": 26246 }, { "epoch": 4.00482177734375e-05, "step": 26246, "training_step_time": 0.3271634578704834 }, { "epoch": 4.004974365234375e-05, "model_forward_time": 0.03686237335205078, "step": 26247 }, { "epoch": 4.004974365234375e-05, "step": 26247, "training_step_time": 0.3153567314147949 }, { "epoch": 4.005126953125e-05, "model_forward_time": 0.029470443725585938, "step": 26248 }, { "epoch": 4.005126953125e-05, "step": 26248, "training_step_time": 0.24451017379760742 }, { "epoch": 4.005279541015625e-05, "model_forward_time": 0.031524658203125, "step": 26249 }, { "epoch": 4.005279541015625e-05, "step": 26249, "training_step_time": 0.2888178825378418 }, { "epoch": 4.00543212890625e-05, "grad_norm": 0.11314801126718521, "learning_rate": 4.2113336672471245e-06, "loss": 0.0077, "step": 26250 }, { "epoch": 4.00543212890625e-05, "model_forward_time": 0.030463457107543945, "step": 26250 }, { "epoch": 4.00543212890625e-05, "step": 26250, "training_step_time": 0.2657449245452881 }, { "epoch": 4.005584716796875e-05, "model_forward_time": 0.03507041931152344, "step": 26251 }, { "epoch": 4.005584716796875e-05, "step": 26251, "training_step_time": 0.2915992736816406 }, { "epoch": 4.0057373046875e-05, "model_forward_time": 0.03037714958190918, "step": 26252 }, { "epoch": 4.0057373046875e-05, "step": 26252, "training_step_time": 0.19980287551879883 }, { "epoch": 4.005889892578125e-05, "model_forward_time": 0.03186750411987305, "step": 26253 }, { "epoch": 4.005889892578125e-05, "step": 26253, "training_step_time": 0.2978029251098633 }, { "epoch": 4.00604248046875e-05, "model_forward_time": 0.035239458084106445, "step": 26254 }, { "epoch": 4.00604248046875e-05, "step": 26254, "training_step_time": 0.2939913272857666 }, { "epoch": 4.006195068359375e-05, "model_forward_time": 0.028951644897460938, "step": 26255 }, { "epoch": 4.006195068359375e-05, "step": 26255, "training_step_time": 0.14375066757202148 }, { "epoch": 4.00634765625e-05, "model_forward_time": 0.03395342826843262, "step": 26256 }, { "epoch": 4.00634765625e-05, "step": 26256, "training_step_time": 0.13275599479675293 }, { "epoch": 4.006500244140625e-05, "model_forward_time": 0.03153800964355469, "step": 26257 }, { "epoch": 4.006500244140625e-05, "step": 26257, "training_step_time": 0.1263735294342041 }, { "epoch": 4.00665283203125e-05, "model_forward_time": 0.028304338455200195, "step": 26258 }, { "epoch": 4.00665283203125e-05, "step": 26258, "training_step_time": 0.12805747985839844 }, { "epoch": 4.006805419921875e-05, "model_forward_time": 0.02751898765563965, "step": 26259 }, { "epoch": 4.006805419921875e-05, "step": 26259, "training_step_time": 0.11527013778686523 }, { "epoch": 4.0069580078125e-05, "grad_norm": 0.1763758361339569, "learning_rate": 4.189221781818914e-06, "loss": 0.0084, "step": 26260 }, { "epoch": 4.0069580078125e-05, "model_forward_time": 0.027667760848999023, "step": 26260 }, { "epoch": 4.0069580078125e-05, "step": 26260, "training_step_time": 0.12492895126342773 }, { "epoch": 4.007110595703125e-05, "model_forward_time": 0.02668285369873047, "step": 26261 }, { "epoch": 4.007110595703125e-05, "step": 26261, "training_step_time": 0.1195363998413086 }, { "epoch": 4.00726318359375e-05, "model_forward_time": 0.02624964714050293, "step": 26262 }, { "epoch": 4.00726318359375e-05, "step": 26262, "training_step_time": 0.10950708389282227 }, { "epoch": 4.007415771484375e-05, "model_forward_time": 0.025242328643798828, "step": 26263 }, { "epoch": 4.007415771484375e-05, "step": 26263, "training_step_time": 0.13776874542236328 }, { "epoch": 4.007568359375e-05, "model_forward_time": 0.025519132614135742, "step": 26264 }, { "epoch": 4.007568359375e-05, "step": 26264, "training_step_time": 0.141005277633667 }, { "epoch": 4.007720947265625e-05, "model_forward_time": 0.02430891990661621, "step": 26265 }, { "epoch": 4.007720947265625e-05, "step": 26265, "training_step_time": 0.10855317115783691 }, { "epoch": 4.00787353515625e-05, "model_forward_time": 0.025052309036254883, "step": 26266 }, { "epoch": 4.00787353515625e-05, "step": 26266, "training_step_time": 0.11240291595458984 }, { "epoch": 4.008026123046875e-05, "model_forward_time": 0.025258541107177734, "step": 26267 }, { "epoch": 4.008026123046875e-05, "step": 26267, "training_step_time": 0.10968446731567383 }, { "epoch": 4.0081787109375e-05, "model_forward_time": 0.025557994842529297, "step": 26268 }, { "epoch": 4.0081787109375e-05, "step": 26268, "training_step_time": 0.1083521842956543 }, { "epoch": 4.008331298828125e-05, "model_forward_time": 0.0256807804107666, "step": 26269 }, { "epoch": 4.008331298828125e-05, "step": 26269, "training_step_time": 0.19491934776306152 }, { "epoch": 4.00848388671875e-05, "grad_norm": 0.08767119795084, "learning_rate": 4.167165560805914e-06, "loss": 0.0035, "step": 26270 }, { "epoch": 4.00848388671875e-05, "model_forward_time": 0.02497243881225586, "step": 26270 }, { "epoch": 4.00848388671875e-05, "step": 26270, "training_step_time": 0.11090731620788574 }, { "epoch": 4.008636474609375e-05, "model_forward_time": 0.023956298828125, "step": 26271 }, { "epoch": 4.008636474609375e-05, "step": 26271, "training_step_time": 0.1124274730682373 }, { "epoch": 4.0087890625e-05, "model_forward_time": 0.025119304656982422, "step": 26272 }, { "epoch": 4.0087890625e-05, "step": 26272, "training_step_time": 0.11515450477600098 }, { "epoch": 4.008941650390625e-05, "model_forward_time": 0.025896310806274414, "step": 26273 }, { "epoch": 4.008941650390625e-05, "step": 26273, "training_step_time": 0.11102080345153809 }, { "epoch": 4.00909423828125e-05, "model_forward_time": 0.025412321090698242, "step": 26274 }, { "epoch": 4.00909423828125e-05, "step": 26274, "training_step_time": 0.11011767387390137 }, { "epoch": 4.009246826171875e-05, "model_forward_time": 0.025719881057739258, "step": 26275 }, { "epoch": 4.009246826171875e-05, "step": 26275, "training_step_time": 0.1140134334564209 }, { "epoch": 4.0093994140625e-05, "model_forward_time": 0.02560257911682129, "step": 26276 }, { "epoch": 4.0093994140625e-05, "step": 26276, "training_step_time": 0.11352872848510742 }, { "epoch": 4.009552001953125e-05, "model_forward_time": 0.028265953063964844, "step": 26277 }, { "epoch": 4.009552001953125e-05, "step": 26277, "training_step_time": 0.1090998649597168 }, { "epoch": 4.00970458984375e-05, "model_forward_time": 0.0254209041595459, "step": 26278 }, { "epoch": 4.00970458984375e-05, "step": 26278, "training_step_time": 0.11704158782958984 }, { "epoch": 4.009857177734375e-05, "model_forward_time": 0.02718353271484375, "step": 26279 }, { "epoch": 4.009857177734375e-05, "step": 26279, "training_step_time": 0.11139988899230957 }, { "epoch": 4.010009765625e-05, "grad_norm": 0.09713966399431229, "learning_rate": 4.145165031008508e-06, "loss": 0.0138, "step": 26280 }, { "epoch": 4.010009765625e-05, "model_forward_time": 0.02546834945678711, "step": 26280 }, { "epoch": 4.010009765625e-05, "step": 26280, "training_step_time": 0.11081910133361816 }, { "epoch": 4.010162353515625e-05, "model_forward_time": 0.025545835494995117, "step": 26281 }, { "epoch": 4.010162353515625e-05, "step": 26281, "training_step_time": 0.10771059989929199 }, { "epoch": 4.01031494140625e-05, "model_forward_time": 0.025603294372558594, "step": 26282 }, { "epoch": 4.01031494140625e-05, "step": 26282, "training_step_time": 0.10892319679260254 }, { "epoch": 4.010467529296875e-05, "model_forward_time": 0.026063919067382812, "step": 26283 }, { "epoch": 4.010467529296875e-05, "step": 26283, "training_step_time": 0.11043667793273926 }, { "epoch": 4.0106201171875e-05, "model_forward_time": 0.025173187255859375, "step": 26284 }, { "epoch": 4.0106201171875e-05, "step": 26284, "training_step_time": 0.19211983680725098 }, { "epoch": 4.010772705078125e-05, "model_forward_time": 0.02514362335205078, "step": 26285 }, { "epoch": 4.010772705078125e-05, "step": 26285, "training_step_time": 0.10571622848510742 }, { "epoch": 4.01092529296875e-05, "model_forward_time": 0.024885177612304688, "step": 26286 }, { "epoch": 4.01092529296875e-05, "step": 26286, "training_step_time": 0.11598443984985352 }, { "epoch": 4.011077880859375e-05, "model_forward_time": 0.025402307510375977, "step": 26287 }, { "epoch": 4.011077880859375e-05, "step": 26287, "training_step_time": 0.12391304969787598 }, { "epoch": 4.01123046875e-05, "model_forward_time": 0.024918556213378906, "step": 26288 }, { "epoch": 4.01123046875e-05, "step": 26288, "training_step_time": 0.1025230884552002 }, { "epoch": 4.011383056640625e-05, "model_forward_time": 0.025389432907104492, "step": 26289 }, { "epoch": 4.011383056640625e-05, "step": 26289, "training_step_time": 0.15187287330627441 }, { "epoch": 4.01153564453125e-05, "grad_norm": 0.08062807470560074, "learning_rate": 4.123220219159418e-06, "loss": 0.0121, "step": 26290 }, { "epoch": 4.01153564453125e-05, "model_forward_time": 0.025385618209838867, "step": 26290 }, { "epoch": 4.01153564453125e-05, "step": 26290, "training_step_time": 0.11150217056274414 }, { "epoch": 4.011688232421875e-05, "model_forward_time": 0.025077104568481445, "step": 26291 }, { "epoch": 4.011688232421875e-05, "step": 26291, "training_step_time": 0.11345291137695312 }, { "epoch": 4.0118408203125e-05, "model_forward_time": 0.025686979293823242, "step": 26292 }, { "epoch": 4.0118408203125e-05, "step": 26292, "training_step_time": 0.14574646949768066 }, { "epoch": 4.011993408203125e-05, "model_forward_time": 0.025452136993408203, "step": 26293 }, { "epoch": 4.011993408203125e-05, "step": 26293, "training_step_time": 0.18892145156860352 }, { "epoch": 4.01214599609375e-05, "model_forward_time": 0.024883270263671875, "step": 26294 }, { "epoch": 4.01214599609375e-05, "step": 26294, "training_step_time": 0.15984296798706055 }, { "epoch": 4.012298583984375e-05, "model_forward_time": 0.02430868148803711, "step": 26295 }, { "epoch": 4.012298583984375e-05, "step": 26295, "training_step_time": 0.18121623992919922 }, { "epoch": 4.012451171875e-05, "model_forward_time": 0.02453923225402832, "step": 26296 }, { "epoch": 4.012451171875e-05, "step": 26296, "training_step_time": 0.149245023727417 }, { "epoch": 4.012603759765625e-05, "model_forward_time": 0.02492809295654297, "step": 26297 }, { "epoch": 4.012603759765625e-05, "step": 26297, "training_step_time": 0.16715240478515625 }, { "epoch": 4.01275634765625e-05, "model_forward_time": 0.025471210479736328, "step": 26298 }, { "epoch": 4.01275634765625e-05, "step": 26298, "training_step_time": 0.12182927131652832 }, { "epoch": 4.012908935546875e-05, "model_forward_time": 0.02447342872619629, "step": 26299 }, { "epoch": 4.012908935546875e-05, "step": 26299, "training_step_time": 0.12989473342895508 }, { "epoch": 4.0130615234375e-05, "grad_norm": 0.09044896066188812, "learning_rate": 4.101331151923649e-06, "loss": 0.0091, "step": 26300 }, { "epoch": 4.0130615234375e-05, "model_forward_time": 0.025290727615356445, "step": 26300 }, { "epoch": 4.0130615234375e-05, "step": 26300, "training_step_time": 0.11214041709899902 }, { "epoch": 4.013214111328125e-05, "model_forward_time": 0.025206804275512695, "step": 26301 }, { "epoch": 4.013214111328125e-05, "step": 26301, "training_step_time": 0.10429215431213379 }, { "epoch": 4.01336669921875e-05, "model_forward_time": 0.026093721389770508, "step": 26302 }, { "epoch": 4.01336669921875e-05, "step": 26302, "training_step_time": 0.10824108123779297 }, { "epoch": 4.013519287109375e-05, "model_forward_time": 0.02540755271911621, "step": 26303 }, { "epoch": 4.013519287109375e-05, "step": 26303, "training_step_time": 0.10756969451904297 }, { "epoch": 4.013671875e-05, "model_forward_time": 0.025562763214111328, "step": 26304 }, { "epoch": 4.013671875e-05, "step": 26304, "training_step_time": 0.12741827964782715 }, { "epoch": 4.013824462890625e-05, "model_forward_time": 0.026329755783081055, "step": 26305 }, { "epoch": 4.013824462890625e-05, "step": 26305, "training_step_time": 0.17176508903503418 }, { "epoch": 4.01397705078125e-05, "model_forward_time": 0.024481773376464844, "step": 26306 }, { "epoch": 4.01397705078125e-05, "step": 26306, "training_step_time": 0.16790342330932617 }, { "epoch": 4.014129638671875e-05, "model_forward_time": 0.02565908432006836, "step": 26307 }, { "epoch": 4.014129638671875e-05, "step": 26307, "training_step_time": 0.1396794319152832 }, { "epoch": 4.0142822265625e-05, "model_forward_time": 0.0247647762298584, "step": 26308 }, { "epoch": 4.0142822265625e-05, "step": 26308, "training_step_time": 0.1678149700164795 }, { "epoch": 4.014434814453125e-05, "model_forward_time": 0.026042699813842773, "step": 26309 }, { "epoch": 4.014434814453125e-05, "step": 26309, "training_step_time": 0.14809608459472656 }, { "epoch": 4.01458740234375e-05, "grad_norm": 0.1574499011039734, "learning_rate": 4.079497855898501e-06, "loss": 0.014, "step": 26310 }, { "epoch": 4.01458740234375e-05, "model_forward_time": 0.025024890899658203, "step": 26310 }, { "epoch": 4.01458740234375e-05, "step": 26310, "training_step_time": 0.12175464630126953 }, { "epoch": 4.014739990234375e-05, "model_forward_time": 0.024291038513183594, "step": 26311 }, { "epoch": 4.014739990234375e-05, "step": 26311, "training_step_time": 0.18211960792541504 }, { "epoch": 4.014892578125e-05, "model_forward_time": 0.025204896926879883, "step": 26312 }, { "epoch": 4.014892578125e-05, "step": 26312, "training_step_time": 0.11302924156188965 }, { "epoch": 4.015045166015625e-05, "model_forward_time": 0.024596452713012695, "step": 26313 }, { "epoch": 4.015045166015625e-05, "step": 26313, "training_step_time": 0.11298274993896484 }, { "epoch": 4.01519775390625e-05, "model_forward_time": 0.025521278381347656, "step": 26314 }, { "epoch": 4.01519775390625e-05, "step": 26314, "training_step_time": 0.11246871948242188 }, { "epoch": 4.015350341796875e-05, "model_forward_time": 0.024985074996948242, "step": 26315 }, { "epoch": 4.015350341796875e-05, "step": 26315, "training_step_time": 0.11112833023071289 }, { "epoch": 4.0155029296875e-05, "model_forward_time": 0.025078535079956055, "step": 26316 }, { "epoch": 4.0155029296875e-05, "step": 26316, "training_step_time": 0.1080927848815918 }, { "epoch": 4.015655517578125e-05, "model_forward_time": 0.025440692901611328, "step": 26317 }, { "epoch": 4.015655517578125e-05, "step": 26317, "training_step_time": 0.10802435874938965 }, { "epoch": 4.01580810546875e-05, "model_forward_time": 0.02532958984375, "step": 26318 }, { "epoch": 4.01580810546875e-05, "step": 26318, "training_step_time": 0.11025214195251465 }, { "epoch": 4.015960693359375e-05, "model_forward_time": 0.025005817413330078, "step": 26319 }, { "epoch": 4.015960693359375e-05, "step": 26319, "training_step_time": 0.11236977577209473 }, { "epoch": 4.01611328125e-05, "grad_norm": 0.19923123717308044, "learning_rate": 4.057720357613482e-06, "loss": 0.0043, "step": 26320 }, { "epoch": 4.01611328125e-05, "model_forward_time": 0.025292634963989258, "step": 26320 }, { "epoch": 4.01611328125e-05, "step": 26320, "training_step_time": 0.11114263534545898 }, { "epoch": 4.016265869140625e-05, "model_forward_time": 0.02508997917175293, "step": 26321 }, { "epoch": 4.016265869140625e-05, "step": 26321, "training_step_time": 0.10986208915710449 }, { "epoch": 4.01641845703125e-05, "model_forward_time": 0.025566816329956055, "step": 26322 }, { "epoch": 4.01641845703125e-05, "step": 26322, "training_step_time": 0.10797309875488281 }, { "epoch": 4.016571044921875e-05, "model_forward_time": 0.02617669105529785, "step": 26323 }, { "epoch": 4.016571044921875e-05, "step": 26323, "training_step_time": 0.11313390731811523 }, { "epoch": 4.0167236328125e-05, "model_forward_time": 0.025249481201171875, "step": 26324 }, { "epoch": 4.0167236328125e-05, "step": 26324, "training_step_time": 0.1083984375 }, { "epoch": 4.016876220703125e-05, "model_forward_time": 0.025038719177246094, "step": 26325 }, { "epoch": 4.016876220703125e-05, "step": 26325, "training_step_time": 0.10836148262023926 }, { "epoch": 4.01702880859375e-05, "model_forward_time": 0.02548050880432129, "step": 26326 }, { "epoch": 4.01702880859375e-05, "step": 26326, "training_step_time": 0.10759091377258301 }, { "epoch": 4.017181396484375e-05, "model_forward_time": 0.024790048599243164, "step": 26327 }, { "epoch": 4.017181396484375e-05, "step": 26327, "training_step_time": 0.11237931251525879 }, { "epoch": 4.017333984375e-05, "model_forward_time": 0.025119781494140625, "step": 26328 }, { "epoch": 4.017333984375e-05, "step": 26328, "training_step_time": 0.14493417739868164 }, { "epoch": 4.017486572265625e-05, "model_forward_time": 0.025468111038208008, "step": 26329 }, { "epoch": 4.017486572265625e-05, "step": 26329, "training_step_time": 0.10816597938537598 }, { "epoch": 4.01763916015625e-05, "grad_norm": 0.0620352141559124, "learning_rate": 4.03599868353029e-06, "loss": 0.0066, "step": 26330 }, { "epoch": 4.01763916015625e-05, "model_forward_time": 0.02503514289855957, "step": 26330 }, { "epoch": 4.01763916015625e-05, "step": 26330, "training_step_time": 0.11126065254211426 }, { "epoch": 4.017791748046875e-05, "model_forward_time": 0.025090932846069336, "step": 26331 }, { "epoch": 4.017791748046875e-05, "step": 26331, "training_step_time": 0.11881351470947266 }, { "epoch": 4.0179443359375e-05, "model_forward_time": 0.02516317367553711, "step": 26332 }, { "epoch": 4.0179443359375e-05, "step": 26332, "training_step_time": 0.10435605049133301 }, { "epoch": 4.018096923828125e-05, "model_forward_time": 0.02465367317199707, "step": 26333 }, { "epoch": 4.018096923828125e-05, "step": 26333, "training_step_time": 0.1433720588684082 }, { "epoch": 4.01824951171875e-05, "model_forward_time": 0.02499556541442871, "step": 26334 }, { "epoch": 4.01824951171875e-05, "step": 26334, "training_step_time": 0.14278602600097656 }, { "epoch": 4.018402099609375e-05, "model_forward_time": 0.025307416915893555, "step": 26335 }, { "epoch": 4.018402099609375e-05, "step": 26335, "training_step_time": 0.18710780143737793 }, { "epoch": 4.0185546875e-05, "model_forward_time": 0.02431321144104004, "step": 26336 }, { "epoch": 4.0185546875e-05, "step": 26336, "training_step_time": 0.1337270736694336 }, { "epoch": 4.018707275390625e-05, "model_forward_time": 0.02416229248046875, "step": 26337 }, { "epoch": 4.018707275390625e-05, "step": 26337, "training_step_time": 0.16265416145324707 }, { "epoch": 4.01885986328125e-05, "model_forward_time": 0.024775981903076172, "step": 26338 }, { "epoch": 4.01885986328125e-05, "step": 26338, "training_step_time": 0.15987110137939453 }, { "epoch": 4.019012451171875e-05, "model_forward_time": 0.02425408363342285, "step": 26339 }, { "epoch": 4.019012451171875e-05, "step": 26339, "training_step_time": 0.12137556076049805 }, { "epoch": 4.0191650390625e-05, "grad_norm": 0.25910672545433044, "learning_rate": 4.01433286004283e-06, "loss": 0.0048, "step": 26340 }, { "epoch": 4.0191650390625e-05, "model_forward_time": 0.024231672286987305, "step": 26340 }, { "epoch": 4.0191650390625e-05, "step": 26340, "training_step_time": 0.21265602111816406 }, { "epoch": 4.019317626953125e-05, "model_forward_time": 0.024422645568847656, "step": 26341 }, { "epoch": 4.019317626953125e-05, "step": 26341, "training_step_time": 0.11136794090270996 }, { "epoch": 4.01947021484375e-05, "model_forward_time": 0.024634122848510742, "step": 26342 }, { "epoch": 4.01947021484375e-05, "step": 26342, "training_step_time": 0.17721796035766602 }, { "epoch": 4.019622802734375e-05, "model_forward_time": 0.02380228042602539, "step": 26343 }, { "epoch": 4.019622802734375e-05, "step": 26343, "training_step_time": 0.20404767990112305 }, { "epoch": 4.019775390625e-05, "model_forward_time": 0.024129152297973633, "step": 26344 }, { "epoch": 4.019775390625e-05, "step": 26344, "training_step_time": 0.18899965286254883 }, { "epoch": 4.019927978515625e-05, "model_forward_time": 0.02433466911315918, "step": 26345 }, { "epoch": 4.019927978515625e-05, "step": 26345, "training_step_time": 0.17934775352478027 }, { "epoch": 4.02008056640625e-05, "model_forward_time": 0.024580717086791992, "step": 26346 }, { "epoch": 4.02008056640625e-05, "step": 26346, "training_step_time": 0.1727914810180664 }, { "epoch": 4.020233154296875e-05, "model_forward_time": 0.024190902709960938, "step": 26347 }, { "epoch": 4.020233154296875e-05, "step": 26347, "training_step_time": 0.10953640937805176 }, { "epoch": 4.0203857421875e-05, "model_forward_time": 0.024502992630004883, "step": 26348 }, { "epoch": 4.0203857421875e-05, "step": 26348, "training_step_time": 0.1653285026550293 }, { "epoch": 4.020538330078125e-05, "model_forward_time": 0.024463415145874023, "step": 26349 }, { "epoch": 4.020538330078125e-05, "step": 26349, "training_step_time": 0.1340937614440918 }, { "epoch": 4.02069091796875e-05, "grad_norm": 0.06537821143865585, "learning_rate": 3.9927229134771035e-06, "loss": 0.0036, "step": 26350 }, { "epoch": 4.02069091796875e-05, "model_forward_time": 0.024655818939208984, "step": 26350 }, { "epoch": 4.02069091796875e-05, "step": 26350, "training_step_time": 0.10665559768676758 }, { "epoch": 4.020843505859375e-05, "model_forward_time": 0.025043249130249023, "step": 26351 }, { "epoch": 4.020843505859375e-05, "step": 26351, "training_step_time": 0.10814428329467773 }, { "epoch": 4.02099609375e-05, "model_forward_time": 0.02535414695739746, "step": 26352 }, { "epoch": 4.02099609375e-05, "step": 26352, "training_step_time": 0.10964179039001465 }, { "epoch": 4.021148681640625e-05, "model_forward_time": 0.02823662757873535, "step": 26353 }, { "epoch": 4.021148681640625e-05, "step": 26353, "training_step_time": 0.10802412033081055 }, { "epoch": 4.02130126953125e-05, "model_forward_time": 0.024797439575195312, "step": 26354 }, { "epoch": 4.02130126953125e-05, "step": 26354, "training_step_time": 0.20056724548339844 }, { "epoch": 4.021453857421875e-05, "model_forward_time": 0.024448394775390625, "step": 26355 }, { "epoch": 4.021453857421875e-05, "step": 26355, "training_step_time": 0.10429954528808594 }, { "epoch": 4.0216064453125e-05, "model_forward_time": 0.024451494216918945, "step": 26356 }, { "epoch": 4.0216064453125e-05, "step": 26356, "training_step_time": 0.10266828536987305 }, { "epoch": 4.021759033203125e-05, "model_forward_time": 0.025339841842651367, "step": 26357 }, { "epoch": 4.021759033203125e-05, "step": 26357, "training_step_time": 0.10928606986999512 }, { "epoch": 4.02191162109375e-05, "model_forward_time": 0.025394439697265625, "step": 26358 }, { "epoch": 4.02191162109375e-05, "step": 26358, "training_step_time": 0.10451841354370117 }, { "epoch": 4.022064208984375e-05, "model_forward_time": 0.024864912033081055, "step": 26359 }, { "epoch": 4.022064208984375e-05, "step": 26359, "training_step_time": 0.10608720779418945 }, { "epoch": 4.022216796875e-05, "grad_norm": 0.05001050978899002, "learning_rate": 3.971168870091247e-06, "loss": 0.0076, "step": 26360 }, { "epoch": 4.022216796875e-05, "model_forward_time": 0.024997472763061523, "step": 26360 }, { "epoch": 4.022216796875e-05, "step": 26360, "training_step_time": 0.10564303398132324 }, { "epoch": 4.022369384765625e-05, "model_forward_time": 0.026213407516479492, "step": 26361 }, { "epoch": 4.022369384765625e-05, "step": 26361, "training_step_time": 0.10607624053955078 }, { "epoch": 4.02252197265625e-05, "model_forward_time": 0.02534341812133789, "step": 26362 }, { "epoch": 4.02252197265625e-05, "step": 26362, "training_step_time": 0.1103672981262207 }, { "epoch": 4.022674560546875e-05, "model_forward_time": 0.025134801864624023, "step": 26363 }, { "epoch": 4.022674560546875e-05, "step": 26363, "training_step_time": 0.10824394226074219 }, { "epoch": 4.0228271484375e-05, "model_forward_time": 0.025542020797729492, "step": 26364 }, { "epoch": 4.0228271484375e-05, "step": 26364, "training_step_time": 0.10752582550048828 }, { "epoch": 4.022979736328125e-05, "model_forward_time": 0.026169776916503906, "step": 26365 }, { "epoch": 4.022979736328125e-05, "step": 26365, "training_step_time": 0.10811948776245117 }, { "epoch": 4.02313232421875e-05, "model_forward_time": 0.025082826614379883, "step": 26366 }, { "epoch": 4.02313232421875e-05, "step": 26366, "training_step_time": 0.10401391983032227 }, { "epoch": 4.023284912109375e-05, "model_forward_time": 0.025108814239501953, "step": 26367 }, { "epoch": 4.023284912109375e-05, "step": 26367, "training_step_time": 0.10463786125183105 }, { "epoch": 4.0234375e-05, "model_forward_time": 0.025428056716918945, "step": 26368 }, { "epoch": 4.0234375e-05, "step": 26368, "training_step_time": 0.10335922241210938 }, { "epoch": 4.023590087890625e-05, "model_forward_time": 0.02483844757080078, "step": 26369 }, { "epoch": 4.023590087890625e-05, "step": 26369, "training_step_time": 0.10768008232116699 }, { "epoch": 4.02374267578125e-05, "grad_norm": 0.08122275024652481, "learning_rate": 3.949670756075447e-06, "loss": 0.0033, "step": 26370 }, { "epoch": 4.02374267578125e-05, "model_forward_time": 0.025269746780395508, "step": 26370 }, { "epoch": 4.02374267578125e-05, "step": 26370, "training_step_time": 0.11851239204406738 }, { "epoch": 4.023895263671875e-05, "model_forward_time": 0.025038719177246094, "step": 26371 }, { "epoch": 4.023895263671875e-05, "step": 26371, "training_step_time": 0.13715100288391113 }, { "epoch": 4.0240478515625e-05, "model_forward_time": 0.02512812614440918, "step": 26372 }, { "epoch": 4.0240478515625e-05, "step": 26372, "training_step_time": 0.12050580978393555 }, { "epoch": 4.024200439453125e-05, "model_forward_time": 0.025053024291992188, "step": 26373 }, { "epoch": 4.024200439453125e-05, "step": 26373, "training_step_time": 0.2238011360168457 }, { "epoch": 4.02435302734375e-05, "model_forward_time": 0.024322509765625, "step": 26374 }, { "epoch": 4.02435302734375e-05, "step": 26374, "training_step_time": 0.1360483169555664 }, { "epoch": 4.024505615234375e-05, "model_forward_time": 0.02450728416442871, "step": 26375 }, { "epoch": 4.024505615234375e-05, "step": 26375, "training_step_time": 0.11437273025512695 }, { "epoch": 4.024658203125e-05, "model_forward_time": 0.02482008934020996, "step": 26376 }, { "epoch": 4.024658203125e-05, "step": 26376, "training_step_time": 0.18973517417907715 }, { "epoch": 4.024810791015625e-05, "model_forward_time": 0.024373769760131836, "step": 26377 }, { "epoch": 4.024810791015625e-05, "step": 26377, "training_step_time": 0.1875293254852295 }, { "epoch": 4.02496337890625e-05, "model_forward_time": 0.024494647979736328, "step": 26378 }, { "epoch": 4.02496337890625e-05, "step": 26378, "training_step_time": 0.17230749130249023 }, { "epoch": 4.025115966796875e-05, "model_forward_time": 0.024723529815673828, "step": 26379 }, { "epoch": 4.025115966796875e-05, "step": 26379, "training_step_time": 0.1215677261352539 }, { "epoch": 4.0252685546875e-05, "grad_norm": 0.10025465488433838, "learning_rate": 3.928228597551947e-06, "loss": 0.0044, "step": 26380 }, { "epoch": 4.0252685546875e-05, "model_forward_time": 0.024968862533569336, "step": 26380 }, { "epoch": 4.0252685546875e-05, "step": 26380, "training_step_time": 0.1457688808441162 }, { "epoch": 4.025421142578125e-05, "model_forward_time": 0.02488112449645996, "step": 26381 }, { "epoch": 4.025421142578125e-05, "step": 26381, "training_step_time": 0.12195348739624023 }, { "epoch": 4.02557373046875e-05, "model_forward_time": 0.024956226348876953, "step": 26382 }, { "epoch": 4.02557373046875e-05, "step": 26382, "training_step_time": 0.21177315711975098 }, { "epoch": 4.025726318359375e-05, "model_forward_time": 0.023922443389892578, "step": 26383 }, { "epoch": 4.025726318359375e-05, "step": 26383, "training_step_time": 0.18485093116760254 }, { "epoch": 4.02587890625e-05, "model_forward_time": 0.024397850036621094, "step": 26384 }, { "epoch": 4.02587890625e-05, "step": 26384, "training_step_time": 0.12829279899597168 }, { "epoch": 4.026031494140625e-05, "model_forward_time": 0.02417445182800293, "step": 26385 }, { "epoch": 4.026031494140625e-05, "step": 26385, "training_step_time": 0.1228783130645752 }, { "epoch": 4.02618408203125e-05, "model_forward_time": 0.025017261505126953, "step": 26386 }, { "epoch": 4.02618408203125e-05, "step": 26386, "training_step_time": 0.10435962677001953 }, { "epoch": 4.026336669921875e-05, "model_forward_time": 0.025168180465698242, "step": 26387 }, { "epoch": 4.026336669921875e-05, "step": 26387, "training_step_time": 0.10535383224487305 }, { "epoch": 4.0264892578125e-05, "model_forward_time": 0.025782108306884766, "step": 26388 }, { "epoch": 4.0264892578125e-05, "step": 26388, "training_step_time": 0.1072545051574707 }, { "epoch": 4.026641845703125e-05, "model_forward_time": 0.026050090789794922, "step": 26389 }, { "epoch": 4.026641845703125e-05, "step": 26389, "training_step_time": 0.10728645324707031 }, { "epoch": 4.02679443359375e-05, "grad_norm": 0.08983786404132843, "learning_rate": 3.90684242057498e-06, "loss": 0.0084, "step": 26390 }, { "epoch": 4.02679443359375e-05, "model_forward_time": 0.024805068969726562, "step": 26390 }, { "epoch": 4.02679443359375e-05, "step": 26390, "training_step_time": 0.10869193077087402 }, { "epoch": 4.026947021484375e-05, "model_forward_time": 0.02477717399597168, "step": 26391 }, { "epoch": 4.026947021484375e-05, "step": 26391, "training_step_time": 0.1058351993560791 }, { "epoch": 4.027099609375e-05, "model_forward_time": 0.025067567825317383, "step": 26392 }, { "epoch": 4.027099609375e-05, "step": 26392, "training_step_time": 0.11029458045959473 }, { "epoch": 4.027252197265625e-05, "model_forward_time": 0.02496480941772461, "step": 26393 }, { "epoch": 4.027252197265625e-05, "step": 26393, "training_step_time": 0.11024212837219238 }, { "epoch": 4.02740478515625e-05, "model_forward_time": 0.024397850036621094, "step": 26394 }, { "epoch": 4.02740478515625e-05, "step": 26394, "training_step_time": 0.1381516456604004 }, { "epoch": 4.027557373046875e-05, "model_forward_time": 0.02532196044921875, "step": 26395 }, { "epoch": 4.027557373046875e-05, "step": 26395, "training_step_time": 0.13750410079956055 }, { "epoch": 4.0277099609375e-05, "model_forward_time": 0.02460765838623047, "step": 26396 }, { "epoch": 4.0277099609375e-05, "step": 26396, "training_step_time": 0.11692500114440918 }, { "epoch": 4.027862548828125e-05, "model_forward_time": 0.02514791488647461, "step": 26397 }, { "epoch": 4.027862548828125e-05, "step": 26397, "training_step_time": 0.12416553497314453 }, { "epoch": 4.02801513671875e-05, "model_forward_time": 0.024776220321655273, "step": 26398 }, { "epoch": 4.02801513671875e-05, "step": 26398, "training_step_time": 0.10987281799316406 }, { "epoch": 4.028167724609375e-05, "model_forward_time": 0.024753808975219727, "step": 26399 }, { "epoch": 4.028167724609375e-05, "step": 26399, "training_step_time": 0.10681009292602539 }, { "epoch": 4.0283203125e-05, "grad_norm": 0.1022719219326973, "learning_rate": 3.885512251130763e-06, "loss": 0.0051, "step": 26400 }, { "epoch": 4.0283203125e-05, "model_forward_time": 0.024988412857055664, "step": 26400 }, { "epoch": 4.0283203125e-05, "step": 26400, "training_step_time": 0.1995549201965332 }, { "epoch": 4.028472900390625e-05, "model_forward_time": 0.02407240867614746, "step": 26401 }, { "epoch": 4.028472900390625e-05, "step": 26401, "training_step_time": 0.1286015510559082 }, { "epoch": 4.02862548828125e-05, "model_forward_time": 0.023145198822021484, "step": 26402 }, { "epoch": 4.02862548828125e-05, "step": 26402, "training_step_time": 0.1265878677368164 }, { "epoch": 4.028778076171875e-05, "model_forward_time": 0.02324390411376953, "step": 26403 }, { "epoch": 4.028778076171875e-05, "step": 26403, "training_step_time": 0.12350606918334961 }, { "epoch": 4.0289306640625e-05, "model_forward_time": 0.02460479736328125, "step": 26404 }, { "epoch": 4.0289306640625e-05, "step": 26404, "training_step_time": 0.11295199394226074 }, { "epoch": 4.029083251953125e-05, "model_forward_time": 0.02408003807067871, "step": 26405 }, { "epoch": 4.029083251953125e-05, "step": 26405, "training_step_time": 0.115081787109375 }, { "epoch": 4.02923583984375e-05, "model_forward_time": 0.024715423583984375, "step": 26406 }, { "epoch": 4.02923583984375e-05, "step": 26406, "training_step_time": 0.11436986923217773 }, { "epoch": 4.029388427734375e-05, "model_forward_time": 0.024031639099121094, "step": 26407 }, { "epoch": 4.029388427734375e-05, "step": 26407, "training_step_time": 0.6010837554931641 }, { "epoch": 4.029541015625e-05, "model_forward_time": 0.022745609283447266, "step": 26408 }, { "epoch": 4.029541015625e-05, "step": 26408, "training_step_time": 0.10480260848999023 }, { "epoch": 4.029693603515625e-05, "model_forward_time": 0.024315357208251953, "step": 26409 }, { "epoch": 4.029693603515625e-05, "step": 26409, "training_step_time": 0.1067349910736084 }, { "epoch": 4.02984619140625e-05, "grad_norm": 0.07938051968812943, "learning_rate": 3.864238115137481e-06, "loss": 0.0034, "step": 26410 }, { "epoch": 4.02984619140625e-05, "model_forward_time": 0.025067806243896484, "step": 26410 }, { "epoch": 4.02984619140625e-05, "step": 26410, "training_step_time": 0.11238455772399902 }, { "epoch": 4.029998779296875e-05, "model_forward_time": 0.02537989616394043, "step": 26411 }, { "epoch": 4.029998779296875e-05, "step": 26411, "training_step_time": 0.17967605590820312 }, { "epoch": 4.0301513671875e-05, "model_forward_time": 0.024684906005859375, "step": 26412 }, { "epoch": 4.0301513671875e-05, "step": 26412, "training_step_time": 0.10774850845336914 }, { "epoch": 4.030303955078125e-05, "model_forward_time": 0.024586200714111328, "step": 26413 }, { "epoch": 4.030303955078125e-05, "step": 26413, "training_step_time": 0.11280250549316406 }, { "epoch": 4.03045654296875e-05, "model_forward_time": 0.025382280349731445, "step": 26414 }, { "epoch": 4.03045654296875e-05, "step": 26414, "training_step_time": 0.13556909561157227 }, { "epoch": 4.030609130859375e-05, "model_forward_time": 0.028500795364379883, "step": 26415 }, { "epoch": 4.030609130859375e-05, "step": 26415, "training_step_time": 0.10759377479553223 }, { "epoch": 4.03076171875e-05, "model_forward_time": 0.02465653419494629, "step": 26416 }, { "epoch": 4.03076171875e-05, "step": 26416, "training_step_time": 0.1723332405090332 }, { "epoch": 4.030914306640625e-05, "model_forward_time": 0.024422883987426758, "step": 26417 }, { "epoch": 4.030914306640625e-05, "step": 26417, "training_step_time": 0.145768404006958 }, { "epoch": 4.03106689453125e-05, "model_forward_time": 0.025044679641723633, "step": 26418 }, { "epoch": 4.03106689453125e-05, "step": 26418, "training_step_time": 0.10959315299987793 }, { "epoch": 4.031219482421875e-05, "model_forward_time": 0.024940013885498047, "step": 26419 }, { "epoch": 4.031219482421875e-05, "step": 26419, "training_step_time": 0.10508084297180176 }, { "epoch": 4.0313720703125e-05, "grad_norm": 0.08243121206760406, "learning_rate": 3.843020038445211e-06, "loss": 0.0028, "step": 26420 }, { "epoch": 4.0313720703125e-05, "model_forward_time": 0.0259702205657959, "step": 26420 }, { "epoch": 4.0313720703125e-05, "step": 26420, "training_step_time": 0.11414027214050293 }, { "epoch": 4.031524658203125e-05, "model_forward_time": 0.026123046875, "step": 26421 }, { "epoch": 4.031524658203125e-05, "step": 26421, "training_step_time": 0.18726468086242676 }, { "epoch": 4.03167724609375e-05, "model_forward_time": 0.02807331085205078, "step": 26422 }, { "epoch": 4.03167724609375e-05, "step": 26422, "training_step_time": 0.11030387878417969 }, { "epoch": 4.031829833984375e-05, "model_forward_time": 0.024532079696655273, "step": 26423 }, { "epoch": 4.031829833984375e-05, "step": 26423, "training_step_time": 0.10832548141479492 }, { "epoch": 4.031982421875e-05, "model_forward_time": 0.025362253189086914, "step": 26424 }, { "epoch": 4.031982421875e-05, "step": 26424, "training_step_time": 0.15369796752929688 }, { "epoch": 4.032135009765625e-05, "model_forward_time": 0.024271726608276367, "step": 26425 }, { "epoch": 4.032135009765625e-05, "step": 26425, "training_step_time": 0.10685300827026367 }, { "epoch": 4.03228759765625e-05, "model_forward_time": 0.02485799789428711, "step": 26426 }, { "epoch": 4.03228759765625e-05, "step": 26426, "training_step_time": 0.1215517520904541 }, { "epoch": 4.032440185546875e-05, "model_forward_time": 0.02566695213317871, "step": 26427 }, { "epoch": 4.032440185546875e-05, "step": 26427, "training_step_time": 0.11046886444091797 }, { "epoch": 4.0325927734375e-05, "model_forward_time": 0.026460886001586914, "step": 26428 }, { "epoch": 4.0325927734375e-05, "step": 26428, "training_step_time": 0.11052560806274414 }, { "epoch": 4.032745361328125e-05, "model_forward_time": 0.0252227783203125, "step": 26429 }, { "epoch": 4.032745361328125e-05, "step": 26429, "training_step_time": 0.10758137702941895 }, { "epoch": 4.03289794921875e-05, "grad_norm": 0.06858857721090317, "learning_rate": 3.821858046835913e-06, "loss": 0.0059, "step": 26430 }, { "epoch": 4.03289794921875e-05, "model_forward_time": 0.02806711196899414, "step": 26430 }, { "epoch": 4.03289794921875e-05, "step": 26430, "training_step_time": 0.10823345184326172 }, { "epoch": 4.033050537109375e-05, "model_forward_time": 0.025559186935424805, "step": 26431 }, { "epoch": 4.033050537109375e-05, "step": 26431, "training_step_time": 0.11278486251831055 }, { "epoch": 4.033203125e-05, "model_forward_time": 0.025243520736694336, "step": 26432 }, { "epoch": 4.033203125e-05, "step": 26432, "training_step_time": 0.10969781875610352 }, { "epoch": 4.033355712890625e-05, "model_forward_time": 0.025149106979370117, "step": 26433 }, { "epoch": 4.033355712890625e-05, "step": 26433, "training_step_time": 0.10625052452087402 }, { "epoch": 4.03350830078125e-05, "model_forward_time": 0.02505350112915039, "step": 26434 }, { "epoch": 4.03350830078125e-05, "step": 26434, "training_step_time": 0.10821533203125 }, { "epoch": 4.033660888671875e-05, "model_forward_time": 0.025459766387939453, "step": 26435 }, { "epoch": 4.033660888671875e-05, "step": 26435, "training_step_time": 0.12248015403747559 }, { "epoch": 4.0338134765625e-05, "model_forward_time": 0.025197744369506836, "step": 26436 }, { "epoch": 4.0338134765625e-05, "step": 26436, "training_step_time": 0.16705107688903809 }, { "epoch": 4.033966064453125e-05, "model_forward_time": 0.024364709854125977, "step": 26437 }, { "epoch": 4.033966064453125e-05, "step": 26437, "training_step_time": 0.13602328300476074 }, { "epoch": 4.03411865234375e-05, "model_forward_time": 0.024669885635375977, "step": 26438 }, { "epoch": 4.03411865234375e-05, "step": 26438, "training_step_time": 0.10366678237915039 }, { "epoch": 4.034271240234375e-05, "model_forward_time": 0.02506709098815918, "step": 26439 }, { "epoch": 4.034271240234375e-05, "step": 26439, "training_step_time": 0.11863088607788086 }, { "epoch": 4.034423828125e-05, "grad_norm": 0.07096494734287262, "learning_rate": 3.8007521660234023e-06, "loss": 0.006, "step": 26440 }, { "epoch": 4.034423828125e-05, "model_forward_time": 0.025040626525878906, "step": 26440 }, { "epoch": 4.034423828125e-05, "step": 26440, "training_step_time": 0.10867071151733398 }, { "epoch": 4.034576416015625e-05, "model_forward_time": 0.025470495223999023, "step": 26441 }, { "epoch": 4.034576416015625e-05, "step": 26441, "training_step_time": 0.10976910591125488 }, { "epoch": 4.03472900390625e-05, "model_forward_time": 0.025163650512695312, "step": 26442 }, { "epoch": 4.03472900390625e-05, "step": 26442, "training_step_time": 0.19699859619140625 }, { "epoch": 4.034881591796875e-05, "model_forward_time": 0.02436351776123047, "step": 26443 }, { "epoch": 4.034881591796875e-05, "step": 26443, "training_step_time": 0.10846352577209473 }, { "epoch": 4.0350341796875e-05, "model_forward_time": 0.024471044540405273, "step": 26444 }, { "epoch": 4.0350341796875e-05, "step": 26444, "training_step_time": 0.1071784496307373 }, { "epoch": 4.035186767578125e-05, "model_forward_time": 0.025505781173706055, "step": 26445 }, { "epoch": 4.035186767578125e-05, "step": 26445, "training_step_time": 0.10872721672058105 }, { "epoch": 4.03533935546875e-05, "model_forward_time": 0.025043964385986328, "step": 26446 }, { "epoch": 4.03533935546875e-05, "step": 26446, "training_step_time": 0.1049811840057373 }, { "epoch": 4.035491943359375e-05, "model_forward_time": 0.02552008628845215, "step": 26447 }, { "epoch": 4.035491943359375e-05, "step": 26447, "training_step_time": 0.11053943634033203 }, { "epoch": 4.03564453125e-05, "model_forward_time": 0.025560617446899414, "step": 26448 }, { "epoch": 4.03564453125e-05, "step": 26448, "training_step_time": 0.12161636352539062 }, { "epoch": 4.035797119140625e-05, "model_forward_time": 0.025191545486450195, "step": 26449 }, { "epoch": 4.035797119140625e-05, "step": 26449, "training_step_time": 0.1221151351928711 }, { "epoch": 4.03594970703125e-05, "grad_norm": 0.08621404320001602, "learning_rate": 3.7797024216533138e-06, "loss": 0.0081, "step": 26450 }, { "epoch": 4.03594970703125e-05, "model_forward_time": 0.02521681785583496, "step": 26450 }, { "epoch": 4.03594970703125e-05, "step": 26450, "training_step_time": 0.12389802932739258 }, { "epoch": 4.036102294921875e-05, "model_forward_time": 0.025003433227539062, "step": 26451 }, { "epoch": 4.036102294921875e-05, "step": 26451, "training_step_time": 0.11805891990661621 }, { "epoch": 4.0362548828125e-05, "model_forward_time": 0.025041580200195312, "step": 26452 }, { "epoch": 4.0362548828125e-05, "step": 26452, "training_step_time": 0.12157082557678223 }, { "epoch": 4.036407470703125e-05, "model_forward_time": 0.025294065475463867, "step": 26453 }, { "epoch": 4.036407470703125e-05, "step": 26453, "training_step_time": 0.12359094619750977 }, { "epoch": 4.03656005859375e-05, "model_forward_time": 0.02542257308959961, "step": 26454 }, { "epoch": 4.03656005859375e-05, "step": 26454, "training_step_time": 0.11923432350158691 }, { "epoch": 4.036712646484375e-05, "model_forward_time": 0.025609731674194336, "step": 26455 }, { "epoch": 4.036712646484375e-05, "step": 26455, "training_step_time": 0.11363005638122559 }, { "epoch": 4.036865234375e-05, "model_forward_time": 0.025257587432861328, "step": 26456 }, { "epoch": 4.036865234375e-05, "step": 26456, "training_step_time": 0.11240792274475098 }, { "epoch": 4.037017822265625e-05, "model_forward_time": 0.025002002716064453, "step": 26457 }, { "epoch": 4.037017822265625e-05, "step": 26457, "training_step_time": 0.1110074520111084 }, { "epoch": 4.03717041015625e-05, "model_forward_time": 0.025066375732421875, "step": 26458 }, { "epoch": 4.03717041015625e-05, "step": 26458, "training_step_time": 0.11243224143981934 }, { "epoch": 4.037322998046875e-05, "model_forward_time": 0.02495265007019043, "step": 26459 }, { "epoch": 4.037322998046875e-05, "step": 26459, "training_step_time": 0.11172652244567871 }, { "epoch": 4.0374755859375e-05, "grad_norm": 0.07990297675132751, "learning_rate": 3.75870883930306e-06, "loss": 0.0067, "step": 26460 }, { "epoch": 4.0374755859375e-05, "model_forward_time": 0.025435447692871094, "step": 26460 }, { "epoch": 4.0374755859375e-05, "step": 26460, "training_step_time": 0.11028671264648438 }, { "epoch": 4.037628173828125e-05, "model_forward_time": 0.024938344955444336, "step": 26461 }, { "epoch": 4.037628173828125e-05, "step": 26461, "training_step_time": 0.10533738136291504 }, { "epoch": 4.03778076171875e-05, "model_forward_time": 0.024785518646240234, "step": 26462 }, { "epoch": 4.03778076171875e-05, "step": 26462, "training_step_time": 0.17598772048950195 }, { "epoch": 4.037933349609375e-05, "model_forward_time": 0.024675369262695312, "step": 26463 }, { "epoch": 4.037933349609375e-05, "step": 26463, "training_step_time": 0.1821300983428955 }, { "epoch": 4.0380859375e-05, "model_forward_time": 0.024676084518432617, "step": 26464 }, { "epoch": 4.0380859375e-05, "step": 26464, "training_step_time": 0.11289048194885254 }, { "epoch": 4.038238525390625e-05, "model_forward_time": 0.02447509765625, "step": 26465 }, { "epoch": 4.038238525390625e-05, "step": 26465, "training_step_time": 0.21402430534362793 }, { "epoch": 4.03839111328125e-05, "model_forward_time": 0.024451732635498047, "step": 26466 }, { "epoch": 4.03839111328125e-05, "step": 26466, "training_step_time": 0.160994291305542 }, { "epoch": 4.038543701171875e-05, "model_forward_time": 0.024286270141601562, "step": 26467 }, { "epoch": 4.038543701171875e-05, "step": 26467, "training_step_time": 0.1130833625793457 }, { "epoch": 4.0386962890625e-05, "model_forward_time": 0.02477288246154785, "step": 26468 }, { "epoch": 4.0386962890625e-05, "step": 26468, "training_step_time": 0.1181955337524414 }, { "epoch": 4.038848876953125e-05, "model_forward_time": 0.02525806427001953, "step": 26469 }, { "epoch": 4.038848876953125e-05, "step": 26469, "training_step_time": 0.12119412422180176 }, { "epoch": 4.03900146484375e-05, "grad_norm": 0.33543387055397034, "learning_rate": 3.7377714444818468e-06, "loss": 0.0048, "step": 26470 }, { "epoch": 4.03900146484375e-05, "model_forward_time": 0.026371002197265625, "step": 26470 }, { "epoch": 4.03900146484375e-05, "step": 26470, "training_step_time": 0.156721830368042 }, { "epoch": 4.039154052734375e-05, "model_forward_time": 0.024292469024658203, "step": 26471 }, { "epoch": 4.039154052734375e-05, "step": 26471, "training_step_time": 0.16886067390441895 }, { "epoch": 4.039306640625e-05, "model_forward_time": 0.02477550506591797, "step": 26472 }, { "epoch": 4.039306640625e-05, "step": 26472, "training_step_time": 0.11124873161315918 }, { "epoch": 4.039459228515625e-05, "model_forward_time": 0.023722171783447266, "step": 26473 }, { "epoch": 4.039459228515625e-05, "step": 26473, "training_step_time": 0.12565827369689941 }, { "epoch": 4.03961181640625e-05, "model_forward_time": 0.024488449096679688, "step": 26474 }, { "epoch": 4.03961181640625e-05, "step": 26474, "training_step_time": 0.12562036514282227 }, { "epoch": 4.039764404296875e-05, "model_forward_time": 0.024092674255371094, "step": 26475 }, { "epoch": 4.039764404296875e-05, "step": 26475, "training_step_time": 0.12891721725463867 }, { "epoch": 4.0399169921875e-05, "model_forward_time": 0.024013042449951172, "step": 26476 }, { "epoch": 4.0399169921875e-05, "step": 26476, "training_step_time": 0.12344646453857422 }, { "epoch": 4.040069580078125e-05, "model_forward_time": 0.023740291595458984, "step": 26477 }, { "epoch": 4.040069580078125e-05, "step": 26477, "training_step_time": 0.11871886253356934 }, { "epoch": 4.04022216796875e-05, "model_forward_time": 0.02512645721435547, "step": 26478 }, { "epoch": 4.04022216796875e-05, "step": 26478, "training_step_time": 0.12000632286071777 }, { "epoch": 4.040374755859375e-05, "model_forward_time": 0.025534629821777344, "step": 26479 }, { "epoch": 4.040374755859375e-05, "step": 26479, "training_step_time": 0.1130666732788086 }, { "epoch": 4.04052734375e-05, "grad_norm": 0.07303806394338608, "learning_rate": 3.7168902626305622e-06, "loss": 0.0033, "step": 26480 }, { "epoch": 4.04052734375e-05, "model_forward_time": 0.02535390853881836, "step": 26480 }, { "epoch": 4.04052734375e-05, "step": 26480, "training_step_time": 0.1931018829345703 }, { "epoch": 4.040679931640625e-05, "model_forward_time": 0.02527928352355957, "step": 26481 }, { "epoch": 4.040679931640625e-05, "step": 26481, "training_step_time": 0.13720417022705078 }, { "epoch": 4.04083251953125e-05, "model_forward_time": 0.02448129653930664, "step": 26482 }, { "epoch": 4.04083251953125e-05, "step": 26482, "training_step_time": 0.11026358604431152 }, { "epoch": 4.040985107421875e-05, "model_forward_time": 0.025110483169555664, "step": 26483 }, { "epoch": 4.040985107421875e-05, "step": 26483, "training_step_time": 0.10820269584655762 }, { "epoch": 4.0411376953125e-05, "model_forward_time": 0.025191068649291992, "step": 26484 }, { "epoch": 4.0411376953125e-05, "step": 26484, "training_step_time": 0.10896587371826172 }, { "epoch": 4.041290283203125e-05, "model_forward_time": 0.025431394577026367, "step": 26485 }, { "epoch": 4.041290283203125e-05, "step": 26485, "training_step_time": 0.10830354690551758 }, { "epoch": 4.04144287109375e-05, "model_forward_time": 0.02522444725036621, "step": 26486 }, { "epoch": 4.04144287109375e-05, "step": 26486, "training_step_time": 0.19467496871948242 }, { "epoch": 4.041595458984375e-05, "model_forward_time": 0.024538755416870117, "step": 26487 }, { "epoch": 4.041595458984375e-05, "step": 26487, "training_step_time": 0.10838747024536133 }, { "epoch": 4.041748046875e-05, "model_forward_time": 0.024657487869262695, "step": 26488 }, { "epoch": 4.041748046875e-05, "step": 26488, "training_step_time": 0.10874557495117188 }, { "epoch": 4.041900634765625e-05, "model_forward_time": 0.02512359619140625, "step": 26489 }, { "epoch": 4.041900634765625e-05, "step": 26489, "training_step_time": 0.10472989082336426 }, { "epoch": 4.04205322265625e-05, "grad_norm": 0.05251162871718407, "learning_rate": 3.696065319121833e-06, "loss": 0.0048, "step": 26490 }, { "epoch": 4.04205322265625e-05, "model_forward_time": 0.025049209594726562, "step": 26490 }, { "epoch": 4.04205322265625e-05, "step": 26490, "training_step_time": 0.10488724708557129 }, { "epoch": 4.042205810546875e-05, "model_forward_time": 0.025122404098510742, "step": 26491 }, { "epoch": 4.042205810546875e-05, "step": 26491, "training_step_time": 0.10743975639343262 }, { "epoch": 4.0423583984375e-05, "model_forward_time": 0.02494072914123535, "step": 26492 }, { "epoch": 4.0423583984375e-05, "step": 26492, "training_step_time": 0.10522890090942383 }, { "epoch": 4.042510986328125e-05, "model_forward_time": 0.02551436424255371, "step": 26493 }, { "epoch": 4.042510986328125e-05, "step": 26493, "training_step_time": 0.10524749755859375 }, { "epoch": 4.04266357421875e-05, "model_forward_time": 0.024808168411254883, "step": 26494 }, { "epoch": 4.04266357421875e-05, "step": 26494, "training_step_time": 0.10324645042419434 }, { "epoch": 4.042816162109375e-05, "model_forward_time": 0.02505207061767578, "step": 26495 }, { "epoch": 4.042816162109375e-05, "step": 26495, "training_step_time": 0.1071326732635498 }, { "epoch": 4.04296875e-05, "model_forward_time": 0.024913787841796875, "step": 26496 }, { "epoch": 4.04296875e-05, "step": 26496, "training_step_time": 0.10564470291137695 }, { "epoch": 4.043121337890625e-05, "model_forward_time": 0.025305986404418945, "step": 26497 }, { "epoch": 4.043121337890625e-05, "step": 26497, "training_step_time": 0.10999155044555664 }, { "epoch": 4.04327392578125e-05, "model_forward_time": 0.025078773498535156, "step": 26498 }, { "epoch": 4.04327392578125e-05, "step": 26498, "training_step_time": 0.10939908027648926 }, { "epoch": 4.043426513671875e-05, "model_forward_time": 0.02516913414001465, "step": 26499 }, { "epoch": 4.043426513671875e-05, "step": 26499, "training_step_time": 0.10502409934997559 }, { "epoch": 4.0435791015625e-05, "grad_norm": 0.1022261381149292, "learning_rate": 3.675296639259912e-06, "loss": 0.0056, "step": 26500 }, { "epoch": 4.0435791015625e-05, "model_forward_time": 0.025397300720214844, "step": 26500 }, { "epoch": 4.0435791015625e-05, "step": 26500, "training_step_time": 0.10549211502075195 }, { "epoch": 4.043731689453125e-05, "model_forward_time": 0.02512192726135254, "step": 26501 }, { "epoch": 4.043731689453125e-05, "step": 26501, "training_step_time": 0.105438232421875 }, { "epoch": 4.04388427734375e-05, "model_forward_time": 0.02480316162109375, "step": 26502 }, { "epoch": 4.04388427734375e-05, "step": 26502, "training_step_time": 0.10332369804382324 }, { "epoch": 4.044036865234375e-05, "model_forward_time": 0.02492213249206543, "step": 26503 }, { "epoch": 4.044036865234375e-05, "step": 26503, "training_step_time": 0.18656635284423828 }, { "epoch": 4.044189453125e-05, "model_forward_time": 0.02458357810974121, "step": 26504 }, { "epoch": 4.044189453125e-05, "step": 26504, "training_step_time": 0.11342453956604004 }, { "epoch": 4.044342041015625e-05, "model_forward_time": 0.024757862091064453, "step": 26505 }, { "epoch": 4.044342041015625e-05, "step": 26505, "training_step_time": 0.10750746726989746 }, { "epoch": 4.04449462890625e-05, "model_forward_time": 0.02530074119567871, "step": 26506 }, { "epoch": 4.04449462890625e-05, "step": 26506, "training_step_time": 0.10714459419250488 }, { "epoch": 4.044647216796875e-05, "model_forward_time": 0.024649620056152344, "step": 26507 }, { "epoch": 4.044647216796875e-05, "step": 26507, "training_step_time": 0.19402313232421875 }, { "epoch": 4.0447998046875e-05, "model_forward_time": 0.024662017822265625, "step": 26508 }, { "epoch": 4.0447998046875e-05, "step": 26508, "training_step_time": 0.14632773399353027 }, { "epoch": 4.044952392578125e-05, "model_forward_time": 0.024420976638793945, "step": 26509 }, { "epoch": 4.044952392578125e-05, "step": 26509, "training_step_time": 0.1154334545135498 }, { "epoch": 4.04510498046875e-05, "grad_norm": 0.057058922946453094, "learning_rate": 3.654584248280707e-06, "loss": 0.0122, "step": 26510 }, { "epoch": 4.04510498046875e-05, "model_forward_time": 0.024847745895385742, "step": 26510 }, { "epoch": 4.04510498046875e-05, "step": 26510, "training_step_time": 0.12876415252685547 }, { "epoch": 4.045257568359375e-05, "model_forward_time": 0.02538323402404785, "step": 26511 }, { "epoch": 4.045257568359375e-05, "step": 26511, "training_step_time": 0.11016511917114258 }, { "epoch": 4.04541015625e-05, "model_forward_time": 0.025345563888549805, "step": 26512 }, { "epoch": 4.04541015625e-05, "step": 26512, "training_step_time": 0.15119099617004395 }, { "epoch": 4.045562744140625e-05, "model_forward_time": 0.024898529052734375, "step": 26513 }, { "epoch": 4.045562744140625e-05, "step": 26513, "training_step_time": 0.12807059288024902 }, { "epoch": 4.04571533203125e-05, "model_forward_time": 0.024813413619995117, "step": 26514 }, { "epoch": 4.04571533203125e-05, "step": 26514, "training_step_time": 0.2071993350982666 }, { "epoch": 4.045867919921875e-05, "model_forward_time": 0.024770259857177734, "step": 26515 }, { "epoch": 4.045867919921875e-05, "step": 26515, "training_step_time": 0.1227271556854248 }, { "epoch": 4.0460205078125e-05, "model_forward_time": 0.02407050132751465, "step": 26516 }, { "epoch": 4.0460205078125e-05, "step": 26516, "training_step_time": 0.16160154342651367 }, { "epoch": 4.046173095703125e-05, "model_forward_time": 0.024768590927124023, "step": 26517 }, { "epoch": 4.046173095703125e-05, "step": 26517, "training_step_time": 0.13242316246032715 }, { "epoch": 4.04632568359375e-05, "model_forward_time": 0.02495551109313965, "step": 26518 }, { "epoch": 4.04632568359375e-05, "step": 26518, "training_step_time": 0.11339974403381348 }, { "epoch": 4.046478271484375e-05, "model_forward_time": 0.02517080307006836, "step": 26519 }, { "epoch": 4.046478271484375e-05, "step": 26519, "training_step_time": 0.11597990989685059 }, { "epoch": 4.046630859375e-05, "grad_norm": 0.04885173216462135, "learning_rate": 3.6339281713517303e-06, "loss": 0.0059, "step": 26520 }, { "epoch": 4.046630859375e-05, "model_forward_time": 0.02517080307006836, "step": 26520 }, { "epoch": 4.046630859375e-05, "step": 26520, "training_step_time": 0.10831117630004883 }, { "epoch": 4.046783447265625e-05, "model_forward_time": 0.026674270629882812, "step": 26521 }, { "epoch": 4.046783447265625e-05, "step": 26521, "training_step_time": 0.1100010871887207 }, { "epoch": 4.04693603515625e-05, "model_forward_time": 0.025594472885131836, "step": 26522 }, { "epoch": 4.04693603515625e-05, "step": 26522, "training_step_time": 0.10642051696777344 }, { "epoch": 4.047088623046875e-05, "model_forward_time": 0.025460243225097656, "step": 26523 }, { "epoch": 4.047088623046875e-05, "step": 26523, "training_step_time": 0.10879063606262207 }, { "epoch": 4.0472412109375e-05, "model_forward_time": 0.024884939193725586, "step": 26524 }, { "epoch": 4.0472412109375e-05, "step": 26524, "training_step_time": 0.10812640190124512 }, { "epoch": 4.047393798828125e-05, "model_forward_time": 0.02557826042175293, "step": 26525 }, { "epoch": 4.047393798828125e-05, "step": 26525, "training_step_time": 0.10730242729187012 }, { "epoch": 4.04754638671875e-05, "model_forward_time": 0.0276339054107666, "step": 26526 }, { "epoch": 4.04754638671875e-05, "step": 26526, "training_step_time": 0.15433645248413086 }, { "epoch": 4.047698974609375e-05, "model_forward_time": 0.024886608123779297, "step": 26527 }, { "epoch": 4.047698974609375e-05, "step": 26527, "training_step_time": 0.1389927864074707 }, { "epoch": 4.0478515625e-05, "model_forward_time": 0.0249631404876709, "step": 26528 }, { "epoch": 4.0478515625e-05, "step": 26528, "training_step_time": 0.10765862464904785 }, { "epoch": 4.048004150390625e-05, "model_forward_time": 0.025698423385620117, "step": 26529 }, { "epoch": 4.048004150390625e-05, "step": 26529, "training_step_time": 0.11196327209472656 }, { "epoch": 4.04815673828125e-05, "grad_norm": 0.08183445781469345, "learning_rate": 3.6133284335720605e-06, "loss": 0.0061, "step": 26530 }, { "epoch": 4.04815673828125e-05, "model_forward_time": 0.025316476821899414, "step": 26530 }, { "epoch": 4.04815673828125e-05, "step": 26530, "training_step_time": 0.11513996124267578 }, { "epoch": 4.048309326171875e-05, "model_forward_time": 0.025295019149780273, "step": 26531 }, { "epoch": 4.048309326171875e-05, "step": 26531, "training_step_time": 0.18512296676635742 }, { "epoch": 4.0484619140625e-05, "model_forward_time": 0.023864269256591797, "step": 26532 }, { "epoch": 4.0484619140625e-05, "step": 26532, "training_step_time": 0.10978341102600098 }, { "epoch": 4.048614501953125e-05, "model_forward_time": 0.02470874786376953, "step": 26533 }, { "epoch": 4.048614501953125e-05, "step": 26533, "training_step_time": 0.12314629554748535 }, { "epoch": 4.04876708984375e-05, "model_forward_time": 0.02522110939025879, "step": 26534 }, { "epoch": 4.04876708984375e-05, "step": 26534, "training_step_time": 0.10663628578186035 }, { "epoch": 4.048919677734375e-05, "model_forward_time": 0.025143146514892578, "step": 26535 }, { "epoch": 4.048919677734375e-05, "step": 26535, "training_step_time": 0.10529589653015137 }, { "epoch": 4.049072265625e-05, "model_forward_time": 0.025355815887451172, "step": 26536 }, { "epoch": 4.049072265625e-05, "step": 26536, "training_step_time": 0.10533380508422852 }, { "epoch": 4.049224853515625e-05, "model_forward_time": 0.02504563331604004, "step": 26537 }, { "epoch": 4.049224853515625e-05, "step": 26537, "training_step_time": 0.10556244850158691 }, { "epoch": 4.04937744140625e-05, "model_forward_time": 0.02545642852783203, "step": 26538 }, { "epoch": 4.04937744140625e-05, "step": 26538, "training_step_time": 0.10567760467529297 }, { "epoch": 4.049530029296875e-05, "model_forward_time": 0.02527308464050293, "step": 26539 }, { "epoch": 4.049530029296875e-05, "step": 26539, "training_step_time": 0.10600471496582031 }, { "epoch": 4.0496826171875e-05, "grad_norm": 0.09746471792459488, "learning_rate": 3.59278505997232e-06, "loss": 0.0049, "step": 26540 }, { "epoch": 4.0496826171875e-05, "model_forward_time": 0.025417566299438477, "step": 26540 }, { "epoch": 4.0496826171875e-05, "step": 26540, "training_step_time": 0.10756278038024902 }, { "epoch": 4.049835205078125e-05, "model_forward_time": 0.025043487548828125, "step": 26541 }, { "epoch": 4.049835205078125e-05, "step": 26541, "training_step_time": 0.10797357559204102 }, { "epoch": 4.04998779296875e-05, "model_forward_time": 0.02576589584350586, "step": 26542 }, { "epoch": 4.04998779296875e-05, "step": 26542, "training_step_time": 0.10625720024108887 }, { "epoch": 4.050140380859375e-05, "model_forward_time": 0.025609254837036133, "step": 26543 }, { "epoch": 4.050140380859375e-05, "step": 26543, "training_step_time": 0.1117391586303711 }, { "epoch": 4.05029296875e-05, "model_forward_time": 0.025373458862304688, "step": 26544 }, { "epoch": 4.05029296875e-05, "step": 26544, "training_step_time": 0.10754847526550293 }, { "epoch": 4.050445556640625e-05, "model_forward_time": 0.02575850486755371, "step": 26545 }, { "epoch": 4.050445556640625e-05, "step": 26545, "training_step_time": 0.10830569267272949 }, { "epoch": 4.05059814453125e-05, "model_forward_time": 0.025150299072265625, "step": 26546 }, { "epoch": 4.05059814453125e-05, "step": 26546, "training_step_time": 0.10763216018676758 }, { "epoch": 4.050750732421875e-05, "model_forward_time": 0.02489161491394043, "step": 26547 }, { "epoch": 4.050750732421875e-05, "step": 26547, "training_step_time": 0.105926513671875 }, { "epoch": 4.0509033203125e-05, "model_forward_time": 0.025455713272094727, "step": 26548 }, { "epoch": 4.0509033203125e-05, "step": 26548, "training_step_time": 0.10813164710998535 }, { "epoch": 4.051055908203125e-05, "model_forward_time": 0.0253446102142334, "step": 26549 }, { "epoch": 4.051055908203125e-05, "step": 26549, "training_step_time": 0.17074990272521973 }, { "epoch": 4.05120849609375e-05, "grad_norm": 0.0958266481757164, "learning_rate": 3.5722980755146517e-06, "loss": 0.0039, "step": 26550 }, { "epoch": 4.05120849609375e-05, "model_forward_time": 0.02494359016418457, "step": 26550 }, { "epoch": 4.05120849609375e-05, "step": 26550, "training_step_time": 0.10689473152160645 }, { "epoch": 4.051361083984375e-05, "model_forward_time": 0.02472686767578125, "step": 26551 }, { "epoch": 4.051361083984375e-05, "step": 26551, "training_step_time": 0.11568999290466309 }, { "epoch": 4.051513671875e-05, "model_forward_time": 0.025069475173950195, "step": 26552 }, { "epoch": 4.051513671875e-05, "step": 26552, "training_step_time": 0.10408234596252441 }, { "epoch": 4.051666259765625e-05, "model_forward_time": 0.024181604385375977, "step": 26553 }, { "epoch": 4.051666259765625e-05, "step": 26553, "training_step_time": 0.16146063804626465 }, { "epoch": 4.05181884765625e-05, "model_forward_time": 0.02450728416442871, "step": 26554 }, { "epoch": 4.05181884765625e-05, "step": 26554, "training_step_time": 0.15445470809936523 }, { "epoch": 4.051971435546875e-05, "model_forward_time": 0.024485111236572266, "step": 26555 }, { "epoch": 4.051971435546875e-05, "step": 26555, "training_step_time": 0.10858750343322754 }, { "epoch": 4.0521240234375e-05, "model_forward_time": 0.025087594985961914, "step": 26556 }, { "epoch": 4.0521240234375e-05, "step": 26556, "training_step_time": 0.13478565216064453 }, { "epoch": 4.052276611328125e-05, "model_forward_time": 0.0253903865814209, "step": 26557 }, { "epoch": 4.052276611328125e-05, "step": 26557, "training_step_time": 0.1949324607849121 }, { "epoch": 4.05242919921875e-05, "model_forward_time": 0.024905681610107422, "step": 26558 }, { "epoch": 4.05242919921875e-05, "step": 26558, "training_step_time": 0.11893033981323242 }, { "epoch": 4.052581787109375e-05, "model_forward_time": 0.024436235427856445, "step": 26559 }, { "epoch": 4.052581787109375e-05, "step": 26559, "training_step_time": 0.21183371543884277 }, { "epoch": 4.052734375e-05, "grad_norm": 0.1997993439435959, "learning_rate": 3.5518675050926544e-06, "loss": 0.006, "step": 26560 }, { "epoch": 4.052734375e-05, "model_forward_time": 0.02456831932067871, "step": 26560 }, { "epoch": 4.052734375e-05, "step": 26560, "training_step_time": 0.10870885848999023 }, { "epoch": 4.052886962890625e-05, "model_forward_time": 0.02470111846923828, "step": 26561 }, { "epoch": 4.052886962890625e-05, "step": 26561, "training_step_time": 0.10820937156677246 }, { "epoch": 4.05303955078125e-05, "model_forward_time": 0.025211572647094727, "step": 26562 }, { "epoch": 4.05303955078125e-05, "step": 26562, "training_step_time": 0.19577383995056152 }, { "epoch": 4.053192138671875e-05, "model_forward_time": 0.024295330047607422, "step": 26563 }, { "epoch": 4.053192138671875e-05, "step": 26563, "training_step_time": 0.10260367393493652 }, { "epoch": 4.0533447265625e-05, "model_forward_time": 0.02475595474243164, "step": 26564 }, { "epoch": 4.0533447265625e-05, "step": 26564, "training_step_time": 0.10417723655700684 }, { "epoch": 4.053497314453125e-05, "model_forward_time": 0.025842905044555664, "step": 26565 }, { "epoch": 4.053497314453125e-05, "step": 26565, "training_step_time": 0.10833430290222168 }, { "epoch": 4.05364990234375e-05, "model_forward_time": 0.0262451171875, "step": 26566 }, { "epoch": 4.05364990234375e-05, "step": 26566, "training_step_time": 0.11400413513183594 }, { "epoch": 4.053802490234375e-05, "model_forward_time": 0.025316715240478516, "step": 26567 }, { "epoch": 4.053802490234375e-05, "step": 26567, "training_step_time": 0.11634063720703125 }, { "epoch": 4.053955078125e-05, "model_forward_time": 0.027282238006591797, "step": 26568 }, { "epoch": 4.053955078125e-05, "step": 26568, "training_step_time": 0.11847662925720215 }, { "epoch": 4.054107666015625e-05, "model_forward_time": 0.025174379348754883, "step": 26569 }, { "epoch": 4.054107666015625e-05, "step": 26569, "training_step_time": 0.11097383499145508 }, { "epoch": 4.05426025390625e-05, "grad_norm": 0.0691823661327362, "learning_rate": 3.531493373531419e-06, "loss": 0.0029, "step": 26570 }, { "epoch": 4.05426025390625e-05, "model_forward_time": 0.024933815002441406, "step": 26570 }, { "epoch": 4.05426025390625e-05, "step": 26570, "training_step_time": 0.1118321418762207 }, { "epoch": 4.054412841796875e-05, "model_forward_time": 0.025927305221557617, "step": 26571 }, { "epoch": 4.054412841796875e-05, "step": 26571, "training_step_time": 0.11410021781921387 }, { "epoch": 4.0545654296875e-05, "model_forward_time": 0.025362730026245117, "step": 26572 }, { "epoch": 4.0545654296875e-05, "step": 26572, "training_step_time": 0.11338162422180176 }, { "epoch": 4.054718017578125e-05, "model_forward_time": 0.026100873947143555, "step": 26573 }, { "epoch": 4.054718017578125e-05, "step": 26573, "training_step_time": 0.11757349967956543 }, { "epoch": 4.05487060546875e-05, "model_forward_time": 0.025223255157470703, "step": 26574 }, { "epoch": 4.05487060546875e-05, "step": 26574, "training_step_time": 0.11042094230651855 }, { "epoch": 4.055023193359375e-05, "model_forward_time": 0.025331735610961914, "step": 26575 }, { "epoch": 4.055023193359375e-05, "step": 26575, "training_step_time": 0.10987567901611328 }, { "epoch": 4.05517578125e-05, "model_forward_time": 0.0251772403717041, "step": 26576 }, { "epoch": 4.05517578125e-05, "step": 26576, "training_step_time": 0.10582566261291504 }, { "epoch": 4.055328369140625e-05, "model_forward_time": 0.02513265609741211, "step": 26577 }, { "epoch": 4.055328369140625e-05, "step": 26577, "training_step_time": 0.1081385612487793 }, { "epoch": 4.05548095703125e-05, "model_forward_time": 0.025162220001220703, "step": 26578 }, { "epoch": 4.05548095703125e-05, "step": 26578, "training_step_time": 0.19886493682861328 }, { "epoch": 4.055633544921875e-05, "model_forward_time": 0.024438858032226562, "step": 26579 }, { "epoch": 4.055633544921875e-05, "step": 26579, "training_step_time": 0.10661792755126953 }, { "epoch": 4.0557861328125e-05, "grad_norm": 0.10050533711910248, "learning_rate": 3.511175705587433e-06, "loss": 0.0029, "step": 26580 }, { "epoch": 4.0557861328125e-05, "model_forward_time": 0.024654865264892578, "step": 26580 }, { "epoch": 4.0557861328125e-05, "step": 26580, "training_step_time": 0.10755205154418945 }, { "epoch": 4.055938720703125e-05, "model_forward_time": 0.02519965171813965, "step": 26581 }, { "epoch": 4.055938720703125e-05, "step": 26581, "training_step_time": 0.10943269729614258 }, { "epoch": 4.05609130859375e-05, "model_forward_time": 0.025081396102905273, "step": 26582 }, { "epoch": 4.05609130859375e-05, "step": 26582, "training_step_time": 0.10694456100463867 }, { "epoch": 4.056243896484375e-05, "model_forward_time": 0.025464773178100586, "step": 26583 }, { "epoch": 4.056243896484375e-05, "step": 26583, "training_step_time": 0.10962510108947754 }, { "epoch": 4.056396484375e-05, "model_forward_time": 0.025272130966186523, "step": 26584 }, { "epoch": 4.056396484375e-05, "step": 26584, "training_step_time": 0.10564017295837402 }, { "epoch": 4.056549072265625e-05, "model_forward_time": 0.025301456451416016, "step": 26585 }, { "epoch": 4.056549072265625e-05, "step": 26585, "training_step_time": 0.10627508163452148 }, { "epoch": 4.05670166015625e-05, "model_forward_time": 0.02558279037475586, "step": 26586 }, { "epoch": 4.05670166015625e-05, "step": 26586, "training_step_time": 0.10618305206298828 }, { "epoch": 4.056854248046875e-05, "model_forward_time": 0.025369644165039062, "step": 26587 }, { "epoch": 4.056854248046875e-05, "step": 26587, "training_step_time": 0.10348916053771973 }, { "epoch": 4.0570068359375e-05, "model_forward_time": 0.024962425231933594, "step": 26588 }, { "epoch": 4.0570068359375e-05, "step": 26588, "training_step_time": 0.11166763305664062 }, { "epoch": 4.057159423828125e-05, "model_forward_time": 0.02548956871032715, "step": 26589 }, { "epoch": 4.057159423828125e-05, "step": 26589, "training_step_time": 0.10599136352539062 }, { "epoch": 4.05731201171875e-05, "grad_norm": 0.34594714641571045, "learning_rate": 3.4909145259485744e-06, "loss": 0.0051, "step": 26590 }, { "epoch": 4.05731201171875e-05, "model_forward_time": 0.025170087814331055, "step": 26590 }, { "epoch": 4.05731201171875e-05, "step": 26590, "training_step_time": 0.10874271392822266 }, { "epoch": 4.057464599609375e-05, "model_forward_time": 0.025215864181518555, "step": 26591 }, { "epoch": 4.057464599609375e-05, "step": 26591, "training_step_time": 0.11495256423950195 }, { "epoch": 4.0576171875e-05, "model_forward_time": 0.025197505950927734, "step": 26592 }, { "epoch": 4.0576171875e-05, "step": 26592, "training_step_time": 0.18700385093688965 }, { "epoch": 4.057769775390625e-05, "model_forward_time": 0.02479720115661621, "step": 26593 }, { "epoch": 4.057769775390625e-05, "step": 26593, "training_step_time": 0.21073341369628906 }, { "epoch": 4.05792236328125e-05, "model_forward_time": 0.024804115295410156, "step": 26594 }, { "epoch": 4.05792236328125e-05, "step": 26594, "training_step_time": 0.20987486839294434 }, { "epoch": 4.058074951171875e-05, "model_forward_time": 0.024443387985229492, "step": 26595 }, { "epoch": 4.058074951171875e-05, "step": 26595, "training_step_time": 0.20466256141662598 }, { "epoch": 4.0582275390625e-05, "model_forward_time": 0.025411128997802734, "step": 26596 }, { "epoch": 4.0582275390625e-05, "step": 26596, "training_step_time": 0.20511174201965332 }, { "epoch": 4.058380126953125e-05, "model_forward_time": 0.025126218795776367, "step": 26597 }, { "epoch": 4.058380126953125e-05, "step": 26597, "training_step_time": 0.2215898036956787 }, { "epoch": 4.05853271484375e-05, "model_forward_time": 0.024886369705200195, "step": 26598 }, { "epoch": 4.05853271484375e-05, "step": 26598, "training_step_time": 0.16161108016967773 }, { "epoch": 4.058685302734375e-05, "model_forward_time": 0.025318384170532227, "step": 26599 }, { "epoch": 4.058685302734375e-05, "step": 26599, "training_step_time": 0.16342496871948242 }, { "epoch": 4.058837890625e-05, "grad_norm": 0.10978017747402191, "learning_rate": 3.470709859234084e-06, "loss": 0.0066, "step": 26600 }, { "epoch": 4.058837890625e-05, "model_forward_time": 0.024515628814697266, "step": 26600 }, { "epoch": 4.058837890625e-05, "step": 26600, "training_step_time": 0.1556835174560547 }, { "epoch": 4.058990478515625e-05, "model_forward_time": 0.023957252502441406, "step": 26601 }, { "epoch": 4.058990478515625e-05, "step": 26601, "training_step_time": 0.15072917938232422 }, { "epoch": 4.05914306640625e-05, "model_forward_time": 0.023457050323486328, "step": 26602 }, { "epoch": 4.05914306640625e-05, "step": 26602, "training_step_time": 0.15877771377563477 }, { "epoch": 4.059295654296875e-05, "model_forward_time": 0.02448248863220215, "step": 26603 }, { "epoch": 4.059295654296875e-05, "step": 26603, "training_step_time": 0.12450098991394043 }, { "epoch": 4.0594482421875e-05, "model_forward_time": 0.024402379989624023, "step": 26604 }, { "epoch": 4.0594482421875e-05, "step": 26604, "training_step_time": 0.16680693626403809 }, { "epoch": 4.059600830078125e-05, "model_forward_time": 0.0240628719329834, "step": 26605 }, { "epoch": 4.059600830078125e-05, "step": 26605, "training_step_time": 0.10515880584716797 }, { "epoch": 4.05975341796875e-05, "model_forward_time": 0.02437424659729004, "step": 26606 }, { "epoch": 4.05975341796875e-05, "step": 26606, "training_step_time": 0.10426831245422363 }, { "epoch": 4.059906005859375e-05, "model_forward_time": 0.024747371673583984, "step": 26607 }, { "epoch": 4.059906005859375e-05, "step": 26607, "training_step_time": 0.10286664962768555 }, { "epoch": 4.06005859375e-05, "model_forward_time": 0.025056123733520508, "step": 26608 }, { "epoch": 4.06005859375e-05, "step": 26608, "training_step_time": 0.10657477378845215 }, { "epoch": 4.060211181640625e-05, "model_forward_time": 0.02533125877380371, "step": 26609 }, { "epoch": 4.060211181640625e-05, "step": 26609, "training_step_time": 0.10516476631164551 }, { "epoch": 4.06036376953125e-05, "grad_norm": 0.05121985822916031, "learning_rate": 3.4505617299945336e-06, "loss": 0.0039, "step": 26610 }, { "epoch": 4.06036376953125e-05, "model_forward_time": 0.025144100189208984, "step": 26610 }, { "epoch": 4.06036376953125e-05, "step": 26610, "training_step_time": 0.18141984939575195 }, { "epoch": 4.060516357421875e-05, "model_forward_time": 0.024670839309692383, "step": 26611 }, { "epoch": 4.060516357421875e-05, "step": 26611, "training_step_time": 0.18514800071716309 }, { "epoch": 4.0606689453125e-05, "model_forward_time": 0.025215864181518555, "step": 26612 }, { "epoch": 4.0606689453125e-05, "step": 26612, "training_step_time": 0.18893837928771973 }, { "epoch": 4.060821533203125e-05, "model_forward_time": 0.024189472198486328, "step": 26613 }, { "epoch": 4.060821533203125e-05, "step": 26613, "training_step_time": 0.18543744087219238 }, { "epoch": 4.06097412109375e-05, "model_forward_time": 0.0242002010345459, "step": 26614 }, { "epoch": 4.06097412109375e-05, "step": 26614, "training_step_time": 0.17374396324157715 }, { "epoch": 4.061126708984375e-05, "model_forward_time": 0.025160789489746094, "step": 26615 }, { "epoch": 4.061126708984375e-05, "step": 26615, "training_step_time": 0.17132878303527832 }, { "epoch": 4.061279296875e-05, "model_forward_time": 0.024704694747924805, "step": 26616 }, { "epoch": 4.061279296875e-05, "step": 26616, "training_step_time": 0.1777970790863037 }, { "epoch": 4.061431884765625e-05, "model_forward_time": 0.02519369125366211, "step": 26617 }, { "epoch": 4.061431884765625e-05, "step": 26617, "training_step_time": 0.11552858352661133 }, { "epoch": 4.06158447265625e-05, "model_forward_time": 0.02439570426940918, "step": 26618 }, { "epoch": 4.06158447265625e-05, "step": 26618, "training_step_time": 0.1066274642944336 }, { "epoch": 4.061737060546875e-05, "model_forward_time": 0.025094985961914062, "step": 26619 }, { "epoch": 4.061737060546875e-05, "step": 26619, "training_step_time": 0.10495281219482422 }, { "epoch": 4.0618896484375e-05, "grad_norm": 0.1307193785905838, "learning_rate": 3.430470162711813e-06, "loss": 0.0073, "step": 26620 }, { "epoch": 4.0618896484375e-05, "model_forward_time": 0.02320241928100586, "step": 26620 }, { "epoch": 4.0618896484375e-05, "step": 26620, "training_step_time": 0.10384321212768555 }, { "epoch": 4.062042236328125e-05, "model_forward_time": 0.024834632873535156, "step": 26621 }, { "epoch": 4.062042236328125e-05, "step": 26621, "training_step_time": 0.12114906311035156 }, { "epoch": 4.06219482421875e-05, "model_forward_time": 0.025135278701782227, "step": 26622 }, { "epoch": 4.06219482421875e-05, "step": 26622, "training_step_time": 0.13491582870483398 }, { "epoch": 4.062347412109375e-05, "model_forward_time": 0.024900436401367188, "step": 26623 }, { "epoch": 4.062347412109375e-05, "step": 26623, "training_step_time": 0.13203811645507812 }, { "epoch": 4.0625e-05, "model_forward_time": 0.02464437484741211, "step": 26624 }, { "epoch": 4.0625e-05, "step": 26624, "training_step_time": 0.12476205825805664 }, { "epoch": 4.062652587890625e-05, "model_forward_time": 0.024642467498779297, "step": 26625 }, { "epoch": 4.062652587890625e-05, "step": 26625, "training_step_time": 0.12337183952331543 }, { "epoch": 4.06280517578125e-05, "model_forward_time": 0.025154829025268555, "step": 26626 }, { "epoch": 4.06280517578125e-05, "step": 26626, "training_step_time": 0.1196751594543457 }, { "epoch": 4.062957763671875e-05, "model_forward_time": 0.02513718605041504, "step": 26627 }, { "epoch": 4.062957763671875e-05, "step": 26627, "training_step_time": 0.11545872688293457 }, { "epoch": 4.0631103515625e-05, "model_forward_time": 0.025327205657958984, "step": 26628 }, { "epoch": 4.0631103515625e-05, "step": 26628, "training_step_time": 0.11024999618530273 }, { "epoch": 4.063262939453125e-05, "model_forward_time": 0.024974346160888672, "step": 26629 }, { "epoch": 4.063262939453125e-05, "step": 26629, "training_step_time": 0.10829448699951172 }, { "epoch": 4.06341552734375e-05, "grad_norm": 0.13951286673545837, "learning_rate": 3.41043518179906e-06, "loss": 0.0049, "step": 26630 }, { "epoch": 4.06341552734375e-05, "model_forward_time": 0.024405956268310547, "step": 26630 }, { "epoch": 4.06341552734375e-05, "step": 26630, "training_step_time": 0.11149811744689941 }, { "epoch": 4.063568115234375e-05, "model_forward_time": 0.024730443954467773, "step": 26631 }, { "epoch": 4.063568115234375e-05, "step": 26631, "training_step_time": 0.10738492012023926 }, { "epoch": 4.063720703125e-05, "model_forward_time": 0.025090932846069336, "step": 26632 }, { "epoch": 4.063720703125e-05, "step": 26632, "training_step_time": 0.10735487937927246 }, { "epoch": 4.063873291015625e-05, "model_forward_time": 0.025223970413208008, "step": 26633 }, { "epoch": 4.063873291015625e-05, "step": 26633, "training_step_time": 0.10879302024841309 }, { "epoch": 4.06402587890625e-05, "model_forward_time": 0.025603055953979492, "step": 26634 }, { "epoch": 4.06402587890625e-05, "step": 26634, "training_step_time": 0.19719862937927246 }, { "epoch": 4.064178466796875e-05, "model_forward_time": 0.024847030639648438, "step": 26635 }, { "epoch": 4.064178466796875e-05, "step": 26635, "training_step_time": 0.10843777656555176 }, { "epoch": 4.0643310546875e-05, "model_forward_time": 0.02484440803527832, "step": 26636 }, { "epoch": 4.0643310546875e-05, "step": 26636, "training_step_time": 0.11187100410461426 }, { "epoch": 4.064483642578125e-05, "model_forward_time": 0.025930166244506836, "step": 26637 }, { "epoch": 4.064483642578125e-05, "step": 26637, "training_step_time": 0.10788345336914062 }, { "epoch": 4.06463623046875e-05, "model_forward_time": 0.0254364013671875, "step": 26638 }, { "epoch": 4.06463623046875e-05, "step": 26638, "training_step_time": 0.14639043807983398 }, { "epoch": 4.064788818359375e-05, "model_forward_time": 0.025045156478881836, "step": 26639 }, { "epoch": 4.064788818359375e-05, "step": 26639, "training_step_time": 0.16502904891967773 }, { "epoch": 4.06494140625e-05, "grad_norm": 0.07353484630584717, "learning_rate": 3.390456811600673e-06, "loss": 0.0034, "step": 26640 }, { "epoch": 4.06494140625e-05, "model_forward_time": 0.027915239334106445, "step": 26640 }, { "epoch": 4.06494140625e-05, "step": 26640, "training_step_time": 0.13318920135498047 }, { "epoch": 4.065093994140625e-05, "model_forward_time": 0.025032997131347656, "step": 26641 }, { "epoch": 4.065093994140625e-05, "step": 26641, "training_step_time": 0.10764670372009277 }, { "epoch": 4.06524658203125e-05, "model_forward_time": 0.025531291961669922, "step": 26642 }, { "epoch": 4.06524658203125e-05, "step": 26642, "training_step_time": 0.19179439544677734 }, { "epoch": 4.065399169921875e-05, "model_forward_time": 0.02549004554748535, "step": 26643 }, { "epoch": 4.065399169921875e-05, "step": 26643, "training_step_time": 0.10766959190368652 }, { "epoch": 4.0655517578125e-05, "model_forward_time": 0.024964094161987305, "step": 26644 }, { "epoch": 4.0655517578125e-05, "step": 26644, "training_step_time": 0.10794544219970703 }, { "epoch": 4.065704345703125e-05, "model_forward_time": 0.024914264678955078, "step": 26645 }, { "epoch": 4.065704345703125e-05, "step": 26645, "training_step_time": 0.10828471183776855 }, { "epoch": 4.06585693359375e-05, "model_forward_time": 0.025846004486083984, "step": 26646 }, { "epoch": 4.06585693359375e-05, "step": 26646, "training_step_time": 0.17346644401550293 }, { "epoch": 4.066009521484375e-05, "model_forward_time": 0.02689337730407715, "step": 26647 }, { "epoch": 4.066009521484375e-05, "step": 26647, "training_step_time": 0.14132261276245117 }, { "epoch": 4.066162109375e-05, "model_forward_time": 0.024379968643188477, "step": 26648 }, { "epoch": 4.066162109375e-05, "step": 26648, "training_step_time": 0.11193156242370605 }, { "epoch": 4.066314697265625e-05, "model_forward_time": 0.025130510330200195, "step": 26649 }, { "epoch": 4.066314697265625e-05, "step": 26649, "training_step_time": 0.10616374015808105 }, { "epoch": 4.06646728515625e-05, "grad_norm": 0.0812462866306305, "learning_rate": 3.3705350763922562e-06, "loss": 0.0021, "step": 26650 }, { "epoch": 4.06646728515625e-05, "model_forward_time": 0.025546789169311523, "step": 26650 }, { "epoch": 4.06646728515625e-05, "step": 26650, "training_step_time": 0.10674285888671875 }, { "epoch": 4.066619873046875e-05, "model_forward_time": 0.025378942489624023, "step": 26651 }, { "epoch": 4.066619873046875e-05, "step": 26651, "training_step_time": 0.10825586318969727 }, { "epoch": 4.0667724609375e-05, "model_forward_time": 0.02554154396057129, "step": 26652 }, { "epoch": 4.0667724609375e-05, "step": 26652, "training_step_time": 0.10586810111999512 }, { "epoch": 4.066925048828125e-05, "model_forward_time": 0.02534031867980957, "step": 26653 }, { "epoch": 4.066925048828125e-05, "step": 26653, "training_step_time": 0.10802030563354492 }, { "epoch": 4.06707763671875e-05, "model_forward_time": 0.025092601776123047, "step": 26654 }, { "epoch": 4.06707763671875e-05, "step": 26654, "training_step_time": 0.10811805725097656 }, { "epoch": 4.067230224609375e-05, "model_forward_time": 0.025314807891845703, "step": 26655 }, { "epoch": 4.067230224609375e-05, "step": 26655, "training_step_time": 0.11012387275695801 }, { "epoch": 4.0673828125e-05, "model_forward_time": 0.02529430389404297, "step": 26656 }, { "epoch": 4.0673828125e-05, "step": 26656, "training_step_time": 0.10884690284729004 }, { "epoch": 4.067535400390625e-05, "model_forward_time": 0.02531886100769043, "step": 26657 }, { "epoch": 4.067535400390625e-05, "step": 26657, "training_step_time": 0.10961246490478516 }, { "epoch": 4.06768798828125e-05, "model_forward_time": 0.025769948959350586, "step": 26658 }, { "epoch": 4.06768798828125e-05, "step": 26658, "training_step_time": 0.15196943283081055 }, { "epoch": 4.067840576171875e-05, "model_forward_time": 0.024996042251586914, "step": 26659 }, { "epoch": 4.067840576171875e-05, "step": 26659, "training_step_time": 0.11664271354675293 }, { "epoch": 4.0679931640625e-05, "grad_norm": 0.08683722466230392, "learning_rate": 3.35067000038059e-06, "loss": 0.0084, "step": 26660 }, { "epoch": 4.0679931640625e-05, "model_forward_time": 0.024899721145629883, "step": 26660 }, { "epoch": 4.0679931640625e-05, "step": 26660, "training_step_time": 0.10618901252746582 }, { "epoch": 4.068145751953125e-05, "model_forward_time": 0.025148630142211914, "step": 26661 }, { "epoch": 4.068145751953125e-05, "step": 26661, "training_step_time": 0.10790419578552246 }, { "epoch": 4.06829833984375e-05, "model_forward_time": 0.025118350982666016, "step": 26662 }, { "epoch": 4.06829833984375e-05, "step": 26662, "training_step_time": 0.11268496513366699 }, { "epoch": 4.068450927734375e-05, "model_forward_time": 0.026336193084716797, "step": 26663 }, { "epoch": 4.068450927734375e-05, "step": 26663, "training_step_time": 0.11171507835388184 }, { "epoch": 4.068603515625e-05, "model_forward_time": 0.025574922561645508, "step": 26664 }, { "epoch": 4.068603515625e-05, "step": 26664, "training_step_time": 0.19835853576660156 }, { "epoch": 4.068756103515625e-05, "model_forward_time": 0.02395796775817871, "step": 26665 }, { "epoch": 4.068756103515625e-05, "step": 26665, "training_step_time": 0.10691976547241211 }, { "epoch": 4.06890869140625e-05, "model_forward_time": 0.024701595306396484, "step": 26666 }, { "epoch": 4.06890869140625e-05, "step": 26666, "training_step_time": 0.10498547554016113 }, { "epoch": 4.069061279296875e-05, "model_forward_time": 0.02523493766784668, "step": 26667 }, { "epoch": 4.069061279296875e-05, "step": 26667, "training_step_time": 0.10854887962341309 }, { "epoch": 4.0692138671875e-05, "model_forward_time": 0.02513575553894043, "step": 26668 }, { "epoch": 4.0692138671875e-05, "step": 26668, "training_step_time": 0.10857033729553223 }, { "epoch": 4.069366455078125e-05, "model_forward_time": 0.025070905685424805, "step": 26669 }, { "epoch": 4.069366455078125e-05, "step": 26669, "training_step_time": 0.10947585105895996 }, { "epoch": 4.06951904296875e-05, "grad_norm": 0.11693254113197327, "learning_rate": 3.3308616077036115e-06, "loss": 0.0033, "step": 26670 }, { "epoch": 4.06951904296875e-05, "model_forward_time": 0.02499222755432129, "step": 26670 }, { "epoch": 4.06951904296875e-05, "step": 26670, "training_step_time": 0.1063694953918457 }, { "epoch": 4.069671630859375e-05, "model_forward_time": 0.028609037399291992, "step": 26671 }, { "epoch": 4.069671630859375e-05, "step": 26671, "training_step_time": 0.10840559005737305 }, { "epoch": 4.06982421875e-05, "model_forward_time": 0.02524876594543457, "step": 26672 }, { "epoch": 4.06982421875e-05, "step": 26672, "training_step_time": 0.10749602317810059 }, { "epoch": 4.069976806640625e-05, "model_forward_time": 0.025388240814208984, "step": 26673 }, { "epoch": 4.069976806640625e-05, "step": 26673, "training_step_time": 0.10989093780517578 }, { "epoch": 4.07012939453125e-05, "model_forward_time": 0.024699926376342773, "step": 26674 }, { "epoch": 4.07012939453125e-05, "step": 26674, "training_step_time": 0.10926246643066406 }, { "epoch": 4.070281982421875e-05, "model_forward_time": 0.025566816329956055, "step": 26675 }, { "epoch": 4.070281982421875e-05, "step": 26675, "training_step_time": 0.11078023910522461 }, { "epoch": 4.0704345703125e-05, "model_forward_time": 0.025050878524780273, "step": 26676 }, { "epoch": 4.0704345703125e-05, "step": 26676, "training_step_time": 0.11076092720031738 }, { "epoch": 4.070587158203125e-05, "model_forward_time": 0.025349855422973633, "step": 26677 }, { "epoch": 4.070587158203125e-05, "step": 26677, "training_step_time": 0.10786747932434082 }, { "epoch": 4.07073974609375e-05, "model_forward_time": 0.02698349952697754, "step": 26678 }, { "epoch": 4.07073974609375e-05, "step": 26678, "training_step_time": 0.1774294376373291 }, { "epoch": 4.070892333984375e-05, "model_forward_time": 0.024695396423339844, "step": 26679 }, { "epoch": 4.070892333984375e-05, "step": 26679, "training_step_time": 0.20139288902282715 }, { "epoch": 4.071044921875e-05, "grad_norm": 0.14898306131362915, "learning_rate": 3.3111099224304e-06, "loss": 0.0061, "step": 26680 }, { "epoch": 4.071044921875e-05, "model_forward_time": 0.024250030517578125, "step": 26680 }, { "epoch": 4.071044921875e-05, "step": 26680, "training_step_time": 0.18573236465454102 }, { "epoch": 4.071197509765625e-05, "model_forward_time": 0.0242156982421875, "step": 26681 }, { "epoch": 4.071197509765625e-05, "step": 26681, "training_step_time": 0.16794943809509277 }, { "epoch": 4.07135009765625e-05, "model_forward_time": 0.024793386459350586, "step": 26682 }, { "epoch": 4.07135009765625e-05, "step": 26682, "training_step_time": 0.16138625144958496 }, { "epoch": 4.071502685546875e-05, "model_forward_time": 0.02394556999206543, "step": 26683 }, { "epoch": 4.071502685546875e-05, "step": 26683, "training_step_time": 0.1855635643005371 }, { "epoch": 4.0716552734375e-05, "model_forward_time": 0.025079011917114258, "step": 26684 }, { "epoch": 4.0716552734375e-05, "step": 26684, "training_step_time": 0.1862490177154541 }, { "epoch": 4.071807861328125e-05, "model_forward_time": 0.02399921417236328, "step": 26685 }, { "epoch": 4.071807861328125e-05, "step": 26685, "training_step_time": 0.13516879081726074 }, { "epoch": 4.07196044921875e-05, "model_forward_time": 0.02427983283996582, "step": 26686 }, { "epoch": 4.07196044921875e-05, "step": 26686, "training_step_time": 0.17879915237426758 }, { "epoch": 4.072113037109375e-05, "model_forward_time": 0.02459239959716797, "step": 26687 }, { "epoch": 4.072113037109375e-05, "step": 26687, "training_step_time": 0.14362812042236328 }, { "epoch": 4.072265625e-05, "model_forward_time": 0.02516913414001465, "step": 26688 }, { "epoch": 4.072265625e-05, "step": 26688, "training_step_time": 0.13523173332214355 }, { "epoch": 4.072418212890625e-05, "model_forward_time": 0.024919986724853516, "step": 26689 }, { "epoch": 4.072418212890625e-05, "step": 26689, "training_step_time": 0.19314074516296387 }, { "epoch": 4.07257080078125e-05, "grad_norm": 0.10780028998851776, "learning_rate": 3.2914149685611073e-06, "loss": 0.0052, "step": 26690 }, { "epoch": 4.07257080078125e-05, "model_forward_time": 0.026250839233398438, "step": 26690 }, { "epoch": 4.07257080078125e-05, "step": 26690, "training_step_time": 0.17038750648498535 }, { "epoch": 4.072723388671875e-05, "model_forward_time": 0.02421712875366211, "step": 26691 }, { "epoch": 4.072723388671875e-05, "step": 26691, "training_step_time": 0.10178661346435547 }, { "epoch": 4.0728759765625e-05, "model_forward_time": 0.024585723876953125, "step": 26692 }, { "epoch": 4.0728759765625e-05, "step": 26692, "training_step_time": 0.10254025459289551 }, { "epoch": 4.073028564453125e-05, "model_forward_time": 0.025119781494140625, "step": 26693 }, { "epoch": 4.073028564453125e-05, "step": 26693, "training_step_time": 0.10536932945251465 }, { "epoch": 4.07318115234375e-05, "model_forward_time": 0.02521491050720215, "step": 26694 }, { "epoch": 4.07318115234375e-05, "step": 26694, "training_step_time": 0.1072688102722168 }, { "epoch": 4.073333740234375e-05, "model_forward_time": 0.025226593017578125, "step": 26695 }, { "epoch": 4.073333740234375e-05, "step": 26695, "training_step_time": 0.10624980926513672 }, { "epoch": 4.073486328125e-05, "model_forward_time": 0.02539682388305664, "step": 26696 }, { "epoch": 4.073486328125e-05, "step": 26696, "training_step_time": 0.10838603973388672 }, { "epoch": 4.073638916015625e-05, "model_forward_time": 0.0249021053314209, "step": 26697 }, { "epoch": 4.073638916015625e-05, "step": 26697, "training_step_time": 0.11003398895263672 }, { "epoch": 4.07379150390625e-05, "model_forward_time": 0.025248050689697266, "step": 26698 }, { "epoch": 4.07379150390625e-05, "step": 26698, "training_step_time": 0.10663175582885742 }, { "epoch": 4.073944091796875e-05, "model_forward_time": 0.02561020851135254, "step": 26699 }, { "epoch": 4.073944091796875e-05, "step": 26699, "training_step_time": 0.10784435272216797 }, { "epoch": 4.0740966796875e-05, "grad_norm": 0.24510358273983002, "learning_rate": 3.271776770026963e-06, "loss": 0.0108, "step": 26700 }, { "epoch": 4.0740966796875e-05, "model_forward_time": 0.025061368942260742, "step": 26700 }, { "epoch": 4.0740966796875e-05, "step": 26700, "training_step_time": 0.10606741905212402 }, { "epoch": 4.074249267578125e-05, "model_forward_time": 0.025350332260131836, "step": 26701 }, { "epoch": 4.074249267578125e-05, "step": 26701, "training_step_time": 0.10470747947692871 }, { "epoch": 4.07440185546875e-05, "model_forward_time": 0.02533864974975586, "step": 26702 }, { "epoch": 4.07440185546875e-05, "step": 26702, "training_step_time": 0.11341714859008789 }, { "epoch": 4.074554443359375e-05, "model_forward_time": 0.025208711624145508, "step": 26703 }, { "epoch": 4.074554443359375e-05, "step": 26703, "training_step_time": 0.1367814540863037 }, { "epoch": 4.07470703125e-05, "model_forward_time": 0.025407075881958008, "step": 26704 }, { "epoch": 4.07470703125e-05, "step": 26704, "training_step_time": 0.11318778991699219 }, { "epoch": 4.074859619140625e-05, "model_forward_time": 0.02510857582092285, "step": 26705 }, { "epoch": 4.074859619140625e-05, "step": 26705, "training_step_time": 0.10726761817932129 }, { "epoch": 4.07501220703125e-05, "model_forward_time": 0.026613235473632812, "step": 26706 }, { "epoch": 4.07501220703125e-05, "step": 26706, "training_step_time": 0.11190485954284668 }, { "epoch": 4.075164794921875e-05, "model_forward_time": 0.02526402473449707, "step": 26707 }, { "epoch": 4.075164794921875e-05, "step": 26707, "training_step_time": 0.11483931541442871 }, { "epoch": 4.0753173828125e-05, "model_forward_time": 0.025832414627075195, "step": 26708 }, { "epoch": 4.0753173828125e-05, "step": 26708, "training_step_time": 0.1880347728729248 }, { "epoch": 4.075469970703125e-05, "model_forward_time": 0.024509668350219727, "step": 26709 }, { "epoch": 4.075469970703125e-05, "step": 26709, "training_step_time": 0.10419464111328125 }, { "epoch": 4.07562255859375e-05, "grad_norm": 0.22987960278987885, "learning_rate": 3.2521953506902237e-06, "loss": 0.0057, "step": 26710 }, { "epoch": 4.07562255859375e-05, "model_forward_time": 0.02458977699279785, "step": 26710 }, { "epoch": 4.07562255859375e-05, "step": 26710, "training_step_time": 0.10155248641967773 }, { "epoch": 4.075775146484375e-05, "model_forward_time": 0.02584075927734375, "step": 26711 }, { "epoch": 4.075775146484375e-05, "step": 26711, "training_step_time": 0.10361647605895996 }, { "epoch": 4.075927734375e-05, "model_forward_time": 0.025406599044799805, "step": 26712 }, { "epoch": 4.075927734375e-05, "step": 26712, "training_step_time": 0.10613656044006348 }, { "epoch": 4.076080322265625e-05, "model_forward_time": 0.025420188903808594, "step": 26713 }, { "epoch": 4.076080322265625e-05, "step": 26713, "training_step_time": 0.10767412185668945 }, { "epoch": 4.07623291015625e-05, "model_forward_time": 0.025574922561645508, "step": 26714 }, { "epoch": 4.07623291015625e-05, "step": 26714, "training_step_time": 0.180009126663208 }, { "epoch": 4.076385498046875e-05, "model_forward_time": 0.024447202682495117, "step": 26715 }, { "epoch": 4.076385498046875e-05, "step": 26715, "training_step_time": 0.19122052192687988 }, { "epoch": 4.0765380859375e-05, "model_forward_time": 0.024560928344726562, "step": 26716 }, { "epoch": 4.0765380859375e-05, "step": 26716, "training_step_time": 0.19205188751220703 }, { "epoch": 4.076690673828125e-05, "model_forward_time": 0.024631977081298828, "step": 26717 }, { "epoch": 4.076690673828125e-05, "step": 26717, "training_step_time": 0.19199252128601074 }, { "epoch": 4.07684326171875e-05, "model_forward_time": 0.024318695068359375, "step": 26718 }, { "epoch": 4.07684326171875e-05, "step": 26718, "training_step_time": 0.1804361343383789 }, { "epoch": 4.076995849609375e-05, "model_forward_time": 0.027651071548461914, "step": 26719 }, { "epoch": 4.076995849609375e-05, "step": 26719, "training_step_time": 0.15955328941345215 }, { "epoch": 4.0771484375e-05, "grad_norm": 0.08157902956008911, "learning_rate": 3.2326707343441566e-06, "loss": 0.0024, "step": 26720 }, { "epoch": 4.0771484375e-05, "model_forward_time": 0.024436235427856445, "step": 26720 }, { "epoch": 4.0771484375e-05, "step": 26720, "training_step_time": 0.14250731468200684 }, { "epoch": 4.077301025390625e-05, "model_forward_time": 0.024405479431152344, "step": 26721 }, { "epoch": 4.077301025390625e-05, "step": 26721, "training_step_time": 0.14423108100891113 }, { "epoch": 4.07745361328125e-05, "model_forward_time": 0.02418994903564453, "step": 26722 }, { "epoch": 4.07745361328125e-05, "step": 26722, "training_step_time": 0.16389107704162598 }, { "epoch": 4.077606201171875e-05, "model_forward_time": 0.025127172470092773, "step": 26723 }, { "epoch": 4.077606201171875e-05, "step": 26723, "training_step_time": 0.11426854133605957 }, { "epoch": 4.0777587890625e-05, "model_forward_time": 0.02463507652282715, "step": 26724 }, { "epoch": 4.0777587890625e-05, "step": 26724, "training_step_time": 0.141495943069458 }, { "epoch": 4.077911376953125e-05, "model_forward_time": 0.026463031768798828, "step": 26725 }, { "epoch": 4.077911376953125e-05, "step": 26725, "training_step_time": 0.15899109840393066 }, { "epoch": 4.07806396484375e-05, "model_forward_time": 0.024477720260620117, "step": 26726 }, { "epoch": 4.07806396484375e-05, "step": 26726, "training_step_time": 0.1990516185760498 }, { "epoch": 4.078216552734375e-05, "model_forward_time": 0.02467632293701172, "step": 26727 }, { "epoch": 4.078216552734375e-05, "step": 26727, "training_step_time": 0.15700173377990723 }, { "epoch": 4.078369140625e-05, "model_forward_time": 0.024187088012695312, "step": 26728 }, { "epoch": 4.078369140625e-05, "step": 26728, "training_step_time": 0.14815807342529297 }, { "epoch": 4.078521728515625e-05, "model_forward_time": 0.02472543716430664, "step": 26729 }, { "epoch": 4.078521728515625e-05, "step": 26729, "training_step_time": 0.13523554801940918 }, { "epoch": 4.07867431640625e-05, "grad_norm": 0.05186415836215019, "learning_rate": 3.213202944713023e-06, "loss": 0.0028, "step": 26730 }, { "epoch": 4.07867431640625e-05, "model_forward_time": 0.024548768997192383, "step": 26730 }, { "epoch": 4.07867431640625e-05, "step": 26730, "training_step_time": 0.12346673011779785 }, { "epoch": 4.078826904296875e-05, "model_forward_time": 0.025125503540039062, "step": 26731 }, { "epoch": 4.078826904296875e-05, "step": 26731, "training_step_time": 0.10602045059204102 }, { "epoch": 4.0789794921875e-05, "model_forward_time": 0.025434494018554688, "step": 26732 }, { "epoch": 4.0789794921875e-05, "step": 26732, "training_step_time": 0.10860824584960938 }, { "epoch": 4.079132080078125e-05, "model_forward_time": 0.024926185607910156, "step": 26733 }, { "epoch": 4.079132080078125e-05, "step": 26733, "training_step_time": 0.21651959419250488 }, { "epoch": 4.07928466796875e-05, "model_forward_time": 0.023987531661987305, "step": 26734 }, { "epoch": 4.07928466796875e-05, "step": 26734, "training_step_time": 0.19706368446350098 }, { "epoch": 4.079437255859375e-05, "model_forward_time": 0.02391672134399414, "step": 26735 }, { "epoch": 4.079437255859375e-05, "step": 26735, "training_step_time": 0.1784071922302246 }, { "epoch": 4.07958984375e-05, "model_forward_time": 0.024206876754760742, "step": 26736 }, { "epoch": 4.07958984375e-05, "step": 26736, "training_step_time": 0.17437005043029785 }, { "epoch": 4.079742431640625e-05, "model_forward_time": 0.025784730911254883, "step": 26737 }, { "epoch": 4.079742431640625e-05, "step": 26737, "training_step_time": 0.15831613540649414 }, { "epoch": 4.07989501953125e-05, "model_forward_time": 0.023953676223754883, "step": 26738 }, { "epoch": 4.07989501953125e-05, "step": 26738, "training_step_time": 0.14695072174072266 }, { "epoch": 4.080047607421875e-05, "model_forward_time": 0.023875951766967773, "step": 26739 }, { "epoch": 4.080047607421875e-05, "step": 26739, "training_step_time": 0.131239652633667 }, { "epoch": 4.0802001953125e-05, "grad_norm": 0.07744824141263962, "learning_rate": 3.193792005452018e-06, "loss": 0.0041, "step": 26740 }, { "epoch": 4.0802001953125e-05, "model_forward_time": 0.02471017837524414, "step": 26740 }, { "epoch": 4.0802001953125e-05, "step": 26740, "training_step_time": 0.12274861335754395 }, { "epoch": 4.080352783203125e-05, "model_forward_time": 0.02469348907470703, "step": 26741 }, { "epoch": 4.080352783203125e-05, "step": 26741, "training_step_time": 0.13964629173278809 }, { "epoch": 4.08050537109375e-05, "model_forward_time": 0.024678707122802734, "step": 26742 }, { "epoch": 4.08050537109375e-05, "step": 26742, "training_step_time": 0.13474655151367188 }, { "epoch": 4.080657958984375e-05, "model_forward_time": 0.02449202537536621, "step": 26743 }, { "epoch": 4.080657958984375e-05, "step": 26743, "training_step_time": 0.11082839965820312 }, { "epoch": 4.080810546875e-05, "model_forward_time": 0.025324583053588867, "step": 26744 }, { "epoch": 4.080810546875e-05, "step": 26744, "training_step_time": 0.10487627983093262 }, { "epoch": 4.080963134765625e-05, "model_forward_time": 0.02512812614440918, "step": 26745 }, { "epoch": 4.080963134765625e-05, "step": 26745, "training_step_time": 0.11162781715393066 }, { "epoch": 4.08111572265625e-05, "model_forward_time": 0.025099515914916992, "step": 26746 }, { "epoch": 4.08111572265625e-05, "step": 26746, "training_step_time": 0.10545039176940918 }, { "epoch": 4.081268310546875e-05, "model_forward_time": 0.025190353393554688, "step": 26747 }, { "epoch": 4.081268310546875e-05, "step": 26747, "training_step_time": 0.19527888298034668 }, { "epoch": 4.0814208984375e-05, "model_forward_time": 0.024199962615966797, "step": 26748 }, { "epoch": 4.0814208984375e-05, "step": 26748, "training_step_time": 0.10188746452331543 }, { "epoch": 4.081573486328125e-05, "model_forward_time": 0.024327754974365234, "step": 26749 }, { "epoch": 4.081573486328125e-05, "step": 26749, "training_step_time": 0.10188412666320801 }, { "epoch": 4.08172607421875e-05, "grad_norm": 0.14027544856071472, "learning_rate": 3.1744379401472677e-06, "loss": 0.0084, "step": 26750 }, { "epoch": 4.08172607421875e-05, "model_forward_time": 0.025066137313842773, "step": 26750 }, { "epoch": 4.08172607421875e-05, "step": 26750, "training_step_time": 0.10620760917663574 }, { "epoch": 4.081878662109375e-05, "model_forward_time": 0.025079727172851562, "step": 26751 }, { "epoch": 4.081878662109375e-05, "step": 26751, "training_step_time": 0.11285281181335449 }, { "epoch": 4.08203125e-05, "model_forward_time": 0.025304079055786133, "step": 26752 }, { "epoch": 4.08203125e-05, "step": 26752, "training_step_time": 0.10619235038757324 }, { "epoch": 4.082183837890625e-05, "model_forward_time": 0.024847984313964844, "step": 26753 }, { "epoch": 4.082183837890625e-05, "step": 26753, "training_step_time": 0.10709619522094727 }, { "epoch": 4.08233642578125e-05, "model_forward_time": 0.02524399757385254, "step": 26754 }, { "epoch": 4.08233642578125e-05, "step": 26754, "training_step_time": 0.1036221981048584 }, { "epoch": 4.082489013671875e-05, "model_forward_time": 0.025450468063354492, "step": 26755 }, { "epoch": 4.082489013671875e-05, "step": 26755, "training_step_time": 0.1078188419342041 }, { "epoch": 4.0826416015625e-05, "model_forward_time": 0.025645732879638672, "step": 26756 }, { "epoch": 4.0826416015625e-05, "step": 26756, "training_step_time": 0.10690975189208984 }, { "epoch": 4.082794189453125e-05, "model_forward_time": 0.02541065216064453, "step": 26757 }, { "epoch": 4.082794189453125e-05, "step": 26757, "training_step_time": 0.17798852920532227 }, { "epoch": 4.08294677734375e-05, "model_forward_time": 0.02449488639831543, "step": 26758 }, { "epoch": 4.08294677734375e-05, "step": 26758, "training_step_time": 0.18977570533752441 }, { "epoch": 4.083099365234375e-05, "model_forward_time": 0.026149272918701172, "step": 26759 }, { "epoch": 4.083099365234375e-05, "step": 26759, "training_step_time": 0.18576788902282715 }, { "epoch": 4.083251953125e-05, "grad_norm": 0.10226710885763168, "learning_rate": 3.155140772315773e-06, "loss": 0.0043, "step": 26760 }, { "epoch": 4.083251953125e-05, "model_forward_time": 0.024295806884765625, "step": 26760 }, { "epoch": 4.083251953125e-05, "step": 26760, "training_step_time": 0.17508864402770996 }, { "epoch": 4.083404541015625e-05, "model_forward_time": 0.024616003036499023, "step": 26761 }, { "epoch": 4.083404541015625e-05, "step": 26761, "training_step_time": 0.17728924751281738 }, { "epoch": 4.08355712890625e-05, "model_forward_time": 0.02429938316345215, "step": 26762 }, { "epoch": 4.08355712890625e-05, "step": 26762, "training_step_time": 0.1758437156677246 }, { "epoch": 4.083709716796875e-05, "model_forward_time": 0.02487659454345703, "step": 26763 }, { "epoch": 4.083709716796875e-05, "step": 26763, "training_step_time": 0.10165286064147949 }, { "epoch": 4.0838623046875e-05, "model_forward_time": 0.027752161026000977, "step": 26764 }, { "epoch": 4.0838623046875e-05, "step": 26764, "training_step_time": 0.14945292472839355 }, { "epoch": 4.084014892578125e-05, "model_forward_time": 0.024996519088745117, "step": 26765 }, { "epoch": 4.084014892578125e-05, "step": 26765, "training_step_time": 0.19479680061340332 }, { "epoch": 4.08416748046875e-05, "model_forward_time": 0.02440166473388672, "step": 26766 }, { "epoch": 4.08416748046875e-05, "step": 26766, "training_step_time": 0.206467866897583 }, { "epoch": 4.084320068359375e-05, "model_forward_time": 0.02463555335998535, "step": 26767 }, { "epoch": 4.084320068359375e-05, "step": 26767, "training_step_time": 0.1618201732635498 }, { "epoch": 4.08447265625e-05, "model_forward_time": 0.024322032928466797, "step": 26768 }, { "epoch": 4.08447265625e-05, "step": 26768, "training_step_time": 0.158919095993042 }, { "epoch": 4.084625244140625e-05, "model_forward_time": 0.024488449096679688, "step": 26769 }, { "epoch": 4.084625244140625e-05, "step": 26769, "training_step_time": 0.11172962188720703 }, { "epoch": 4.08477783203125e-05, "grad_norm": 0.09542731940746307, "learning_rate": 3.1359005254054273e-06, "loss": 0.0027, "step": 26770 }, { "epoch": 4.08477783203125e-05, "model_forward_time": 0.024994373321533203, "step": 26770 }, { "epoch": 4.08477783203125e-05, "step": 26770, "training_step_time": 0.17457294464111328 }, { "epoch": 4.084930419921875e-05, "model_forward_time": 0.02418994903564453, "step": 26771 }, { "epoch": 4.084930419921875e-05, "step": 26771, "training_step_time": 0.13950490951538086 }, { "epoch": 4.0850830078125e-05, "model_forward_time": 0.024289369583129883, "step": 26772 }, { "epoch": 4.0850830078125e-05, "step": 26772, "training_step_time": 0.11224365234375 }, { "epoch": 4.085235595703125e-05, "model_forward_time": 0.025055408477783203, "step": 26773 }, { "epoch": 4.085235595703125e-05, "step": 26773, "training_step_time": 0.10435986518859863 }, { "epoch": 4.08538818359375e-05, "model_forward_time": 0.02481532096862793, "step": 26774 }, { "epoch": 4.08538818359375e-05, "step": 26774, "training_step_time": 0.10432195663452148 }, { "epoch": 4.085540771484375e-05, "model_forward_time": 0.02518939971923828, "step": 26775 }, { "epoch": 4.085540771484375e-05, "step": 26775, "training_step_time": 0.10755133628845215 }, { "epoch": 4.085693359375e-05, "model_forward_time": 0.02528667449951172, "step": 26776 }, { "epoch": 4.085693359375e-05, "step": 26776, "training_step_time": 0.10510921478271484 }, { "epoch": 4.085845947265625e-05, "model_forward_time": 0.025311708450317383, "step": 26777 }, { "epoch": 4.085845947265625e-05, "step": 26777, "training_step_time": 0.10868144035339355 }, { "epoch": 4.08599853515625e-05, "model_forward_time": 0.025835514068603516, "step": 26778 }, { "epoch": 4.08599853515625e-05, "step": 26778, "training_step_time": 0.10531020164489746 }, { "epoch": 4.086151123046875e-05, "model_forward_time": 0.025369882583618164, "step": 26779 }, { "epoch": 4.086151123046875e-05, "step": 26779, "training_step_time": 0.1085515022277832 }, { "epoch": 4.0863037109375e-05, "grad_norm": 0.09529156982898712, "learning_rate": 3.1167172227949347e-06, "loss": 0.0045, "step": 26780 }, { "epoch": 4.0863037109375e-05, "model_forward_time": 0.025146007537841797, "step": 26780 }, { "epoch": 4.0863037109375e-05, "step": 26780, "training_step_time": 0.10555911064147949 }, { "epoch": 4.086456298828125e-05, "model_forward_time": 0.02534937858581543, "step": 26781 }, { "epoch": 4.086456298828125e-05, "step": 26781, "training_step_time": 0.10546731948852539 }, { "epoch": 4.08660888671875e-05, "model_forward_time": 0.025066614151000977, "step": 26782 }, { "epoch": 4.08660888671875e-05, "step": 26782, "training_step_time": 0.10415339469909668 }, { "epoch": 4.086761474609375e-05, "model_forward_time": 0.025035619735717773, "step": 26783 }, { "epoch": 4.086761474609375e-05, "step": 26783, "training_step_time": 0.10475659370422363 }, { "epoch": 4.0869140625e-05, "model_forward_time": 0.025317668914794922, "step": 26784 }, { "epoch": 4.0869140625e-05, "step": 26784, "training_step_time": 0.15226459503173828 }, { "epoch": 4.087066650390625e-05, "model_forward_time": 0.025641441345214844, "step": 26785 }, { "epoch": 4.087066650390625e-05, "step": 26785, "training_step_time": 0.11969327926635742 }, { "epoch": 4.08721923828125e-05, "model_forward_time": 0.02508997917175293, "step": 26786 }, { "epoch": 4.08721923828125e-05, "step": 26786, "training_step_time": 0.17935442924499512 }, { "epoch": 4.087371826171875e-05, "model_forward_time": 0.02457404136657715, "step": 26787 }, { "epoch": 4.087371826171875e-05, "step": 26787, "training_step_time": 0.17623567581176758 }, { "epoch": 4.0875244140625e-05, "model_forward_time": 0.02387523651123047, "step": 26788 }, { "epoch": 4.0875244140625e-05, "step": 26788, "training_step_time": 0.16893410682678223 }, { "epoch": 4.087677001953125e-05, "model_forward_time": 0.024147748947143555, "step": 26789 }, { "epoch": 4.087677001953125e-05, "step": 26789, "training_step_time": 0.11099481582641602 }, { "epoch": 4.08782958984375e-05, "grad_norm": 0.1236596554517746, "learning_rate": 3.0975908877938277e-06, "loss": 0.0068, "step": 26790 }, { "epoch": 4.08782958984375e-05, "model_forward_time": 0.024781465530395508, "step": 26790 }, { "epoch": 4.08782958984375e-05, "step": 26790, "training_step_time": 0.10897636413574219 }, { "epoch": 4.087982177734375e-05, "model_forward_time": 0.025019168853759766, "step": 26791 }, { "epoch": 4.087982177734375e-05, "step": 26791, "training_step_time": 0.11070489883422852 }, { "epoch": 4.088134765625e-05, "model_forward_time": 0.0256502628326416, "step": 26792 }, { "epoch": 4.088134765625e-05, "step": 26792, "training_step_time": 0.11133837699890137 }, { "epoch": 4.088287353515625e-05, "model_forward_time": 0.02516770362854004, "step": 26793 }, { "epoch": 4.088287353515625e-05, "step": 26793, "training_step_time": 0.10945343971252441 }, { "epoch": 4.08843994140625e-05, "model_forward_time": 0.025018692016601562, "step": 26794 }, { "epoch": 4.08843994140625e-05, "step": 26794, "training_step_time": 0.1097710132598877 }, { "epoch": 4.088592529296875e-05, "model_forward_time": 0.02643418312072754, "step": 26795 }, { "epoch": 4.088592529296875e-05, "step": 26795, "training_step_time": 0.1076805591583252 }, { "epoch": 4.0887451171875e-05, "model_forward_time": 0.025021076202392578, "step": 26796 }, { "epoch": 4.0887451171875e-05, "step": 26796, "training_step_time": 0.10661625862121582 }, { "epoch": 4.088897705078125e-05, "model_forward_time": 0.024930477142333984, "step": 26797 }, { "epoch": 4.088897705078125e-05, "step": 26797, "training_step_time": 0.10617876052856445 }, { "epoch": 4.08905029296875e-05, "model_forward_time": 0.025120973587036133, "step": 26798 }, { "epoch": 4.08905029296875e-05, "step": 26798, "training_step_time": 0.1139533519744873 }, { "epoch": 4.089202880859375e-05, "model_forward_time": 0.025290727615356445, "step": 26799 }, { "epoch": 4.089202880859375e-05, "step": 26799, "training_step_time": 0.10721564292907715 }, { "epoch": 4.08935546875e-05, "grad_norm": 0.07082542032003403, "learning_rate": 3.078521543642399e-06, "loss": 0.004, "step": 26800 }, { "epoch": 4.08935546875e-05, "model_forward_time": 0.02545785903930664, "step": 26800 }, { "epoch": 4.08935546875e-05, "step": 26800, "training_step_time": 0.10879826545715332 }, { "epoch": 4.089508056640625e-05, "model_forward_time": 0.025609493255615234, "step": 26801 }, { "epoch": 4.089508056640625e-05, "step": 26801, "training_step_time": 0.10764265060424805 }, { "epoch": 4.08966064453125e-05, "model_forward_time": 0.025213003158569336, "step": 26802 }, { "epoch": 4.08966064453125e-05, "step": 26802, "training_step_time": 0.10943818092346191 }, { "epoch": 4.089813232421875e-05, "model_forward_time": 0.025181293487548828, "step": 26803 }, { "epoch": 4.089813232421875e-05, "step": 26803, "training_step_time": 0.10697007179260254 }, { "epoch": 4.0899658203125e-05, "model_forward_time": 0.025313138961791992, "step": 26804 }, { "epoch": 4.0899658203125e-05, "step": 26804, "training_step_time": 0.10487627983093262 }, { "epoch": 4.090118408203125e-05, "model_forward_time": 0.025311946868896484, "step": 26805 }, { "epoch": 4.090118408203125e-05, "step": 26805, "training_step_time": 0.10634446144104004 }, { "epoch": 4.09027099609375e-05, "model_forward_time": 0.02514195442199707, "step": 26806 }, { "epoch": 4.09027099609375e-05, "step": 26806, "training_step_time": 0.10591602325439453 }, { "epoch": 4.090423583984375e-05, "model_forward_time": 0.024975299835205078, "step": 26807 }, { "epoch": 4.090423583984375e-05, "step": 26807, "training_step_time": 0.1493215560913086 }, { "epoch": 4.090576171875e-05, "model_forward_time": 0.02936553955078125, "step": 26808 }, { "epoch": 4.090576171875e-05, "step": 26808, "training_step_time": 0.10948824882507324 }, { "epoch": 4.090728759765625e-05, "model_forward_time": 0.02452254295349121, "step": 26809 }, { "epoch": 4.090728759765625e-05, "step": 26809, "training_step_time": 0.14825439453125 }, { "epoch": 4.09088134765625e-05, "grad_norm": 0.10529822111129761, "learning_rate": 3.059509213511702e-06, "loss": 0.0035, "step": 26810 }, { "epoch": 4.09088134765625e-05, "model_forward_time": 0.024562597274780273, "step": 26810 }, { "epoch": 4.09088134765625e-05, "step": 26810, "training_step_time": 0.150299072265625 }, { "epoch": 4.091033935546875e-05, "model_forward_time": 0.024527311325073242, "step": 26811 }, { "epoch": 4.091033935546875e-05, "step": 26811, "training_step_time": 0.18941593170166016 }, { "epoch": 4.0911865234375e-05, "model_forward_time": 0.024666547775268555, "step": 26812 }, { "epoch": 4.0911865234375e-05, "step": 26812, "training_step_time": 0.15288901329040527 }, { "epoch": 4.091339111328125e-05, "model_forward_time": 0.02435779571533203, "step": 26813 }, { "epoch": 4.091339111328125e-05, "step": 26813, "training_step_time": 0.1166837215423584 }, { "epoch": 4.09149169921875e-05, "model_forward_time": 0.024791479110717773, "step": 26814 }, { "epoch": 4.09149169921875e-05, "step": 26814, "training_step_time": 0.1302340030670166 }, { "epoch": 4.091644287109375e-05, "model_forward_time": 0.025376081466674805, "step": 26815 }, { "epoch": 4.091644287109375e-05, "step": 26815, "training_step_time": 0.10962843894958496 }, { "epoch": 4.091796875e-05, "model_forward_time": 0.02527165412902832, "step": 26816 }, { "epoch": 4.091796875e-05, "step": 26816, "training_step_time": 0.1553654670715332 }, { "epoch": 4.091949462890625e-05, "model_forward_time": 0.0246429443359375, "step": 26817 }, { "epoch": 4.091949462890625e-05, "step": 26817, "training_step_time": 0.14779233932495117 }, { "epoch": 4.09210205078125e-05, "model_forward_time": 0.024532318115234375, "step": 26818 }, { "epoch": 4.09210205078125e-05, "step": 26818, "training_step_time": 0.1265120506286621 }, { "epoch": 4.092254638671875e-05, "model_forward_time": 0.023479461669921875, "step": 26819 }, { "epoch": 4.092254638671875e-05, "step": 26819, "training_step_time": 0.12608742713928223 }, { "epoch": 4.0924072265625e-05, "grad_norm": 0.12813250720500946, "learning_rate": 3.040553920503503e-06, "loss": 0.0037, "step": 26820 }, { "epoch": 4.0924072265625e-05, "model_forward_time": 0.02372288703918457, "step": 26820 }, { "epoch": 4.0924072265625e-05, "step": 26820, "training_step_time": 0.12379789352416992 }, { "epoch": 4.092559814453125e-05, "model_forward_time": 0.023946046829223633, "step": 26821 }, { "epoch": 4.092559814453125e-05, "step": 26821, "training_step_time": 0.13062000274658203 }, { "epoch": 4.09271240234375e-05, "model_forward_time": 0.024103641510009766, "step": 26822 }, { "epoch": 4.09271240234375e-05, "step": 26822, "training_step_time": 0.12062263488769531 }, { "epoch": 4.092864990234375e-05, "model_forward_time": 0.023974180221557617, "step": 26823 }, { "epoch": 4.092864990234375e-05, "step": 26823, "training_step_time": 0.11594200134277344 }, { "epoch": 4.093017578125e-05, "model_forward_time": 0.024115800857543945, "step": 26824 }, { "epoch": 4.093017578125e-05, "step": 26824, "training_step_time": 0.11386752128601074 }, { "epoch": 4.093170166015625e-05, "model_forward_time": 0.02384805679321289, "step": 26825 }, { "epoch": 4.093170166015625e-05, "step": 26825, "training_step_time": 0.11443924903869629 }, { "epoch": 4.09332275390625e-05, "model_forward_time": 0.023669958114624023, "step": 26826 }, { "epoch": 4.09332275390625e-05, "step": 26826, "training_step_time": 0.11028933525085449 }, { "epoch": 4.093475341796875e-05, "model_forward_time": 0.024934053421020508, "step": 26827 }, { "epoch": 4.093475341796875e-05, "step": 26827, "training_step_time": 0.1070094108581543 }, { "epoch": 4.0936279296875e-05, "model_forward_time": 0.025259733200073242, "step": 26828 }, { "epoch": 4.0936279296875e-05, "step": 26828, "training_step_time": 0.13768768310546875 }, { "epoch": 4.093780517578125e-05, "model_forward_time": 0.025411367416381836, "step": 26829 }, { "epoch": 4.093780517578125e-05, "step": 26829, "training_step_time": 0.1508162021636963 }, { "epoch": 4.09393310546875e-05, "grad_norm": 0.0825091302394867, "learning_rate": 3.021655687650282e-06, "loss": 0.003, "step": 26830 }, { "epoch": 4.09393310546875e-05, "model_forward_time": 0.02431035041809082, "step": 26830 }, { "epoch": 4.09393310546875e-05, "step": 26830, "training_step_time": 0.14912962913513184 }, { "epoch": 4.094085693359375e-05, "model_forward_time": 0.024685382843017578, "step": 26831 }, { "epoch": 4.094085693359375e-05, "step": 26831, "training_step_time": 0.17029905319213867 }, { "epoch": 4.09423828125e-05, "model_forward_time": 0.0244143009185791, "step": 26832 }, { "epoch": 4.09423828125e-05, "step": 26832, "training_step_time": 0.10464024543762207 }, { "epoch": 4.094390869140625e-05, "model_forward_time": 0.024786949157714844, "step": 26833 }, { "epoch": 4.094390869140625e-05, "step": 26833, "training_step_time": 0.1954343318939209 }, { "epoch": 4.09454345703125e-05, "model_forward_time": 0.02444767951965332, "step": 26834 }, { "epoch": 4.09454345703125e-05, "step": 26834, "training_step_time": 0.103271484375 }, { "epoch": 4.094696044921875e-05, "model_forward_time": 0.02415156364440918, "step": 26835 }, { "epoch": 4.094696044921875e-05, "step": 26835, "training_step_time": 0.10453104972839355 }, { "epoch": 4.0948486328125e-05, "model_forward_time": 0.025792598724365234, "step": 26836 }, { "epoch": 4.0948486328125e-05, "step": 26836, "training_step_time": 0.10685515403747559 }, { "epoch": 4.095001220703125e-05, "model_forward_time": 0.025383710861206055, "step": 26837 }, { "epoch": 4.095001220703125e-05, "step": 26837, "training_step_time": 0.10424304008483887 }, { "epoch": 4.09515380859375e-05, "model_forward_time": 0.025351524353027344, "step": 26838 }, { "epoch": 4.09515380859375e-05, "step": 26838, "training_step_time": 0.10774588584899902 }, { "epoch": 4.095306396484375e-05, "model_forward_time": 0.024941205978393555, "step": 26839 }, { "epoch": 4.095306396484375e-05, "step": 26839, "training_step_time": 0.10336971282958984 }, { "epoch": 4.095458984375e-05, "grad_norm": 0.24770134687423706, "learning_rate": 3.0028145379151716e-06, "loss": 0.0088, "step": 26840 }, { "epoch": 4.095458984375e-05, "model_forward_time": 0.02497553825378418, "step": 26840 }, { "epoch": 4.095458984375e-05, "step": 26840, "training_step_time": 0.10447144508361816 }, { "epoch": 4.095611572265625e-05, "model_forward_time": 0.02541065216064453, "step": 26841 }, { "epoch": 4.095611572265625e-05, "step": 26841, "training_step_time": 0.10454964637756348 }, { "epoch": 4.09576416015625e-05, "model_forward_time": 0.02534174919128418, "step": 26842 }, { "epoch": 4.09576416015625e-05, "step": 26842, "training_step_time": 0.10695767402648926 }, { "epoch": 4.095916748046875e-05, "model_forward_time": 0.02536487579345703, "step": 26843 }, { "epoch": 4.095916748046875e-05, "step": 26843, "training_step_time": 0.1092684268951416 }, { "epoch": 4.0960693359375e-05, "model_forward_time": 0.026121854782104492, "step": 26844 }, { "epoch": 4.0960693359375e-05, "step": 26844, "training_step_time": 0.10672616958618164 }, { "epoch": 4.096221923828125e-05, "model_forward_time": 0.02857661247253418, "step": 26845 }, { "epoch": 4.096221923828125e-05, "step": 26845, "training_step_time": 0.10812854766845703 }, { "epoch": 4.09637451171875e-05, "model_forward_time": 0.02514338493347168, "step": 26846 }, { "epoch": 4.09637451171875e-05, "step": 26846, "training_step_time": 0.10379528999328613 }, { "epoch": 4.096527099609375e-05, "model_forward_time": 0.02532196044921875, "step": 26847 }, { "epoch": 4.096527099609375e-05, "step": 26847, "training_step_time": 0.1054384708404541 }, { "epoch": 4.0966796875e-05, "model_forward_time": 0.025258779525756836, "step": 26848 }, { "epoch": 4.0966796875e-05, "step": 26848, "training_step_time": 0.10576868057250977 }, { "epoch": 4.096832275390625e-05, "model_forward_time": 0.024927139282226562, "step": 26849 }, { "epoch": 4.096832275390625e-05, "step": 26849, "training_step_time": 0.10403323173522949 }, { "epoch": 4.09698486328125e-05, "grad_norm": 0.301150918006897, "learning_rate": 2.9840304941919415e-06, "loss": 0.0056, "step": 26850 }, { "epoch": 4.09698486328125e-05, "model_forward_time": 0.024908781051635742, "step": 26850 }, { "epoch": 4.09698486328125e-05, "step": 26850, "training_step_time": 0.10323572158813477 }, { "epoch": 4.097137451171875e-05, "model_forward_time": 0.02469468116760254, "step": 26851 }, { "epoch": 4.097137451171875e-05, "step": 26851, "training_step_time": 0.1034543514251709 }, { "epoch": 4.0972900390625e-05, "model_forward_time": 0.0252072811126709, "step": 26852 }, { "epoch": 4.0972900390625e-05, "step": 26852, "training_step_time": 0.14432215690612793 }, { "epoch": 4.097442626953125e-05, "model_forward_time": 0.02497243881225586, "step": 26853 }, { "epoch": 4.097442626953125e-05, "step": 26853, "training_step_time": 0.10369205474853516 }, { "epoch": 4.09759521484375e-05, "model_forward_time": 0.024309158325195312, "step": 26854 }, { "epoch": 4.09759521484375e-05, "step": 26854, "training_step_time": 0.18585491180419922 }, { "epoch": 4.097747802734375e-05, "model_forward_time": 0.02435755729675293, "step": 26855 }, { "epoch": 4.097747802734375e-05, "step": 26855, "training_step_time": 0.13795256614685059 }, { "epoch": 4.097900390625e-05, "model_forward_time": 0.02453303337097168, "step": 26856 }, { "epoch": 4.097900390625e-05, "step": 26856, "training_step_time": 0.11211752891540527 }, { "epoch": 4.098052978515625e-05, "model_forward_time": 0.025313377380371094, "step": 26857 }, { "epoch": 4.098052978515625e-05, "step": 26857, "training_step_time": 0.22170019149780273 }, { "epoch": 4.09820556640625e-05, "model_forward_time": 0.0244598388671875, "step": 26858 }, { "epoch": 4.09820556640625e-05, "step": 26858, "training_step_time": 0.11779308319091797 }, { "epoch": 4.098358154296875e-05, "model_forward_time": 0.024547100067138672, "step": 26859 }, { "epoch": 4.098358154296875e-05, "step": 26859, "training_step_time": 0.12204194068908691 }, { "epoch": 4.0985107421875e-05, "grad_norm": 0.07965141534805298, "learning_rate": 2.965303579304973e-06, "loss": 0.0042, "step": 26860 }, { "epoch": 4.0985107421875e-05, "model_forward_time": 0.024420976638793945, "step": 26860 }, { "epoch": 4.0985107421875e-05, "step": 26860, "training_step_time": 0.16338467597961426 }, { "epoch": 4.098663330078125e-05, "model_forward_time": 0.024096012115478516, "step": 26861 }, { "epoch": 4.098663330078125e-05, "step": 26861, "training_step_time": 0.2120952606201172 }, { "epoch": 4.09881591796875e-05, "model_forward_time": 0.024346590042114258, "step": 26862 }, { "epoch": 4.09881591796875e-05, "step": 26862, "training_step_time": 0.11568140983581543 }, { "epoch": 4.098968505859375e-05, "model_forward_time": 0.02457737922668457, "step": 26863 }, { "epoch": 4.098968505859375e-05, "step": 26863, "training_step_time": 0.10689544677734375 }, { "epoch": 4.09912109375e-05, "model_forward_time": 0.02512669563293457, "step": 26864 }, { "epoch": 4.09912109375e-05, "step": 26864, "training_step_time": 0.11184382438659668 }, { "epoch": 4.099273681640625e-05, "model_forward_time": 0.024976253509521484, "step": 26865 }, { "epoch": 4.099273681640625e-05, "step": 26865, "training_step_time": 0.10662627220153809 }, { "epoch": 4.09942626953125e-05, "model_forward_time": 0.025468826293945312, "step": 26866 }, { "epoch": 4.09942626953125e-05, "step": 26866, "training_step_time": 0.10573887825012207 }, { "epoch": 4.099578857421875e-05, "model_forward_time": 0.024996042251586914, "step": 26867 }, { "epoch": 4.099578857421875e-05, "step": 26867, "training_step_time": 0.10895824432373047 }, { "epoch": 4.0997314453125e-05, "model_forward_time": 0.025645017623901367, "step": 26868 }, { "epoch": 4.0997314453125e-05, "step": 26868, "training_step_time": 0.10598373413085938 }, { "epoch": 4.099884033203125e-05, "model_forward_time": 0.025200366973876953, "step": 26869 }, { "epoch": 4.099884033203125e-05, "step": 26869, "training_step_time": 0.10590648651123047 }, { "epoch": 4.10003662109375e-05, "grad_norm": 0.1878916472196579, "learning_rate": 2.946633816009242e-06, "loss": 0.0036, "step": 26870 }, { "epoch": 4.10003662109375e-05, "model_forward_time": 0.02509593963623047, "step": 26870 }, { "epoch": 4.10003662109375e-05, "step": 26870, "training_step_time": 0.1047048568725586 }, { "epoch": 4.100189208984375e-05, "model_forward_time": 0.0250699520111084, "step": 26871 }, { "epoch": 4.100189208984375e-05, "step": 26871, "training_step_time": 0.10566830635070801 }, { "epoch": 4.100341796875e-05, "model_forward_time": 0.025211334228515625, "step": 26872 }, { "epoch": 4.100341796875e-05, "step": 26872, "training_step_time": 0.10526847839355469 }, { "epoch": 4.100494384765625e-05, "model_forward_time": 0.024981021881103516, "step": 26873 }, { "epoch": 4.100494384765625e-05, "step": 26873, "training_step_time": 0.1922311782836914 }, { "epoch": 4.10064697265625e-05, "model_forward_time": 0.024867534637451172, "step": 26874 }, { "epoch": 4.10064697265625e-05, "step": 26874, "training_step_time": 0.11589479446411133 }, { "epoch": 4.100799560546875e-05, "model_forward_time": 0.024444103240966797, "step": 26875 }, { "epoch": 4.100799560546875e-05, "step": 26875, "training_step_time": 0.10891413688659668 }, { "epoch": 4.1009521484375e-05, "model_forward_time": 0.025258541107177734, "step": 26876 }, { "epoch": 4.1009521484375e-05, "step": 26876, "training_step_time": 0.1143946647644043 }, { "epoch": 4.101104736328125e-05, "model_forward_time": 0.025269746780395508, "step": 26877 }, { "epoch": 4.101104736328125e-05, "step": 26877, "training_step_time": 0.1162424087524414 }, { "epoch": 4.10125732421875e-05, "model_forward_time": 0.026995182037353516, "step": 26878 }, { "epoch": 4.10125732421875e-05, "step": 26878, "training_step_time": 0.1124117374420166 }, { "epoch": 4.101409912109375e-05, "model_forward_time": 0.024773597717285156, "step": 26879 }, { "epoch": 4.101409912109375e-05, "step": 26879, "training_step_time": 0.18625998497009277 }, { "epoch": 4.1015625e-05, "grad_norm": 0.2622108459472656, "learning_rate": 2.928021226990263e-06, "loss": 0.0053, "step": 26880 }, { "epoch": 4.1015625e-05, "model_forward_time": 0.024438142776489258, "step": 26880 }, { "epoch": 4.1015625e-05, "step": 26880, "training_step_time": 0.10392928123474121 }, { "epoch": 4.101715087890625e-05, "model_forward_time": 0.02428913116455078, "step": 26881 }, { "epoch": 4.101715087890625e-05, "step": 26881, "training_step_time": 0.10577392578125 }, { "epoch": 4.10186767578125e-05, "model_forward_time": 0.024899005889892578, "step": 26882 }, { "epoch": 4.10186767578125e-05, "step": 26882, "training_step_time": 0.1053929328918457 }, { "epoch": 4.102020263671875e-05, "model_forward_time": 0.02494668960571289, "step": 26883 }, { "epoch": 4.102020263671875e-05, "step": 26883, "training_step_time": 0.1086430549621582 }, { "epoch": 4.1021728515625e-05, "model_forward_time": 0.025395870208740234, "step": 26884 }, { "epoch": 4.1021728515625e-05, "step": 26884, "training_step_time": 0.10634088516235352 }, { "epoch": 4.102325439453125e-05, "model_forward_time": 0.025565624237060547, "step": 26885 }, { "epoch": 4.102325439453125e-05, "step": 26885, "training_step_time": 0.10760998725891113 }, { "epoch": 4.10247802734375e-05, "model_forward_time": 0.025504589080810547, "step": 26886 }, { "epoch": 4.10247802734375e-05, "step": 26886, "training_step_time": 0.10422182083129883 }, { "epoch": 4.102630615234375e-05, "model_forward_time": 0.02504587173461914, "step": 26887 }, { "epoch": 4.102630615234375e-05, "step": 26887, "training_step_time": 0.6076052188873291 }, { "epoch": 4.102783203125e-05, "model_forward_time": 0.023173809051513672, "step": 26888 }, { "epoch": 4.102783203125e-05, "step": 26888, "training_step_time": 0.09851264953613281 }, { "epoch": 4.102935791015625e-05, "model_forward_time": 0.024643898010253906, "step": 26889 }, { "epoch": 4.102935791015625e-05, "step": 26889, "training_step_time": 0.10322785377502441 }, { "epoch": 4.10308837890625e-05, "grad_norm": 0.08564291894435883, "learning_rate": 2.9094658348640945e-06, "loss": 0.005, "step": 26890 }, { "epoch": 4.10308837890625e-05, "model_forward_time": 0.025710105895996094, "step": 26890 }, { "epoch": 4.10308837890625e-05, "step": 26890, "training_step_time": 0.10444426536560059 }, { "epoch": 4.103240966796875e-05, "model_forward_time": 0.02441883087158203, "step": 26891 }, { "epoch": 4.103240966796875e-05, "step": 26891, "training_step_time": 0.11056947708129883 }, { "epoch": 4.1033935546875e-05, "model_forward_time": 0.025146007537841797, "step": 26892 }, { "epoch": 4.1033935546875e-05, "step": 26892, "training_step_time": 0.1063385009765625 }, { "epoch": 4.103546142578125e-05, "model_forward_time": 0.02554774284362793, "step": 26893 }, { "epoch": 4.103546142578125e-05, "step": 26893, "training_step_time": 0.10592961311340332 }, { "epoch": 4.10369873046875e-05, "model_forward_time": 0.026050567626953125, "step": 26894 }, { "epoch": 4.10369873046875e-05, "step": 26894, "training_step_time": 0.18458032608032227 }, { "epoch": 4.103851318359375e-05, "model_forward_time": 0.025455236434936523, "step": 26895 }, { "epoch": 4.103851318359375e-05, "step": 26895, "training_step_time": 0.11171698570251465 }, { "epoch": 4.10400390625e-05, "model_forward_time": 0.024263381958007812, "step": 26896 }, { "epoch": 4.10400390625e-05, "step": 26896, "training_step_time": 0.20047402381896973 }, { "epoch": 4.104156494140625e-05, "model_forward_time": 0.024469375610351562, "step": 26897 }, { "epoch": 4.104156494140625e-05, "step": 26897, "training_step_time": 0.14942646026611328 }, { "epoch": 4.10430908203125e-05, "model_forward_time": 0.02429485321044922, "step": 26898 }, { "epoch": 4.10430908203125e-05, "step": 26898, "training_step_time": 0.2046360969543457 }, { "epoch": 4.104461669921875e-05, "model_forward_time": 0.02417445182800293, "step": 26899 }, { "epoch": 4.104461669921875e-05, "step": 26899, "training_step_time": 0.19893884658813477 }, { "epoch": 4.1046142578125e-05, "grad_norm": 0.12696179747581482, "learning_rate": 2.890967662177285e-06, "loss": 0.0037, "step": 26900 }, { "epoch": 4.1046142578125e-05, "model_forward_time": 0.024502992630004883, "step": 26900 }, { "epoch": 4.1046142578125e-05, "step": 26900, "training_step_time": 0.15789294242858887 }, { "epoch": 4.104766845703125e-05, "model_forward_time": 0.024645090103149414, "step": 26901 }, { "epoch": 4.104766845703125e-05, "step": 26901, "training_step_time": 0.15793752670288086 }, { "epoch": 4.10491943359375e-05, "model_forward_time": 0.024790287017822266, "step": 26902 }, { "epoch": 4.10491943359375e-05, "step": 26902, "training_step_time": 0.2149806022644043 }, { "epoch": 4.105072021484375e-05, "model_forward_time": 0.024631977081298828, "step": 26903 }, { "epoch": 4.105072021484375e-05, "step": 26903, "training_step_time": 0.11391448974609375 }, { "epoch": 4.105224609375e-05, "model_forward_time": 0.024395227432250977, "step": 26904 }, { "epoch": 4.105224609375e-05, "step": 26904, "training_step_time": 0.105194091796875 }, { "epoch": 4.105377197265625e-05, "model_forward_time": 0.02494502067565918, "step": 26905 }, { "epoch": 4.105377197265625e-05, "step": 26905, "training_step_time": 0.1076509952545166 }, { "epoch": 4.10552978515625e-05, "model_forward_time": 0.02440953254699707, "step": 26906 }, { "epoch": 4.10552978515625e-05, "step": 26906, "training_step_time": 0.1060950756072998 }, { "epoch": 4.105682373046875e-05, "model_forward_time": 0.023839712142944336, "step": 26907 }, { "epoch": 4.105682373046875e-05, "step": 26907, "training_step_time": 0.10770535469055176 }, { "epoch": 4.1058349609375e-05, "model_forward_time": 0.025078773498535156, "step": 26908 }, { "epoch": 4.1058349609375e-05, "step": 26908, "training_step_time": 0.10463571548461914 }, { "epoch": 4.105987548828125e-05, "model_forward_time": 0.024875402450561523, "step": 26909 }, { "epoch": 4.105987548828125e-05, "step": 26909, "training_step_time": 0.1064152717590332 }, { "epoch": 4.10614013671875e-05, "grad_norm": 0.039222050458192825, "learning_rate": 2.8725267314068495e-06, "loss": 0.0072, "step": 26910 }, { "epoch": 4.10614013671875e-05, "model_forward_time": 0.025716781616210938, "step": 26910 }, { "epoch": 4.10614013671875e-05, "step": 26910, "training_step_time": 0.10561347007751465 }, { "epoch": 4.106292724609375e-05, "model_forward_time": 0.02525949478149414, "step": 26911 }, { "epoch": 4.106292724609375e-05, "step": 26911, "training_step_time": 0.10338473320007324 }, { "epoch": 4.1064453125e-05, "model_forward_time": 0.02485823631286621, "step": 26912 }, { "epoch": 4.1064453125e-05, "step": 26912, "training_step_time": 0.10731053352355957 }, { "epoch": 4.106597900390625e-05, "model_forward_time": 0.0251772403717041, "step": 26913 }, { "epoch": 4.106597900390625e-05, "step": 26913, "training_step_time": 0.10478639602661133 }, { "epoch": 4.10675048828125e-05, "model_forward_time": 0.02533698081970215, "step": 26914 }, { "epoch": 4.10675048828125e-05, "step": 26914, "training_step_time": 0.19304704666137695 }, { "epoch": 4.106903076171875e-05, "model_forward_time": 0.02449202537536621, "step": 26915 }, { "epoch": 4.106903076171875e-05, "step": 26915, "training_step_time": 0.14485764503479004 }, { "epoch": 4.1070556640625e-05, "model_forward_time": 0.024268388748168945, "step": 26916 }, { "epoch": 4.1070556640625e-05, "step": 26916, "training_step_time": 0.10529422760009766 }, { "epoch": 4.107208251953125e-05, "model_forward_time": 0.0252840518951416, "step": 26917 }, { "epoch": 4.107208251953125e-05, "step": 26917, "training_step_time": 0.10695004463195801 }, { "epoch": 4.10736083984375e-05, "model_forward_time": 0.025008440017700195, "step": 26918 }, { "epoch": 4.10736083984375e-05, "step": 26918, "training_step_time": 0.1177053451538086 }, { "epoch": 4.107513427734375e-05, "model_forward_time": 0.025182723999023438, "step": 26919 }, { "epoch": 4.107513427734375e-05, "step": 26919, "training_step_time": 0.10768604278564453 }, { "epoch": 4.107666015625e-05, "grad_norm": 0.31678056716918945, "learning_rate": 2.854143064960274e-06, "loss": 0.0069, "step": 26920 }, { "epoch": 4.107666015625e-05, "model_forward_time": 0.025304794311523438, "step": 26920 }, { "epoch": 4.107666015625e-05, "step": 26920, "training_step_time": 0.18750691413879395 }, { "epoch": 4.107818603515625e-05, "model_forward_time": 0.02489495277404785, "step": 26921 }, { "epoch": 4.107818603515625e-05, "step": 26921, "training_step_time": 0.10476827621459961 }, { "epoch": 4.10797119140625e-05, "model_forward_time": 0.024906635284423828, "step": 26922 }, { "epoch": 4.10797119140625e-05, "step": 26922, "training_step_time": 0.10395693778991699 }, { "epoch": 4.108123779296875e-05, "model_forward_time": 0.02497720718383789, "step": 26923 }, { "epoch": 4.108123779296875e-05, "step": 26923, "training_step_time": 0.10654187202453613 }, { "epoch": 4.1082763671875e-05, "model_forward_time": 0.025064945220947266, "step": 26924 }, { "epoch": 4.1082763671875e-05, "step": 26924, "training_step_time": 0.10451483726501465 }, { "epoch": 4.108428955078125e-05, "model_forward_time": 0.02535533905029297, "step": 26925 }, { "epoch": 4.108428955078125e-05, "step": 26925, "training_step_time": 0.10527586936950684 }, { "epoch": 4.10858154296875e-05, "model_forward_time": 0.02514171600341797, "step": 26926 }, { "epoch": 4.10858154296875e-05, "step": 26926, "training_step_time": 0.10570073127746582 }, { "epoch": 4.108734130859375e-05, "model_forward_time": 0.025611162185668945, "step": 26927 }, { "epoch": 4.108734130859375e-05, "step": 26927, "training_step_time": 0.10526347160339355 }, { "epoch": 4.10888671875e-05, "model_forward_time": 0.025429487228393555, "step": 26928 }, { "epoch": 4.10888671875e-05, "step": 26928, "training_step_time": 0.10625648498535156 }, { "epoch": 4.109039306640625e-05, "model_forward_time": 0.025321006774902344, "step": 26929 }, { "epoch": 4.109039306640625e-05, "step": 26929, "training_step_time": 0.10836553573608398 }, { "epoch": 4.10919189453125e-05, "grad_norm": 0.11299989372491837, "learning_rate": 2.8358166851754297e-06, "loss": 0.0039, "step": 26930 }, { "epoch": 4.10919189453125e-05, "model_forward_time": 0.02521347999572754, "step": 26930 }, { "epoch": 4.10919189453125e-05, "step": 26930, "training_step_time": 0.10728716850280762 }, { "epoch": 4.109344482421875e-05, "model_forward_time": 0.025056123733520508, "step": 26931 }, { "epoch": 4.109344482421875e-05, "step": 26931, "training_step_time": 0.10994529724121094 }, { "epoch": 4.1094970703125e-05, "model_forward_time": 0.02533578872680664, "step": 26932 }, { "epoch": 4.1094970703125e-05, "step": 26932, "training_step_time": 0.10656118392944336 }, { "epoch": 4.109649658203125e-05, "model_forward_time": 0.025205373764038086, "step": 26933 }, { "epoch": 4.109649658203125e-05, "step": 26933, "training_step_time": 0.10460543632507324 }, { "epoch": 4.10980224609375e-05, "model_forward_time": 0.025433063507080078, "step": 26934 }, { "epoch": 4.10980224609375e-05, "step": 26934, "training_step_time": 0.1058189868927002 }, { "epoch": 4.109954833984375e-05, "model_forward_time": 0.02487468719482422, "step": 26935 }, { "epoch": 4.109954833984375e-05, "step": 26935, "training_step_time": 0.10622310638427734 }, { "epoch": 4.110107421875e-05, "model_forward_time": 0.025209665298461914, "step": 26936 }, { "epoch": 4.110107421875e-05, "step": 26936, "training_step_time": 0.10853171348571777 }, { "epoch": 4.110260009765625e-05, "model_forward_time": 0.025269031524658203, "step": 26937 }, { "epoch": 4.110260009765625e-05, "step": 26937, "training_step_time": 0.10754728317260742 }, { "epoch": 4.11041259765625e-05, "model_forward_time": 0.024928569793701172, "step": 26938 }, { "epoch": 4.11041259765625e-05, "step": 26938, "training_step_time": 0.10909342765808105 }, { "epoch": 4.110565185546875e-05, "model_forward_time": 0.025156259536743164, "step": 26939 }, { "epoch": 4.110565185546875e-05, "step": 26939, "training_step_time": 0.197983980178833 }, { "epoch": 4.1107177734375e-05, "grad_norm": 0.32797518372535706, "learning_rate": 2.817547614320615e-06, "loss": 0.0135, "step": 26940 }, { "epoch": 4.1107177734375e-05, "model_forward_time": 0.02484917640686035, "step": 26940 }, { "epoch": 4.1107177734375e-05, "step": 26940, "training_step_time": 0.10427689552307129 }, { "epoch": 4.110870361328125e-05, "model_forward_time": 0.024303674697875977, "step": 26941 }, { "epoch": 4.110870361328125e-05, "step": 26941, "training_step_time": 0.2096545696258545 }, { "epoch": 4.11102294921875e-05, "model_forward_time": 0.024344205856323242, "step": 26942 }, { "epoch": 4.11102294921875e-05, "step": 26942, "training_step_time": 0.12908577919006348 }, { "epoch": 4.111175537109375e-05, "model_forward_time": 0.024187564849853516, "step": 26943 }, { "epoch": 4.111175537109375e-05, "step": 26943, "training_step_time": 0.20830965042114258 }, { "epoch": 4.111328125e-05, "model_forward_time": 0.027543067932128906, "step": 26944 }, { "epoch": 4.111328125e-05, "step": 26944, "training_step_time": 0.18845725059509277 }, { "epoch": 4.111480712890625e-05, "model_forward_time": 0.024318695068359375, "step": 26945 }, { "epoch": 4.111480712890625e-05, "step": 26945, "training_step_time": 0.1826949119567871 }, { "epoch": 4.11163330078125e-05, "model_forward_time": 0.024022579193115234, "step": 26946 }, { "epoch": 4.11163330078125e-05, "step": 26946, "training_step_time": 0.15043330192565918 }, { "epoch": 4.111785888671875e-05, "model_forward_time": 0.024137020111083984, "step": 26947 }, { "epoch": 4.111785888671875e-05, "step": 26947, "training_step_time": 0.1702404022216797 }, { "epoch": 4.1119384765625e-05, "model_forward_time": 0.024047374725341797, "step": 26948 }, { "epoch": 4.1119384765625e-05, "step": 26948, "training_step_time": 0.16971158981323242 }, { "epoch": 4.112091064453125e-05, "model_forward_time": 0.024654626846313477, "step": 26949 }, { "epoch": 4.112091064453125e-05, "step": 26949, "training_step_time": 0.10151839256286621 }, { "epoch": 4.11224365234375e-05, "grad_norm": 0.06291752308607101, "learning_rate": 2.7993358745944608e-06, "loss": 0.0071, "step": 26950 }, { "epoch": 4.11224365234375e-05, "model_forward_time": 0.024881362915039062, "step": 26950 }, { "epoch": 4.11224365234375e-05, "step": 26950, "training_step_time": 0.10083627700805664 }, { "epoch": 4.112396240234375e-05, "model_forward_time": 0.025052309036254883, "step": 26951 }, { "epoch": 4.112396240234375e-05, "step": 26951, "training_step_time": 0.10433268547058105 }, { "epoch": 4.112548828125e-05, "model_forward_time": 0.025424718856811523, "step": 26952 }, { "epoch": 4.112548828125e-05, "step": 26952, "training_step_time": 0.10338091850280762 }, { "epoch": 4.112701416015625e-05, "model_forward_time": 0.02649664878845215, "step": 26953 }, { "epoch": 4.112701416015625e-05, "step": 26953, "training_step_time": 0.10530352592468262 }, { "epoch": 4.11285400390625e-05, "model_forward_time": 0.02490830421447754, "step": 26954 }, { "epoch": 4.11285400390625e-05, "step": 26954, "training_step_time": 0.10238313674926758 }, { "epoch": 4.113006591796875e-05, "model_forward_time": 0.024704456329345703, "step": 26955 }, { "epoch": 4.113006591796875e-05, "step": 26955, "training_step_time": 0.10378575325012207 }, { "epoch": 4.1131591796875e-05, "model_forward_time": 0.02493453025817871, "step": 26956 }, { "epoch": 4.1131591796875e-05, "step": 26956, "training_step_time": 0.10528993606567383 }, { "epoch": 4.113311767578125e-05, "model_forward_time": 0.0252382755279541, "step": 26957 }, { "epoch": 4.113311767578125e-05, "step": 26957, "training_step_time": 0.10595273971557617 }, { "epoch": 4.11346435546875e-05, "model_forward_time": 0.02518177032470703, "step": 26958 }, { "epoch": 4.11346435546875e-05, "step": 26958, "training_step_time": 0.10848474502563477 }, { "epoch": 4.113616943359375e-05, "model_forward_time": 0.025376081466674805, "step": 26959 }, { "epoch": 4.113616943359375e-05, "step": 26959, "training_step_time": 0.10443806648254395 }, { "epoch": 4.11376953125e-05, "grad_norm": 0.04742514714598656, "learning_rate": 2.7811814881259503e-06, "loss": 0.0037, "step": 26960 }, { "epoch": 4.11376953125e-05, "model_forward_time": 0.024628639221191406, "step": 26960 }, { "epoch": 4.11376953125e-05, "step": 26960, "training_step_time": 0.13226556777954102 }, { "epoch": 4.113922119140625e-05, "model_forward_time": 0.024543046951293945, "step": 26961 }, { "epoch": 4.113922119140625e-05, "step": 26961, "training_step_time": 0.14272332191467285 }, { "epoch": 4.11407470703125e-05, "model_forward_time": 0.024466276168823242, "step": 26962 }, { "epoch": 4.11407470703125e-05, "step": 26962, "training_step_time": 0.1117103099822998 }, { "epoch": 4.114227294921875e-05, "model_forward_time": 0.02486729621887207, "step": 26963 }, { "epoch": 4.114227294921875e-05, "step": 26963, "training_step_time": 0.10391831398010254 }, { "epoch": 4.1143798828125e-05, "model_forward_time": 0.024837255477905273, "step": 26964 }, { "epoch": 4.1143798828125e-05, "step": 26964, "training_step_time": 0.1153707504272461 }, { "epoch": 4.114532470703125e-05, "model_forward_time": 0.024841785430908203, "step": 26965 }, { "epoch": 4.114532470703125e-05, "step": 26965, "training_step_time": 0.18411588668823242 }, { "epoch": 4.11468505859375e-05, "model_forward_time": 0.025398969650268555, "step": 26966 }, { "epoch": 4.11468505859375e-05, "step": 26966, "training_step_time": 0.20819902420043945 }, { "epoch": 4.114837646484375e-05, "model_forward_time": 0.02320408821105957, "step": 26967 }, { "epoch": 4.114837646484375e-05, "step": 26967, "training_step_time": 0.2066655158996582 }, { "epoch": 4.114990234375e-05, "model_forward_time": 0.02342510223388672, "step": 26968 }, { "epoch": 4.114990234375e-05, "step": 26968, "training_step_time": 0.19957256317138672 }, { "epoch": 4.115142822265625e-05, "model_forward_time": 0.023195266723632812, "step": 26969 }, { "epoch": 4.115142822265625e-05, "step": 26969, "training_step_time": 0.1895458698272705 }, { "epoch": 4.11529541015625e-05, "grad_norm": 0.047411367297172546, "learning_rate": 2.7630844769743757e-06, "loss": 0.0075, "step": 26970 }, { "epoch": 4.11529541015625e-05, "model_forward_time": 0.02340531349182129, "step": 26970 }, { "epoch": 4.11529541015625e-05, "step": 26970, "training_step_time": 0.18276381492614746 }, { "epoch": 4.115447998046875e-05, "model_forward_time": 0.024413108825683594, "step": 26971 }, { "epoch": 4.115447998046875e-05, "step": 26971, "training_step_time": 0.1675243377685547 }, { "epoch": 4.1156005859375e-05, "model_forward_time": 0.023451805114746094, "step": 26972 }, { "epoch": 4.1156005859375e-05, "step": 26972, "training_step_time": 0.14309310913085938 }, { "epoch": 4.115753173828125e-05, "model_forward_time": 0.024585723876953125, "step": 26973 }, { "epoch": 4.115753173828125e-05, "step": 26973, "training_step_time": 0.14286398887634277 }, { "epoch": 4.11590576171875e-05, "model_forward_time": 0.024394989013671875, "step": 26974 }, { "epoch": 4.11590576171875e-05, "step": 26974, "training_step_time": 0.13617897033691406 }, { "epoch": 4.116058349609375e-05, "model_forward_time": 0.024253129959106445, "step": 26975 }, { "epoch": 4.116058349609375e-05, "step": 26975, "training_step_time": 0.1284773349761963 }, { "epoch": 4.1162109375e-05, "model_forward_time": 0.02426624298095703, "step": 26976 }, { "epoch": 4.1162109375e-05, "step": 26976, "training_step_time": 0.10826921463012695 }, { "epoch": 4.116363525390625e-05, "model_forward_time": 0.02511119842529297, "step": 26977 }, { "epoch": 4.116363525390625e-05, "step": 26977, "training_step_time": 0.11243176460266113 }, { "epoch": 4.11651611328125e-05, "model_forward_time": 0.02518939971923828, "step": 26978 }, { "epoch": 4.11651611328125e-05, "step": 26978, "training_step_time": 0.10252261161804199 }, { "epoch": 4.116668701171875e-05, "model_forward_time": 0.024952411651611328, "step": 26979 }, { "epoch": 4.116668701171875e-05, "step": 26979, "training_step_time": 0.10800957679748535 }, { "epoch": 4.1168212890625e-05, "grad_norm": 0.042207036167383194, "learning_rate": 2.7450448631293036e-06, "loss": 0.0021, "step": 26980 }, { "epoch": 4.1168212890625e-05, "model_forward_time": 0.025092601776123047, "step": 26980 }, { "epoch": 4.1168212890625e-05, "step": 26980, "training_step_time": 0.13275837898254395 }, { "epoch": 4.116973876953125e-05, "model_forward_time": 0.025404930114746094, "step": 26981 }, { "epoch": 4.116973876953125e-05, "step": 26981, "training_step_time": 0.1043252944946289 }, { "epoch": 4.11712646484375e-05, "model_forward_time": 0.024759292602539062, "step": 26982 }, { "epoch": 4.11712646484375e-05, "step": 26982, "training_step_time": 0.14568853378295898 }, { "epoch": 4.117279052734375e-05, "model_forward_time": 0.024811983108520508, "step": 26983 }, { "epoch": 4.117279052734375e-05, "step": 26983, "training_step_time": 0.16151905059814453 }, { "epoch": 4.117431640625e-05, "model_forward_time": 0.02485489845275879, "step": 26984 }, { "epoch": 4.117431640625e-05, "step": 26984, "training_step_time": 0.11294341087341309 }, { "epoch": 4.117584228515625e-05, "model_forward_time": 0.027725934982299805, "step": 26985 }, { "epoch": 4.117584228515625e-05, "step": 26985, "training_step_time": 0.18981504440307617 }, { "epoch": 4.11773681640625e-05, "model_forward_time": 0.024152278900146484, "step": 26986 }, { "epoch": 4.11773681640625e-05, "step": 26986, "training_step_time": 0.13902640342712402 }, { "epoch": 4.117889404296875e-05, "model_forward_time": 0.02409815788269043, "step": 26987 }, { "epoch": 4.117889404296875e-05, "step": 26987, "training_step_time": 0.10868501663208008 }, { "epoch": 4.1180419921875e-05, "model_forward_time": 0.0255281925201416, "step": 26988 }, { "epoch": 4.1180419921875e-05, "step": 26988, "training_step_time": 0.15943145751953125 }, { "epoch": 4.118194580078125e-05, "model_forward_time": 0.024147510528564453, "step": 26989 }, { "epoch": 4.118194580078125e-05, "step": 26989, "training_step_time": 0.21471047401428223 }, { "epoch": 4.11834716796875e-05, "grad_norm": 0.07503164559602737, "learning_rate": 2.7270626685105828e-06, "loss": 0.005, "step": 26990 }, { "epoch": 4.11834716796875e-05, "model_forward_time": 0.02443552017211914, "step": 26990 }, { "epoch": 4.11834716796875e-05, "step": 26990, "training_step_time": 0.10893750190734863 }, { "epoch": 4.118499755859375e-05, "model_forward_time": 0.024914026260375977, "step": 26991 }, { "epoch": 4.118499755859375e-05, "step": 26991, "training_step_time": 0.10958528518676758 }, { "epoch": 4.11865234375e-05, "model_forward_time": 0.024821758270263672, "step": 26992 }, { "epoch": 4.11865234375e-05, "step": 26992, "training_step_time": 0.11252021789550781 }, { "epoch": 4.118804931640625e-05, "model_forward_time": 0.025542497634887695, "step": 26993 }, { "epoch": 4.118804931640625e-05, "step": 26993, "training_step_time": 0.11174678802490234 }, { "epoch": 4.11895751953125e-05, "model_forward_time": 0.024907350540161133, "step": 26994 }, { "epoch": 4.11895751953125e-05, "step": 26994, "training_step_time": 0.10571861267089844 }, { "epoch": 4.119110107421875e-05, "model_forward_time": 0.025124788284301758, "step": 26995 }, { "epoch": 4.119110107421875e-05, "step": 26995, "training_step_time": 0.1074991226196289 }, { "epoch": 4.1192626953125e-05, "model_forward_time": 0.02523207664489746, "step": 26996 }, { "epoch": 4.1192626953125e-05, "step": 26996, "training_step_time": 0.10913276672363281 }, { "epoch": 4.119415283203125e-05, "model_forward_time": 0.025465965270996094, "step": 26997 }, { "epoch": 4.119415283203125e-05, "step": 26997, "training_step_time": 0.1304309368133545 }, { "epoch": 4.11956787109375e-05, "model_forward_time": 0.0254361629486084, "step": 26998 }, { "epoch": 4.11956787109375e-05, "step": 26998, "training_step_time": 0.12624001502990723 }, { "epoch": 4.119720458984375e-05, "model_forward_time": 0.024965763092041016, "step": 26999 }, { "epoch": 4.119720458984375e-05, "step": 26999, "training_step_time": 0.12414026260375977 }, { "epoch": 4.119873046875e-05, "grad_norm": 0.09099958837032318, "learning_rate": 2.7091379149682685e-06, "loss": 0.0028, "step": 27000 }, { "epoch": 4.119873046875e-05, "model_forward_time": 0.023960351943969727, "step": 27000 }, { "epoch": 4.119873046875e-05, "step": 27000, "training_step_time": 0.09961485862731934 }, { "epoch": 4.120025634765625e-05, "model_forward_time": 0.027181148529052734, "step": 27001 }, { "epoch": 4.120025634765625e-05, "step": 27001, "training_step_time": 0.10092806816101074 }, { "epoch": 4.12017822265625e-05, "model_forward_time": 0.024754762649536133, "step": 27002 }, { "epoch": 4.12017822265625e-05, "step": 27002, "training_step_time": 0.16902947425842285 }, { "epoch": 4.120330810546875e-05, "model_forward_time": 0.025109529495239258, "step": 27003 }, { "epoch": 4.120330810546875e-05, "step": 27003, "training_step_time": 0.1345357894897461 }, { "epoch": 4.1204833984375e-05, "model_forward_time": 0.024495601654052734, "step": 27004 }, { "epoch": 4.1204833984375e-05, "step": 27004, "training_step_time": 0.10534286499023438 }, { "epoch": 4.120635986328125e-05, "model_forward_time": 0.025167226791381836, "step": 27005 }, { "epoch": 4.120635986328125e-05, "step": 27005, "training_step_time": 0.11098265647888184 }, { "epoch": 4.12078857421875e-05, "model_forward_time": 0.025261878967285156, "step": 27006 }, { "epoch": 4.12078857421875e-05, "step": 27006, "training_step_time": 0.11241674423217773 }, { "epoch": 4.120941162109375e-05, "model_forward_time": 0.02484750747680664, "step": 27007 }, { "epoch": 4.120941162109375e-05, "step": 27007, "training_step_time": 0.11015057563781738 }, { "epoch": 4.12109375e-05, "model_forward_time": 0.025192975997924805, "step": 27008 }, { "epoch": 4.12109375e-05, "step": 27008, "training_step_time": 0.19356870651245117 }, { "epoch": 4.121246337890625e-05, "model_forward_time": 0.02445363998413086, "step": 27009 }, { "epoch": 4.121246337890625e-05, "step": 27009, "training_step_time": 0.11181640625 }, { "epoch": 4.12139892578125e-05, "grad_norm": 0.06130315735936165, "learning_rate": 2.691270624282621e-06, "loss": 0.0021, "step": 27010 }, { "epoch": 4.12139892578125e-05, "model_forward_time": 0.023091554641723633, "step": 27010 }, { "epoch": 4.12139892578125e-05, "step": 27010, "training_step_time": 0.10469198226928711 }, { "epoch": 4.121551513671875e-05, "model_forward_time": 0.02430129051208496, "step": 27011 }, { "epoch": 4.121551513671875e-05, "step": 27011, "training_step_time": 0.11219048500061035 }, { "epoch": 4.1217041015625e-05, "model_forward_time": 0.025029659271240234, "step": 27012 }, { "epoch": 4.1217041015625e-05, "step": 27012, "training_step_time": 0.10821890830993652 }, { "epoch": 4.121856689453125e-05, "model_forward_time": 0.024878978729248047, "step": 27013 }, { "epoch": 4.121856689453125e-05, "step": 27013, "training_step_time": 0.10521078109741211 }, { "epoch": 4.12200927734375e-05, "model_forward_time": 0.025193452835083008, "step": 27014 }, { "epoch": 4.12200927734375e-05, "step": 27014, "training_step_time": 0.10845804214477539 }, { "epoch": 4.122161865234375e-05, "model_forward_time": 0.02523493766784668, "step": 27015 }, { "epoch": 4.122161865234375e-05, "step": 27015, "training_step_time": 0.1036832332611084 }, { "epoch": 4.122314453125e-05, "model_forward_time": 0.025578022003173828, "step": 27016 }, { "epoch": 4.122314453125e-05, "step": 27016, "training_step_time": 0.10643219947814941 }, { "epoch": 4.122467041015625e-05, "model_forward_time": 0.025878429412841797, "step": 27017 }, { "epoch": 4.122467041015625e-05, "step": 27017, "training_step_time": 0.11125588417053223 }, { "epoch": 4.12261962890625e-05, "model_forward_time": 0.025020122528076172, "step": 27018 }, { "epoch": 4.12261962890625e-05, "step": 27018, "training_step_time": 0.11412763595581055 }, { "epoch": 4.122772216796875e-05, "model_forward_time": 0.0253450870513916, "step": 27019 }, { "epoch": 4.122772216796875e-05, "step": 27019, "training_step_time": 0.11370062828063965 }, { "epoch": 4.1229248046875e-05, "grad_norm": 0.08990398049354553, "learning_rate": 2.6734608181640917e-06, "loss": 0.0044, "step": 27020 }, { "epoch": 4.1229248046875e-05, "model_forward_time": 0.024729013442993164, "step": 27020 }, { "epoch": 4.1229248046875e-05, "step": 27020, "training_step_time": 0.11831951141357422 }, { "epoch": 4.123077392578125e-05, "model_forward_time": 0.024185895919799805, "step": 27021 }, { "epoch": 4.123077392578125e-05, "step": 27021, "training_step_time": 0.11263680458068848 }, { "epoch": 4.12322998046875e-05, "model_forward_time": 0.025237560272216797, "step": 27022 }, { "epoch": 4.12322998046875e-05, "step": 27022, "training_step_time": 0.11316514015197754 }, { "epoch": 4.123382568359375e-05, "model_forward_time": 0.025025367736816406, "step": 27023 }, { "epoch": 4.123382568359375e-05, "step": 27023, "training_step_time": 0.1130366325378418 }, { "epoch": 4.12353515625e-05, "model_forward_time": 0.02508997917175293, "step": 27024 }, { "epoch": 4.12353515625e-05, "step": 27024, "training_step_time": 0.11358356475830078 }, { "epoch": 4.123687744140625e-05, "model_forward_time": 0.024950504302978516, "step": 27025 }, { "epoch": 4.123687744140625e-05, "step": 27025, "training_step_time": 0.10752201080322266 }, { "epoch": 4.12384033203125e-05, "model_forward_time": 0.02515888214111328, "step": 27026 }, { "epoch": 4.12384033203125e-05, "step": 27026, "training_step_time": 0.10950589179992676 }, { "epoch": 4.123992919921875e-05, "model_forward_time": 0.025304079055786133, "step": 27027 }, { "epoch": 4.123992919921875e-05, "step": 27027, "training_step_time": 0.1083211898803711 }, { "epoch": 4.1241455078125e-05, "model_forward_time": 0.02550959587097168, "step": 27028 }, { "epoch": 4.1241455078125e-05, "step": 27028, "training_step_time": 0.1993846893310547 }, { "epoch": 4.124298095703125e-05, "model_forward_time": 0.02439570426940918, "step": 27029 }, { "epoch": 4.124298095703125e-05, "step": 27029, "training_step_time": 0.10496091842651367 }, { "epoch": 4.12445068359375e-05, "grad_norm": 0.04527433216571808, "learning_rate": 2.6557085182532582e-06, "loss": 0.0022, "step": 27030 }, { "epoch": 4.12445068359375e-05, "model_forward_time": 0.02434563636779785, "step": 27030 }, { "epoch": 4.12445068359375e-05, "step": 27030, "training_step_time": 0.2103407382965088 }, { "epoch": 4.124603271484375e-05, "model_forward_time": 0.024880170822143555, "step": 27031 }, { "epoch": 4.124603271484375e-05, "step": 27031, "training_step_time": 0.15329790115356445 }, { "epoch": 4.124755859375e-05, "model_forward_time": 0.02429986000061035, "step": 27032 }, { "epoch": 4.124755859375e-05, "step": 27032, "training_step_time": 0.192047119140625 }, { "epoch": 4.124908447265625e-05, "model_forward_time": 0.02413153648376465, "step": 27033 }, { "epoch": 4.124908447265625e-05, "step": 27033, "training_step_time": 0.19038677215576172 }, { "epoch": 4.12506103515625e-05, "model_forward_time": 0.02465510368347168, "step": 27034 }, { "epoch": 4.12506103515625e-05, "step": 27034, "training_step_time": 0.14209556579589844 }, { "epoch": 4.125213623046875e-05, "model_forward_time": 0.025075435638427734, "step": 27035 }, { "epoch": 4.125213623046875e-05, "step": 27035, "training_step_time": 0.21680068969726562 }, { "epoch": 4.1253662109375e-05, "model_forward_time": 0.024895429611206055, "step": 27036 }, { "epoch": 4.1253662109375e-05, "step": 27036, "training_step_time": 0.11236238479614258 }, { "epoch": 4.125518798828125e-05, "model_forward_time": 0.02442455291748047, "step": 27037 }, { "epoch": 4.125518798828125e-05, "step": 27037, "training_step_time": 0.10294747352600098 }, { "epoch": 4.12567138671875e-05, "model_forward_time": 0.02545332908630371, "step": 27038 }, { "epoch": 4.12567138671875e-05, "step": 27038, "training_step_time": 0.10767889022827148 }, { "epoch": 4.125823974609375e-05, "model_forward_time": 0.025390625, "step": 27039 }, { "epoch": 4.125823974609375e-05, "step": 27039, "training_step_time": 0.10913991928100586 }, { "epoch": 4.1259765625e-05, "grad_norm": 0.03140696883201599, "learning_rate": 2.63801374612086e-06, "loss": 0.0104, "step": 27040 }, { "epoch": 4.1259765625e-05, "model_forward_time": 0.023951292037963867, "step": 27040 }, { "epoch": 4.1259765625e-05, "step": 27040, "training_step_time": 0.10563325881958008 }, { "epoch": 4.126129150390625e-05, "model_forward_time": 0.024246931076049805, "step": 27041 }, { "epoch": 4.126129150390625e-05, "step": 27041, "training_step_time": 0.10909843444824219 }, { "epoch": 4.12628173828125e-05, "model_forward_time": 0.02557539939880371, "step": 27042 }, { "epoch": 4.12628173828125e-05, "step": 27042, "training_step_time": 0.1052253246307373 }, { "epoch": 4.126434326171875e-05, "model_forward_time": 0.025291919708251953, "step": 27043 }, { "epoch": 4.126434326171875e-05, "step": 27043, "training_step_time": 0.10856151580810547 }, { "epoch": 4.1265869140625e-05, "model_forward_time": 0.02512812614440918, "step": 27044 }, { "epoch": 4.1265869140625e-05, "step": 27044, "training_step_time": 0.1062319278717041 }, { "epoch": 4.126739501953125e-05, "model_forward_time": 0.025133371353149414, "step": 27045 }, { "epoch": 4.126739501953125e-05, "step": 27045, "training_step_time": 0.10752463340759277 }, { "epoch": 4.12689208984375e-05, "model_forward_time": 0.02543187141418457, "step": 27046 }, { "epoch": 4.12689208984375e-05, "step": 27046, "training_step_time": 0.10828781127929688 }, { "epoch": 4.127044677734375e-05, "model_forward_time": 0.024854421615600586, "step": 27047 }, { "epoch": 4.127044677734375e-05, "step": 27047, "training_step_time": 0.11273741722106934 }, { "epoch": 4.127197265625e-05, "model_forward_time": 0.026129722595214844, "step": 27048 }, { "epoch": 4.127197265625e-05, "step": 27048, "training_step_time": 0.10776948928833008 }, { "epoch": 4.127349853515625e-05, "model_forward_time": 0.025411367416381836, "step": 27049 }, { "epoch": 4.127349853515625e-05, "step": 27049, "training_step_time": 0.12827563285827637 }, { "epoch": 4.12750244140625e-05, "grad_norm": 0.21848095953464508, "learning_rate": 2.620376523267698e-06, "loss": 0.0028, "step": 27050 }, { "epoch": 4.12750244140625e-05, "model_forward_time": 0.02498793601989746, "step": 27050 }, { "epoch": 4.12750244140625e-05, "step": 27050, "training_step_time": 0.11032748222351074 }, { "epoch": 4.127655029296875e-05, "model_forward_time": 0.025449275970458984, "step": 27051 }, { "epoch": 4.127655029296875e-05, "step": 27051, "training_step_time": 0.10655903816223145 }, { "epoch": 4.1278076171875e-05, "model_forward_time": 0.02609395980834961, "step": 27052 }, { "epoch": 4.1278076171875e-05, "step": 27052, "training_step_time": 0.11070036888122559 }, { "epoch": 4.127960205078125e-05, "model_forward_time": 0.025506019592285156, "step": 27053 }, { "epoch": 4.127960205078125e-05, "step": 27053, "training_step_time": 0.11590790748596191 }, { "epoch": 4.12811279296875e-05, "model_forward_time": 0.02569437026977539, "step": 27054 }, { "epoch": 4.12811279296875e-05, "step": 27054, "training_step_time": 0.2042102813720703 }, { "epoch": 4.128265380859375e-05, "model_forward_time": 0.02479243278503418, "step": 27055 }, { "epoch": 4.128265380859375e-05, "step": 27055, "training_step_time": 0.11034941673278809 }, { "epoch": 4.12841796875e-05, "model_forward_time": 0.023580312728881836, "step": 27056 }, { "epoch": 4.12841796875e-05, "step": 27056, "training_step_time": 0.10724830627441406 }, { "epoch": 4.128570556640625e-05, "model_forward_time": 0.025020360946655273, "step": 27057 }, { "epoch": 4.128570556640625e-05, "step": 27057, "training_step_time": 0.10899829864501953 }, { "epoch": 4.12872314453125e-05, "model_forward_time": 0.025716066360473633, "step": 27058 }, { "epoch": 4.12872314453125e-05, "step": 27058, "training_step_time": 0.11025333404541016 }, { "epoch": 4.128875732421875e-05, "model_forward_time": 0.025380849838256836, "step": 27059 }, { "epoch": 4.128875732421875e-05, "step": 27059, "training_step_time": 0.1088571548461914 }, { "epoch": 4.1290283203125e-05, "grad_norm": 0.04789227619767189, "learning_rate": 2.602796871124663e-06, "loss": 0.0025, "step": 27060 }, { "epoch": 4.1290283203125e-05, "model_forward_time": 0.02533721923828125, "step": 27060 }, { "epoch": 4.1290283203125e-05, "step": 27060, "training_step_time": 0.11033177375793457 }, { "epoch": 4.129180908203125e-05, "model_forward_time": 0.025603771209716797, "step": 27061 }, { "epoch": 4.129180908203125e-05, "step": 27061, "training_step_time": 0.10684037208557129 }, { "epoch": 4.12933349609375e-05, "model_forward_time": 0.02563190460205078, "step": 27062 }, { "epoch": 4.12933349609375e-05, "step": 27062, "training_step_time": 0.1090090274810791 }, { "epoch": 4.129486083984375e-05, "model_forward_time": 0.02566671371459961, "step": 27063 }, { "epoch": 4.129486083984375e-05, "step": 27063, "training_step_time": 0.1087045669555664 }, { "epoch": 4.129638671875e-05, "model_forward_time": 0.02546858787536621, "step": 27064 }, { "epoch": 4.129638671875e-05, "step": 27064, "training_step_time": 0.10641646385192871 }, { "epoch": 4.129791259765625e-05, "model_forward_time": 0.02562999725341797, "step": 27065 }, { "epoch": 4.129791259765625e-05, "step": 27065, "training_step_time": 0.10577392578125 }, { "epoch": 4.12994384765625e-05, "model_forward_time": 0.025210857391357422, "step": 27066 }, { "epoch": 4.12994384765625e-05, "step": 27066, "training_step_time": 0.10653424263000488 }, { "epoch": 4.130096435546875e-05, "model_forward_time": 0.024494647979736328, "step": 27067 }, { "epoch": 4.130096435546875e-05, "step": 27067, "training_step_time": 0.10486197471618652 }, { "epoch": 4.1302490234375e-05, "model_forward_time": 0.025325298309326172, "step": 27068 }, { "epoch": 4.1302490234375e-05, "step": 27068, "training_step_time": 0.10727667808532715 }, { "epoch": 4.130401611328125e-05, "model_forward_time": 0.02538895606994629, "step": 27069 }, { "epoch": 4.130401611328125e-05, "step": 27069, "training_step_time": 0.11631155014038086 }, { "epoch": 4.13055419921875e-05, "grad_norm": 0.05423854663968086, "learning_rate": 2.5852748110527014e-06, "loss": 0.0037, "step": 27070 }, { "epoch": 4.13055419921875e-05, "model_forward_time": 0.025330543518066406, "step": 27070 }, { "epoch": 4.13055419921875e-05, "step": 27070, "training_step_time": 0.10634231567382812 }, { "epoch": 4.130706787109375e-05, "model_forward_time": 0.02494502067565918, "step": 27071 }, { "epoch": 4.130706787109375e-05, "step": 27071, "training_step_time": 0.10900282859802246 }, { "epoch": 4.130859375e-05, "model_forward_time": 0.025794267654418945, "step": 27072 }, { "epoch": 4.130859375e-05, "step": 27072, "training_step_time": 0.10683178901672363 }, { "epoch": 4.131011962890625e-05, "model_forward_time": 0.025348901748657227, "step": 27073 }, { "epoch": 4.131011962890625e-05, "step": 27073, "training_step_time": 0.10675883293151855 }, { "epoch": 4.13116455078125e-05, "model_forward_time": 0.025124788284301758, "step": 27074 }, { "epoch": 4.13116455078125e-05, "step": 27074, "training_step_time": 0.10666322708129883 }, { "epoch": 4.131317138671875e-05, "model_forward_time": 0.02539372444152832, "step": 27075 }, { "epoch": 4.131317138671875e-05, "step": 27075, "training_step_time": 0.10860610008239746 }, { "epoch": 4.1314697265625e-05, "model_forward_time": 0.024425506591796875, "step": 27076 }, { "epoch": 4.1314697265625e-05, "step": 27076, "training_step_time": 0.14626669883728027 }, { "epoch": 4.131622314453125e-05, "model_forward_time": 0.025066137313842773, "step": 27077 }, { "epoch": 4.131622314453125e-05, "step": 27077, "training_step_time": 0.16222572326660156 }, { "epoch": 4.13177490234375e-05, "model_forward_time": 0.02506422996520996, "step": 27078 }, { "epoch": 4.13177490234375e-05, "step": 27078, "training_step_time": 0.18921208381652832 }, { "epoch": 4.131927490234375e-05, "model_forward_time": 0.024865150451660156, "step": 27079 }, { "epoch": 4.131927490234375e-05, "step": 27079, "training_step_time": 0.14633464813232422 }, { "epoch": 4.132080078125e-05, "grad_norm": 0.10550269484519958, "learning_rate": 2.567810364342765e-06, "loss": 0.0053, "step": 27080 }, { "epoch": 4.132080078125e-05, "model_forward_time": 0.024815797805786133, "step": 27080 }, { "epoch": 4.132080078125e-05, "step": 27080, "training_step_time": 0.10638809204101562 }, { "epoch": 4.132232666015625e-05, "model_forward_time": 0.024890661239624023, "step": 27081 }, { "epoch": 4.132232666015625e-05, "step": 27081, "training_step_time": 0.16589999198913574 }, { "epoch": 4.13238525390625e-05, "model_forward_time": 0.02510523796081543, "step": 27082 }, { "epoch": 4.13238525390625e-05, "step": 27082, "training_step_time": 0.21301770210266113 }, { "epoch": 4.132537841796875e-05, "model_forward_time": 0.024335861206054688, "step": 27083 }, { "epoch": 4.132537841796875e-05, "step": 27083, "training_step_time": 0.10935139656066895 }, { "epoch": 4.1326904296875e-05, "model_forward_time": 0.02464127540588379, "step": 27084 }, { "epoch": 4.1326904296875e-05, "step": 27084, "training_step_time": 0.1063542366027832 }, { "epoch": 4.132843017578125e-05, "model_forward_time": 0.028451919555664062, "step": 27085 }, { "epoch": 4.132843017578125e-05, "step": 27085, "training_step_time": 0.10713887214660645 }, { "epoch": 4.13299560546875e-05, "model_forward_time": 0.025197505950927734, "step": 27086 }, { "epoch": 4.13299560546875e-05, "step": 27086, "training_step_time": 0.10317111015319824 }, { "epoch": 4.133148193359375e-05, "model_forward_time": 0.024071455001831055, "step": 27087 }, { "epoch": 4.133148193359375e-05, "step": 27087, "training_step_time": 0.10592985153198242 }, { "epoch": 4.13330078125e-05, "model_forward_time": 0.024669647216796875, "step": 27088 }, { "epoch": 4.13330078125e-05, "step": 27088, "training_step_time": 0.10637211799621582 }, { "epoch": 4.133453369140625e-05, "model_forward_time": 0.025848865509033203, "step": 27089 }, { "epoch": 4.133453369140625e-05, "step": 27089, "training_step_time": 0.10802316665649414 }, { "epoch": 4.13360595703125e-05, "grad_norm": 0.0726233571767807, "learning_rate": 2.5504035522157854e-06, "loss": 0.0037, "step": 27090 }, { "epoch": 4.13360595703125e-05, "model_forward_time": 0.025147199630737305, "step": 27090 }, { "epoch": 4.13360595703125e-05, "step": 27090, "training_step_time": 0.10675644874572754 }, { "epoch": 4.133758544921875e-05, "model_forward_time": 0.02559065818786621, "step": 27091 }, { "epoch": 4.133758544921875e-05, "step": 27091, "training_step_time": 0.1077423095703125 }, { "epoch": 4.1339111328125e-05, "model_forward_time": 0.025440454483032227, "step": 27092 }, { "epoch": 4.1339111328125e-05, "step": 27092, "training_step_time": 0.11034727096557617 }, { "epoch": 4.134063720703125e-05, "model_forward_time": 0.025664806365966797, "step": 27093 }, { "epoch": 4.134063720703125e-05, "step": 27093, "training_step_time": 0.1103217601776123 }, { "epoch": 4.13421630859375e-05, "model_forward_time": 0.025201082229614258, "step": 27094 }, { "epoch": 4.13421630859375e-05, "step": 27094, "training_step_time": 0.20853710174560547 }, { "epoch": 4.134368896484375e-05, "model_forward_time": 0.024445533752441406, "step": 27095 }, { "epoch": 4.134368896484375e-05, "step": 27095, "training_step_time": 0.1099843978881836 }, { "epoch": 4.134521484375e-05, "model_forward_time": 0.0247652530670166, "step": 27096 }, { "epoch": 4.134521484375e-05, "step": 27096, "training_step_time": 0.10718774795532227 }, { "epoch": 4.134674072265625e-05, "model_forward_time": 0.025397062301635742, "step": 27097 }, { "epoch": 4.134674072265625e-05, "step": 27097, "training_step_time": 0.10552573204040527 }, { "epoch": 4.13482666015625e-05, "model_forward_time": 0.0255434513092041, "step": 27098 }, { "epoch": 4.13482666015625e-05, "step": 27098, "training_step_time": 0.13417363166809082 }, { "epoch": 4.134979248046875e-05, "model_forward_time": 0.02527141571044922, "step": 27099 }, { "epoch": 4.134979248046875e-05, "step": 27099, "training_step_time": 0.10861468315124512 }, { "epoch": 4.1351318359375e-05, "grad_norm": 0.08323527127504349, "learning_rate": 2.533054395822704e-06, "loss": 0.0026, "step": 27100 }, { "epoch": 4.1351318359375e-05, "model_forward_time": 0.02525043487548828, "step": 27100 }, { "epoch": 4.1351318359375e-05, "step": 27100, "training_step_time": 0.19332408905029297 }, { "epoch": 4.135284423828125e-05, "model_forward_time": 0.024570465087890625, "step": 27101 }, { "epoch": 4.135284423828125e-05, "step": 27101, "training_step_time": 0.1037447452545166 }, { "epoch": 4.13543701171875e-05, "model_forward_time": 0.024534940719604492, "step": 27102 }, { "epoch": 4.13543701171875e-05, "step": 27102, "training_step_time": 0.10181570053100586 }, { "epoch": 4.135589599609375e-05, "model_forward_time": 0.025430679321289062, "step": 27103 }, { "epoch": 4.135589599609375e-05, "step": 27103, "training_step_time": 0.10852432250976562 }, { "epoch": 4.1357421875e-05, "model_forward_time": 0.025399208068847656, "step": 27104 }, { "epoch": 4.1357421875e-05, "step": 27104, "training_step_time": 0.10819149017333984 }, { "epoch": 4.135894775390625e-05, "model_forward_time": 0.024935007095336914, "step": 27105 }, { "epoch": 4.135894775390625e-05, "step": 27105, "training_step_time": 0.10955429077148438 }, { "epoch": 4.13604736328125e-05, "model_forward_time": 0.02556753158569336, "step": 27106 }, { "epoch": 4.13604736328125e-05, "step": 27106, "training_step_time": 0.11037516593933105 }, { "epoch": 4.136199951171875e-05, "model_forward_time": 0.02503371238708496, "step": 27107 }, { "epoch": 4.136199951171875e-05, "step": 27107, "training_step_time": 0.10656452178955078 }, { "epoch": 4.1363525390625e-05, "model_forward_time": 0.025246143341064453, "step": 27108 }, { "epoch": 4.1363525390625e-05, "step": 27108, "training_step_time": 0.10650897026062012 }, { "epoch": 4.136505126953125e-05, "model_forward_time": 0.025477886199951172, "step": 27109 }, { "epoch": 4.136505126953125e-05, "step": 27109, "training_step_time": 0.1107480525970459 }, { "epoch": 4.13665771484375e-05, "grad_norm": 0.46716248989105225, "learning_rate": 2.515762916244374e-06, "loss": 0.0055, "step": 27110 }, { "epoch": 4.13665771484375e-05, "model_forward_time": 0.02565932273864746, "step": 27110 }, { "epoch": 4.13665771484375e-05, "step": 27110, "training_step_time": 0.10685276985168457 }, { "epoch": 4.136810302734375e-05, "model_forward_time": 0.025774717330932617, "step": 27111 }, { "epoch": 4.136810302734375e-05, "step": 27111, "training_step_time": 0.10646486282348633 }, { "epoch": 4.136962890625e-05, "model_forward_time": 0.02550363540649414, "step": 27112 }, { "epoch": 4.136962890625e-05, "step": 27112, "training_step_time": 0.10633063316345215 }, { "epoch": 4.137115478515625e-05, "model_forward_time": 0.02534937858581543, "step": 27113 }, { "epoch": 4.137115478515625e-05, "step": 27113, "training_step_time": 0.11271476745605469 }, { "epoch": 4.13726806640625e-05, "model_forward_time": 0.02518630027770996, "step": 27114 }, { "epoch": 4.13726806640625e-05, "step": 27114, "training_step_time": 0.10434269905090332 }, { "epoch": 4.137420654296875e-05, "model_forward_time": 0.02531719207763672, "step": 27115 }, { "epoch": 4.137420654296875e-05, "step": 27115, "training_step_time": 0.10865044593811035 }, { "epoch": 4.1375732421875e-05, "model_forward_time": 0.025368213653564453, "step": 27116 }, { "epoch": 4.1375732421875e-05, "step": 27116, "training_step_time": 0.10389161109924316 }, { "epoch": 4.137725830078125e-05, "model_forward_time": 0.024411916732788086, "step": 27117 }, { "epoch": 4.137725830078125e-05, "step": 27117, "training_step_time": 0.10581660270690918 }, { "epoch": 4.13787841796875e-05, "model_forward_time": 0.02437734603881836, "step": 27118 }, { "epoch": 4.13787841796875e-05, "step": 27118, "training_step_time": 0.10658526420593262 }, { "epoch": 4.138031005859375e-05, "model_forward_time": 0.02540874481201172, "step": 27119 }, { "epoch": 4.138031005859375e-05, "step": 27119, "training_step_time": 0.11419677734375 }, { "epoch": 4.13818359375e-05, "grad_norm": 0.1938505321741104, "learning_rate": 2.4985291344915674e-06, "loss": 0.0045, "step": 27120 }, { "epoch": 4.13818359375e-05, "model_forward_time": 0.02473926544189453, "step": 27120 }, { "epoch": 4.13818359375e-05, "step": 27120, "training_step_time": 0.12047886848449707 }, { "epoch": 4.138336181640625e-05, "model_forward_time": 0.02515578269958496, "step": 27121 }, { "epoch": 4.138336181640625e-05, "step": 27121, "training_step_time": 0.10773468017578125 }, { "epoch": 4.13848876953125e-05, "model_forward_time": 0.02457284927368164, "step": 27122 }, { "epoch": 4.13848876953125e-05, "step": 27122, "training_step_time": 0.14998173713684082 }, { "epoch": 4.138641357421875e-05, "model_forward_time": 0.025141239166259766, "step": 27123 }, { "epoch": 4.138641357421875e-05, "step": 27123, "training_step_time": 0.16455912590026855 }, { "epoch": 4.1387939453125e-05, "model_forward_time": 0.024574995040893555, "step": 27124 }, { "epoch": 4.1387939453125e-05, "step": 27124, "training_step_time": 0.1673569679260254 }, { "epoch": 4.138946533203125e-05, "model_forward_time": 0.024762868881225586, "step": 27125 }, { "epoch": 4.138946533203125e-05, "step": 27125, "training_step_time": 0.15564870834350586 }, { "epoch": 4.13909912109375e-05, "model_forward_time": 0.024801015853881836, "step": 27126 }, { "epoch": 4.13909912109375e-05, "step": 27126, "training_step_time": 0.11758065223693848 }, { "epoch": 4.139251708984375e-05, "model_forward_time": 0.024770021438598633, "step": 27127 }, { "epoch": 4.139251708984375e-05, "step": 27127, "training_step_time": 0.14072299003601074 }, { "epoch": 4.139404296875e-05, "model_forward_time": 0.024611711502075195, "step": 27128 }, { "epoch": 4.139404296875e-05, "step": 27128, "training_step_time": 0.15895557403564453 }, { "epoch": 4.139556884765625e-05, "model_forward_time": 0.024137258529663086, "step": 27129 }, { "epoch": 4.139556884765625e-05, "step": 27129, "training_step_time": 0.1314702033996582 }, { "epoch": 4.13970947265625e-05, "grad_norm": 0.09073584526777267, "learning_rate": 2.481353071504966e-06, "loss": 0.0024, "step": 27130 }, { "epoch": 4.13970947265625e-05, "model_forward_time": 0.0239260196685791, "step": 27130 }, { "epoch": 4.13970947265625e-05, "step": 27130, "training_step_time": 0.10956525802612305 }, { "epoch": 4.139862060546875e-05, "model_forward_time": 0.025166749954223633, "step": 27131 }, { "epoch": 4.139862060546875e-05, "step": 27131, "training_step_time": 0.11165142059326172 }, { "epoch": 4.1400146484375e-05, "model_forward_time": 0.025182008743286133, "step": 27132 }, { "epoch": 4.1400146484375e-05, "step": 27132, "training_step_time": 0.1120445728302002 }, { "epoch": 4.140167236328125e-05, "model_forward_time": 0.024922847747802734, "step": 27133 }, { "epoch": 4.140167236328125e-05, "step": 27133, "training_step_time": 0.11076855659484863 }, { "epoch": 4.14031982421875e-05, "model_forward_time": 0.02542710304260254, "step": 27134 }, { "epoch": 4.14031982421875e-05, "step": 27134, "training_step_time": 0.11251330375671387 }, { "epoch": 4.140472412109375e-05, "model_forward_time": 0.02524423599243164, "step": 27135 }, { "epoch": 4.140472412109375e-05, "step": 27135, "training_step_time": 0.11557984352111816 }, { "epoch": 4.140625e-05, "model_forward_time": 0.025435209274291992, "step": 27136 }, { "epoch": 4.140625e-05, "step": 27136, "training_step_time": 0.12040162086486816 }, { "epoch": 4.140777587890625e-05, "model_forward_time": 0.025095224380493164, "step": 27137 }, { "epoch": 4.140777587890625e-05, "step": 27137, "training_step_time": 0.11200666427612305 }, { "epoch": 4.14093017578125e-05, "model_forward_time": 0.02540445327758789, "step": 27138 }, { "epoch": 4.14093017578125e-05, "step": 27138, "training_step_time": 0.11178112030029297 }, { "epoch": 4.141082763671875e-05, "model_forward_time": 0.025470495223999023, "step": 27139 }, { "epoch": 4.141082763671875e-05, "step": 27139, "training_step_time": 0.11330199241638184 }, { "epoch": 4.1412353515625e-05, "grad_norm": 0.11247889697551727, "learning_rate": 2.4642347481550865e-06, "loss": 0.0084, "step": 27140 }, { "epoch": 4.1412353515625e-05, "model_forward_time": 0.025043487548828125, "step": 27140 }, { "epoch": 4.1412353515625e-05, "step": 27140, "training_step_time": 0.1107022762298584 }, { "epoch": 4.141387939453125e-05, "model_forward_time": 0.0256500244140625, "step": 27141 }, { "epoch": 4.141387939453125e-05, "step": 27141, "training_step_time": 0.1199500560760498 }, { "epoch": 4.14154052734375e-05, "model_forward_time": 0.02564835548400879, "step": 27142 }, { "epoch": 4.14154052734375e-05, "step": 27142, "training_step_time": 0.13262557983398438 }, { "epoch": 4.141693115234375e-05, "model_forward_time": 0.025178909301757812, "step": 27143 }, { "epoch": 4.141693115234375e-05, "step": 27143, "training_step_time": 0.10856127738952637 }, { "epoch": 4.141845703125e-05, "model_forward_time": 0.02529168128967285, "step": 27144 }, { "epoch": 4.141845703125e-05, "step": 27144, "training_step_time": 0.12717461585998535 }, { "epoch": 4.141998291015625e-05, "model_forward_time": 0.02571582794189453, "step": 27145 }, { "epoch": 4.141998291015625e-05, "step": 27145, "training_step_time": 0.11343050003051758 }, { "epoch": 4.14215087890625e-05, "model_forward_time": 0.025376081466674805, "step": 27146 }, { "epoch": 4.14215087890625e-05, "step": 27146, "training_step_time": 0.11295914649963379 }, { "epoch": 4.142303466796875e-05, "model_forward_time": 0.026096582412719727, "step": 27147 }, { "epoch": 4.142303466796875e-05, "step": 27147, "training_step_time": 0.1576695442199707 }, { "epoch": 4.1424560546875e-05, "model_forward_time": 0.02475738525390625, "step": 27148 }, { "epoch": 4.1424560546875e-05, "step": 27148, "training_step_time": 0.10870170593261719 }, { "epoch": 4.142608642578125e-05, "model_forward_time": 0.02508068084716797, "step": 27149 }, { "epoch": 4.142608642578125e-05, "step": 27149, "training_step_time": 0.10722684860229492 }, { "epoch": 4.14276123046875e-05, "grad_norm": 0.09447456896305084, "learning_rate": 2.4471741852423237e-06, "loss": 0.0036, "step": 27150 }, { "epoch": 4.14276123046875e-05, "model_forward_time": 0.025452613830566406, "step": 27150 }, { "epoch": 4.14276123046875e-05, "step": 27150, "training_step_time": 0.10950160026550293 }, { "epoch": 4.142913818359375e-05, "model_forward_time": 0.02527308464050293, "step": 27151 }, { "epoch": 4.142913818359375e-05, "step": 27151, "training_step_time": 0.11713886260986328 }, { "epoch": 4.14306640625e-05, "model_forward_time": 0.025470495223999023, "step": 27152 }, { "epoch": 4.14306640625e-05, "step": 27152, "training_step_time": 0.11479997634887695 }, { "epoch": 4.143218994140625e-05, "model_forward_time": 0.025516986846923828, "step": 27153 }, { "epoch": 4.143218994140625e-05, "step": 27153, "training_step_time": 0.10962080955505371 }, { "epoch": 4.14337158203125e-05, "model_forward_time": 0.025424480438232422, "step": 27154 }, { "epoch": 4.14337158203125e-05, "step": 27154, "training_step_time": 0.11248993873596191 }, { "epoch": 4.143524169921875e-05, "model_forward_time": 0.025280237197875977, "step": 27155 }, { "epoch": 4.143524169921875e-05, "step": 27155, "training_step_time": 0.10719728469848633 }, { "epoch": 4.1436767578125e-05, "model_forward_time": 0.025478363037109375, "step": 27156 }, { "epoch": 4.1436767578125e-05, "step": 27156, "training_step_time": 0.1108705997467041 }, { "epoch": 4.143829345703125e-05, "model_forward_time": 0.025449275970458984, "step": 27157 }, { "epoch": 4.143829345703125e-05, "step": 27157, "training_step_time": 0.10879993438720703 }, { "epoch": 4.14398193359375e-05, "model_forward_time": 0.025259971618652344, "step": 27158 }, { "epoch": 4.14398193359375e-05, "step": 27158, "training_step_time": 0.11244678497314453 }, { "epoch": 4.144134521484375e-05, "model_forward_time": 0.025121450424194336, "step": 27159 }, { "epoch": 4.144134521484375e-05, "step": 27159, "training_step_time": 0.10891532897949219 }, { "epoch": 4.144287109375e-05, "grad_norm": 0.040682245045900345, "learning_rate": 2.430171403496867e-06, "loss": 0.0035, "step": 27160 }, { "epoch": 4.144287109375e-05, "model_forward_time": 0.02549004554748535, "step": 27160 }, { "epoch": 4.144287109375e-05, "step": 27160, "training_step_time": 0.11022377014160156 }, { "epoch": 4.144439697265625e-05, "model_forward_time": 0.02590012550354004, "step": 27161 }, { "epoch": 4.144439697265625e-05, "step": 27161, "training_step_time": 0.10942292213439941 }, { "epoch": 4.14459228515625e-05, "model_forward_time": 0.025229215621948242, "step": 27162 }, { "epoch": 4.14459228515625e-05, "step": 27162, "training_step_time": 0.10913228988647461 }, { "epoch": 4.144744873046875e-05, "model_forward_time": 0.025436878204345703, "step": 27163 }, { "epoch": 4.144744873046875e-05, "step": 27163, "training_step_time": 0.1092681884765625 }, { "epoch": 4.1448974609375e-05, "model_forward_time": 0.025141477584838867, "step": 27164 }, { "epoch": 4.1448974609375e-05, "step": 27164, "training_step_time": 0.10697197914123535 }, { "epoch": 4.145050048828125e-05, "model_forward_time": 0.025154590606689453, "step": 27165 }, { "epoch": 4.145050048828125e-05, "step": 27165, "training_step_time": 0.11335539817810059 }, { "epoch": 4.14520263671875e-05, "model_forward_time": 0.025440454483032227, "step": 27166 }, { "epoch": 4.14520263671875e-05, "step": 27166, "training_step_time": 0.11067032814025879 }, { "epoch": 4.145355224609375e-05, "model_forward_time": 0.025561809539794922, "step": 27167 }, { "epoch": 4.145355224609375e-05, "step": 27167, "training_step_time": 0.10559821128845215 }, { "epoch": 4.1455078125e-05, "model_forward_time": 0.026412487030029297, "step": 27168 }, { "epoch": 4.1455078125e-05, "step": 27168, "training_step_time": 0.10989737510681152 }, { "epoch": 4.145660400390625e-05, "model_forward_time": 0.025352954864501953, "step": 27169 }, { "epoch": 4.145660400390625e-05, "step": 27169, "training_step_time": 0.18042898178100586 }, { "epoch": 4.14581298828125e-05, "grad_norm": 0.06102828308939934, "learning_rate": 2.413226423578696e-06, "loss": 0.0066, "step": 27170 }, { "epoch": 4.14581298828125e-05, "model_forward_time": 0.024439096450805664, "step": 27170 }, { "epoch": 4.14581298828125e-05, "step": 27170, "training_step_time": 0.13613390922546387 }, { "epoch": 4.145965576171875e-05, "model_forward_time": 0.024977684020996094, "step": 27171 }, { "epoch": 4.145965576171875e-05, "step": 27171, "training_step_time": 0.17462849617004395 }, { "epoch": 4.1461181640625e-05, "model_forward_time": 0.024787187576293945, "step": 27172 }, { "epoch": 4.1461181640625e-05, "step": 27172, "training_step_time": 0.21291637420654297 }, { "epoch": 4.146270751953125e-05, "model_forward_time": 0.024611473083496094, "step": 27173 }, { "epoch": 4.146270751953125e-05, "step": 27173, "training_step_time": 0.15426921844482422 }, { "epoch": 4.14642333984375e-05, "model_forward_time": 0.024663209915161133, "step": 27174 }, { "epoch": 4.14642333984375e-05, "step": 27174, "training_step_time": 0.14537358283996582 }, { "epoch": 4.146575927734375e-05, "model_forward_time": 0.024399518966674805, "step": 27175 }, { "epoch": 4.146575927734375e-05, "step": 27175, "training_step_time": 0.2080399990081787 }, { "epoch": 4.146728515625e-05, "model_forward_time": 0.024671554565429688, "step": 27176 }, { "epoch": 4.146728515625e-05, "step": 27176, "training_step_time": 0.13054990768432617 }, { "epoch": 4.146881103515625e-05, "model_forward_time": 0.02411341667175293, "step": 27177 }, { "epoch": 4.146881103515625e-05, "step": 27177, "training_step_time": 0.11742734909057617 }, { "epoch": 4.14703369140625e-05, "model_forward_time": 0.0253298282623291, "step": 27178 }, { "epoch": 4.14703369140625e-05, "step": 27178, "training_step_time": 0.11133432388305664 }, { "epoch": 4.147186279296875e-05, "model_forward_time": 0.02551555633544922, "step": 27179 }, { "epoch": 4.147186279296875e-05, "step": 27179, "training_step_time": 0.11390447616577148 }, { "epoch": 4.1473388671875e-05, "grad_norm": 0.3329187333583832, "learning_rate": 2.3963392660775575e-06, "loss": 0.0086, "step": 27180 }, { "epoch": 4.1473388671875e-05, "model_forward_time": 0.024847984313964844, "step": 27180 }, { "epoch": 4.1473388671875e-05, "step": 27180, "training_step_time": 0.10757017135620117 }, { "epoch": 4.147491455078125e-05, "model_forward_time": 0.02534008026123047, "step": 27181 }, { "epoch": 4.147491455078125e-05, "step": 27181, "training_step_time": 0.11074352264404297 }, { "epoch": 4.14764404296875e-05, "model_forward_time": 0.02534937858581543, "step": 27182 }, { "epoch": 4.14764404296875e-05, "step": 27182, "training_step_time": 0.10532093048095703 }, { "epoch": 4.147796630859375e-05, "model_forward_time": 0.025316715240478516, "step": 27183 }, { "epoch": 4.147796630859375e-05, "step": 27183, "training_step_time": 0.10759496688842773 }, { "epoch": 4.14794921875e-05, "model_forward_time": 0.025157690048217773, "step": 27184 }, { "epoch": 4.14794921875e-05, "step": 27184, "training_step_time": 0.10499048233032227 }, { "epoch": 4.148101806640625e-05, "model_forward_time": 0.025734663009643555, "step": 27185 }, { "epoch": 4.148101806640625e-05, "step": 27185, "training_step_time": 0.10912466049194336 }, { "epoch": 4.14825439453125e-05, "model_forward_time": 0.027640342712402344, "step": 27186 }, { "epoch": 4.14825439453125e-05, "step": 27186, "training_step_time": 0.10944414138793945 }, { "epoch": 4.148406982421875e-05, "model_forward_time": 0.025097370147705078, "step": 27187 }, { "epoch": 4.148406982421875e-05, "step": 27187, "training_step_time": 0.13417506217956543 }, { "epoch": 4.1485595703125e-05, "model_forward_time": 0.02521038055419922, "step": 27188 }, { "epoch": 4.1485595703125e-05, "step": 27188, "training_step_time": 0.11906552314758301 }, { "epoch": 4.148712158203125e-05, "model_forward_time": 0.025262832641601562, "step": 27189 }, { "epoch": 4.148712158203125e-05, "step": 27189, "training_step_time": 0.11391377449035645 }, { "epoch": 4.14886474609375e-05, "grad_norm": 0.11542549729347229, "learning_rate": 2.379509951512937e-06, "loss": 0.0057, "step": 27190 }, { "epoch": 4.14886474609375e-05, "model_forward_time": 0.02521228790283203, "step": 27190 }, { "epoch": 4.14886474609375e-05, "step": 27190, "training_step_time": 0.10749006271362305 }, { "epoch": 4.149017333984375e-05, "model_forward_time": 0.025402545928955078, "step": 27191 }, { "epoch": 4.149017333984375e-05, "step": 27191, "training_step_time": 0.11640119552612305 }, { "epoch": 4.149169921875e-05, "model_forward_time": 0.025147438049316406, "step": 27192 }, { "epoch": 4.149169921875e-05, "step": 27192, "training_step_time": 0.10838079452514648 }, { "epoch": 4.149322509765625e-05, "model_forward_time": 0.025116443634033203, "step": 27193 }, { "epoch": 4.149322509765625e-05, "step": 27193, "training_step_time": 0.13714265823364258 }, { "epoch": 4.14947509765625e-05, "model_forward_time": 0.025503158569335938, "step": 27194 }, { "epoch": 4.14947509765625e-05, "step": 27194, "training_step_time": 0.10992598533630371 }, { "epoch": 4.149627685546875e-05, "model_forward_time": 0.02547144889831543, "step": 27195 }, { "epoch": 4.149627685546875e-05, "step": 27195, "training_step_time": 0.1053018569946289 }, { "epoch": 4.1497802734375e-05, "model_forward_time": 0.0257718563079834, "step": 27196 }, { "epoch": 4.1497802734375e-05, "step": 27196, "training_step_time": 0.10913300514221191 }, { "epoch": 4.149932861328125e-05, "model_forward_time": 0.025367021560668945, "step": 27197 }, { "epoch": 4.149932861328125e-05, "step": 27197, "training_step_time": 0.10893106460571289 }, { "epoch": 4.15008544921875e-05, "model_forward_time": 0.025524139404296875, "step": 27198 }, { "epoch": 4.15008544921875e-05, "step": 27198, "training_step_time": 0.1048281192779541 }, { "epoch": 4.150238037109375e-05, "model_forward_time": 0.02491593360900879, "step": 27199 }, { "epoch": 4.150238037109375e-05, "step": 27199, "training_step_time": 0.10783505439758301 }, { "epoch": 4.150390625e-05, "grad_norm": 0.19436855614185333, "learning_rate": 2.362738500334055e-06, "loss": 0.0118, "step": 27200 }, { "epoch": 4.150390625e-05, "model_forward_time": 0.024727344512939453, "step": 27200 }, { "epoch": 4.150390625e-05, "step": 27200, "training_step_time": 0.10413098335266113 }, { "epoch": 4.150543212890625e-05, "model_forward_time": 0.025806427001953125, "step": 27201 }, { "epoch": 4.150543212890625e-05, "step": 27201, "training_step_time": 0.10806012153625488 }, { "epoch": 4.15069580078125e-05, "model_forward_time": 0.025191783905029297, "step": 27202 }, { "epoch": 4.15069580078125e-05, "step": 27202, "training_step_time": 0.10746145248413086 }, { "epoch": 4.150848388671875e-05, "model_forward_time": 0.025487661361694336, "step": 27203 }, { "epoch": 4.150848388671875e-05, "step": 27203, "training_step_time": 0.1034233570098877 }, { "epoch": 4.1510009765625e-05, "model_forward_time": 0.0242464542388916, "step": 27204 }, { "epoch": 4.1510009765625e-05, "step": 27204, "training_step_time": 0.11036014556884766 }, { "epoch": 4.151153564453125e-05, "model_forward_time": 0.02443408966064453, "step": 27205 }, { "epoch": 4.151153564453125e-05, "step": 27205, "training_step_time": 0.10556626319885254 }, { "epoch": 4.15130615234375e-05, "model_forward_time": 0.024636268615722656, "step": 27206 }, { "epoch": 4.15130615234375e-05, "step": 27206, "training_step_time": 0.13117241859436035 }, { "epoch": 4.151458740234375e-05, "model_forward_time": 0.02502274513244629, "step": 27207 }, { "epoch": 4.151458740234375e-05, "step": 27207, "training_step_time": 0.15232491493225098 }, { "epoch": 4.151611328125e-05, "model_forward_time": 0.024282455444335938, "step": 27208 }, { "epoch": 4.151611328125e-05, "step": 27208, "training_step_time": 0.12797212600708008 }, { "epoch": 4.151763916015625e-05, "model_forward_time": 0.024542808532714844, "step": 27209 }, { "epoch": 4.151763916015625e-05, "step": 27209, "training_step_time": 0.12417173385620117 }, { "epoch": 4.15191650390625e-05, "grad_norm": 0.05312748998403549, "learning_rate": 2.3460249329197824e-06, "loss": 0.0032, "step": 27210 }, { "epoch": 4.15191650390625e-05, "model_forward_time": 0.02474665641784668, "step": 27210 }, { "epoch": 4.15191650390625e-05, "step": 27210, "training_step_time": 0.1182100772857666 }, { "epoch": 4.152069091796875e-05, "model_forward_time": 0.025084495544433594, "step": 27211 }, { "epoch": 4.152069091796875e-05, "step": 27211, "training_step_time": 0.11852192878723145 }, { "epoch": 4.1522216796875e-05, "model_forward_time": 0.0252072811126709, "step": 27212 }, { "epoch": 4.1522216796875e-05, "step": 27212, "training_step_time": 0.1143496036529541 }, { "epoch": 4.152374267578125e-05, "model_forward_time": 0.025771379470825195, "step": 27213 }, { "epoch": 4.152374267578125e-05, "step": 27213, "training_step_time": 0.11190104484558105 }, { "epoch": 4.15252685546875e-05, "model_forward_time": 0.024744510650634766, "step": 27214 }, { "epoch": 4.15252685546875e-05, "step": 27214, "training_step_time": 0.1457371711730957 }, { "epoch": 4.152679443359375e-05, "model_forward_time": 0.02526402473449707, "step": 27215 }, { "epoch": 4.152679443359375e-05, "step": 27215, "training_step_time": 0.15436482429504395 }, { "epoch": 4.15283203125e-05, "model_forward_time": 0.02505207061767578, "step": 27216 }, { "epoch": 4.15283203125e-05, "step": 27216, "training_step_time": 0.21744155883789062 }, { "epoch": 4.152984619140625e-05, "model_forward_time": 0.0249631404876709, "step": 27217 }, { "epoch": 4.152984619140625e-05, "step": 27217, "training_step_time": 0.19009661674499512 }, { "epoch": 4.15313720703125e-05, "model_forward_time": 0.0244290828704834, "step": 27218 }, { "epoch": 4.15313720703125e-05, "step": 27218, "training_step_time": 0.14074063301086426 }, { "epoch": 4.153289794921875e-05, "model_forward_time": 0.024813413619995117, "step": 27219 }, { "epoch": 4.153289794921875e-05, "step": 27219, "training_step_time": 0.14382076263427734 }, { "epoch": 4.1534423828125e-05, "grad_norm": 0.08608946949243546, "learning_rate": 2.3293692695787017e-06, "loss": 0.003, "step": 27220 }, { "epoch": 4.1534423828125e-05, "model_forward_time": 0.024718284606933594, "step": 27220 }, { "epoch": 4.1534423828125e-05, "step": 27220, "training_step_time": 0.21212434768676758 }, { "epoch": 4.153594970703125e-05, "model_forward_time": 0.024696826934814453, "step": 27221 }, { "epoch": 4.153594970703125e-05, "step": 27221, "training_step_time": 0.11261200904846191 }, { "epoch": 4.15374755859375e-05, "model_forward_time": 0.024791240692138672, "step": 27222 }, { "epoch": 4.15374755859375e-05, "step": 27222, "training_step_time": 0.10515213012695312 }, { "epoch": 4.153900146484375e-05, "model_forward_time": 0.02500438690185547, "step": 27223 }, { "epoch": 4.153900146484375e-05, "step": 27223, "training_step_time": 0.10395288467407227 }, { "epoch": 4.154052734375e-05, "model_forward_time": 0.025331974029541016, "step": 27224 }, { "epoch": 4.154052734375e-05, "step": 27224, "training_step_time": 0.10754251480102539 }, { "epoch": 4.154205322265625e-05, "model_forward_time": 0.025434017181396484, "step": 27225 }, { "epoch": 4.154205322265625e-05, "step": 27225, "training_step_time": 0.10514974594116211 }, { "epoch": 4.15435791015625e-05, "model_forward_time": 0.02901768684387207, "step": 27226 }, { "epoch": 4.15435791015625e-05, "step": 27226, "training_step_time": 0.10860252380371094 }, { "epoch": 4.154510498046875e-05, "model_forward_time": 0.025676965713500977, "step": 27227 }, { "epoch": 4.154510498046875e-05, "step": 27227, "training_step_time": 0.10698628425598145 }, { "epoch": 4.1546630859375e-05, "model_forward_time": 0.02530837059020996, "step": 27228 }, { "epoch": 4.1546630859375e-05, "step": 27228, "training_step_time": 0.10467171669006348 }, { "epoch": 4.154815673828125e-05, "model_forward_time": 0.024173259735107422, "step": 27229 }, { "epoch": 4.154815673828125e-05, "step": 27229, "training_step_time": 0.10990262031555176 }, { "epoch": 4.15496826171875e-05, "grad_norm": 0.05302784964442253, "learning_rate": 2.3127715305490073e-06, "loss": 0.0043, "step": 27230 }, { "epoch": 4.15496826171875e-05, "model_forward_time": 0.02562117576599121, "step": 27230 }, { "epoch": 4.15496826171875e-05, "step": 27230, "training_step_time": 0.10649919509887695 }, { "epoch": 4.155120849609375e-05, "model_forward_time": 0.025318622589111328, "step": 27231 }, { "epoch": 4.155120849609375e-05, "step": 27231, "training_step_time": 0.1703331470489502 }, { "epoch": 4.1552734375e-05, "model_forward_time": 0.024808645248413086, "step": 27232 }, { "epoch": 4.1552734375e-05, "step": 27232, "training_step_time": 0.17896413803100586 }, { "epoch": 4.155426025390625e-05, "model_forward_time": 0.02453899383544922, "step": 27233 }, { "epoch": 4.155426025390625e-05, "step": 27233, "training_step_time": 0.18662500381469727 }, { "epoch": 4.15557861328125e-05, "model_forward_time": 0.02443218231201172, "step": 27234 }, { "epoch": 4.15557861328125e-05, "step": 27234, "training_step_time": 0.16045784950256348 }, { "epoch": 4.155731201171875e-05, "model_forward_time": 0.02357935905456543, "step": 27235 }, { "epoch": 4.155731201171875e-05, "step": 27235, "training_step_time": 0.1817488670349121 }, { "epoch": 4.1558837890625e-05, "model_forward_time": 0.02414107322692871, "step": 27236 }, { "epoch": 4.1558837890625e-05, "step": 27236, "training_step_time": 0.10860633850097656 }, { "epoch": 4.156036376953125e-05, "model_forward_time": 0.024489641189575195, "step": 27237 }, { "epoch": 4.156036376953125e-05, "step": 27237, "training_step_time": 0.11304378509521484 }, { "epoch": 4.15618896484375e-05, "model_forward_time": 0.02534627914428711, "step": 27238 }, { "epoch": 4.15618896484375e-05, "step": 27238, "training_step_time": 0.10930371284484863 }, { "epoch": 4.156341552734375e-05, "model_forward_time": 0.02501201629638672, "step": 27239 }, { "epoch": 4.156341552734375e-05, "step": 27239, "training_step_time": 0.10605883598327637 }, { "epoch": 4.156494140625e-05, "grad_norm": 0.042341746389865875, "learning_rate": 2.296231735998511e-06, "loss": 0.0028, "step": 27240 }, { "epoch": 4.156494140625e-05, "model_forward_time": 0.025308847427368164, "step": 27240 }, { "epoch": 4.156494140625e-05, "step": 27240, "training_step_time": 0.10765624046325684 }, { "epoch": 4.156646728515625e-05, "model_forward_time": 0.02490544319152832, "step": 27241 }, { "epoch": 4.156646728515625e-05, "step": 27241, "training_step_time": 0.10683536529541016 }, { "epoch": 4.15679931640625e-05, "model_forward_time": 0.025323867797851562, "step": 27242 }, { "epoch": 4.15679931640625e-05, "step": 27242, "training_step_time": 0.11064696311950684 }, { "epoch": 4.156951904296875e-05, "model_forward_time": 0.02522444725036621, "step": 27243 }, { "epoch": 4.156951904296875e-05, "step": 27243, "training_step_time": 0.10591602325439453 }, { "epoch": 4.1571044921875e-05, "model_forward_time": 0.02507162094116211, "step": 27244 }, { "epoch": 4.1571044921875e-05, "step": 27244, "training_step_time": 0.10682129859924316 }, { "epoch": 4.157257080078125e-05, "model_forward_time": 0.025344133377075195, "step": 27245 }, { "epoch": 4.157257080078125e-05, "step": 27245, "training_step_time": 0.10580635070800781 }, { "epoch": 4.15740966796875e-05, "model_forward_time": 0.02527761459350586, "step": 27246 }, { "epoch": 4.15740966796875e-05, "step": 27246, "training_step_time": 0.10463190078735352 }, { "epoch": 4.157562255859375e-05, "model_forward_time": 0.024954795837402344, "step": 27247 }, { "epoch": 4.157562255859375e-05, "step": 27247, "training_step_time": 0.10550522804260254 }, { "epoch": 4.15771484375e-05, "model_forward_time": 0.025226831436157227, "step": 27248 }, { "epoch": 4.15771484375e-05, "step": 27248, "training_step_time": 0.10754179954528809 }, { "epoch": 4.157867431640625e-05, "model_forward_time": 0.025364398956298828, "step": 27249 }, { "epoch": 4.157867431640625e-05, "step": 27249, "training_step_time": 0.10554623603820801 }, { "epoch": 4.15802001953125e-05, "grad_norm": 0.19960153102874756, "learning_rate": 2.2797499060246253e-06, "loss": 0.0147, "step": 27250 }, { "epoch": 4.15802001953125e-05, "model_forward_time": 0.024859189987182617, "step": 27250 }, { "epoch": 4.15802001953125e-05, "step": 27250, "training_step_time": 0.1105506420135498 }, { "epoch": 4.158172607421875e-05, "model_forward_time": 0.024476289749145508, "step": 27251 }, { "epoch": 4.158172607421875e-05, "step": 27251, "training_step_time": 0.1045844554901123 }, { "epoch": 4.1583251953125e-05, "model_forward_time": 0.02500009536743164, "step": 27252 }, { "epoch": 4.1583251953125e-05, "step": 27252, "training_step_time": 0.10751843452453613 }, { "epoch": 4.158477783203125e-05, "model_forward_time": 0.025313615798950195, "step": 27253 }, { "epoch": 4.158477783203125e-05, "step": 27253, "training_step_time": 0.1093604564666748 }, { "epoch": 4.15863037109375e-05, "model_forward_time": 0.02519989013671875, "step": 27254 }, { "epoch": 4.15863037109375e-05, "step": 27254, "training_step_time": 0.10562944412231445 }, { "epoch": 4.158782958984375e-05, "model_forward_time": 0.025484800338745117, "step": 27255 }, { "epoch": 4.158782958984375e-05, "step": 27255, "training_step_time": 0.10617184638977051 }, { "epoch": 4.158935546875e-05, "model_forward_time": 0.02481842041015625, "step": 27256 }, { "epoch": 4.158935546875e-05, "step": 27256, "training_step_time": 0.10778450965881348 }, { "epoch": 4.159088134765625e-05, "model_forward_time": 0.02503180503845215, "step": 27257 }, { "epoch": 4.159088134765625e-05, "step": 27257, "training_step_time": 0.11006975173950195 }, { "epoch": 4.15924072265625e-05, "model_forward_time": 0.026149988174438477, "step": 27258 }, { "epoch": 4.15924072265625e-05, "step": 27258, "training_step_time": 0.10747838020324707 }, { "epoch": 4.159393310546875e-05, "model_forward_time": 0.025532245635986328, "step": 27259 }, { "epoch": 4.159393310546875e-05, "step": 27259, "training_step_time": 0.14352846145629883 }, { "epoch": 4.1595458984375e-05, "grad_norm": 0.09094958007335663, "learning_rate": 2.263326060654336e-06, "loss": 0.0025, "step": 27260 }, { "epoch": 4.1595458984375e-05, "model_forward_time": 0.024706125259399414, "step": 27260 }, { "epoch": 4.1595458984375e-05, "step": 27260, "training_step_time": 0.15816569328308105 }, { "epoch": 4.159698486328125e-05, "model_forward_time": 0.024568796157836914, "step": 27261 }, { "epoch": 4.159698486328125e-05, "step": 27261, "training_step_time": 0.15113282203674316 }, { "epoch": 4.15985107421875e-05, "model_forward_time": 0.024058818817138672, "step": 27262 }, { "epoch": 4.15985107421875e-05, "step": 27262, "training_step_time": 0.1407921314239502 }, { "epoch": 4.160003662109375e-05, "model_forward_time": 0.02455878257751465, "step": 27263 }, { "epoch": 4.160003662109375e-05, "step": 27263, "training_step_time": 0.15768003463745117 }, { "epoch": 4.16015625e-05, "model_forward_time": 0.0253140926361084, "step": 27264 }, { "epoch": 4.16015625e-05, "step": 27264, "training_step_time": 0.15874528884887695 }, { "epoch": 4.160308837890625e-05, "model_forward_time": 0.024596452713012695, "step": 27265 }, { "epoch": 4.160308837890625e-05, "step": 27265, "training_step_time": 0.16185450553894043 }, { "epoch": 4.16046142578125e-05, "model_forward_time": 0.02475285530090332, "step": 27266 }, { "epoch": 4.16046142578125e-05, "step": 27266, "training_step_time": 0.11216068267822266 }, { "epoch": 4.160614013671875e-05, "model_forward_time": 0.02438640594482422, "step": 27267 }, { "epoch": 4.160614013671875e-05, "step": 27267, "training_step_time": 0.10574841499328613 }, { "epoch": 4.1607666015625e-05, "model_forward_time": 0.025110483169555664, "step": 27268 }, { "epoch": 4.1607666015625e-05, "step": 27268, "training_step_time": 0.10468578338623047 }, { "epoch": 4.160919189453125e-05, "model_forward_time": 0.025249958038330078, "step": 27269 }, { "epoch": 4.160919189453125e-05, "step": 27269, "training_step_time": 0.10563182830810547 }, { "epoch": 4.16107177734375e-05, "grad_norm": 0.05373050272464752, "learning_rate": 2.2469602198441573e-06, "loss": 0.0017, "step": 27270 }, { "epoch": 4.16107177734375e-05, "model_forward_time": 0.025408506393432617, "step": 27270 }, { "epoch": 4.16107177734375e-05, "step": 27270, "training_step_time": 0.10768294334411621 }, { "epoch": 4.161224365234375e-05, "model_forward_time": 0.02541208267211914, "step": 27271 }, { "epoch": 4.161224365234375e-05, "step": 27271, "training_step_time": 0.10850858688354492 }, { "epoch": 4.161376953125e-05, "model_forward_time": 0.025004863739013672, "step": 27272 }, { "epoch": 4.161376953125e-05, "step": 27272, "training_step_time": 0.1045832633972168 }, { "epoch": 4.161529541015625e-05, "model_forward_time": 0.02532649040222168, "step": 27273 }, { "epoch": 4.161529541015625e-05, "step": 27273, "training_step_time": 0.13447165489196777 }, { "epoch": 4.16168212890625e-05, "model_forward_time": 0.02423405647277832, "step": 27274 }, { "epoch": 4.16168212890625e-05, "step": 27274, "training_step_time": 0.17017817497253418 }, { "epoch": 4.161834716796875e-05, "model_forward_time": 0.023329734802246094, "step": 27275 }, { "epoch": 4.161834716796875e-05, "step": 27275, "training_step_time": 0.16400837898254395 }, { "epoch": 4.1619873046875e-05, "model_forward_time": 0.02439594268798828, "step": 27276 }, { "epoch": 4.1619873046875e-05, "step": 27276, "training_step_time": 0.1668076515197754 }, { "epoch": 4.162139892578125e-05, "model_forward_time": 0.024407148361206055, "step": 27277 }, { "epoch": 4.162139892578125e-05, "step": 27277, "training_step_time": 0.16244220733642578 }, { "epoch": 4.16229248046875e-05, "model_forward_time": 0.024383544921875, "step": 27278 }, { "epoch": 4.16229248046875e-05, "step": 27278, "training_step_time": 0.16250157356262207 }, { "epoch": 4.162445068359375e-05, "model_forward_time": 0.023990869522094727, "step": 27279 }, { "epoch": 4.162445068359375e-05, "step": 27279, "training_step_time": 0.1788322925567627 }, { "epoch": 4.16259765625e-05, "grad_norm": 0.13063377141952515, "learning_rate": 2.230652403480127e-06, "loss": 0.0037, "step": 27280 }, { "epoch": 4.16259765625e-05, "model_forward_time": 0.024212121963500977, "step": 27280 }, { "epoch": 4.16259765625e-05, "step": 27280, "training_step_time": 0.1295943260192871 }, { "epoch": 4.162750244140625e-05, "model_forward_time": 0.02401900291442871, "step": 27281 }, { "epoch": 4.162750244140625e-05, "step": 27281, "training_step_time": 0.1951618194580078 }, { "epoch": 4.16290283203125e-05, "model_forward_time": 0.024231433868408203, "step": 27282 }, { "epoch": 4.16290283203125e-05, "step": 27282, "training_step_time": 0.11963868141174316 }, { "epoch": 4.163055419921875e-05, "model_forward_time": 0.024352312088012695, "step": 27283 }, { "epoch": 4.163055419921875e-05, "step": 27283, "training_step_time": 0.116668701171875 }, { "epoch": 4.1632080078125e-05, "model_forward_time": 0.02626776695251465, "step": 27284 }, { "epoch": 4.1632080078125e-05, "step": 27284, "training_step_time": 0.11092948913574219 }, { "epoch": 4.163360595703125e-05, "model_forward_time": 0.02512645721435547, "step": 27285 }, { "epoch": 4.163360595703125e-05, "step": 27285, "training_step_time": 0.10993194580078125 }, { "epoch": 4.16351318359375e-05, "model_forward_time": 0.025506973266601562, "step": 27286 }, { "epoch": 4.16351318359375e-05, "step": 27286, "training_step_time": 0.11314678192138672 }, { "epoch": 4.163665771484375e-05, "model_forward_time": 0.02520132064819336, "step": 27287 }, { "epoch": 4.163665771484375e-05, "step": 27287, "training_step_time": 0.10960865020751953 }, { "epoch": 4.163818359375e-05, "model_forward_time": 0.024880170822143555, "step": 27288 }, { "epoch": 4.163818359375e-05, "step": 27288, "training_step_time": 0.11181998252868652 }, { "epoch": 4.163970947265625e-05, "model_forward_time": 0.02557969093322754, "step": 27289 }, { "epoch": 4.163970947265625e-05, "step": 27289, "training_step_time": 0.10891604423522949 }, { "epoch": 4.16412353515625e-05, "grad_norm": 0.12735968828201294, "learning_rate": 2.214402631377782e-06, "loss": 0.0024, "step": 27290 }, { "epoch": 4.16412353515625e-05, "model_forward_time": 0.028454065322875977, "step": 27290 }, { "epoch": 4.16412353515625e-05, "step": 27290, "training_step_time": 0.11230134963989258 }, { "epoch": 4.164276123046875e-05, "model_forward_time": 0.025388240814208984, "step": 27291 }, { "epoch": 4.164276123046875e-05, "step": 27291, "training_step_time": 0.10927891731262207 }, { "epoch": 4.1644287109375e-05, "model_forward_time": 0.026097774505615234, "step": 27292 }, { "epoch": 4.1644287109375e-05, "step": 27292, "training_step_time": 0.11127090454101562 }, { "epoch": 4.164581298828125e-05, "model_forward_time": 0.025423765182495117, "step": 27293 }, { "epoch": 4.164581298828125e-05, "step": 27293, "training_step_time": 0.11099672317504883 }, { "epoch": 4.16473388671875e-05, "model_forward_time": 0.025485515594482422, "step": 27294 }, { "epoch": 4.16473388671875e-05, "step": 27294, "training_step_time": 0.1084287166595459 }, { "epoch": 4.164886474609375e-05, "model_forward_time": 0.025081634521484375, "step": 27295 }, { "epoch": 4.164886474609375e-05, "step": 27295, "training_step_time": 0.10566353797912598 }, { "epoch": 4.1650390625e-05, "model_forward_time": 0.025013208389282227, "step": 27296 }, { "epoch": 4.1650390625e-05, "step": 27296, "training_step_time": 0.10587263107299805 }, { "epoch": 4.165191650390625e-05, "model_forward_time": 0.025463581085205078, "step": 27297 }, { "epoch": 4.165191650390625e-05, "step": 27297, "training_step_time": 0.10672402381896973 }, { "epoch": 4.16534423828125e-05, "model_forward_time": 0.02516484260559082, "step": 27298 }, { "epoch": 4.16534423828125e-05, "step": 27298, "training_step_time": 0.10550856590270996 }, { "epoch": 4.165496826171875e-05, "model_forward_time": 0.025278329849243164, "step": 27299 }, { "epoch": 4.165496826171875e-05, "step": 27299, "training_step_time": 0.10809063911437988 }, { "epoch": 4.1656494140625e-05, "grad_norm": 0.05588368698954582, "learning_rate": 2.1982109232821178e-06, "loss": 0.0022, "step": 27300 }, { "epoch": 4.1656494140625e-05, "model_forward_time": 0.025710344314575195, "step": 27300 }, { "epoch": 4.1656494140625e-05, "step": 27300, "training_step_time": 0.10536336898803711 }, { "epoch": 4.165802001953125e-05, "model_forward_time": 0.027010679244995117, "step": 27301 }, { "epoch": 4.165802001953125e-05, "step": 27301, "training_step_time": 0.10754013061523438 }, { "epoch": 4.16595458984375e-05, "model_forward_time": 0.02555370330810547, "step": 27302 }, { "epoch": 4.16595458984375e-05, "step": 27302, "training_step_time": 0.10669493675231934 }, { "epoch": 4.166107177734375e-05, "model_forward_time": 0.025969266891479492, "step": 27303 }, { "epoch": 4.166107177734375e-05, "step": 27303, "training_step_time": 0.10797119140625 }, { "epoch": 4.166259765625e-05, "model_forward_time": 0.02422642707824707, "step": 27304 }, { "epoch": 4.166259765625e-05, "step": 27304, "training_step_time": 0.14526081085205078 }, { "epoch": 4.166412353515625e-05, "model_forward_time": 0.024912118911743164, "step": 27305 }, { "epoch": 4.166412353515625e-05, "step": 27305, "training_step_time": 0.16699457168579102 }, { "epoch": 4.16656494140625e-05, "model_forward_time": 0.02438950538635254, "step": 27306 }, { "epoch": 4.16656494140625e-05, "step": 27306, "training_step_time": 0.20656299591064453 }, { "epoch": 4.166717529296875e-05, "model_forward_time": 0.024440526962280273, "step": 27307 }, { "epoch": 4.166717529296875e-05, "step": 27307, "training_step_time": 0.15294170379638672 }, { "epoch": 4.1668701171875e-05, "model_forward_time": 0.024743080139160156, "step": 27308 }, { "epoch": 4.1668701171875e-05, "step": 27308, "training_step_time": 0.18179035186767578 }, { "epoch": 4.167022705078125e-05, "model_forward_time": 0.024389982223510742, "step": 27309 }, { "epoch": 4.167022705078125e-05, "step": 27309, "training_step_time": 0.1519324779510498 }, { "epoch": 4.16717529296875e-05, "grad_norm": 0.16500595211982727, "learning_rate": 2.1820772988676076e-06, "loss": 0.0054, "step": 27310 }, { "epoch": 4.16717529296875e-05, "model_forward_time": 0.024807214736938477, "step": 27310 }, { "epoch": 4.16717529296875e-05, "step": 27310, "training_step_time": 0.1231386661529541 }, { "epoch": 4.167327880859375e-05, "model_forward_time": 0.027429580688476562, "step": 27311 }, { "epoch": 4.167327880859375e-05, "step": 27311, "training_step_time": 0.10866975784301758 }, { "epoch": 4.16748046875e-05, "model_forward_time": 0.025394201278686523, "step": 27312 }, { "epoch": 4.16748046875e-05, "step": 27312, "training_step_time": 0.10531187057495117 }, { "epoch": 4.167633056640625e-05, "model_forward_time": 0.025202035903930664, "step": 27313 }, { "epoch": 4.167633056640625e-05, "step": 27313, "training_step_time": 0.10666823387145996 }, { "epoch": 4.16778564453125e-05, "model_forward_time": 0.02541041374206543, "step": 27314 }, { "epoch": 4.16778564453125e-05, "step": 27314, "training_step_time": 0.10874700546264648 }, { "epoch": 4.167938232421875e-05, "model_forward_time": 0.025158405303955078, "step": 27315 }, { "epoch": 4.167938232421875e-05, "step": 27315, "training_step_time": 0.10769200325012207 }, { "epoch": 4.1680908203125e-05, "model_forward_time": 0.026424407958984375, "step": 27316 }, { "epoch": 4.1680908203125e-05, "step": 27316, "training_step_time": 0.10939788818359375 }, { "epoch": 4.168243408203125e-05, "model_forward_time": 0.024980545043945312, "step": 27317 }, { "epoch": 4.168243408203125e-05, "step": 27317, "training_step_time": 0.10655760765075684 }, { "epoch": 4.16839599609375e-05, "model_forward_time": 0.02508687973022461, "step": 27318 }, { "epoch": 4.16839599609375e-05, "step": 27318, "training_step_time": 0.10556173324584961 }, { "epoch": 4.168548583984375e-05, "model_forward_time": 0.025537490844726562, "step": 27319 }, { "epoch": 4.168548583984375e-05, "step": 27319, "training_step_time": 0.10593318939208984 }, { "epoch": 4.168701171875e-05, "grad_norm": 0.06887489557266235, "learning_rate": 2.1660017777381135e-06, "loss": 0.0041, "step": 27320 }, { "epoch": 4.168701171875e-05, "model_forward_time": 0.025054454803466797, "step": 27320 }, { "epoch": 4.168701171875e-05, "step": 27320, "training_step_time": 0.10872149467468262 }, { "epoch": 4.168853759765625e-05, "model_forward_time": 0.024283409118652344, "step": 27321 }, { "epoch": 4.168853759765625e-05, "step": 27321, "training_step_time": 0.10677909851074219 }, { "epoch": 4.16900634765625e-05, "model_forward_time": 0.02422189712524414, "step": 27322 }, { "epoch": 4.16900634765625e-05, "step": 27322, "training_step_time": 0.10958290100097656 }, { "epoch": 4.169158935546875e-05, "model_forward_time": 0.02456951141357422, "step": 27323 }, { "epoch": 4.169158935546875e-05, "step": 27323, "training_step_time": 0.16224956512451172 }, { "epoch": 4.1693115234375e-05, "model_forward_time": 0.024704456329345703, "step": 27324 }, { "epoch": 4.1693115234375e-05, "step": 27324, "training_step_time": 0.15429282188415527 }, { "epoch": 4.169464111328125e-05, "model_forward_time": 0.024566173553466797, "step": 27325 }, { "epoch": 4.169464111328125e-05, "step": 27325, "training_step_time": 0.11595964431762695 }, { "epoch": 4.16961669921875e-05, "model_forward_time": 0.024733304977416992, "step": 27326 }, { "epoch": 4.16961669921875e-05, "step": 27326, "training_step_time": 0.1186983585357666 }, { "epoch": 4.169769287109375e-05, "model_forward_time": 0.025291919708251953, "step": 27327 }, { "epoch": 4.169769287109375e-05, "step": 27327, "training_step_time": 0.11393404006958008 }, { "epoch": 4.169921875e-05, "model_forward_time": 0.02526998519897461, "step": 27328 }, { "epoch": 4.169921875e-05, "step": 27328, "training_step_time": 0.11274313926696777 }, { "epoch": 4.170074462890625e-05, "model_forward_time": 0.024403095245361328, "step": 27329 }, { "epoch": 4.170074462890625e-05, "step": 27329, "training_step_time": 0.18658804893493652 }, { "epoch": 4.17022705078125e-05, "grad_norm": 0.07587850093841553, "learning_rate": 2.149984379426906e-06, "loss": 0.0019, "step": 27330 }, { "epoch": 4.17022705078125e-05, "model_forward_time": 0.024632692337036133, "step": 27330 }, { "epoch": 4.17022705078125e-05, "step": 27330, "training_step_time": 0.10962820053100586 }, { "epoch": 4.170379638671875e-05, "model_forward_time": 0.024538278579711914, "step": 27331 }, { "epoch": 4.170379638671875e-05, "step": 27331, "training_step_time": 0.11240458488464355 }, { "epoch": 4.1705322265625e-05, "model_forward_time": 0.025141000747680664, "step": 27332 }, { "epoch": 4.1705322265625e-05, "step": 27332, "training_step_time": 0.10818266868591309 }, { "epoch": 4.170684814453125e-05, "model_forward_time": 0.025443077087402344, "step": 27333 }, { "epoch": 4.170684814453125e-05, "step": 27333, "training_step_time": 0.10554313659667969 }, { "epoch": 4.17083740234375e-05, "model_forward_time": 0.025351524353027344, "step": 27334 }, { "epoch": 4.17083740234375e-05, "step": 27334, "training_step_time": 0.10863184928894043 }, { "epoch": 4.170989990234375e-05, "model_forward_time": 0.02562403678894043, "step": 27335 }, { "epoch": 4.170989990234375e-05, "step": 27335, "training_step_time": 0.11043453216552734 }, { "epoch": 4.171142578125e-05, "model_forward_time": 0.025097131729125977, "step": 27336 }, { "epoch": 4.171142578125e-05, "step": 27336, "training_step_time": 0.10755085945129395 }, { "epoch": 4.171295166015625e-05, "model_forward_time": 0.025231361389160156, "step": 27337 }, { "epoch": 4.171295166015625e-05, "step": 27337, "training_step_time": 0.11263108253479004 }, { "epoch": 4.17144775390625e-05, "model_forward_time": 0.025311708450317383, "step": 27338 }, { "epoch": 4.17144775390625e-05, "step": 27338, "training_step_time": 0.10808801651000977 }, { "epoch": 4.171600341796875e-05, "model_forward_time": 0.0250396728515625, "step": 27339 }, { "epoch": 4.171600341796875e-05, "step": 27339, "training_step_time": 0.1073613166809082 }, { "epoch": 4.1717529296875e-05, "grad_norm": 0.05957550182938576, "learning_rate": 2.134025123396638e-06, "loss": 0.0037, "step": 27340 }, { "epoch": 4.1717529296875e-05, "model_forward_time": 0.025179147720336914, "step": 27340 }, { "epoch": 4.1717529296875e-05, "step": 27340, "training_step_time": 0.10913562774658203 }, { "epoch": 4.171905517578125e-05, "model_forward_time": 0.02502608299255371, "step": 27341 }, { "epoch": 4.171905517578125e-05, "step": 27341, "training_step_time": 0.10604643821716309 }, { "epoch": 4.17205810546875e-05, "model_forward_time": 0.025432348251342773, "step": 27342 }, { "epoch": 4.17205810546875e-05, "step": 27342, "training_step_time": 0.1089167594909668 }, { "epoch": 4.172210693359375e-05, "model_forward_time": 0.02519512176513672, "step": 27343 }, { "epoch": 4.172210693359375e-05, "step": 27343, "training_step_time": 0.10561847686767578 }, { "epoch": 4.17236328125e-05, "model_forward_time": 0.025223731994628906, "step": 27344 }, { "epoch": 4.17236328125e-05, "step": 27344, "training_step_time": 0.10542917251586914 }, { "epoch": 4.172515869140625e-05, "model_forward_time": 0.025231599807739258, "step": 27345 }, { "epoch": 4.172515869140625e-05, "step": 27345, "training_step_time": 0.10626411437988281 }, { "epoch": 4.17266845703125e-05, "model_forward_time": 0.025161266326904297, "step": 27346 }, { "epoch": 4.17266845703125e-05, "step": 27346, "training_step_time": 0.10964298248291016 }, { "epoch": 4.172821044921875e-05, "model_forward_time": 0.025211572647094727, "step": 27347 }, { "epoch": 4.172821044921875e-05, "step": 27347, "training_step_time": 0.1046903133392334 }, { "epoch": 4.1729736328125e-05, "model_forward_time": 0.02510547637939453, "step": 27348 }, { "epoch": 4.1729736328125e-05, "step": 27348, "training_step_time": 0.10929656028747559 }, { "epoch": 4.173126220703125e-05, "model_forward_time": 0.025201797485351562, "step": 27349 }, { "epoch": 4.173126220703125e-05, "step": 27349, "training_step_time": 0.10480785369873047 }, { "epoch": 4.17327880859375e-05, "grad_norm": 0.05198904499411583, "learning_rate": 2.118124029039309e-06, "loss": 0.0075, "step": 27350 }, { "epoch": 4.17327880859375e-05, "model_forward_time": 0.025602340698242188, "step": 27350 }, { "epoch": 4.17327880859375e-05, "step": 27350, "training_step_time": 0.15978789329528809 }, { "epoch": 4.173431396484375e-05, "model_forward_time": 0.024624347686767578, "step": 27351 }, { "epoch": 4.173431396484375e-05, "step": 27351, "training_step_time": 0.16864562034606934 }, { "epoch": 4.173583984375e-05, "model_forward_time": 0.024357080459594727, "step": 27352 }, { "epoch": 4.173583984375e-05, "step": 27352, "training_step_time": 0.18687701225280762 }, { "epoch": 4.173736572265625e-05, "model_forward_time": 0.024384737014770508, "step": 27353 }, { "epoch": 4.173736572265625e-05, "step": 27353, "training_step_time": 0.16687655448913574 }, { "epoch": 4.17388916015625e-05, "model_forward_time": 0.024014949798583984, "step": 27354 }, { "epoch": 4.17388916015625e-05, "step": 27354, "training_step_time": 0.19188261032104492 }, { "epoch": 4.174041748046875e-05, "model_forward_time": 0.024992942810058594, "step": 27355 }, { "epoch": 4.174041748046875e-05, "step": 27355, "training_step_time": 0.13675403594970703 }, { "epoch": 4.1741943359375e-05, "model_forward_time": 0.024609088897705078, "step": 27356 }, { "epoch": 4.1741943359375e-05, "step": 27356, "training_step_time": 0.21551728248596191 }, { "epoch": 4.174346923828125e-05, "model_forward_time": 0.024300813674926758, "step": 27357 }, { "epoch": 4.174346923828125e-05, "step": 27357, "training_step_time": 0.11113977432250977 }, { "epoch": 4.17449951171875e-05, "model_forward_time": 0.023906230926513672, "step": 27358 }, { "epoch": 4.17449951171875e-05, "step": 27358, "training_step_time": 0.10847210884094238 }, { "epoch": 4.174652099609375e-05, "model_forward_time": 0.025197505950927734, "step": 27359 }, { "epoch": 4.174652099609375e-05, "step": 27359, "training_step_time": 0.10991692543029785 }, { "epoch": 4.1748046875e-05, "grad_norm": 0.07137750834226608, "learning_rate": 2.102281115676258e-06, "loss": 0.0069, "step": 27360 }, { "epoch": 4.1748046875e-05, "model_forward_time": 0.024916648864746094, "step": 27360 }, { "epoch": 4.1748046875e-05, "step": 27360, "training_step_time": 0.10718750953674316 }, { "epoch": 4.174957275390625e-05, "model_forward_time": 0.025011539459228516, "step": 27361 }, { "epoch": 4.174957275390625e-05, "step": 27361, "training_step_time": 0.11072492599487305 }, { "epoch": 4.17510986328125e-05, "model_forward_time": 0.025212764739990234, "step": 27362 }, { "epoch": 4.17510986328125e-05, "step": 27362, "training_step_time": 0.10950970649719238 }, { "epoch": 4.175262451171875e-05, "model_forward_time": 0.02538776397705078, "step": 27363 }, { "epoch": 4.175262451171875e-05, "step": 27363, "training_step_time": 0.10811376571655273 }, { "epoch": 4.1754150390625e-05, "model_forward_time": 0.025346994400024414, "step": 27364 }, { "epoch": 4.1754150390625e-05, "step": 27364, "training_step_time": 0.1065821647644043 }, { "epoch": 4.175567626953125e-05, "model_forward_time": 0.02498650550842285, "step": 27365 }, { "epoch": 4.175567626953125e-05, "step": 27365, "training_step_time": 0.10657715797424316 }, { "epoch": 4.17572021484375e-05, "model_forward_time": 0.025121688842773438, "step": 27366 }, { "epoch": 4.17572021484375e-05, "step": 27366, "training_step_time": 0.10641741752624512 }, { "epoch": 4.175872802734375e-05, "model_forward_time": 0.025173425674438477, "step": 27367 }, { "epoch": 4.175872802734375e-05, "step": 27367, "training_step_time": 0.10604643821716309 }, { "epoch": 4.176025390625e-05, "model_forward_time": 0.025542497634887695, "step": 27368 }, { "epoch": 4.176025390625e-05, "step": 27368, "training_step_time": 0.10823392868041992 }, { "epoch": 4.176177978515625e-05, "model_forward_time": 0.02549266815185547, "step": 27369 }, { "epoch": 4.176177978515625e-05, "step": 27369, "training_step_time": 0.1940605640411377 }, { "epoch": 4.17633056640625e-05, "grad_norm": 0.05839123576879501, "learning_rate": 2.0864964025581135e-06, "loss": 0.0041, "step": 27370 }, { "epoch": 4.17633056640625e-05, "model_forward_time": 0.02423882484436035, "step": 27370 }, { "epoch": 4.17633056640625e-05, "step": 27370, "training_step_time": 0.1367037296295166 }, { "epoch": 4.176483154296875e-05, "model_forward_time": 0.02413344383239746, "step": 27371 }, { "epoch": 4.176483154296875e-05, "step": 27371, "training_step_time": 0.10798144340515137 }, { "epoch": 4.1766357421875e-05, "model_forward_time": 0.025147676467895508, "step": 27372 }, { "epoch": 4.1766357421875e-05, "step": 27372, "training_step_time": 0.12361860275268555 }, { "epoch": 4.176788330078125e-05, "model_forward_time": 0.02529311180114746, "step": 27373 }, { "epoch": 4.176788330078125e-05, "step": 27373, "training_step_time": 0.1168520450592041 }, { "epoch": 4.17694091796875e-05, "model_forward_time": 0.02512192726135254, "step": 27374 }, { "epoch": 4.17694091796875e-05, "step": 27374, "training_step_time": 0.10658550262451172 }, { "epoch": 4.177093505859375e-05, "model_forward_time": 0.02530956268310547, "step": 27375 }, { "epoch": 4.177093505859375e-05, "step": 27375, "training_step_time": 0.18764686584472656 }, { "epoch": 4.17724609375e-05, "model_forward_time": 0.02440500259399414, "step": 27376 }, { "epoch": 4.17724609375e-05, "step": 27376, "training_step_time": 0.10719728469848633 }, { "epoch": 4.177398681640625e-05, "model_forward_time": 0.024349451065063477, "step": 27377 }, { "epoch": 4.177398681640625e-05, "step": 27377, "training_step_time": 0.10791373252868652 }, { "epoch": 4.17755126953125e-05, "model_forward_time": 0.025031566619873047, "step": 27378 }, { "epoch": 4.17755126953125e-05, "step": 27378, "training_step_time": 0.12014293670654297 }, { "epoch": 4.177703857421875e-05, "model_forward_time": 0.02491593360900879, "step": 27379 }, { "epoch": 4.177703857421875e-05, "step": 27379, "training_step_time": 0.10888314247131348 }, { "epoch": 4.1778564453125e-05, "grad_norm": 0.06713982671499252, "learning_rate": 2.0707699088647836e-06, "loss": 0.004, "step": 27380 }, { "epoch": 4.1778564453125e-05, "model_forward_time": 0.025205135345458984, "step": 27380 }, { "epoch": 4.1778564453125e-05, "step": 27380, "training_step_time": 0.10607433319091797 }, { "epoch": 4.178009033203125e-05, "model_forward_time": 0.025559425354003906, "step": 27381 }, { "epoch": 4.178009033203125e-05, "step": 27381, "training_step_time": 0.10652899742126465 }, { "epoch": 4.17816162109375e-05, "model_forward_time": 0.02518486976623535, "step": 27382 }, { "epoch": 4.17816162109375e-05, "step": 27382, "training_step_time": 0.10806679725646973 }, { "epoch": 4.178314208984375e-05, "model_forward_time": 0.025487422943115234, "step": 27383 }, { "epoch": 4.178314208984375e-05, "step": 27383, "training_step_time": 0.10729289054870605 }, { "epoch": 4.178466796875e-05, "model_forward_time": 0.025025129318237305, "step": 27384 }, { "epoch": 4.178466796875e-05, "step": 27384, "training_step_time": 0.10611486434936523 }, { "epoch": 4.178619384765625e-05, "model_forward_time": 0.025339365005493164, "step": 27385 }, { "epoch": 4.178619384765625e-05, "step": 27385, "training_step_time": 0.10610365867614746 }, { "epoch": 4.17877197265625e-05, "model_forward_time": 0.02490401268005371, "step": 27386 }, { "epoch": 4.17877197265625e-05, "step": 27386, "training_step_time": 0.10580229759216309 }, { "epoch": 4.178924560546875e-05, "model_forward_time": 0.02511429786682129, "step": 27387 }, { "epoch": 4.178924560546875e-05, "step": 27387, "training_step_time": 0.1052405834197998 }, { "epoch": 4.1790771484375e-05, "model_forward_time": 0.02518939971923828, "step": 27388 }, { "epoch": 4.1790771484375e-05, "step": 27388, "training_step_time": 0.11149716377258301 }, { "epoch": 4.179229736328125e-05, "model_forward_time": 0.02562713623046875, "step": 27389 }, { "epoch": 4.179229736328125e-05, "step": 27389, "training_step_time": 0.1062936782836914 }, { "epoch": 4.17938232421875e-05, "grad_norm": 0.09553220868110657, "learning_rate": 2.0551016537054493e-06, "loss": 0.0036, "step": 27390 }, { "epoch": 4.17938232421875e-05, "model_forward_time": 0.025326013565063477, "step": 27390 }, { "epoch": 4.17938232421875e-05, "step": 27390, "training_step_time": 0.10575199127197266 }, { "epoch": 4.179534912109375e-05, "model_forward_time": 0.025343656539916992, "step": 27391 }, { "epoch": 4.179534912109375e-05, "step": 27391, "training_step_time": 0.10982632637023926 }, { "epoch": 4.1796875e-05, "model_forward_time": 0.02521800994873047, "step": 27392 }, { "epoch": 4.1796875e-05, "step": 27392, "training_step_time": 0.109375 }, { "epoch": 4.179840087890625e-05, "model_forward_time": 0.025253772735595703, "step": 27393 }, { "epoch": 4.179840087890625e-05, "step": 27393, "training_step_time": 0.10704207420349121 }, { "epoch": 4.17999267578125e-05, "model_forward_time": 0.025048255920410156, "step": 27394 }, { "epoch": 4.17999267578125e-05, "step": 27394, "training_step_time": 0.1058192253112793 }, { "epoch": 4.180145263671875e-05, "model_forward_time": 0.025005578994750977, "step": 27395 }, { "epoch": 4.180145263671875e-05, "step": 27395, "training_step_time": 0.10320258140563965 }, { "epoch": 4.1802978515625e-05, "model_forward_time": 0.024826526641845703, "step": 27396 }, { "epoch": 4.1802978515625e-05, "step": 27396, "training_step_time": 0.16946101188659668 }, { "epoch": 4.180450439453125e-05, "model_forward_time": 0.024760723114013672, "step": 27397 }, { "epoch": 4.180450439453125e-05, "step": 27397, "training_step_time": 0.18193459510803223 }, { "epoch": 4.18060302734375e-05, "model_forward_time": 0.02702498435974121, "step": 27398 }, { "epoch": 4.18060302734375e-05, "step": 27398, "training_step_time": 0.19617080688476562 }, { "epoch": 4.180755615234375e-05, "model_forward_time": 0.025606870651245117, "step": 27399 }, { "epoch": 4.180755615234375e-05, "step": 27399, "training_step_time": 0.17314505577087402 }, { "epoch": 4.180908203125e-05, "grad_norm": 0.05021870881319046, "learning_rate": 2.0394916561185083e-06, "loss": 0.0022, "step": 27400 }, { "epoch": 4.180908203125e-05, "model_forward_time": 0.025249719619750977, "step": 27400 }, { "epoch": 4.180908203125e-05, "step": 27400, "training_step_time": 0.17282986640930176 }, { "epoch": 4.181060791015625e-05, "model_forward_time": 0.024559736251831055, "step": 27401 }, { "epoch": 4.181060791015625e-05, "step": 27401, "training_step_time": 0.11244368553161621 }, { "epoch": 4.18121337890625e-05, "model_forward_time": 0.024523019790649414, "step": 27402 }, { "epoch": 4.18121337890625e-05, "step": 27402, "training_step_time": 0.10839724540710449 }, { "epoch": 4.181365966796875e-05, "model_forward_time": 0.025531768798828125, "step": 27403 }, { "epoch": 4.181365966796875e-05, "step": 27403, "training_step_time": 0.11739587783813477 }, { "epoch": 4.1815185546875e-05, "model_forward_time": 0.025006532669067383, "step": 27404 }, { "epoch": 4.1815185546875e-05, "step": 27404, "training_step_time": 0.11202168464660645 }, { "epoch": 4.181671142578125e-05, "model_forward_time": 0.02541518211364746, "step": 27405 }, { "epoch": 4.181671142578125e-05, "step": 27405, "training_step_time": 0.10808777809143066 }, { "epoch": 4.18182373046875e-05, "model_forward_time": 0.024933338165283203, "step": 27406 }, { "epoch": 4.18182373046875e-05, "step": 27406, "training_step_time": 0.11101841926574707 }, { "epoch": 4.181976318359375e-05, "model_forward_time": 0.025141000747680664, "step": 27407 }, { "epoch": 4.181976318359375e-05, "step": 27407, "training_step_time": 0.10969662666320801 }, { "epoch": 4.18212890625e-05, "model_forward_time": 0.025233745574951172, "step": 27408 }, { "epoch": 4.18212890625e-05, "step": 27408, "training_step_time": 0.10629940032958984 }, { "epoch": 4.182281494140625e-05, "model_forward_time": 0.02537059783935547, "step": 27409 }, { "epoch": 4.182281494140625e-05, "step": 27409, "training_step_time": 0.10664510726928711 }, { "epoch": 4.18243408203125e-05, "grad_norm": 0.03802133724093437, "learning_rate": 2.0239399350715895e-06, "loss": 0.0065, "step": 27410 }, { "epoch": 4.18243408203125e-05, "model_forward_time": 0.025231361389160156, "step": 27410 }, { "epoch": 4.18243408203125e-05, "step": 27410, "training_step_time": 0.10942363739013672 }, { "epoch": 4.182586669921875e-05, "model_forward_time": 0.023288249969482422, "step": 27411 }, { "epoch": 4.182586669921875e-05, "step": 27411, "training_step_time": 0.1096792221069336 }, { "epoch": 4.1827392578125e-05, "model_forward_time": 0.024753093719482422, "step": 27412 }, { "epoch": 4.1827392578125e-05, "step": 27412, "training_step_time": 0.11046385765075684 }, { "epoch": 4.182891845703125e-05, "model_forward_time": 0.025518417358398438, "step": 27413 }, { "epoch": 4.182891845703125e-05, "step": 27413, "training_step_time": 0.10704827308654785 }, { "epoch": 4.18304443359375e-05, "model_forward_time": 0.02505803108215332, "step": 27414 }, { "epoch": 4.18304443359375e-05, "step": 27414, "training_step_time": 0.1084287166595459 }, { "epoch": 4.183197021484375e-05, "model_forward_time": 0.025177955627441406, "step": 27415 }, { "epoch": 4.183197021484375e-05, "step": 27415, "training_step_time": 0.1071314811706543 }, { "epoch": 4.183349609375e-05, "model_forward_time": 0.02544236183166504, "step": 27416 }, { "epoch": 4.183349609375e-05, "step": 27416, "training_step_time": 0.10797572135925293 }, { "epoch": 4.183502197265625e-05, "model_forward_time": 0.025148391723632812, "step": 27417 }, { "epoch": 4.183502197265625e-05, "step": 27417, "training_step_time": 0.13752055168151855 }, { "epoch": 4.18365478515625e-05, "model_forward_time": 0.025104999542236328, "step": 27418 }, { "epoch": 4.18365478515625e-05, "step": 27418, "training_step_time": 0.10954475402832031 }, { "epoch": 4.183807373046875e-05, "model_forward_time": 0.02528071403503418, "step": 27419 }, { "epoch": 4.183807373046875e-05, "step": 27419, "training_step_time": 0.10897469520568848 }, { "epoch": 4.1839599609375e-05, "grad_norm": 0.07062240689992905, "learning_rate": 2.008446509461498e-06, "loss": 0.0035, "step": 27420 }, { "epoch": 4.1839599609375e-05, "model_forward_time": 0.02506232261657715, "step": 27420 }, { "epoch": 4.1839599609375e-05, "step": 27420, "training_step_time": 0.10736250877380371 }, { "epoch": 4.184112548828125e-05, "model_forward_time": 0.02533888816833496, "step": 27421 }, { "epoch": 4.184112548828125e-05, "step": 27421, "training_step_time": 0.15326237678527832 }, { "epoch": 4.18426513671875e-05, "model_forward_time": 0.025276660919189453, "step": 27422 }, { "epoch": 4.18426513671875e-05, "step": 27422, "training_step_time": 0.15224552154541016 }, { "epoch": 4.184417724609375e-05, "model_forward_time": 0.024387836456298828, "step": 27423 }, { "epoch": 4.184417724609375e-05, "step": 27423, "training_step_time": 0.1059575080871582 }, { "epoch": 4.1845703125e-05, "model_forward_time": 0.024991989135742188, "step": 27424 }, { "epoch": 4.1845703125e-05, "step": 27424, "training_step_time": 0.10260868072509766 }, { "epoch": 4.184722900390625e-05, "model_forward_time": 0.025104045867919922, "step": 27425 }, { "epoch": 4.184722900390625e-05, "step": 27425, "training_step_time": 0.10508489608764648 }, { "epoch": 4.18487548828125e-05, "model_forward_time": 0.025986433029174805, "step": 27426 }, { "epoch": 4.18487548828125e-05, "step": 27426, "training_step_time": 0.10611343383789062 }, { "epoch": 4.185028076171875e-05, "model_forward_time": 0.024981975555419922, "step": 27427 }, { "epoch": 4.185028076171875e-05, "step": 27427, "training_step_time": 0.11332058906555176 }, { "epoch": 4.1851806640625e-05, "model_forward_time": 0.025382518768310547, "step": 27428 }, { "epoch": 4.1851806640625e-05, "step": 27428, "training_step_time": 0.11293601989746094 }, { "epoch": 4.185333251953125e-05, "model_forward_time": 0.0257418155670166, "step": 27429 }, { "epoch": 4.185333251953125e-05, "step": 27429, "training_step_time": 0.11393260955810547 }, { "epoch": 4.18548583984375e-05, "grad_norm": 0.16383881866931915, "learning_rate": 1.9930113981142028e-06, "loss": 0.0054, "step": 27430 }, { "epoch": 4.18548583984375e-05, "model_forward_time": 0.025405168533325195, "step": 27430 }, { "epoch": 4.18548583984375e-05, "step": 27430, "training_step_time": 0.11467337608337402 }, { "epoch": 4.185638427734375e-05, "model_forward_time": 0.024990558624267578, "step": 27431 }, { "epoch": 4.185638427734375e-05, "step": 27431, "training_step_time": 0.1144256591796875 }, { "epoch": 4.185791015625e-05, "model_forward_time": 0.0258023738861084, "step": 27432 }, { "epoch": 4.185791015625e-05, "step": 27432, "training_step_time": 0.11575436592102051 }, { "epoch": 4.185943603515625e-05, "model_forward_time": 0.02470874786376953, "step": 27433 }, { "epoch": 4.185943603515625e-05, "step": 27433, "training_step_time": 0.11305570602416992 }, { "epoch": 4.18609619140625e-05, "model_forward_time": 0.025424718856811523, "step": 27434 }, { "epoch": 4.18609619140625e-05, "step": 27434, "training_step_time": 0.11548686027526855 }, { "epoch": 4.186248779296875e-05, "model_forward_time": 0.02555561065673828, "step": 27435 }, { "epoch": 4.186248779296875e-05, "step": 27435, "training_step_time": 0.11454463005065918 }, { "epoch": 4.1864013671875e-05, "model_forward_time": 0.027447223663330078, "step": 27436 }, { "epoch": 4.1864013671875e-05, "step": 27436, "training_step_time": 0.1119835376739502 }, { "epoch": 4.186553955078125e-05, "model_forward_time": 0.025762319564819336, "step": 27437 }, { "epoch": 4.186553955078125e-05, "step": 27437, "training_step_time": 0.10747933387756348 }, { "epoch": 4.18670654296875e-05, "model_forward_time": 0.02572035789489746, "step": 27438 }, { "epoch": 4.18670654296875e-05, "step": 27438, "training_step_time": 0.11485743522644043 }, { "epoch": 4.186859130859375e-05, "model_forward_time": 0.025011539459228516, "step": 27439 }, { "epoch": 4.186859130859375e-05, "step": 27439, "training_step_time": 0.1107323169708252 }, { "epoch": 4.18701171875e-05, "grad_norm": 0.3348250389099121, "learning_rate": 1.9776346197848296e-06, "loss": 0.0097, "step": 27440 }, { "epoch": 4.18701171875e-05, "model_forward_time": 0.02513599395751953, "step": 27440 }, { "epoch": 4.18701171875e-05, "step": 27440, "training_step_time": 0.10911083221435547 }, { "epoch": 4.187164306640625e-05, "model_forward_time": 0.025310754776000977, "step": 27441 }, { "epoch": 4.187164306640625e-05, "step": 27441, "training_step_time": 0.10758543014526367 }, { "epoch": 4.18731689453125e-05, "model_forward_time": 0.026711702346801758, "step": 27442 }, { "epoch": 4.18731689453125e-05, "step": 27442, "training_step_time": 0.10889315605163574 }, { "epoch": 4.187469482421875e-05, "model_forward_time": 0.024873733520507812, "step": 27443 }, { "epoch": 4.187469482421875e-05, "step": 27443, "training_step_time": 0.10815167427062988 }, { "epoch": 4.1876220703125e-05, "model_forward_time": 0.025715112686157227, "step": 27444 }, { "epoch": 4.1876220703125e-05, "step": 27444, "training_step_time": 0.13560724258422852 }, { "epoch": 4.187774658203125e-05, "model_forward_time": 0.025656938552856445, "step": 27445 }, { "epoch": 4.187774658203125e-05, "step": 27445, "training_step_time": 0.1906285285949707 }, { "epoch": 4.18792724609375e-05, "model_forward_time": 0.024431943893432617, "step": 27446 }, { "epoch": 4.18792724609375e-05, "step": 27446, "training_step_time": 0.16294407844543457 }, { "epoch": 4.188079833984375e-05, "model_forward_time": 0.02448582649230957, "step": 27447 }, { "epoch": 4.188079833984375e-05, "step": 27447, "training_step_time": 0.17828917503356934 }, { "epoch": 4.188232421875e-05, "model_forward_time": 0.025101423263549805, "step": 27448 }, { "epoch": 4.188232421875e-05, "step": 27448, "training_step_time": 0.18698430061340332 }, { "epoch": 4.188385009765625e-05, "model_forward_time": 0.02508711814880371, "step": 27449 }, { "epoch": 4.188385009765625e-05, "step": 27449, "training_step_time": 0.11213231086730957 }, { "epoch": 4.18853759765625e-05, "grad_norm": 0.057641200721263885, "learning_rate": 1.962316193157593e-06, "loss": 0.0047, "step": 27450 }, { "epoch": 4.18853759765625e-05, "model_forward_time": 0.02467203140258789, "step": 27450 }, { "epoch": 4.18853759765625e-05, "step": 27450, "training_step_time": 0.11140108108520508 }, { "epoch": 4.188690185546875e-05, "model_forward_time": 0.027614831924438477, "step": 27451 }, { "epoch": 4.188690185546875e-05, "step": 27451, "training_step_time": 0.1136178970336914 }, { "epoch": 4.1888427734375e-05, "model_forward_time": 0.025501012802124023, "step": 27452 }, { "epoch": 4.1888427734375e-05, "step": 27452, "training_step_time": 0.10609841346740723 }, { "epoch": 4.188995361328125e-05, "model_forward_time": 0.02535104751586914, "step": 27453 }, { "epoch": 4.188995361328125e-05, "step": 27453, "training_step_time": 0.10734415054321289 }, { "epoch": 4.18914794921875e-05, "model_forward_time": 0.025449037551879883, "step": 27454 }, { "epoch": 4.18914794921875e-05, "step": 27454, "training_step_time": 0.10927295684814453 }, { "epoch": 4.189300537109375e-05, "model_forward_time": 0.02522587776184082, "step": 27455 }, { "epoch": 4.189300537109375e-05, "step": 27455, "training_step_time": 0.10616087913513184 }, { "epoch": 4.189453125e-05, "model_forward_time": 0.026230335235595703, "step": 27456 }, { "epoch": 4.189453125e-05, "step": 27456, "training_step_time": 0.10807371139526367 }, { "epoch": 4.189605712890625e-05, "model_forward_time": 0.025315523147583008, "step": 27457 }, { "epoch": 4.189605712890625e-05, "step": 27457, "training_step_time": 0.10823178291320801 }, { "epoch": 4.18975830078125e-05, "model_forward_time": 0.02517104148864746, "step": 27458 }, { "epoch": 4.18975830078125e-05, "step": 27458, "training_step_time": 0.1043243408203125 }, { "epoch": 4.189910888671875e-05, "model_forward_time": 0.025259733200073242, "step": 27459 }, { "epoch": 4.189910888671875e-05, "step": 27459, "training_step_time": 0.10832905769348145 }, { "epoch": 4.1900634765625e-05, "grad_norm": 0.04290970042347908, "learning_rate": 1.9470561368458485e-06, "loss": 0.0041, "step": 27460 }, { "epoch": 4.1900634765625e-05, "model_forward_time": 0.02535104751586914, "step": 27460 }, { "epoch": 4.1900634765625e-05, "step": 27460, "training_step_time": 0.10336017608642578 }, { "epoch": 4.190216064453125e-05, "model_forward_time": 0.025298357009887695, "step": 27461 }, { "epoch": 4.190216064453125e-05, "step": 27461, "training_step_time": 0.11078977584838867 }, { "epoch": 4.19036865234375e-05, "model_forward_time": 0.025175809860229492, "step": 27462 }, { "epoch": 4.19036865234375e-05, "step": 27462, "training_step_time": 0.10450291633605957 }, { "epoch": 4.190521240234375e-05, "model_forward_time": 0.02521967887878418, "step": 27463 }, { "epoch": 4.190521240234375e-05, "step": 27463, "training_step_time": 0.16716885566711426 }, { "epoch": 4.190673828125e-05, "model_forward_time": 0.024560928344726562, "step": 27464 }, { "epoch": 4.190673828125e-05, "step": 27464, "training_step_time": 0.14967584609985352 }, { "epoch": 4.190826416015625e-05, "model_forward_time": 0.024634122848510742, "step": 27465 }, { "epoch": 4.190826416015625e-05, "step": 27465, "training_step_time": 0.10407304763793945 }, { "epoch": 4.19097900390625e-05, "model_forward_time": 0.025407075881958008, "step": 27466 }, { "epoch": 4.19097900390625e-05, "step": 27466, "training_step_time": 0.10511398315429688 }, { "epoch": 4.191131591796875e-05, "model_forward_time": 0.02519512176513672, "step": 27467 }, { "epoch": 4.191131591796875e-05, "step": 27467, "training_step_time": 0.10986018180847168 }, { "epoch": 4.1912841796875e-05, "model_forward_time": 0.025296926498413086, "step": 27468 }, { "epoch": 4.1912841796875e-05, "step": 27468, "training_step_time": 0.10811519622802734 }, { "epoch": 4.191436767578125e-05, "model_forward_time": 0.02529120445251465, "step": 27469 }, { "epoch": 4.191436767578125e-05, "step": 27469, "training_step_time": 0.20360732078552246 }, { "epoch": 4.19158935546875e-05, "grad_norm": 0.22710780799388885, "learning_rate": 1.9318544693919916e-06, "loss": 0.0119, "step": 27470 }, { "epoch": 4.19158935546875e-05, "model_forward_time": 0.024785995483398438, "step": 27470 }, { "epoch": 4.19158935546875e-05, "step": 27470, "training_step_time": 0.10249567031860352 }, { "epoch": 4.191741943359375e-05, "model_forward_time": 0.025137662887573242, "step": 27471 }, { "epoch": 4.191741943359375e-05, "step": 27471, "training_step_time": 0.10365486145019531 }, { "epoch": 4.19189453125e-05, "model_forward_time": 0.025381088256835938, "step": 27472 }, { "epoch": 4.19189453125e-05, "step": 27472, "training_step_time": 0.10603547096252441 }, { "epoch": 4.192047119140625e-05, "model_forward_time": 0.025968313217163086, "step": 27473 }, { "epoch": 4.192047119140625e-05, "step": 27473, "training_step_time": 0.11260771751403809 }, { "epoch": 4.19219970703125e-05, "model_forward_time": 0.02595353126525879, "step": 27474 }, { "epoch": 4.19219970703125e-05, "step": 27474, "training_step_time": 0.12001347541809082 }, { "epoch": 4.192352294921875e-05, "model_forward_time": 0.025643348693847656, "step": 27475 }, { "epoch": 4.192352294921875e-05, "step": 27475, "training_step_time": 0.11197257041931152 }, { "epoch": 4.1925048828125e-05, "model_forward_time": 0.025481224060058594, "step": 27476 }, { "epoch": 4.1925048828125e-05, "step": 27476, "training_step_time": 0.11362981796264648 }, { "epoch": 4.192657470703125e-05, "model_forward_time": 0.025565147399902344, "step": 27477 }, { "epoch": 4.192657470703125e-05, "step": 27477, "training_step_time": 0.11202287673950195 }, { "epoch": 4.19281005859375e-05, "model_forward_time": 0.025464773178100586, "step": 27478 }, { "epoch": 4.19281005859375e-05, "step": 27478, "training_step_time": 0.11535215377807617 }, { "epoch": 4.192962646484375e-05, "model_forward_time": 0.02550029754638672, "step": 27479 }, { "epoch": 4.192962646484375e-05, "step": 27479, "training_step_time": 0.1105949878692627 }, { "epoch": 4.193115234375e-05, "grad_norm": 0.31672078371047974, "learning_rate": 1.91671120926748e-06, "loss": 0.0068, "step": 27480 }, { "epoch": 4.193115234375e-05, "model_forward_time": 0.025203227996826172, "step": 27480 }, { "epoch": 4.193115234375e-05, "step": 27480, "training_step_time": 0.11097550392150879 }, { "epoch": 4.193267822265625e-05, "model_forward_time": 0.025226116180419922, "step": 27481 }, { "epoch": 4.193267822265625e-05, "step": 27481, "training_step_time": 0.12115907669067383 }, { "epoch": 4.19342041015625e-05, "model_forward_time": 0.025313377380371094, "step": 27482 }, { "epoch": 4.19342041015625e-05, "step": 27482, "training_step_time": 0.11470460891723633 }, { "epoch": 4.193572998046875e-05, "model_forward_time": 0.02600836753845215, "step": 27483 }, { "epoch": 4.193572998046875e-05, "step": 27483, "training_step_time": 0.10695433616638184 }, { "epoch": 4.1937255859375e-05, "model_forward_time": 0.025343656539916992, "step": 27484 }, { "epoch": 4.1937255859375e-05, "step": 27484, "training_step_time": 0.10799527168273926 }, { "epoch": 4.193878173828125e-05, "model_forward_time": 0.025595426559448242, "step": 27485 }, { "epoch": 4.193878173828125e-05, "step": 27485, "training_step_time": 0.10973119735717773 }, { "epoch": 4.19403076171875e-05, "model_forward_time": 0.025278091430664062, "step": 27486 }, { "epoch": 4.19403076171875e-05, "step": 27486, "training_step_time": 0.11096644401550293 }, { "epoch": 4.194183349609375e-05, "model_forward_time": 0.026182174682617188, "step": 27487 }, { "epoch": 4.194183349609375e-05, "step": 27487, "training_step_time": 0.10775089263916016 }, { "epoch": 4.1943359375e-05, "model_forward_time": 0.025049448013305664, "step": 27488 }, { "epoch": 4.1943359375e-05, "step": 27488, "training_step_time": 0.10559749603271484 }, { "epoch": 4.194488525390625e-05, "model_forward_time": 0.025813817977905273, "step": 27489 }, { "epoch": 4.194488525390625e-05, "step": 27489, "training_step_time": 0.10961151123046875 }, { "epoch": 4.19464111328125e-05, "grad_norm": 0.1764764040708542, "learning_rate": 1.9016263748728114e-06, "loss": 0.0052, "step": 27490 }, { "epoch": 4.19464111328125e-05, "model_forward_time": 0.025191545486450195, "step": 27490 }, { "epoch": 4.19464111328125e-05, "step": 27490, "training_step_time": 0.15883755683898926 }, { "epoch": 4.194793701171875e-05, "model_forward_time": 0.02490973472595215, "step": 27491 }, { "epoch": 4.194793701171875e-05, "step": 27491, "training_step_time": 0.1653451919555664 }, { "epoch": 4.1949462890625e-05, "model_forward_time": 0.024480342864990234, "step": 27492 }, { "epoch": 4.1949462890625e-05, "step": 27492, "training_step_time": 0.224379301071167 }, { "epoch": 4.195098876953125e-05, "model_forward_time": 0.024535655975341797, "step": 27493 }, { "epoch": 4.195098876953125e-05, "step": 27493, "training_step_time": 0.21618080139160156 }, { "epoch": 4.19525146484375e-05, "model_forward_time": 0.024358034133911133, "step": 27494 }, { "epoch": 4.19525146484375e-05, "step": 27494, "training_step_time": 0.14006328582763672 }, { "epoch": 4.195404052734375e-05, "model_forward_time": 0.024471282958984375, "step": 27495 }, { "epoch": 4.195404052734375e-05, "step": 27495, "training_step_time": 0.1847212314605713 }, { "epoch": 4.195556640625e-05, "model_forward_time": 0.02492213249206543, "step": 27496 }, { "epoch": 4.195556640625e-05, "step": 27496, "training_step_time": 0.10326552391052246 }, { "epoch": 4.195709228515625e-05, "model_forward_time": 0.024483680725097656, "step": 27497 }, { "epoch": 4.195709228515625e-05, "step": 27497, "training_step_time": 0.10661172866821289 }, { "epoch": 4.19586181640625e-05, "model_forward_time": 0.025578737258911133, "step": 27498 }, { "epoch": 4.19586181640625e-05, "step": 27498, "training_step_time": 0.1071021556854248 }, { "epoch": 4.196014404296875e-05, "model_forward_time": 0.025483369827270508, "step": 27499 }, { "epoch": 4.196014404296875e-05, "step": 27499, "training_step_time": 0.10799717903137207 }, { "epoch": 4.1961669921875e-05, "grad_norm": 0.31481289863586426, "learning_rate": 1.8865999845374793e-06, "loss": 0.0124, "step": 27500 }, { "epoch": 4.1961669921875e-05, "model_forward_time": 0.025333404541015625, "step": 27500 }, { "epoch": 4.1961669921875e-05, "step": 27500, "training_step_time": 0.10700631141662598 }, { "epoch": 4.196319580078125e-05, "model_forward_time": 0.025399208068847656, "step": 27501 }, { "epoch": 4.196319580078125e-05, "step": 27501, "training_step_time": 0.10657262802124023 }, { "epoch": 4.19647216796875e-05, "model_forward_time": 0.025539159774780273, "step": 27502 }, { "epoch": 4.19647216796875e-05, "step": 27502, "training_step_time": 0.10805869102478027 }, { "epoch": 4.196624755859375e-05, "model_forward_time": 0.0250852108001709, "step": 27503 }, { "epoch": 4.196624755859375e-05, "step": 27503, "training_step_time": 0.11168670654296875 }, { "epoch": 4.19677734375e-05, "model_forward_time": 0.02507758140563965, "step": 27504 }, { "epoch": 4.19677734375e-05, "step": 27504, "training_step_time": 0.11144471168518066 }, { "epoch": 4.196929931640625e-05, "model_forward_time": 0.025075674057006836, "step": 27505 }, { "epoch": 4.196929931640625e-05, "step": 27505, "training_step_time": 0.11420679092407227 }, { "epoch": 4.19708251953125e-05, "model_forward_time": 0.025195598602294922, "step": 27506 }, { "epoch": 4.19708251953125e-05, "step": 27506, "training_step_time": 0.15414690971374512 }, { "epoch": 4.197235107421875e-05, "model_forward_time": 0.02484416961669922, "step": 27507 }, { "epoch": 4.197235107421875e-05, "step": 27507, "training_step_time": 0.15661048889160156 }, { "epoch": 4.1973876953125e-05, "model_forward_time": 0.024560213088989258, "step": 27508 }, { "epoch": 4.1973876953125e-05, "step": 27508, "training_step_time": 0.1725625991821289 }, { "epoch": 4.197540283203125e-05, "model_forward_time": 0.024504899978637695, "step": 27509 }, { "epoch": 4.197540283203125e-05, "step": 27509, "training_step_time": 0.14648652076721191 }, { "epoch": 4.19769287109375e-05, "grad_norm": 0.310109406709671, "learning_rate": 1.8716320565199618e-06, "loss": 0.008, "step": 27510 }, { "epoch": 4.19769287109375e-05, "model_forward_time": 0.024860382080078125, "step": 27510 }, { "epoch": 4.19769287109375e-05, "step": 27510, "training_step_time": 0.20289945602416992 }, { "epoch": 4.197845458984375e-05, "model_forward_time": 0.024822235107421875, "step": 27511 }, { "epoch": 4.197845458984375e-05, "step": 27511, "training_step_time": 0.12235832214355469 }, { "epoch": 4.197998046875e-05, "model_forward_time": 0.02441883087158203, "step": 27512 }, { "epoch": 4.197998046875e-05, "step": 27512, "training_step_time": 0.18161416053771973 }, { "epoch": 4.198150634765625e-05, "model_forward_time": 0.02444601058959961, "step": 27513 }, { "epoch": 4.198150634765625e-05, "step": 27513, "training_step_time": 0.11646628379821777 }, { "epoch": 4.19830322265625e-05, "model_forward_time": 0.025057315826416016, "step": 27514 }, { "epoch": 4.19830322265625e-05, "step": 27514, "training_step_time": 0.1098940372467041 }, { "epoch": 4.198455810546875e-05, "model_forward_time": 0.025715351104736328, "step": 27515 }, { "epoch": 4.198455810546875e-05, "step": 27515, "training_step_time": 0.11123085021972656 }, { "epoch": 4.1986083984375e-05, "model_forward_time": 0.025283336639404297, "step": 27516 }, { "epoch": 4.1986083984375e-05, "step": 27516, "training_step_time": 0.11224913597106934 }, { "epoch": 4.198760986328125e-05, "model_forward_time": 0.02502131462097168, "step": 27517 }, { "epoch": 4.198760986328125e-05, "step": 27517, "training_step_time": 0.10814046859741211 }, { "epoch": 4.19891357421875e-05, "model_forward_time": 0.025496482849121094, "step": 27518 }, { "epoch": 4.19891357421875e-05, "step": 27518, "training_step_time": 0.10894203186035156 }, { "epoch": 4.199066162109375e-05, "model_forward_time": 0.025363922119140625, "step": 27519 }, { "epoch": 4.199066162109375e-05, "step": 27519, "training_step_time": 0.10723090171813965 }, { "epoch": 4.19921875e-05, "grad_norm": 0.09316647052764893, "learning_rate": 1.856722609007705e-06, "loss": 0.0094, "step": 27520 }, { "epoch": 4.19921875e-05, "model_forward_time": 0.02691817283630371, "step": 27520 }, { "epoch": 4.19921875e-05, "step": 27520, "training_step_time": 0.1132967472076416 }, { "epoch": 4.199371337890625e-05, "model_forward_time": 0.026094675064086914, "step": 27521 }, { "epoch": 4.199371337890625e-05, "step": 27521, "training_step_time": 0.10701322555541992 }, { "epoch": 4.19952392578125e-05, "model_forward_time": 0.025216102600097656, "step": 27522 }, { "epoch": 4.19952392578125e-05, "step": 27522, "training_step_time": 0.10623741149902344 }, { "epoch": 4.199676513671875e-05, "model_forward_time": 0.02497076988220215, "step": 27523 }, { "epoch": 4.199676513671875e-05, "step": 27523, "training_step_time": 0.10799932479858398 }, { "epoch": 4.1998291015625e-05, "model_forward_time": 0.025278091430664062, "step": 27524 }, { "epoch": 4.1998291015625e-05, "step": 27524, "training_step_time": 0.10667729377746582 }, { "epoch": 4.199981689453125e-05, "model_forward_time": 0.02546858787536621, "step": 27525 }, { "epoch": 4.199981689453125e-05, "step": 27525, "training_step_time": 0.10734081268310547 }, { "epoch": 4.20013427734375e-05, "model_forward_time": 0.025707483291625977, "step": 27526 }, { "epoch": 4.20013427734375e-05, "step": 27526, "training_step_time": 0.10908102989196777 }, { "epoch": 4.200286865234375e-05, "model_forward_time": 0.025506973266601562, "step": 27527 }, { "epoch": 4.200286865234375e-05, "step": 27527, "training_step_time": 0.10759925842285156 }, { "epoch": 4.200439453125e-05, "model_forward_time": 0.025616168975830078, "step": 27528 }, { "epoch": 4.200439453125e-05, "step": 27528, "training_step_time": 0.10637927055358887 }, { "epoch": 4.200592041015625e-05, "model_forward_time": 0.02531290054321289, "step": 27529 }, { "epoch": 4.200592041015625e-05, "step": 27529, "training_step_time": 0.10735034942626953 }, { "epoch": 4.20074462890625e-05, "grad_norm": 0.0915100947022438, "learning_rate": 1.841871660117095e-06, "loss": 0.0072, "step": 27530 }, { "epoch": 4.20074462890625e-05, "model_forward_time": 0.025414705276489258, "step": 27530 }, { "epoch": 4.20074462890625e-05, "step": 27530, "training_step_time": 0.11011242866516113 }, { "epoch": 4.200897216796875e-05, "model_forward_time": 0.025435447692871094, "step": 27531 }, { "epoch": 4.200897216796875e-05, "step": 27531, "training_step_time": 0.10951375961303711 }, { "epoch": 4.2010498046875e-05, "model_forward_time": 0.025510787963867188, "step": 27532 }, { "epoch": 4.2010498046875e-05, "step": 27532, "training_step_time": 0.10940361022949219 }, { "epoch": 4.201202392578125e-05, "model_forward_time": 0.025747060775756836, "step": 27533 }, { "epoch": 4.201202392578125e-05, "step": 27533, "training_step_time": 0.10470867156982422 }, { "epoch": 4.20135498046875e-05, "model_forward_time": 0.024981260299682617, "step": 27534 }, { "epoch": 4.20135498046875e-05, "step": 27534, "training_step_time": 0.1567833423614502 }, { "epoch": 4.201507568359375e-05, "model_forward_time": 0.02532672882080078, "step": 27535 }, { "epoch": 4.201507568359375e-05, "step": 27535, "training_step_time": 0.17128252983093262 }, { "epoch": 4.20166015625e-05, "model_forward_time": 0.025480985641479492, "step": 27536 }, { "epoch": 4.20166015625e-05, "step": 27536, "training_step_time": 0.19112920761108398 }, { "epoch": 4.201812744140625e-05, "model_forward_time": 0.024856090545654297, "step": 27537 }, { "epoch": 4.201812744140625e-05, "step": 27537, "training_step_time": 0.17227768898010254 }, { "epoch": 4.20196533203125e-05, "model_forward_time": 0.024065256118774414, "step": 27538 }, { "epoch": 4.20196533203125e-05, "step": 27538, "training_step_time": 0.20054841041564941 }, { "epoch": 4.202117919921875e-05, "model_forward_time": 0.02421402931213379, "step": 27539 }, { "epoch": 4.202117919921875e-05, "step": 27539, "training_step_time": 0.13978862762451172 }, { "epoch": 4.2022705078125e-05, "grad_norm": 0.07453785836696625, "learning_rate": 1.8270792278934302e-06, "loss": 0.0041, "step": 27540 }, { "epoch": 4.2022705078125e-05, "model_forward_time": 0.02553701400756836, "step": 27540 }, { "epoch": 4.2022705078125e-05, "step": 27540, "training_step_time": 0.20325040817260742 }, { "epoch": 4.202423095703125e-05, "model_forward_time": 0.024491548538208008, "step": 27541 }, { "epoch": 4.202423095703125e-05, "step": 27541, "training_step_time": 0.1246337890625 }, { "epoch": 4.20257568359375e-05, "model_forward_time": 0.024100542068481445, "step": 27542 }, { "epoch": 4.20257568359375e-05, "step": 27542, "training_step_time": 0.10514497756958008 }, { "epoch": 4.202728271484375e-05, "model_forward_time": 0.025437116622924805, "step": 27543 }, { "epoch": 4.202728271484375e-05, "step": 27543, "training_step_time": 0.10455775260925293 }, { "epoch": 4.202880859375e-05, "model_forward_time": 0.02534770965576172, "step": 27544 }, { "epoch": 4.202880859375e-05, "step": 27544, "training_step_time": 0.1079859733581543 }, { "epoch": 4.203033447265625e-05, "model_forward_time": 0.025345563888549805, "step": 27545 }, { "epoch": 4.203033447265625e-05, "step": 27545, "training_step_time": 0.11057829856872559 }, { "epoch": 4.20318603515625e-05, "model_forward_time": 0.02565622329711914, "step": 27546 }, { "epoch": 4.20318603515625e-05, "step": 27546, "training_step_time": 0.10788464546203613 }, { "epoch": 4.203338623046875e-05, "model_forward_time": 0.025149822235107422, "step": 27547 }, { "epoch": 4.203338623046875e-05, "step": 27547, "training_step_time": 0.10787343978881836 }, { "epoch": 4.2034912109375e-05, "model_forward_time": 0.025337696075439453, "step": 27548 }, { "epoch": 4.2034912109375e-05, "step": 27548, "training_step_time": 0.1053626537322998 }, { "epoch": 4.203643798828125e-05, "model_forward_time": 0.025126218795776367, "step": 27549 }, { "epoch": 4.203643798828125e-05, "step": 27549, "training_step_time": 0.1347651481628418 }, { "epoch": 4.20379638671875e-05, "grad_norm": 0.10368197411298752, "learning_rate": 1.812345330310916e-06, "loss": 0.0084, "step": 27550 }, { "epoch": 4.20379638671875e-05, "model_forward_time": 0.025801658630371094, "step": 27550 }, { "epoch": 4.20379638671875e-05, "step": 27550, "training_step_time": 0.16543054580688477 }, { "epoch": 4.203948974609375e-05, "model_forward_time": 0.023992300033569336, "step": 27551 }, { "epoch": 4.203948974609375e-05, "step": 27551, "training_step_time": 0.1585555076599121 }, { "epoch": 4.2041015625e-05, "model_forward_time": 0.024158239364624023, "step": 27552 }, { "epoch": 4.2041015625e-05, "step": 27552, "training_step_time": 0.1614081859588623 }, { "epoch": 4.204254150390625e-05, "model_forward_time": 0.024393796920776367, "step": 27553 }, { "epoch": 4.204254150390625e-05, "step": 27553, "training_step_time": 0.1886730194091797 }, { "epoch": 4.20440673828125e-05, "model_forward_time": 0.025038719177246094, "step": 27554 }, { "epoch": 4.20440673828125e-05, "step": 27554, "training_step_time": 0.13743019104003906 }, { "epoch": 4.204559326171875e-05, "model_forward_time": 0.024355411529541016, "step": 27555 }, { "epoch": 4.204559326171875e-05, "step": 27555, "training_step_time": 0.18871212005615234 }, { "epoch": 4.2047119140625e-05, "model_forward_time": 0.024212360382080078, "step": 27556 }, { "epoch": 4.2047119140625e-05, "step": 27556, "training_step_time": 0.11986827850341797 }, { "epoch": 4.204864501953125e-05, "model_forward_time": 0.024112224578857422, "step": 27557 }, { "epoch": 4.204864501953125e-05, "step": 27557, "training_step_time": 0.11790919303894043 }, { "epoch": 4.20501708984375e-05, "model_forward_time": 0.0249936580657959, "step": 27558 }, { "epoch": 4.20501708984375e-05, "step": 27558, "training_step_time": 0.11476397514343262 }, { "epoch": 4.205169677734375e-05, "model_forward_time": 0.025356531143188477, "step": 27559 }, { "epoch": 4.205169677734375e-05, "step": 27559, "training_step_time": 0.11282587051391602 }, { "epoch": 4.205322265625e-05, "grad_norm": 0.07172807306051254, "learning_rate": 1.7976699852726153e-06, "loss": 0.0026, "step": 27560 }, { "epoch": 4.205322265625e-05, "model_forward_time": 0.0253908634185791, "step": 27560 }, { "epoch": 4.205322265625e-05, "step": 27560, "training_step_time": 0.11072373390197754 }, { "epoch": 4.205474853515625e-05, "model_forward_time": 0.025568246841430664, "step": 27561 }, { "epoch": 4.205474853515625e-05, "step": 27561, "training_step_time": 0.11153697967529297 }, { "epoch": 4.20562744140625e-05, "model_forward_time": 0.024016618728637695, "step": 27562 }, { "epoch": 4.20562744140625e-05, "step": 27562, "training_step_time": 0.10944962501525879 }, { "epoch": 4.205780029296875e-05, "model_forward_time": 0.02501535415649414, "step": 27563 }, { "epoch": 4.205780029296875e-05, "step": 27563, "training_step_time": 0.10572671890258789 }, { "epoch": 4.2059326171875e-05, "model_forward_time": 0.0252838134765625, "step": 27564 }, { "epoch": 4.2059326171875e-05, "step": 27564, "training_step_time": 0.10776209831237793 }, { "epoch": 4.206085205078125e-05, "model_forward_time": 0.02505016326904297, "step": 27565 }, { "epoch": 4.206085205078125e-05, "step": 27565, "training_step_time": 0.10538935661315918 }, { "epoch": 4.20623779296875e-05, "model_forward_time": 0.026669740676879883, "step": 27566 }, { "epoch": 4.20623779296875e-05, "step": 27566, "training_step_time": 0.10636568069458008 }, { "epoch": 4.206390380859375e-05, "model_forward_time": 0.025302410125732422, "step": 27567 }, { "epoch": 4.206390380859375e-05, "step": 27567, "training_step_time": 0.10702681541442871 }, { "epoch": 4.20654296875e-05, "model_forward_time": 0.024237632751464844, "step": 27568 }, { "epoch": 4.20654296875e-05, "step": 27568, "training_step_time": 0.10477256774902344 }, { "epoch": 4.206695556640625e-05, "model_forward_time": 0.024177074432373047, "step": 27569 }, { "epoch": 4.206695556640625e-05, "step": 27569, "training_step_time": 0.11072707176208496 }, { "epoch": 4.20684814453125e-05, "grad_norm": 0.20210319757461548, "learning_rate": 1.7830532106104747e-06, "loss": 0.0048, "step": 27570 }, { "epoch": 4.20684814453125e-05, "model_forward_time": 0.024779558181762695, "step": 27570 }, { "epoch": 4.20684814453125e-05, "step": 27570, "training_step_time": 0.10695195198059082 }, { "epoch": 4.207000732421875e-05, "model_forward_time": 0.025423526763916016, "step": 27571 }, { "epoch": 4.207000732421875e-05, "step": 27571, "training_step_time": 0.1106729507446289 }, { "epoch": 4.2071533203125e-05, "model_forward_time": 0.02490377426147461, "step": 27572 }, { "epoch": 4.2071533203125e-05, "step": 27572, "training_step_time": 0.10621452331542969 }, { "epoch": 4.207305908203125e-05, "model_forward_time": 0.02536153793334961, "step": 27573 }, { "epoch": 4.207305908203125e-05, "step": 27573, "training_step_time": 0.10739302635192871 }, { "epoch": 4.20745849609375e-05, "model_forward_time": 0.024981975555419922, "step": 27574 }, { "epoch": 4.20745849609375e-05, "step": 27574, "training_step_time": 0.1068265438079834 }, { "epoch": 4.207611083984375e-05, "model_forward_time": 0.0252683162689209, "step": 27575 }, { "epoch": 4.207611083984375e-05, "step": 27575, "training_step_time": 0.11684250831604004 }, { "epoch": 4.207763671875e-05, "model_forward_time": 0.025206804275512695, "step": 27576 }, { "epoch": 4.207763671875e-05, "step": 27576, "training_step_time": 0.1087493896484375 }, { "epoch": 4.207916259765625e-05, "model_forward_time": 0.024904251098632812, "step": 27577 }, { "epoch": 4.207916259765625e-05, "step": 27577, "training_step_time": 0.10351276397705078 }, { "epoch": 4.20806884765625e-05, "model_forward_time": 0.024743318557739258, "step": 27578 }, { "epoch": 4.20806884765625e-05, "step": 27578, "training_step_time": 0.15907716751098633 }, { "epoch": 4.208221435546875e-05, "model_forward_time": 0.024530887603759766, "step": 27579 }, { "epoch": 4.208221435546875e-05, "step": 27579, "training_step_time": 0.1522693634033203 }, { "epoch": 4.2083740234375e-05, "grad_norm": 0.034953873604536057, "learning_rate": 1.7684950240852372e-06, "loss": 0.0036, "step": 27580 }, { "epoch": 4.2083740234375e-05, "model_forward_time": 0.024492740631103516, "step": 27580 }, { "epoch": 4.2083740234375e-05, "step": 27580, "training_step_time": 0.1039276123046875 }, { "epoch": 4.208526611328125e-05, "model_forward_time": 0.024318695068359375, "step": 27581 }, { "epoch": 4.208526611328125e-05, "step": 27581, "training_step_time": 0.13969826698303223 }, { "epoch": 4.20867919921875e-05, "model_forward_time": 0.025610923767089844, "step": 27582 }, { "epoch": 4.20867919921875e-05, "step": 27582, "training_step_time": 0.2103722095489502 }, { "epoch": 4.208831787109375e-05, "model_forward_time": 0.024657726287841797, "step": 27583 }, { "epoch": 4.208831787109375e-05, "step": 27583, "training_step_time": 0.1298537254333496 }, { "epoch": 4.208984375e-05, "model_forward_time": 0.024837017059326172, "step": 27584 }, { "epoch": 4.208984375e-05, "step": 27584, "training_step_time": 0.146087646484375 }, { "epoch": 4.209136962890625e-05, "model_forward_time": 0.02493429183959961, "step": 27585 }, { "epoch": 4.209136962890625e-05, "step": 27585, "training_step_time": 0.1825244426727295 }, { "epoch": 4.20928955078125e-05, "model_forward_time": 0.024359941482543945, "step": 27586 }, { "epoch": 4.20928955078125e-05, "step": 27586, "training_step_time": 0.10082507133483887 }, { "epoch": 4.209442138671875e-05, "model_forward_time": 0.02447199821472168, "step": 27587 }, { "epoch": 4.209442138671875e-05, "step": 27587, "training_step_time": 0.10001969337463379 }, { "epoch": 4.2095947265625e-05, "model_forward_time": 0.024995088577270508, "step": 27588 }, { "epoch": 4.2095947265625e-05, "step": 27588, "training_step_time": 0.10413527488708496 }, { "epoch": 4.209747314453125e-05, "model_forward_time": 0.02521657943725586, "step": 27589 }, { "epoch": 4.209747314453125e-05, "step": 27589, "training_step_time": 0.10712146759033203 }, { "epoch": 4.20989990234375e-05, "grad_norm": 0.10998217761516571, "learning_rate": 1.7539954433864858e-06, "loss": 0.0034, "step": 27590 }, { "epoch": 4.20989990234375e-05, "model_forward_time": 0.025073528289794922, "step": 27590 }, { "epoch": 4.20989990234375e-05, "step": 27590, "training_step_time": 0.10562396049499512 }, { "epoch": 4.210052490234375e-05, "model_forward_time": 0.02523946762084961, "step": 27591 }, { "epoch": 4.210052490234375e-05, "step": 27591, "training_step_time": 0.18592286109924316 }, { "epoch": 4.210205078125e-05, "model_forward_time": 0.024280071258544922, "step": 27592 }, { "epoch": 4.210205078125e-05, "step": 27592, "training_step_time": 0.16002988815307617 }, { "epoch": 4.210357666015625e-05, "model_forward_time": 0.02422499656677246, "step": 27593 }, { "epoch": 4.210357666015625e-05, "step": 27593, "training_step_time": 0.13994431495666504 }, { "epoch": 4.21051025390625e-05, "model_forward_time": 0.024435043334960938, "step": 27594 }, { "epoch": 4.21051025390625e-05, "step": 27594, "training_step_time": 0.1456737518310547 }, { "epoch": 4.210662841796875e-05, "model_forward_time": 0.024753093719482422, "step": 27595 }, { "epoch": 4.210662841796875e-05, "step": 27595, "training_step_time": 0.13695359230041504 }, { "epoch": 4.2108154296875e-05, "model_forward_time": 0.024649381637573242, "step": 27596 }, { "epoch": 4.2108154296875e-05, "step": 27596, "training_step_time": 0.2193615436553955 }, { "epoch": 4.210968017578125e-05, "model_forward_time": 0.025158166885375977, "step": 27597 }, { "epoch": 4.210968017578125e-05, "step": 27597, "training_step_time": 0.1297159194946289 }, { "epoch": 4.21112060546875e-05, "model_forward_time": 0.024258136749267578, "step": 27598 }, { "epoch": 4.21112060546875e-05, "step": 27598, "training_step_time": 0.19692397117614746 }, { "epoch": 4.211273193359375e-05, "model_forward_time": 0.024234771728515625, "step": 27599 }, { "epoch": 4.211273193359375e-05, "step": 27599, "training_step_time": 0.12053132057189941 }, { "epoch": 4.21142578125e-05, "grad_norm": 0.19780333340168, "learning_rate": 1.7395544861325718e-06, "loss": 0.0062, "step": 27600 }, { "epoch": 4.21142578125e-05, "model_forward_time": 0.02364826202392578, "step": 27600 }, { "epoch": 4.21142578125e-05, "step": 27600, "training_step_time": 0.18732953071594238 }, { "epoch": 4.211578369140625e-05, "model_forward_time": 0.02459263801574707, "step": 27601 }, { "epoch": 4.211578369140625e-05, "step": 27601, "training_step_time": 0.11251521110534668 }, { "epoch": 4.21173095703125e-05, "model_forward_time": 0.0247650146484375, "step": 27602 }, { "epoch": 4.21173095703125e-05, "step": 27602, "training_step_time": 0.11029386520385742 }, { "epoch": 4.211883544921875e-05, "model_forward_time": 0.025539398193359375, "step": 27603 }, { "epoch": 4.211883544921875e-05, "step": 27603, "training_step_time": 0.1074066162109375 }, { "epoch": 4.2120361328125e-05, "model_forward_time": 0.0255584716796875, "step": 27604 }, { "epoch": 4.2120361328125e-05, "step": 27604, "training_step_time": 0.10654091835021973 }, { "epoch": 4.212188720703125e-05, "model_forward_time": 0.0254976749420166, "step": 27605 }, { "epoch": 4.212188720703125e-05, "step": 27605, "training_step_time": 0.11065459251403809 }, { "epoch": 4.21234130859375e-05, "model_forward_time": 0.025205373764038086, "step": 27606 }, { "epoch": 4.21234130859375e-05, "step": 27606, "training_step_time": 0.1060938835144043 }, { "epoch": 4.212493896484375e-05, "model_forward_time": 0.0254824161529541, "step": 27607 }, { "epoch": 4.212493896484375e-05, "step": 27607, "training_step_time": 0.10522627830505371 }, { "epoch": 4.212646484375e-05, "model_forward_time": 0.025151968002319336, "step": 27608 }, { "epoch": 4.212646484375e-05, "step": 27608, "training_step_time": 0.10329437255859375 }, { "epoch": 4.212799072265625e-05, "model_forward_time": 0.025222301483154297, "step": 27609 }, { "epoch": 4.212799072265625e-05, "step": 27609, "training_step_time": 0.1039586067199707 }, { "epoch": 4.21295166015625e-05, "grad_norm": 0.15223300457000732, "learning_rate": 1.7251721698706147e-06, "loss": 0.0088, "step": 27610 }, { "epoch": 4.21295166015625e-05, "model_forward_time": 0.025224924087524414, "step": 27610 }, { "epoch": 4.21295166015625e-05, "step": 27610, "training_step_time": 0.11183428764343262 }, { "epoch": 4.213104248046875e-05, "model_forward_time": 0.024543285369873047, "step": 27611 }, { "epoch": 4.213104248046875e-05, "step": 27611, "training_step_time": 0.10614728927612305 }, { "epoch": 4.2132568359375e-05, "model_forward_time": 0.025307178497314453, "step": 27612 }, { "epoch": 4.2132568359375e-05, "step": 27612, "training_step_time": 0.10941624641418457 }, { "epoch": 4.213409423828125e-05, "model_forward_time": 0.025407791137695312, "step": 27613 }, { "epoch": 4.213409423828125e-05, "step": 27613, "training_step_time": 0.10332059860229492 }, { "epoch": 4.21356201171875e-05, "model_forward_time": 0.02514338493347168, "step": 27614 }, { "epoch": 4.21356201171875e-05, "step": 27614, "training_step_time": 0.10655832290649414 }, { "epoch": 4.213714599609375e-05, "model_forward_time": 0.025196313858032227, "step": 27615 }, { "epoch": 4.213714599609375e-05, "step": 27615, "training_step_time": 0.10486698150634766 }, { "epoch": 4.2138671875e-05, "model_forward_time": 0.02527475357055664, "step": 27616 }, { "epoch": 4.2138671875e-05, "step": 27616, "training_step_time": 0.10414671897888184 }, { "epoch": 4.214019775390625e-05, "model_forward_time": 0.025246381759643555, "step": 27617 }, { "epoch": 4.214019775390625e-05, "step": 27617, "training_step_time": 0.10378575325012207 }, { "epoch": 4.21417236328125e-05, "model_forward_time": 0.02530503273010254, "step": 27618 }, { "epoch": 4.21417236328125e-05, "step": 27618, "training_step_time": 0.10435795783996582 }, { "epoch": 4.214324951171875e-05, "model_forward_time": 0.025318622589111328, "step": 27619 }, { "epoch": 4.214324951171875e-05, "step": 27619, "training_step_time": 0.10833263397216797 }, { "epoch": 4.2144775390625e-05, "grad_norm": 0.061246681958436966, "learning_rate": 1.7108485120764905e-06, "loss": 0.0043, "step": 27620 }, { "epoch": 4.2144775390625e-05, "model_forward_time": 0.024988889694213867, "step": 27620 }, { "epoch": 4.2144775390625e-05, "step": 27620, "training_step_time": 0.10388636589050293 }, { "epoch": 4.214630126953125e-05, "model_forward_time": 0.025701284408569336, "step": 27621 }, { "epoch": 4.214630126953125e-05, "step": 27621, "training_step_time": 0.10527634620666504 }, { "epoch": 4.21478271484375e-05, "model_forward_time": 0.024407148361206055, "step": 27622 }, { "epoch": 4.21478271484375e-05, "step": 27622, "training_step_time": 0.14646148681640625 }, { "epoch": 4.214935302734375e-05, "model_forward_time": 0.024723291397094727, "step": 27623 }, { "epoch": 4.214935302734375e-05, "step": 27623, "training_step_time": 0.15848016738891602 }, { "epoch": 4.215087890625e-05, "model_forward_time": 0.024689912796020508, "step": 27624 }, { "epoch": 4.215087890625e-05, "step": 27624, "training_step_time": 0.21657061576843262 }, { "epoch": 4.215240478515625e-05, "model_forward_time": 0.024332046508789062, "step": 27625 }, { "epoch": 4.215240478515625e-05, "step": 27625, "training_step_time": 0.15656328201293945 }, { "epoch": 4.21539306640625e-05, "model_forward_time": 0.024501800537109375, "step": 27626 }, { "epoch": 4.21539306640625e-05, "step": 27626, "training_step_time": 0.14226675033569336 }, { "epoch": 4.215545654296875e-05, "model_forward_time": 0.023906230926513672, "step": 27627 }, { "epoch": 4.215545654296875e-05, "step": 27627, "training_step_time": 0.12164425849914551 }, { "epoch": 4.2156982421875e-05, "model_forward_time": 0.024671077728271484, "step": 27628 }, { "epoch": 4.2156982421875e-05, "step": 27628, "training_step_time": 0.22577667236328125 }, { "epoch": 4.215850830078125e-05, "model_forward_time": 0.02451014518737793, "step": 27629 }, { "epoch": 4.215850830078125e-05, "step": 27629, "training_step_time": 0.11810612678527832 }, { "epoch": 4.21600341796875e-05, "grad_norm": 0.04241884499788284, "learning_rate": 1.696583530154794e-06, "loss": 0.0036, "step": 27630 }, { "epoch": 4.21600341796875e-05, "model_forward_time": 0.024547576904296875, "step": 27630 }, { "epoch": 4.21600341796875e-05, "step": 27630, "training_step_time": 0.13604140281677246 }, { "epoch": 4.216156005859375e-05, "model_forward_time": 0.02526998519897461, "step": 27631 }, { "epoch": 4.216156005859375e-05, "step": 27631, "training_step_time": 0.14240646362304688 }, { "epoch": 4.21630859375e-05, "model_forward_time": 0.024836063385009766, "step": 27632 }, { "epoch": 4.21630859375e-05, "step": 27632, "training_step_time": 0.13510608673095703 }, { "epoch": 4.216461181640625e-05, "model_forward_time": 0.02458477020263672, "step": 27633 }, { "epoch": 4.216461181640625e-05, "step": 27633, "training_step_time": 0.12606143951416016 }, { "epoch": 4.21661376953125e-05, "model_forward_time": 0.02495551109313965, "step": 27634 }, { "epoch": 4.21661376953125e-05, "step": 27634, "training_step_time": 0.12200784683227539 }, { "epoch": 4.216766357421875e-05, "model_forward_time": 0.025063037872314453, "step": 27635 }, { "epoch": 4.216766357421875e-05, "step": 27635, "training_step_time": 0.11873269081115723 }, { "epoch": 4.2169189453125e-05, "model_forward_time": 0.024874210357666016, "step": 27636 }, { "epoch": 4.2169189453125e-05, "step": 27636, "training_step_time": 0.11419558525085449 }, { "epoch": 4.217071533203125e-05, "model_forward_time": 0.025151968002319336, "step": 27637 }, { "epoch": 4.217071533203125e-05, "step": 27637, "training_step_time": 0.11036872863769531 }, { "epoch": 4.21722412109375e-05, "model_forward_time": 0.02498340606689453, "step": 27638 }, { "epoch": 4.21722412109375e-05, "step": 27638, "training_step_time": 0.10825681686401367 }, { "epoch": 4.217376708984375e-05, "model_forward_time": 0.025016307830810547, "step": 27639 }, { "epoch": 4.217376708984375e-05, "step": 27639, "training_step_time": 0.20430684089660645 }, { "epoch": 4.217529296875e-05, "grad_norm": 0.06447423994541168, "learning_rate": 1.682377241438826e-06, "loss": 0.0035, "step": 27640 }, { "epoch": 4.217529296875e-05, "model_forward_time": 0.02485203742980957, "step": 27640 }, { "epoch": 4.217529296875e-05, "step": 27640, "training_step_time": 0.12594294548034668 }, { "epoch": 4.217681884765625e-05, "model_forward_time": 0.023974180221557617, "step": 27641 }, { "epoch": 4.217681884765625e-05, "step": 27641, "training_step_time": 0.1966104507446289 }, { "epoch": 4.21783447265625e-05, "model_forward_time": 0.024400711059570312, "step": 27642 }, { "epoch": 4.21783447265625e-05, "step": 27642, "training_step_time": 0.12387800216674805 }, { "epoch": 4.217987060546875e-05, "model_forward_time": 0.02424454689025879, "step": 27643 }, { "epoch": 4.217987060546875e-05, "step": 27643, "training_step_time": 0.10306358337402344 }, { "epoch": 4.2181396484375e-05, "model_forward_time": 0.02456498146057129, "step": 27644 }, { "epoch": 4.2181396484375e-05, "step": 27644, "training_step_time": 0.1922774314880371 }, { "epoch": 4.218292236328125e-05, "model_forward_time": 0.024243831634521484, "step": 27645 }, { "epoch": 4.218292236328125e-05, "step": 27645, "training_step_time": 0.10531926155090332 }, { "epoch": 4.21844482421875e-05, "model_forward_time": 0.023311376571655273, "step": 27646 }, { "epoch": 4.21844482421875e-05, "step": 27646, "training_step_time": 0.10196924209594727 }, { "epoch": 4.218597412109375e-05, "model_forward_time": 0.02417778968811035, "step": 27647 }, { "epoch": 4.218597412109375e-05, "step": 27647, "training_step_time": 0.10902690887451172 }, { "epoch": 4.21875e-05, "model_forward_time": 0.025592327117919922, "step": 27648 }, { "epoch": 4.21875e-05, "step": 27648, "training_step_time": 0.10908293724060059 }, { "epoch": 4.218902587890625e-05, "model_forward_time": 0.024844646453857422, "step": 27649 }, { "epoch": 4.218902587890625e-05, "step": 27649, "training_step_time": 0.10633730888366699 }, { "epoch": 4.21905517578125e-05, "grad_norm": 0.0660465881228447, "learning_rate": 1.6682296631905626e-06, "loss": 0.0102, "step": 27650 }, { "epoch": 4.21905517578125e-05, "model_forward_time": 0.025337696075439453, "step": 27650 }, { "epoch": 4.21905517578125e-05, "step": 27650, "training_step_time": 0.10471677780151367 }, { "epoch": 4.219207763671875e-05, "model_forward_time": 0.025357961654663086, "step": 27651 }, { "epoch": 4.219207763671875e-05, "step": 27651, "training_step_time": 0.10409045219421387 }, { "epoch": 4.2193603515625e-05, "model_forward_time": 0.024956226348876953, "step": 27652 }, { "epoch": 4.2193603515625e-05, "step": 27652, "training_step_time": 0.10526013374328613 }, { "epoch": 4.219512939453125e-05, "model_forward_time": 0.026139497756958008, "step": 27653 }, { "epoch": 4.219512939453125e-05, "step": 27653, "training_step_time": 0.10611605644226074 }, { "epoch": 4.21966552734375e-05, "model_forward_time": 0.025371789932250977, "step": 27654 }, { "epoch": 4.21966552734375e-05, "step": 27654, "training_step_time": 0.10560846328735352 }, { "epoch": 4.219818115234375e-05, "model_forward_time": 0.02519536018371582, "step": 27655 }, { "epoch": 4.219818115234375e-05, "step": 27655, "training_step_time": 0.10461139678955078 }, { "epoch": 4.219970703125e-05, "model_forward_time": 0.025520801544189453, "step": 27656 }, { "epoch": 4.219970703125e-05, "step": 27656, "training_step_time": 0.1064450740814209 }, { "epoch": 4.220123291015625e-05, "model_forward_time": 0.025105953216552734, "step": 27657 }, { "epoch": 4.220123291015625e-05, "step": 27657, "training_step_time": 0.10495805740356445 }, { "epoch": 4.22027587890625e-05, "model_forward_time": 0.025074481964111328, "step": 27658 }, { "epoch": 4.22027587890625e-05, "step": 27658, "training_step_time": 0.10374188423156738 }, { "epoch": 4.220428466796875e-05, "model_forward_time": 0.025078296661376953, "step": 27659 }, { "epoch": 4.220428466796875e-05, "step": 27659, "training_step_time": 0.10534286499023438 }, { "epoch": 4.2205810546875e-05, "grad_norm": 0.06475159525871277, "learning_rate": 1.6541408126006463e-06, "loss": 0.01, "step": 27660 }, { "epoch": 4.2205810546875e-05, "model_forward_time": 0.025194644927978516, "step": 27660 }, { "epoch": 4.2205810546875e-05, "step": 27660, "training_step_time": 0.10530352592468262 }, { "epoch": 4.220733642578125e-05, "model_forward_time": 0.02490687370300293, "step": 27661 }, { "epoch": 4.220733642578125e-05, "step": 27661, "training_step_time": 0.10345339775085449 }, { "epoch": 4.22088623046875e-05, "model_forward_time": 0.025268077850341797, "step": 27662 }, { "epoch": 4.22088623046875e-05, "step": 27662, "training_step_time": 0.10631561279296875 }, { "epoch": 4.221038818359375e-05, "model_forward_time": 0.025289058685302734, "step": 27663 }, { "epoch": 4.221038818359375e-05, "step": 27663, "training_step_time": 0.12271618843078613 }, { "epoch": 4.22119140625e-05, "model_forward_time": 0.025401592254638672, "step": 27664 }, { "epoch": 4.22119140625e-05, "step": 27664, "training_step_time": 0.12385916709899902 }, { "epoch": 4.221343994140625e-05, "model_forward_time": 0.024962425231933594, "step": 27665 }, { "epoch": 4.221343994140625e-05, "step": 27665, "training_step_time": 0.11117696762084961 }, { "epoch": 4.22149658203125e-05, "model_forward_time": 0.0244596004486084, "step": 27666 }, { "epoch": 4.22149658203125e-05, "step": 27666, "training_step_time": 0.13835668563842773 }, { "epoch": 4.221649169921875e-05, "model_forward_time": 0.02455759048461914, "step": 27667 }, { "epoch": 4.221649169921875e-05, "step": 27667, "training_step_time": 0.16215872764587402 }, { "epoch": 4.2218017578125e-05, "model_forward_time": 0.0248262882232666, "step": 27668 }, { "epoch": 4.2218017578125e-05, "step": 27668, "training_step_time": 0.20669078826904297 }, { "epoch": 4.221954345703125e-05, "model_forward_time": 0.024550437927246094, "step": 27669 }, { "epoch": 4.221954345703125e-05, "step": 27669, "training_step_time": 0.16620492935180664 }, { "epoch": 4.22210693359375e-05, "grad_norm": 0.07270149886608124, "learning_rate": 1.6401107067883559e-06, "loss": 0.0084, "step": 27670 }, { "epoch": 4.22210693359375e-05, "model_forward_time": 0.0258176326751709, "step": 27670 }, { "epoch": 4.22210693359375e-05, "step": 27670, "training_step_time": 0.1777806282043457 }, { "epoch": 4.222259521484375e-05, "model_forward_time": 0.02454853057861328, "step": 27671 }, { "epoch": 4.222259521484375e-05, "step": 27671, "training_step_time": 0.13738679885864258 }, { "epoch": 4.222412109375e-05, "model_forward_time": 0.024824857711791992, "step": 27672 }, { "epoch": 4.222412109375e-05, "step": 27672, "training_step_time": 0.18199920654296875 }, { "epoch": 4.222564697265625e-05, "model_forward_time": 0.024954795837402344, "step": 27673 }, { "epoch": 4.222564697265625e-05, "step": 27673, "training_step_time": 0.15314388275146484 }, { "epoch": 4.22271728515625e-05, "model_forward_time": 0.02451944351196289, "step": 27674 }, { "epoch": 4.22271728515625e-05, "step": 27674, "training_step_time": 0.10490560531616211 }, { "epoch": 4.222869873046875e-05, "model_forward_time": 0.024959564208984375, "step": 27675 }, { "epoch": 4.222869873046875e-05, "step": 27675, "training_step_time": 0.10483050346374512 }, { "epoch": 4.2230224609375e-05, "model_forward_time": 0.02555108070373535, "step": 27676 }, { "epoch": 4.2230224609375e-05, "step": 27676, "training_step_time": 0.10722804069519043 }, { "epoch": 4.223175048828125e-05, "model_forward_time": 0.025754451751708984, "step": 27677 }, { "epoch": 4.223175048828125e-05, "step": 27677, "training_step_time": 0.10642719268798828 }, { "epoch": 4.22332763671875e-05, "model_forward_time": 0.025213956832885742, "step": 27678 }, { "epoch": 4.22332763671875e-05, "step": 27678, "training_step_time": 0.10613417625427246 }, { "epoch": 4.223480224609375e-05, "model_forward_time": 0.02508401870727539, "step": 27679 }, { "epoch": 4.223480224609375e-05, "step": 27679, "training_step_time": 0.11035823822021484 }, { "epoch": 4.2236328125e-05, "grad_norm": 0.1756133884191513, "learning_rate": 1.626139362801604e-06, "loss": 0.0056, "step": 27680 }, { "epoch": 4.2236328125e-05, "model_forward_time": 0.025370359420776367, "step": 27680 }, { "epoch": 4.2236328125e-05, "step": 27680, "training_step_time": 0.10494422912597656 }, { "epoch": 4.223785400390625e-05, "model_forward_time": 0.0251157283782959, "step": 27681 }, { "epoch": 4.223785400390625e-05, "step": 27681, "training_step_time": 0.1048576831817627 }, { "epoch": 4.22393798828125e-05, "model_forward_time": 0.02539992332458496, "step": 27682 }, { "epoch": 4.22393798828125e-05, "step": 27682, "training_step_time": 0.10538291931152344 }, { "epoch": 4.224090576171875e-05, "model_forward_time": 0.026517629623413086, "step": 27683 }, { "epoch": 4.224090576171875e-05, "step": 27683, "training_step_time": 0.10849261283874512 }, { "epoch": 4.2242431640625e-05, "model_forward_time": 0.025059223175048828, "step": 27684 }, { "epoch": 4.2242431640625e-05, "step": 27684, "training_step_time": 0.19494891166687012 }, { "epoch": 4.224395751953125e-05, "model_forward_time": 0.02438521385192871, "step": 27685 }, { "epoch": 4.224395751953125e-05, "step": 27685, "training_step_time": 0.1343832015991211 }, { "epoch": 4.22454833984375e-05, "model_forward_time": 0.02482318878173828, "step": 27686 }, { "epoch": 4.22454833984375e-05, "step": 27686, "training_step_time": 0.10733890533447266 }, { "epoch": 4.224700927734375e-05, "model_forward_time": 0.025352001190185547, "step": 27687 }, { "epoch": 4.224700927734375e-05, "step": 27687, "training_step_time": 0.11685633659362793 }, { "epoch": 4.224853515625e-05, "model_forward_time": 0.02523946762084961, "step": 27688 }, { "epoch": 4.224853515625e-05, "step": 27688, "training_step_time": 0.10916829109191895 }, { "epoch": 4.225006103515625e-05, "model_forward_time": 0.025341272354125977, "step": 27689 }, { "epoch": 4.225006103515625e-05, "step": 27689, "training_step_time": 0.10696029663085938 }, { "epoch": 4.22515869140625e-05, "grad_norm": 0.04706356301903725, "learning_rate": 1.6122267976168781e-06, "loss": 0.0028, "step": 27690 }, { "epoch": 4.22515869140625e-05, "model_forward_time": 0.025202274322509766, "step": 27690 }, { "epoch": 4.22515869140625e-05, "step": 27690, "training_step_time": 0.2031550407409668 }, { "epoch": 4.225311279296875e-05, "model_forward_time": 0.02489185333251953, "step": 27691 }, { "epoch": 4.225311279296875e-05, "step": 27691, "training_step_time": 0.10516738891601562 }, { "epoch": 4.2254638671875e-05, "model_forward_time": 0.024877309799194336, "step": 27692 }, { "epoch": 4.2254638671875e-05, "step": 27692, "training_step_time": 0.11028456687927246 }, { "epoch": 4.225616455078125e-05, "model_forward_time": 0.026930570602416992, "step": 27693 }, { "epoch": 4.225616455078125e-05, "step": 27693, "training_step_time": 0.11659812927246094 }, { "epoch": 4.22576904296875e-05, "model_forward_time": 0.02515101432800293, "step": 27694 }, { "epoch": 4.22576904296875e-05, "step": 27694, "training_step_time": 0.10611438751220703 }, { "epoch": 4.225921630859375e-05, "model_forward_time": 0.025331974029541016, "step": 27695 }, { "epoch": 4.225921630859375e-05, "step": 27695, "training_step_time": 0.1058206558227539 }, { "epoch": 4.22607421875e-05, "model_forward_time": 0.025413990020751953, "step": 27696 }, { "epoch": 4.22607421875e-05, "step": 27696, "training_step_time": 0.10953187942504883 }, { "epoch": 4.226226806640625e-05, "model_forward_time": 0.02500462532043457, "step": 27697 }, { "epoch": 4.226226806640625e-05, "step": 27697, "training_step_time": 0.10342764854431152 }, { "epoch": 4.22637939453125e-05, "model_forward_time": 0.02593207359313965, "step": 27698 }, { "epoch": 4.22637939453125e-05, "step": 27698, "training_step_time": 0.10918784141540527 }, { "epoch": 4.226531982421875e-05, "model_forward_time": 0.024413347244262695, "step": 27699 }, { "epoch": 4.226531982421875e-05, "step": 27699, "training_step_time": 0.10820317268371582 }, { "epoch": 4.2266845703125e-05, "grad_norm": 0.08612319827079773, "learning_rate": 1.5983730281392662e-06, "loss": 0.0047, "step": 27700 }, { "epoch": 4.2266845703125e-05, "model_forward_time": 0.02556633949279785, "step": 27700 }, { "epoch": 4.2266845703125e-05, "step": 27700, "training_step_time": 0.11347150802612305 }, { "epoch": 4.226837158203125e-05, "model_forward_time": 0.02498316764831543, "step": 27701 }, { "epoch": 4.226837158203125e-05, "step": 27701, "training_step_time": 0.10439515113830566 }, { "epoch": 4.22698974609375e-05, "model_forward_time": 0.024886131286621094, "step": 27702 }, { "epoch": 4.22698974609375e-05, "step": 27702, "training_step_time": 0.10900568962097168 }, { "epoch": 4.227142333984375e-05, "model_forward_time": 0.025036096572875977, "step": 27703 }, { "epoch": 4.227142333984375e-05, "step": 27703, "training_step_time": 0.10880541801452637 }, { "epoch": 4.227294921875e-05, "model_forward_time": 0.02498912811279297, "step": 27704 }, { "epoch": 4.227294921875e-05, "step": 27704, "training_step_time": 0.10824990272521973 }, { "epoch": 4.227447509765625e-05, "model_forward_time": 0.025271177291870117, "step": 27705 }, { "epoch": 4.227447509765625e-05, "step": 27705, "training_step_time": 0.10960817337036133 }, { "epoch": 4.22760009765625e-05, "model_forward_time": 0.025146007537841797, "step": 27706 }, { "epoch": 4.22760009765625e-05, "step": 27706, "training_step_time": 0.10738110542297363 }, { "epoch": 4.227752685546875e-05, "model_forward_time": 0.025606870651245117, "step": 27707 }, { "epoch": 4.227752685546875e-05, "step": 27707, "training_step_time": 0.10852336883544922 }, { "epoch": 4.2279052734375e-05, "model_forward_time": 0.02523183822631836, "step": 27708 }, { "epoch": 4.2279052734375e-05, "step": 27708, "training_step_time": 0.10644960403442383 }, { "epoch": 4.228057861328125e-05, "model_forward_time": 0.02565455436706543, "step": 27709 }, { "epoch": 4.228057861328125e-05, "step": 27709, "training_step_time": 0.1053462028503418 }, { "epoch": 4.22821044921875e-05, "grad_norm": 0.12188015878200531, "learning_rate": 1.5845780712023973e-06, "loss": 0.0025, "step": 27710 }, { "epoch": 4.22821044921875e-05, "model_forward_time": 0.025073528289794922, "step": 27710 }, { "epoch": 4.22821044921875e-05, "step": 27710, "training_step_time": 0.10300779342651367 }, { "epoch": 4.228363037109375e-05, "model_forward_time": 0.024135351181030273, "step": 27711 }, { "epoch": 4.228363037109375e-05, "step": 27711, "training_step_time": 0.14708924293518066 }, { "epoch": 4.228515625e-05, "model_forward_time": 0.024503231048583984, "step": 27712 }, { "epoch": 4.228515625e-05, "step": 27712, "training_step_time": 0.15679025650024414 }, { "epoch": 4.228668212890625e-05, "model_forward_time": 0.024532318115234375, "step": 27713 }, { "epoch": 4.228668212890625e-05, "step": 27713, "training_step_time": 0.17716169357299805 }, { "epoch": 4.22882080078125e-05, "model_forward_time": 0.0243985652923584, "step": 27714 }, { "epoch": 4.22882080078125e-05, "step": 27714, "training_step_time": 0.16577982902526855 }, { "epoch": 4.228973388671875e-05, "model_forward_time": 0.024974584579467773, "step": 27715 }, { "epoch": 4.228973388671875e-05, "step": 27715, "training_step_time": 0.18579697608947754 }, { "epoch": 4.2291259765625e-05, "model_forward_time": 0.023831605911254883, "step": 27716 }, { "epoch": 4.2291259765625e-05, "step": 27716, "training_step_time": 0.1444697380065918 }, { "epoch": 4.229278564453125e-05, "model_forward_time": 0.023415327072143555, "step": 27717 }, { "epoch": 4.229278564453125e-05, "step": 27717, "training_step_time": 0.5963430404663086 }, { "epoch": 4.22943115234375e-05, "model_forward_time": 0.021768808364868164, "step": 27718 }, { "epoch": 4.22943115234375e-05, "step": 27718, "training_step_time": 0.10294556617736816 }, { "epoch": 4.229583740234375e-05, "model_forward_time": 0.023662805557250977, "step": 27719 }, { "epoch": 4.229583740234375e-05, "step": 27719, "training_step_time": 0.10234904289245605 }, { "epoch": 4.229736328125e-05, "grad_norm": 0.025142701342701912, "learning_rate": 1.5708419435684462e-06, "loss": 0.003, "step": 27720 }, { "epoch": 4.229736328125e-05, "model_forward_time": 0.02426767349243164, "step": 27720 }, { "epoch": 4.229736328125e-05, "step": 27720, "training_step_time": 0.10664916038513184 }, { "epoch": 4.229888916015625e-05, "model_forward_time": 0.024670124053955078, "step": 27721 }, { "epoch": 4.229888916015625e-05, "step": 27721, "training_step_time": 0.10760378837585449 }, { "epoch": 4.23004150390625e-05, "model_forward_time": 0.02463984489440918, "step": 27722 }, { "epoch": 4.23004150390625e-05, "step": 27722, "training_step_time": 0.11524438858032227 }, { "epoch": 4.230194091796875e-05, "model_forward_time": 0.02450728416442871, "step": 27723 }, { "epoch": 4.230194091796875e-05, "step": 27723, "training_step_time": 0.10560369491577148 }, { "epoch": 4.2303466796875e-05, "model_forward_time": 0.024857282638549805, "step": 27724 }, { "epoch": 4.2303466796875e-05, "step": 27724, "training_step_time": 0.10944819450378418 }, { "epoch": 4.230499267578125e-05, "model_forward_time": 0.02480030059814453, "step": 27725 }, { "epoch": 4.230499267578125e-05, "step": 27725, "training_step_time": 0.10605478286743164 }, { "epoch": 4.23065185546875e-05, "model_forward_time": 0.025127410888671875, "step": 27726 }, { "epoch": 4.23065185546875e-05, "step": 27726, "training_step_time": 0.10770082473754883 }, { "epoch": 4.230804443359375e-05, "model_forward_time": 0.027119159698486328, "step": 27727 }, { "epoch": 4.230804443359375e-05, "step": 27727, "training_step_time": 0.13924479484558105 }, { "epoch": 4.23095703125e-05, "model_forward_time": 0.024493694305419922, "step": 27728 }, { "epoch": 4.23095703125e-05, "step": 27728, "training_step_time": 0.14240503311157227 }, { "epoch": 4.231109619140625e-05, "model_forward_time": 0.023777008056640625, "step": 27729 }, { "epoch": 4.231109619140625e-05, "step": 27729, "training_step_time": 0.10647845268249512 }, { "epoch": 4.23126220703125e-05, "grad_norm": 0.10114025324583054, "learning_rate": 1.5571646619281066e-06, "loss": 0.0027, "step": 27730 }, { "epoch": 4.23126220703125e-05, "model_forward_time": 0.024282455444335938, "step": 27730 }, { "epoch": 4.23126220703125e-05, "step": 27730, "training_step_time": 0.1061711311340332 }, { "epoch": 4.231414794921875e-05, "model_forward_time": 0.024338483810424805, "step": 27731 }, { "epoch": 4.231414794921875e-05, "step": 27731, "training_step_time": 0.11275649070739746 }, { "epoch": 4.2315673828125e-05, "model_forward_time": 0.025748014450073242, "step": 27732 }, { "epoch": 4.2315673828125e-05, "step": 27732, "training_step_time": 0.11031794548034668 }, { "epoch": 4.231719970703125e-05, "model_forward_time": 0.025377273559570312, "step": 27733 }, { "epoch": 4.231719970703125e-05, "step": 27733, "training_step_time": 0.18670272827148438 }, { "epoch": 4.23187255859375e-05, "model_forward_time": 0.02460026741027832, "step": 27734 }, { "epoch": 4.23187255859375e-05, "step": 27734, "training_step_time": 0.10669660568237305 }, { "epoch": 4.232025146484375e-05, "model_forward_time": 0.02521491050720215, "step": 27735 }, { "epoch": 4.232025146484375e-05, "step": 27735, "training_step_time": 0.1014094352722168 }, { "epoch": 4.232177734375e-05, "model_forward_time": 0.025114774703979492, "step": 27736 }, { "epoch": 4.232177734375e-05, "step": 27736, "training_step_time": 0.10554051399230957 }, { "epoch": 4.232330322265625e-05, "model_forward_time": 0.024494409561157227, "step": 27737 }, { "epoch": 4.232330322265625e-05, "step": 27737, "training_step_time": 0.10537910461425781 }, { "epoch": 4.23248291015625e-05, "model_forward_time": 0.024053096771240234, "step": 27738 }, { "epoch": 4.23248291015625e-05, "step": 27738, "training_step_time": 0.10490655899047852 }, { "epoch": 4.232635498046875e-05, "model_forward_time": 0.02542257308959961, "step": 27739 }, { "epoch": 4.232635498046875e-05, "step": 27739, "training_step_time": 0.16970491409301758 }, { "epoch": 4.2327880859375e-05, "grad_norm": 0.03618144243955612, "learning_rate": 1.5435462429005675e-06, "loss": 0.0036, "step": 27740 }, { "epoch": 4.2327880859375e-05, "model_forward_time": 0.02414989471435547, "step": 27740 }, { "epoch": 4.2327880859375e-05, "step": 27740, "training_step_time": 0.17909622192382812 }, { "epoch": 4.232940673828125e-05, "model_forward_time": 0.024317502975463867, "step": 27741 }, { "epoch": 4.232940673828125e-05, "step": 27741, "training_step_time": 0.16564083099365234 }, { "epoch": 4.23309326171875e-05, "model_forward_time": 0.024778127670288086, "step": 27742 }, { "epoch": 4.23309326171875e-05, "step": 27742, "training_step_time": 0.15216350555419922 }, { "epoch": 4.233245849609375e-05, "model_forward_time": 0.024600744247436523, "step": 27743 }, { "epoch": 4.233245849609375e-05, "step": 27743, "training_step_time": 0.1533827781677246 }, { "epoch": 4.2333984375e-05, "model_forward_time": 0.02443981170654297, "step": 27744 }, { "epoch": 4.2333984375e-05, "step": 27744, "training_step_time": 0.1368401050567627 }, { "epoch": 4.233551025390625e-05, "model_forward_time": 0.025228023529052734, "step": 27745 }, { "epoch": 4.233551025390625e-05, "step": 27745, "training_step_time": 0.131392240524292 }, { "epoch": 4.23370361328125e-05, "model_forward_time": 0.02487659454345703, "step": 27746 }, { "epoch": 4.23370361328125e-05, "step": 27746, "training_step_time": 0.1256556510925293 }, { "epoch": 4.233856201171875e-05, "model_forward_time": 0.025441646575927734, "step": 27747 }, { "epoch": 4.233856201171875e-05, "step": 27747, "training_step_time": 0.12474560737609863 }, { "epoch": 4.2340087890625e-05, "model_forward_time": 0.02576446533203125, "step": 27748 }, { "epoch": 4.2340087890625e-05, "step": 27748, "training_step_time": 0.11660218238830566 }, { "epoch": 4.234161376953125e-05, "model_forward_time": 0.02531886100769043, "step": 27749 }, { "epoch": 4.234161376953125e-05, "step": 27749, "training_step_time": 0.11276483535766602 }, { "epoch": 4.23431396484375e-05, "grad_norm": 0.042110636830329895, "learning_rate": 1.5299867030334814e-06, "loss": 0.0032, "step": 27750 }, { "epoch": 4.23431396484375e-05, "model_forward_time": 0.025428056716918945, "step": 27750 }, { "epoch": 4.23431396484375e-05, "step": 27750, "training_step_time": 0.11369776725769043 }, { "epoch": 4.234466552734375e-05, "model_forward_time": 0.02568507194519043, "step": 27751 }, { "epoch": 4.234466552734375e-05, "step": 27751, "training_step_time": 0.10561418533325195 }, { "epoch": 4.234619140625e-05, "model_forward_time": 0.02510523796081543, "step": 27752 }, { "epoch": 4.234619140625e-05, "step": 27752, "training_step_time": 0.14813947677612305 }, { "epoch": 4.234771728515625e-05, "model_forward_time": 0.025078773498535156, "step": 27753 }, { "epoch": 4.234771728515625e-05, "step": 27753, "training_step_time": 0.1586003303527832 }, { "epoch": 4.23492431640625e-05, "model_forward_time": 0.024753093719482422, "step": 27754 }, { "epoch": 4.23492431640625e-05, "step": 27754, "training_step_time": 0.1386735439300537 }, { "epoch": 4.235076904296875e-05, "model_forward_time": 0.02470088005065918, "step": 27755 }, { "epoch": 4.235076904296875e-05, "step": 27755, "training_step_time": 0.13225913047790527 }, { "epoch": 4.2352294921875e-05, "model_forward_time": 0.025704383850097656, "step": 27756 }, { "epoch": 4.2352294921875e-05, "step": 27756, "training_step_time": 0.17214250564575195 }, { "epoch": 4.235382080078125e-05, "model_forward_time": 0.025185346603393555, "step": 27757 }, { "epoch": 4.235382080078125e-05, "step": 27757, "training_step_time": 0.17632579803466797 }, { "epoch": 4.23553466796875e-05, "model_forward_time": 0.02440667152404785, "step": 27758 }, { "epoch": 4.23553466796875e-05, "step": 27758, "training_step_time": 0.1441481113433838 }, { "epoch": 4.235687255859375e-05, "model_forward_time": 0.02455306053161621, "step": 27759 }, { "epoch": 4.235687255859375e-05, "step": 27759, "training_step_time": 0.1085672378540039 }, { "epoch": 4.23583984375e-05, "grad_norm": 0.04515612870454788, "learning_rate": 1.516486058802974e-06, "loss": 0.0027, "step": 27760 }, { "epoch": 4.23583984375e-05, "model_forward_time": 0.024704933166503906, "step": 27760 }, { "epoch": 4.23583984375e-05, "step": 27760, "training_step_time": 0.11039590835571289 }, { "epoch": 4.235992431640625e-05, "model_forward_time": 0.025605201721191406, "step": 27761 }, { "epoch": 4.235992431640625e-05, "step": 27761, "training_step_time": 0.10695195198059082 }, { "epoch": 4.23614501953125e-05, "model_forward_time": 0.025614500045776367, "step": 27762 }, { "epoch": 4.23614501953125e-05, "step": 27762, "training_step_time": 0.10595226287841797 }, { "epoch": 4.236297607421875e-05, "model_forward_time": 0.025545358657836914, "step": 27763 }, { "epoch": 4.236297607421875e-05, "step": 27763, "training_step_time": 0.11027359962463379 }, { "epoch": 4.2364501953125e-05, "model_forward_time": 0.025343656539916992, "step": 27764 }, { "epoch": 4.2364501953125e-05, "step": 27764, "training_step_time": 0.10791611671447754 }, { "epoch": 4.236602783203125e-05, "model_forward_time": 0.025295019149780273, "step": 27765 }, { "epoch": 4.236602783203125e-05, "step": 27765, "training_step_time": 0.10526633262634277 }, { "epoch": 4.23675537109375e-05, "model_forward_time": 0.025359630584716797, "step": 27766 }, { "epoch": 4.23675537109375e-05, "step": 27766, "training_step_time": 0.10560274124145508 }, { "epoch": 4.236907958984375e-05, "model_forward_time": 0.025148630142211914, "step": 27767 }, { "epoch": 4.236907958984375e-05, "step": 27767, "training_step_time": 0.1112675666809082 }, { "epoch": 4.237060546875e-05, "model_forward_time": 0.02546858787536621, "step": 27768 }, { "epoch": 4.237060546875e-05, "step": 27768, "training_step_time": 0.10685563087463379 }, { "epoch": 4.237213134765625e-05, "model_forward_time": 0.02554941177368164, "step": 27769 }, { "epoch": 4.237213134765625e-05, "step": 27769, "training_step_time": 0.10724282264709473 }, { "epoch": 4.23736572265625e-05, "grad_norm": 0.09268509596586227, "learning_rate": 1.5030443266136118e-06, "loss": 0.0118, "step": 27770 }, { "epoch": 4.23736572265625e-05, "model_forward_time": 0.025165081024169922, "step": 27770 }, { "epoch": 4.23736572265625e-05, "step": 27770, "training_step_time": 0.10873174667358398 }, { "epoch": 4.237518310546875e-05, "model_forward_time": 0.026018142700195312, "step": 27771 }, { "epoch": 4.237518310546875e-05, "step": 27771, "training_step_time": 0.10544133186340332 }, { "epoch": 4.2376708984375e-05, "model_forward_time": 0.025185585021972656, "step": 27772 }, { "epoch": 4.2376708984375e-05, "step": 27772, "training_step_time": 0.12476205825805664 }, { "epoch": 4.237823486328125e-05, "model_forward_time": 0.025534391403198242, "step": 27773 }, { "epoch": 4.237823486328125e-05, "step": 27773, "training_step_time": 0.14030790328979492 }, { "epoch": 4.23797607421875e-05, "model_forward_time": 0.025883197784423828, "step": 27774 }, { "epoch": 4.23797607421875e-05, "step": 27774, "training_step_time": 0.10585474967956543 }, { "epoch": 4.238128662109375e-05, "model_forward_time": 0.025811433792114258, "step": 27775 }, { "epoch": 4.238128662109375e-05, "step": 27775, "training_step_time": 0.10695052146911621 }, { "epoch": 4.23828125e-05, "model_forward_time": 0.024994373321533203, "step": 27776 }, { "epoch": 4.23828125e-05, "step": 27776, "training_step_time": 0.1168055534362793 }, { "epoch": 4.238433837890625e-05, "model_forward_time": 0.025405406951904297, "step": 27777 }, { "epoch": 4.238433837890625e-05, "step": 27777, "training_step_time": 0.10906100273132324 }, { "epoch": 4.23858642578125e-05, "model_forward_time": 0.02455282211303711, "step": 27778 }, { "epoch": 4.23858642578125e-05, "step": 27778, "training_step_time": 0.19130706787109375 }, { "epoch": 4.238739013671875e-05, "model_forward_time": 0.02480626106262207, "step": 27779 }, { "epoch": 4.238739013671875e-05, "step": 27779, "training_step_time": 0.1045677661895752 }, { "epoch": 4.2388916015625e-05, "grad_norm": 0.05210455507040024, "learning_rate": 1.4896615227983468e-06, "loss": 0.0039, "step": 27780 }, { "epoch": 4.2388916015625e-05, "model_forward_time": 0.0239102840423584, "step": 27780 }, { "epoch": 4.2388916015625e-05, "step": 27780, "training_step_time": 0.10227751731872559 }, { "epoch": 4.239044189453125e-05, "model_forward_time": 0.02455878257751465, "step": 27781 }, { "epoch": 4.239044189453125e-05, "step": 27781, "training_step_time": 0.10583376884460449 }, { "epoch": 4.23919677734375e-05, "model_forward_time": 0.025266647338867188, "step": 27782 }, { "epoch": 4.23919677734375e-05, "step": 27782, "training_step_time": 0.1070241928100586 }, { "epoch": 4.239349365234375e-05, "model_forward_time": 0.025438308715820312, "step": 27783 }, { "epoch": 4.239349365234375e-05, "step": 27783, "training_step_time": 0.10763788223266602 }, { "epoch": 4.239501953125e-05, "model_forward_time": 0.024671077728271484, "step": 27784 }, { "epoch": 4.239501953125e-05, "step": 27784, "training_step_time": 0.10678482055664062 }, { "epoch": 4.239654541015625e-05, "model_forward_time": 0.025286197662353516, "step": 27785 }, { "epoch": 4.239654541015625e-05, "step": 27785, "training_step_time": 0.10775494575500488 }, { "epoch": 4.23980712890625e-05, "model_forward_time": 0.025350093841552734, "step": 27786 }, { "epoch": 4.23980712890625e-05, "step": 27786, "training_step_time": 0.11060523986816406 }, { "epoch": 4.239959716796875e-05, "model_forward_time": 0.02554488182067871, "step": 27787 }, { "epoch": 4.239959716796875e-05, "step": 27787, "training_step_time": 0.10510087013244629 }, { "epoch": 4.2401123046875e-05, "model_forward_time": 0.02471160888671875, "step": 27788 }, { "epoch": 4.2401123046875e-05, "step": 27788, "training_step_time": 0.10821223258972168 }, { "epoch": 4.240264892578125e-05, "model_forward_time": 0.025285720825195312, "step": 27789 }, { "epoch": 4.240264892578125e-05, "step": 27789, "training_step_time": 0.10730099678039551 }, { "epoch": 4.24041748046875e-05, "grad_norm": 0.1868578940629959, "learning_rate": 1.4763376636185599e-06, "loss": 0.0055, "step": 27790 }, { "epoch": 4.24041748046875e-05, "model_forward_time": 0.025423526763916016, "step": 27790 }, { "epoch": 4.24041748046875e-05, "step": 27790, "training_step_time": 0.10583901405334473 }, { "epoch": 4.240570068359375e-05, "model_forward_time": 0.025251150131225586, "step": 27791 }, { "epoch": 4.240570068359375e-05, "step": 27791, "training_step_time": 0.10694384574890137 }, { "epoch": 4.24072265625e-05, "model_forward_time": 0.025606870651245117, "step": 27792 }, { "epoch": 4.24072265625e-05, "step": 27792, "training_step_time": 0.10620403289794922 }, { "epoch": 4.240875244140625e-05, "model_forward_time": 0.02562427520751953, "step": 27793 }, { "epoch": 4.240875244140625e-05, "step": 27793, "training_step_time": 0.11023759841918945 }, { "epoch": 4.24102783203125e-05, "model_forward_time": 0.025459766387939453, "step": 27794 }, { "epoch": 4.24102783203125e-05, "step": 27794, "training_step_time": 0.10524201393127441 }, { "epoch": 4.241180419921875e-05, "model_forward_time": 0.025812387466430664, "step": 27795 }, { "epoch": 4.241180419921875e-05, "step": 27795, "training_step_time": 0.10813498497009277 }, { "epoch": 4.2413330078125e-05, "model_forward_time": 0.025298118591308594, "step": 27796 }, { "epoch": 4.2413330078125e-05, "step": 27796, "training_step_time": 0.10708022117614746 }, { "epoch": 4.241485595703125e-05, "model_forward_time": 0.025615692138671875, "step": 27797 }, { "epoch": 4.241485595703125e-05, "step": 27797, "training_step_time": 0.10777115821838379 }, { "epoch": 4.24163818359375e-05, "model_forward_time": 0.02490401268005371, "step": 27798 }, { "epoch": 4.24163818359375e-05, "step": 27798, "training_step_time": 0.1049349308013916 }, { "epoch": 4.241790771484375e-05, "model_forward_time": 0.024810791015625, "step": 27799 }, { "epoch": 4.241790771484375e-05, "step": 27799, "training_step_time": 0.149885892868042 }, { "epoch": 4.241943359375e-05, "grad_norm": 0.07447796314954758, "learning_rate": 1.463072765264001e-06, "loss": 0.0049, "step": 27800 }, { "epoch": 4.241943359375e-05, "model_forward_time": 0.02437424659729004, "step": 27800 }, { "epoch": 4.241943359375e-05, "step": 27800, "training_step_time": 0.15484380722045898 }, { "epoch": 4.242095947265625e-05, "model_forward_time": 0.024450302124023438, "step": 27801 }, { "epoch": 4.242095947265625e-05, "step": 27801, "training_step_time": 0.13563823699951172 }, { "epoch": 4.24224853515625e-05, "model_forward_time": 0.02437305450439453, "step": 27802 }, { "epoch": 4.24224853515625e-05, "step": 27802, "training_step_time": 0.21140241622924805 }, { "epoch": 4.242401123046875e-05, "model_forward_time": 0.02472543716430664, "step": 27803 }, { "epoch": 4.242401123046875e-05, "step": 27803, "training_step_time": 0.12787652015686035 }, { "epoch": 4.2425537109375e-05, "model_forward_time": 0.02444911003112793, "step": 27804 }, { "epoch": 4.2425537109375e-05, "step": 27804, "training_step_time": 0.21919870376586914 }, { "epoch": 4.242706298828125e-05, "model_forward_time": 0.024511337280273438, "step": 27805 }, { "epoch": 4.242706298828125e-05, "step": 27805, "training_step_time": 0.17590641975402832 }, { "epoch": 4.24285888671875e-05, "model_forward_time": 0.02406144142150879, "step": 27806 }, { "epoch": 4.24285888671875e-05, "step": 27806, "training_step_time": 0.19149518013000488 }, { "epoch": 4.243011474609375e-05, "model_forward_time": 0.02446889877319336, "step": 27807 }, { "epoch": 4.243011474609375e-05, "step": 27807, "training_step_time": 0.10854840278625488 }, { "epoch": 4.2431640625e-05, "model_forward_time": 0.025229692459106445, "step": 27808 }, { "epoch": 4.2431640625e-05, "step": 27808, "training_step_time": 0.10677576065063477 }, { "epoch": 4.243316650390625e-05, "model_forward_time": 0.0252227783203125, "step": 27809 }, { "epoch": 4.243316650390625e-05, "step": 27809, "training_step_time": 0.10595273971557617 }, { "epoch": 4.24346923828125e-05, "grad_norm": 0.42719563841819763, "learning_rate": 1.4498668438527597e-06, "loss": 0.0082, "step": 27810 }, { "epoch": 4.24346923828125e-05, "model_forward_time": 0.02503657341003418, "step": 27810 }, { "epoch": 4.24346923828125e-05, "step": 27810, "training_step_time": 0.1055910587310791 }, { "epoch": 4.243621826171875e-05, "model_forward_time": 0.02529597282409668, "step": 27811 }, { "epoch": 4.243621826171875e-05, "step": 27811, "training_step_time": 0.10571026802062988 }, { "epoch": 4.2437744140625e-05, "model_forward_time": 0.02472662925720215, "step": 27812 }, { "epoch": 4.2437744140625e-05, "step": 27812, "training_step_time": 0.10520124435424805 }, { "epoch": 4.243927001953125e-05, "model_forward_time": 0.02493000030517578, "step": 27813 }, { "epoch": 4.243927001953125e-05, "step": 27813, "training_step_time": 0.10967206954956055 }, { "epoch": 4.24407958984375e-05, "model_forward_time": 0.025262832641601562, "step": 27814 }, { "epoch": 4.24407958984375e-05, "step": 27814, "training_step_time": 0.1060795783996582 }, { "epoch": 4.244232177734375e-05, "model_forward_time": 0.025254011154174805, "step": 27815 }, { "epoch": 4.244232177734375e-05, "step": 27815, "training_step_time": 0.10849165916442871 }, { "epoch": 4.244384765625e-05, "model_forward_time": 0.025343894958496094, "step": 27816 }, { "epoch": 4.244384765625e-05, "step": 27816, "training_step_time": 0.10576343536376953 }, { "epoch": 4.244537353515625e-05, "model_forward_time": 0.025491714477539062, "step": 27817 }, { "epoch": 4.244537353515625e-05, "step": 27817, "training_step_time": 0.10484504699707031 }, { "epoch": 4.24468994140625e-05, "model_forward_time": 0.02534174919128418, "step": 27818 }, { "epoch": 4.24468994140625e-05, "step": 27818, "training_step_time": 0.12265968322753906 }, { "epoch": 4.244842529296875e-05, "model_forward_time": 0.025585412979125977, "step": 27819 }, { "epoch": 4.244842529296875e-05, "step": 27819, "training_step_time": 0.13701558113098145 }, { "epoch": 4.2449951171875e-05, "grad_norm": 0.029894618317484856, "learning_rate": 1.4367199154312783e-06, "loss": 0.0025, "step": 27820 }, { "epoch": 4.2449951171875e-05, "model_forward_time": 0.027461528778076172, "step": 27820 }, { "epoch": 4.2449951171875e-05, "step": 27820, "training_step_time": 0.10947537422180176 }, { "epoch": 4.245147705078125e-05, "model_forward_time": 0.02530980110168457, "step": 27821 }, { "epoch": 4.245147705078125e-05, "step": 27821, "training_step_time": 0.10671806335449219 }, { "epoch": 4.24530029296875e-05, "model_forward_time": 0.02665114402770996, "step": 27822 }, { "epoch": 4.24530029296875e-05, "step": 27822, "training_step_time": 0.10999488830566406 }, { "epoch": 4.245452880859375e-05, "model_forward_time": 0.02570629119873047, "step": 27823 }, { "epoch": 4.245452880859375e-05, "step": 27823, "training_step_time": 0.1114661693572998 }, { "epoch": 4.24560546875e-05, "model_forward_time": 0.025671720504760742, "step": 27824 }, { "epoch": 4.24560546875e-05, "step": 27824, "training_step_time": 0.20056700706481934 }, { "epoch": 4.245758056640625e-05, "model_forward_time": 0.024576902389526367, "step": 27825 }, { "epoch": 4.245758056640625e-05, "step": 27825, "training_step_time": 0.10237884521484375 }, { "epoch": 4.24591064453125e-05, "model_forward_time": 0.024979352951049805, "step": 27826 }, { "epoch": 4.24591064453125e-05, "step": 27826, "training_step_time": 0.10492539405822754 }, { "epoch": 4.246063232421875e-05, "model_forward_time": 0.024428367614746094, "step": 27827 }, { "epoch": 4.246063232421875e-05, "step": 27827, "training_step_time": 0.10420584678649902 }, { "epoch": 4.2462158203125e-05, "model_forward_time": 0.0242002010345459, "step": 27828 }, { "epoch": 4.2462158203125e-05, "step": 27828, "training_step_time": 0.10303044319152832 }, { "epoch": 4.246368408203125e-05, "model_forward_time": 0.024187803268432617, "step": 27829 }, { "epoch": 4.246368408203125e-05, "step": 27829, "training_step_time": 0.10311603546142578 }, { "epoch": 4.24652099609375e-05, "grad_norm": 0.14968939125537872, "learning_rate": 1.4236319959743227e-06, "loss": 0.0038, "step": 27830 }, { "epoch": 4.24652099609375e-05, "model_forward_time": 0.02434086799621582, "step": 27830 }, { "epoch": 4.24652099609375e-05, "step": 27830, "training_step_time": 0.10735964775085449 }, { "epoch": 4.246673583984375e-05, "model_forward_time": 0.025661945343017578, "step": 27831 }, { "epoch": 4.246673583984375e-05, "step": 27831, "training_step_time": 0.11038899421691895 }, { "epoch": 4.246826171875e-05, "model_forward_time": 0.025493860244750977, "step": 27832 }, { "epoch": 4.246826171875e-05, "step": 27832, "training_step_time": 0.10648226737976074 }, { "epoch": 4.246978759765625e-05, "model_forward_time": 0.025682449340820312, "step": 27833 }, { "epoch": 4.246978759765625e-05, "step": 27833, "training_step_time": 0.10470938682556152 }, { "epoch": 4.24713134765625e-05, "model_forward_time": 0.025580406188964844, "step": 27834 }, { "epoch": 4.24713134765625e-05, "step": 27834, "training_step_time": 0.10486507415771484 }, { "epoch": 4.247283935546875e-05, "model_forward_time": 0.025501012802124023, "step": 27835 }, { "epoch": 4.247283935546875e-05, "step": 27835, "training_step_time": 0.1041867733001709 }, { "epoch": 4.2474365234375e-05, "model_forward_time": 0.0249173641204834, "step": 27836 }, { "epoch": 4.2474365234375e-05, "step": 27836, "training_step_time": 0.10445499420166016 }, { "epoch": 4.247589111328125e-05, "model_forward_time": 0.024964332580566406, "step": 27837 }, { "epoch": 4.247589111328125e-05, "step": 27837, "training_step_time": 0.11200428009033203 }, { "epoch": 4.24774169921875e-05, "model_forward_time": 0.025246143341064453, "step": 27838 }, { "epoch": 4.24774169921875e-05, "step": 27838, "training_step_time": 0.11360526084899902 }, { "epoch": 4.247894287109375e-05, "model_forward_time": 0.025316953659057617, "step": 27839 }, { "epoch": 4.247894287109375e-05, "step": 27839, "training_step_time": 0.10687971115112305 }, { "epoch": 4.248046875e-05, "grad_norm": 0.0546284094452858, "learning_rate": 1.4106031013849496e-06, "loss": 0.0061, "step": 27840 }, { "epoch": 4.248046875e-05, "model_forward_time": 0.02528524398803711, "step": 27840 }, { "epoch": 4.248046875e-05, "step": 27840, "training_step_time": 0.10661196708679199 }, { "epoch": 4.248199462890625e-05, "model_forward_time": 0.025310277938842773, "step": 27841 }, { "epoch": 4.248199462890625e-05, "step": 27841, "training_step_time": 0.10816431045532227 }, { "epoch": 4.24835205078125e-05, "model_forward_time": 0.025682926177978516, "step": 27842 }, { "epoch": 4.24835205078125e-05, "step": 27842, "training_step_time": 0.10800051689147949 }, { "epoch": 4.248504638671875e-05, "model_forward_time": 0.024756193161010742, "step": 27843 }, { "epoch": 4.248504638671875e-05, "step": 27843, "training_step_time": 0.11150908470153809 }, { "epoch": 4.2486572265625e-05, "model_forward_time": 0.025356531143188477, "step": 27844 }, { "epoch": 4.2486572265625e-05, "step": 27844, "training_step_time": 0.10523748397827148 }, { "epoch": 4.248809814453125e-05, "model_forward_time": 0.024932861328125, "step": 27845 }, { "epoch": 4.248809814453125e-05, "step": 27845, "training_step_time": 0.12113285064697266 }, { "epoch": 4.24896240234375e-05, "model_forward_time": 0.025599241256713867, "step": 27846 }, { "epoch": 4.24896240234375e-05, "step": 27846, "training_step_time": 0.11135435104370117 }, { "epoch": 4.249114990234375e-05, "model_forward_time": 0.025439977645874023, "step": 27847 }, { "epoch": 4.249114990234375e-05, "step": 27847, "training_step_time": 0.22305846214294434 }, { "epoch": 4.249267578125e-05, "model_forward_time": 0.024872303009033203, "step": 27848 }, { "epoch": 4.249267578125e-05, "step": 27848, "training_step_time": 0.14758968353271484 }, { "epoch": 4.249420166015625e-05, "model_forward_time": 0.02462029457092285, "step": 27849 }, { "epoch": 4.249420166015625e-05, "step": 27849, "training_step_time": 0.1842501163482666 }, { "epoch": 4.24957275390625e-05, "grad_norm": 0.03513207659125328, "learning_rate": 1.3976332474944843e-06, "loss": 0.0034, "step": 27850 }, { "epoch": 4.24957275390625e-05, "model_forward_time": 0.025066375732421875, "step": 27850 }, { "epoch": 4.24957275390625e-05, "step": 27850, "training_step_time": 0.16826081275939941 }, { "epoch": 4.249725341796875e-05, "model_forward_time": 0.024399995803833008, "step": 27851 }, { "epoch": 4.249725341796875e-05, "step": 27851, "training_step_time": 0.17161297798156738 }, { "epoch": 4.2498779296875e-05, "model_forward_time": 0.02479386329650879, "step": 27852 }, { "epoch": 4.2498779296875e-05, "step": 27852, "training_step_time": 0.1333465576171875 }, { "epoch": 4.250030517578125e-05, "model_forward_time": 0.02429342269897461, "step": 27853 }, { "epoch": 4.250030517578125e-05, "step": 27853, "training_step_time": 0.11715388298034668 }, { "epoch": 4.25018310546875e-05, "model_forward_time": 0.025049209594726562, "step": 27854 }, { "epoch": 4.25018310546875e-05, "step": 27854, "training_step_time": 0.11929464340209961 }, { "epoch": 4.250335693359375e-05, "model_forward_time": 0.025506973266601562, "step": 27855 }, { "epoch": 4.250335693359375e-05, "step": 27855, "training_step_time": 0.11747884750366211 }, { "epoch": 4.25048828125e-05, "model_forward_time": 0.025383710861206055, "step": 27856 }, { "epoch": 4.25048828125e-05, "step": 27856, "training_step_time": 0.11568307876586914 }, { "epoch": 4.250640869140625e-05, "model_forward_time": 0.025324344635009766, "step": 27857 }, { "epoch": 4.250640869140625e-05, "step": 27857, "training_step_time": 0.11356496810913086 }, { "epoch": 4.25079345703125e-05, "model_forward_time": 0.02538132667541504, "step": 27858 }, { "epoch": 4.25079345703125e-05, "step": 27858, "training_step_time": 0.10979771614074707 }, { "epoch": 4.250946044921875e-05, "model_forward_time": 0.02555370330810547, "step": 27859 }, { "epoch": 4.250946044921875e-05, "step": 27859, "training_step_time": 0.10915350914001465 }, { "epoch": 4.2510986328125e-05, "grad_norm": 0.05167609825730324, "learning_rate": 1.3847224500625256e-06, "loss": 0.0039, "step": 27860 }, { "epoch": 4.2510986328125e-05, "model_forward_time": 0.024819612503051758, "step": 27860 }, { "epoch": 4.2510986328125e-05, "step": 27860, "training_step_time": 0.1078941822052002 }, { "epoch": 4.251251220703125e-05, "model_forward_time": 0.025228261947631836, "step": 27861 }, { "epoch": 4.251251220703125e-05, "step": 27861, "training_step_time": 0.10863733291625977 }, { "epoch": 4.25140380859375e-05, "model_forward_time": 0.025562047958374023, "step": 27862 }, { "epoch": 4.25140380859375e-05, "step": 27862, "training_step_time": 0.10815024375915527 }, { "epoch": 4.251556396484375e-05, "model_forward_time": 0.0254671573638916, "step": 27863 }, { "epoch": 4.251556396484375e-05, "step": 27863, "training_step_time": 0.10552644729614258 }, { "epoch": 4.251708984375e-05, "model_forward_time": 0.025632143020629883, "step": 27864 }, { "epoch": 4.251708984375e-05, "step": 27864, "training_step_time": 0.1062624454498291 }, { "epoch": 4.251861572265625e-05, "model_forward_time": 0.02562689781188965, "step": 27865 }, { "epoch": 4.251861572265625e-05, "step": 27865, "training_step_time": 0.10991859436035156 }, { "epoch": 4.25201416015625e-05, "model_forward_time": 0.02547287940979004, "step": 27866 }, { "epoch": 4.25201416015625e-05, "step": 27866, "training_step_time": 0.13116097450256348 }, { "epoch": 4.252166748046875e-05, "model_forward_time": 0.025582313537597656, "step": 27867 }, { "epoch": 4.252166748046875e-05, "step": 27867, "training_step_time": 0.1120157241821289 }, { "epoch": 4.2523193359375e-05, "model_forward_time": 0.02555060386657715, "step": 27868 }, { "epoch": 4.2523193359375e-05, "step": 27868, "training_step_time": 0.1085810661315918 }, { "epoch": 4.252471923828125e-05, "model_forward_time": 0.025282621383666992, "step": 27869 }, { "epoch": 4.252471923828125e-05, "step": 27869, "training_step_time": 0.11660623550415039 }, { "epoch": 4.25262451171875e-05, "grad_norm": 0.03676657751202583, "learning_rate": 1.3718707247769135e-06, "loss": 0.0023, "step": 27870 }, { "epoch": 4.25262451171875e-05, "model_forward_time": 0.02561163902282715, "step": 27870 }, { "epoch": 4.25262451171875e-05, "step": 27870, "training_step_time": 0.18759512901306152 }, { "epoch": 4.252777099609375e-05, "model_forward_time": 0.02464604377746582, "step": 27871 }, { "epoch": 4.252777099609375e-05, "step": 27871, "training_step_time": 0.11059355735778809 }, { "epoch": 4.2529296875e-05, "model_forward_time": 0.024799108505249023, "step": 27872 }, { "epoch": 4.2529296875e-05, "step": 27872, "training_step_time": 0.10209178924560547 }, { "epoch": 4.253082275390625e-05, "model_forward_time": 0.025667428970336914, "step": 27873 }, { "epoch": 4.253082275390625e-05, "step": 27873, "training_step_time": 0.10463213920593262 }, { "epoch": 4.25323486328125e-05, "model_forward_time": 0.027620315551757812, "step": 27874 }, { "epoch": 4.25323486328125e-05, "step": 27874, "training_step_time": 0.10800051689147949 }, { "epoch": 4.253387451171875e-05, "model_forward_time": 0.026723861694335938, "step": 27875 }, { "epoch": 4.253387451171875e-05, "step": 27875, "training_step_time": 0.11061406135559082 }, { "epoch": 4.2535400390625e-05, "model_forward_time": 0.026164531707763672, "step": 27876 }, { "epoch": 4.2535400390625e-05, "step": 27876, "training_step_time": 0.10574626922607422 }, { "epoch": 4.253692626953125e-05, "model_forward_time": 0.025716781616210938, "step": 27877 }, { "epoch": 4.253692626953125e-05, "step": 27877, "training_step_time": 0.1097412109375 }, { "epoch": 4.25384521484375e-05, "model_forward_time": 0.02550649642944336, "step": 27878 }, { "epoch": 4.25384521484375e-05, "step": 27878, "training_step_time": 0.10526251792907715 }, { "epoch": 4.253997802734375e-05, "model_forward_time": 0.02563762664794922, "step": 27879 }, { "epoch": 4.253997802734375e-05, "step": 27879, "training_step_time": 0.10612869262695312 }, { "epoch": 4.254150390625e-05, "grad_norm": 0.31312671303749084, "learning_rate": 1.3590780872536958e-06, "loss": 0.0131, "step": 27880 }, { "epoch": 4.254150390625e-05, "model_forward_time": 0.025370121002197266, "step": 27880 }, { "epoch": 4.254150390625e-05, "step": 27880, "training_step_time": 0.10602498054504395 }, { "epoch": 4.254302978515625e-05, "model_forward_time": 0.02553725242614746, "step": 27881 }, { "epoch": 4.254302978515625e-05, "step": 27881, "training_step_time": 0.10577726364135742 }, { "epoch": 4.25445556640625e-05, "model_forward_time": 0.025724411010742188, "step": 27882 }, { "epoch": 4.25445556640625e-05, "step": 27882, "training_step_time": 0.17434048652648926 }, { "epoch": 4.254608154296875e-05, "model_forward_time": 0.02409648895263672, "step": 27883 }, { "epoch": 4.254608154296875e-05, "step": 27883, "training_step_time": 0.19861888885498047 }, { "epoch": 4.2547607421875e-05, "model_forward_time": 0.02398371696472168, "step": 27884 }, { "epoch": 4.2547607421875e-05, "step": 27884, "training_step_time": 0.18986248970031738 }, { "epoch": 4.254913330078125e-05, "model_forward_time": 0.02332448959350586, "step": 27885 }, { "epoch": 4.254913330078125e-05, "step": 27885, "training_step_time": 0.17373037338256836 }, { "epoch": 4.25506591796875e-05, "model_forward_time": 0.02451634407043457, "step": 27886 }, { "epoch": 4.25506591796875e-05, "step": 27886, "training_step_time": 0.16798830032348633 }, { "epoch": 4.255218505859375e-05, "model_forward_time": 0.023904085159301758, "step": 27887 }, { "epoch": 4.255218505859375e-05, "step": 27887, "training_step_time": 0.11904740333557129 }, { "epoch": 4.25537109375e-05, "model_forward_time": 0.025408029556274414, "step": 27888 }, { "epoch": 4.25537109375e-05, "step": 27888, "training_step_time": 0.10461926460266113 }, { "epoch": 4.255523681640625e-05, "model_forward_time": 0.024646997451782227, "step": 27889 }, { "epoch": 4.255523681640625e-05, "step": 27889, "training_step_time": 0.14524006843566895 }, { "epoch": 4.25567626953125e-05, "grad_norm": 0.07141478359699249, "learning_rate": 1.3463445530371488e-06, "loss": 0.0052, "step": 27890 }, { "epoch": 4.25567626953125e-05, "model_forward_time": 0.026252269744873047, "step": 27890 }, { "epoch": 4.25567626953125e-05, "step": 27890, "training_step_time": 0.19132685661315918 }, { "epoch": 4.255828857421875e-05, "model_forward_time": 0.024653911590576172, "step": 27891 }, { "epoch": 4.255828857421875e-05, "step": 27891, "training_step_time": 0.12134814262390137 }, { "epoch": 4.2559814453125e-05, "model_forward_time": 0.024671077728271484, "step": 27892 }, { "epoch": 4.2559814453125e-05, "step": 27892, "training_step_time": 0.13105273246765137 }, { "epoch": 4.256134033203125e-05, "model_forward_time": 0.025290250778198242, "step": 27893 }, { "epoch": 4.256134033203125e-05, "step": 27893, "training_step_time": 0.1995391845703125 }, { "epoch": 4.25628662109375e-05, "model_forward_time": 0.024758577346801758, "step": 27894 }, { "epoch": 4.25628662109375e-05, "step": 27894, "training_step_time": 0.17871856689453125 }, { "epoch": 4.256439208984375e-05, "model_forward_time": 0.024981021881103516, "step": 27895 }, { "epoch": 4.256439208984375e-05, "step": 27895, "training_step_time": 0.10334467887878418 }, { "epoch": 4.256591796875e-05, "model_forward_time": 0.02443218231201172, "step": 27896 }, { "epoch": 4.256591796875e-05, "step": 27896, "training_step_time": 0.12168192863464355 }, { "epoch": 4.256744384765625e-05, "model_forward_time": 0.025715112686157227, "step": 27897 }, { "epoch": 4.256744384765625e-05, "step": 27897, "training_step_time": 0.10761451721191406 }, { "epoch": 4.25689697265625e-05, "model_forward_time": 0.025214672088623047, "step": 27898 }, { "epoch": 4.25689697265625e-05, "step": 27898, "training_step_time": 0.11822724342346191 }, { "epoch": 4.257049560546875e-05, "model_forward_time": 0.025624752044677734, "step": 27899 }, { "epoch": 4.257049560546875e-05, "step": 27899, "training_step_time": 0.10529327392578125 }, { "epoch": 4.2572021484375e-05, "grad_norm": 0.05580778047442436, "learning_rate": 1.333670137599713e-06, "loss": 0.0045, "step": 27900 }, { "epoch": 4.2572021484375e-05, "model_forward_time": 0.025719165802001953, "step": 27900 }, { "epoch": 4.2572021484375e-05, "step": 27900, "training_step_time": 0.10622739791870117 }, { "epoch": 4.257354736328125e-05, "model_forward_time": 0.025370359420776367, "step": 27901 }, { "epoch": 4.257354736328125e-05, "step": 27901, "training_step_time": 0.10665321350097656 }, { "epoch": 4.25750732421875e-05, "model_forward_time": 0.02575397491455078, "step": 27902 }, { "epoch": 4.25750732421875e-05, "step": 27902, "training_step_time": 0.10714101791381836 }, { "epoch": 4.257659912109375e-05, "model_forward_time": 0.025780916213989258, "step": 27903 }, { "epoch": 4.257659912109375e-05, "step": 27903, "training_step_time": 0.10862016677856445 }, { "epoch": 4.2578125e-05, "model_forward_time": 0.025417566299438477, "step": 27904 }, { "epoch": 4.2578125e-05, "step": 27904, "training_step_time": 0.1513967514038086 }, { "epoch": 4.257965087890625e-05, "model_forward_time": 0.025171279907226562, "step": 27905 }, { "epoch": 4.257965087890625e-05, "step": 27905, "training_step_time": 0.17249774932861328 }, { "epoch": 4.25811767578125e-05, "model_forward_time": 0.02461719512939453, "step": 27906 }, { "epoch": 4.25811767578125e-05, "step": 27906, "training_step_time": 0.17644524574279785 }, { "epoch": 4.258270263671875e-05, "model_forward_time": 0.02466750144958496, "step": 27907 }, { "epoch": 4.258270263671875e-05, "step": 27907, "training_step_time": 0.1753687858581543 }, { "epoch": 4.2584228515625e-05, "model_forward_time": 0.024820327758789062, "step": 27908 }, { "epoch": 4.2584228515625e-05, "step": 27908, "training_step_time": 0.14687514305114746 }, { "epoch": 4.258575439453125e-05, "model_forward_time": 0.024682998657226562, "step": 27909 }, { "epoch": 4.258575439453125e-05, "step": 27909, "training_step_time": 0.2015371322631836 }, { "epoch": 4.25872802734375e-05, "grad_norm": 0.06222119927406311, "learning_rate": 1.3210548563419856e-06, "loss": 0.0041, "step": 27910 }, { "epoch": 4.25872802734375e-05, "model_forward_time": 0.02773594856262207, "step": 27910 }, { "epoch": 4.25872802734375e-05, "step": 27910, "training_step_time": 0.1265099048614502 }, { "epoch": 4.258880615234375e-05, "model_forward_time": 0.02469491958618164, "step": 27911 }, { "epoch": 4.258880615234375e-05, "step": 27911, "training_step_time": 0.19068121910095215 }, { "epoch": 4.259033203125e-05, "model_forward_time": 0.024985313415527344, "step": 27912 }, { "epoch": 4.259033203125e-05, "step": 27912, "training_step_time": 0.11831974983215332 }, { "epoch": 4.259185791015625e-05, "model_forward_time": 0.024536848068237305, "step": 27913 }, { "epoch": 4.259185791015625e-05, "step": 27913, "training_step_time": 0.11262941360473633 }, { "epoch": 4.25933837890625e-05, "model_forward_time": 0.025439739227294922, "step": 27914 }, { "epoch": 4.25933837890625e-05, "step": 27914, "training_step_time": 0.11113357543945312 }, { "epoch": 4.259490966796875e-05, "model_forward_time": 0.025494813919067383, "step": 27915 }, { "epoch": 4.259490966796875e-05, "step": 27915, "training_step_time": 0.10785484313964844 }, { "epoch": 4.2596435546875e-05, "model_forward_time": 0.02586817741394043, "step": 27916 }, { "epoch": 4.2596435546875e-05, "step": 27916, "training_step_time": 0.10970187187194824 }, { "epoch": 4.259796142578125e-05, "model_forward_time": 0.025429725646972656, "step": 27917 }, { "epoch": 4.259796142578125e-05, "step": 27917, "training_step_time": 0.11130952835083008 }, { "epoch": 4.25994873046875e-05, "model_forward_time": 0.025813579559326172, "step": 27918 }, { "epoch": 4.25994873046875e-05, "step": 27918, "training_step_time": 0.10906457901000977 }, { "epoch": 4.260101318359375e-05, "model_forward_time": 0.025560379028320312, "step": 27919 }, { "epoch": 4.260101318359375e-05, "step": 27919, "training_step_time": 0.10883951187133789 }, { "epoch": 4.26025390625e-05, "grad_norm": 0.17487028241157532, "learning_rate": 1.3084987245927383e-06, "loss": 0.0042, "step": 27920 }, { "epoch": 4.26025390625e-05, "model_forward_time": 0.025545597076416016, "step": 27920 }, { "epoch": 4.26025390625e-05, "step": 27920, "training_step_time": 0.10739731788635254 }, { "epoch": 4.260406494140625e-05, "model_forward_time": 0.025815486907958984, "step": 27921 }, { "epoch": 4.260406494140625e-05, "step": 27921, "training_step_time": 0.10610008239746094 }, { "epoch": 4.26055908203125e-05, "model_forward_time": 0.025365114212036133, "step": 27922 }, { "epoch": 4.26055908203125e-05, "step": 27922, "training_step_time": 0.1053469181060791 }, { "epoch": 4.260711669921875e-05, "model_forward_time": 0.025623559951782227, "step": 27923 }, { "epoch": 4.260711669921875e-05, "step": 27923, "training_step_time": 0.10929369926452637 }, { "epoch": 4.2608642578125e-05, "model_forward_time": 0.02564239501953125, "step": 27924 }, { "epoch": 4.2608642578125e-05, "step": 27924, "training_step_time": 0.10677647590637207 }, { "epoch": 4.261016845703125e-05, "model_forward_time": 0.025307655334472656, "step": 27925 }, { "epoch": 4.261016845703125e-05, "step": 27925, "training_step_time": 0.10831093788146973 }, { "epoch": 4.26116943359375e-05, "model_forward_time": 0.026432514190673828, "step": 27926 }, { "epoch": 4.26116943359375e-05, "step": 27926, "training_step_time": 0.10590600967407227 }, { "epoch": 4.261322021484375e-05, "model_forward_time": 0.025585174560546875, "step": 27927 }, { "epoch": 4.261322021484375e-05, "step": 27927, "training_step_time": 0.10826683044433594 }, { "epoch": 4.261474609375e-05, "model_forward_time": 0.02531886100769043, "step": 27928 }, { "epoch": 4.261474609375e-05, "step": 27928, "training_step_time": 0.10998320579528809 }, { "epoch": 4.261627197265625e-05, "model_forward_time": 0.025435686111450195, "step": 27929 }, { "epoch": 4.261627197265625e-05, "step": 27929, "training_step_time": 0.10559892654418945 }, { "epoch": 4.26177978515625e-05, "grad_norm": 0.09832992404699326, "learning_rate": 1.2960017576088446e-06, "loss": 0.005, "step": 27930 }, { "epoch": 4.26177978515625e-05, "model_forward_time": 0.02521824836730957, "step": 27930 }, { "epoch": 4.26177978515625e-05, "step": 27930, "training_step_time": 0.10478949546813965 }, { "epoch": 4.261932373046875e-05, "model_forward_time": 0.02548670768737793, "step": 27931 }, { "epoch": 4.261932373046875e-05, "step": 27931, "training_step_time": 0.10495376586914062 }, { "epoch": 4.2620849609375e-05, "model_forward_time": 0.025649070739746094, "step": 27932 }, { "epoch": 4.2620849609375e-05, "step": 27932, "training_step_time": 0.10551261901855469 }, { "epoch": 4.262237548828125e-05, "model_forward_time": 0.025668621063232422, "step": 27933 }, { "epoch": 4.262237548828125e-05, "step": 27933, "training_step_time": 0.10544824600219727 }, { "epoch": 4.26239013671875e-05, "model_forward_time": 0.02572345733642578, "step": 27934 }, { "epoch": 4.26239013671875e-05, "step": 27934, "training_step_time": 0.14290952682495117 }, { "epoch": 4.262542724609375e-05, "model_forward_time": 0.025204896926879883, "step": 27935 }, { "epoch": 4.262542724609375e-05, "step": 27935, "training_step_time": 0.16504144668579102 }, { "epoch": 4.2626953125e-05, "model_forward_time": 0.02519059181213379, "step": 27936 }, { "epoch": 4.2626953125e-05, "step": 27936, "training_step_time": 0.20573830604553223 }, { "epoch": 4.262847900390625e-05, "model_forward_time": 0.025143146514892578, "step": 27937 }, { "epoch": 4.262847900390625e-05, "step": 27937, "training_step_time": 0.13631677627563477 }, { "epoch": 4.26300048828125e-05, "model_forward_time": 0.02484869956970215, "step": 27938 }, { "epoch": 4.26300048828125e-05, "step": 27938, "training_step_time": 0.18605422973632812 }, { "epoch": 4.263153076171875e-05, "model_forward_time": 0.024816274642944336, "step": 27939 }, { "epoch": 4.263153076171875e-05, "step": 27939, "training_step_time": 0.17425918579101562 }, { "epoch": 4.2633056640625e-05, "grad_norm": 0.0365869477391243, "learning_rate": 1.2835639705753078e-06, "loss": 0.0049, "step": 27940 }, { "epoch": 4.2633056640625e-05, "model_forward_time": 0.025331735610961914, "step": 27940 }, { "epoch": 4.2633056640625e-05, "step": 27940, "training_step_time": 0.19548249244689941 }, { "epoch": 4.263458251953125e-05, "model_forward_time": 0.0250396728515625, "step": 27941 }, { "epoch": 4.263458251953125e-05, "step": 27941, "training_step_time": 0.11956048011779785 }, { "epoch": 4.26361083984375e-05, "model_forward_time": 0.025290250778198242, "step": 27942 }, { "epoch": 4.26361083984375e-05, "step": 27942, "training_step_time": 0.11834287643432617 }, { "epoch": 4.263763427734375e-05, "model_forward_time": 0.02579975128173828, "step": 27943 }, { "epoch": 4.263763427734375e-05, "step": 27943, "training_step_time": 0.1056208610534668 }, { "epoch": 4.263916015625e-05, "model_forward_time": 0.02572035789489746, "step": 27944 }, { "epoch": 4.263916015625e-05, "step": 27944, "training_step_time": 0.10506153106689453 }, { "epoch": 4.264068603515625e-05, "model_forward_time": 0.025712013244628906, "step": 27945 }, { "epoch": 4.264068603515625e-05, "step": 27945, "training_step_time": 0.1472768783569336 }, { "epoch": 4.26422119140625e-05, "model_forward_time": 0.02608776092529297, "step": 27946 }, { "epoch": 4.26422119140625e-05, "step": 27946, "training_step_time": 0.15186285972595215 }, { "epoch": 4.264373779296875e-05, "model_forward_time": 0.024965286254882812, "step": 27947 }, { "epoch": 4.264373779296875e-05, "step": 27947, "training_step_time": 0.14381670951843262 }, { "epoch": 4.2645263671875e-05, "model_forward_time": 0.024871826171875, "step": 27948 }, { "epoch": 4.2645263671875e-05, "step": 27948, "training_step_time": 0.12871646881103516 }, { "epoch": 4.264678955078125e-05, "model_forward_time": 0.025005578994750977, "step": 27949 }, { "epoch": 4.264678955078125e-05, "step": 27949, "training_step_time": 0.1214454174041748 }, { "epoch": 4.26483154296875e-05, "grad_norm": 0.2922270894050598, "learning_rate": 1.2711853786052109e-06, "loss": 0.0056, "step": 27950 }, { "epoch": 4.26483154296875e-05, "model_forward_time": 0.025091171264648438, "step": 27950 }, { "epoch": 4.26483154296875e-05, "step": 27950, "training_step_time": 0.11933326721191406 }, { "epoch": 4.264984130859375e-05, "model_forward_time": 0.02534317970275879, "step": 27951 }, { "epoch": 4.264984130859375e-05, "step": 27951, "training_step_time": 0.13525056838989258 }, { "epoch": 4.26513671875e-05, "model_forward_time": 0.025454282760620117, "step": 27952 }, { "epoch": 4.26513671875e-05, "step": 27952, "training_step_time": 0.14168810844421387 }, { "epoch": 4.265289306640625e-05, "model_forward_time": 0.02507495880126953, "step": 27953 }, { "epoch": 4.265289306640625e-05, "step": 27953, "training_step_time": 0.11379218101501465 }, { "epoch": 4.26544189453125e-05, "model_forward_time": 0.025511741638183594, "step": 27954 }, { "epoch": 4.26544189453125e-05, "step": 27954, "training_step_time": 0.10819888114929199 }, { "epoch": 4.265594482421875e-05, "model_forward_time": 0.0258028507232666, "step": 27955 }, { "epoch": 4.265594482421875e-05, "step": 27955, "training_step_time": 0.11082243919372559 }, { "epoch": 4.2657470703125e-05, "model_forward_time": 0.025349855422973633, "step": 27956 }, { "epoch": 4.2657470703125e-05, "step": 27956, "training_step_time": 0.17798066139221191 }, { "epoch": 4.265899658203125e-05, "model_forward_time": 0.02451801300048828, "step": 27957 }, { "epoch": 4.265899658203125e-05, "step": 27957, "training_step_time": 0.11390304565429688 }, { "epoch": 4.26605224609375e-05, "model_forward_time": 0.025112152099609375, "step": 27958 }, { "epoch": 4.26605224609375e-05, "step": 27958, "training_step_time": 0.10487127304077148 }, { "epoch": 4.266204833984375e-05, "model_forward_time": 0.025587797164916992, "step": 27959 }, { "epoch": 4.266204833984375e-05, "step": 27959, "training_step_time": 0.10451459884643555 }, { "epoch": 4.266357421875e-05, "grad_norm": 0.05221688374876976, "learning_rate": 1.2588659967397e-06, "loss": 0.0114, "step": 27960 }, { "epoch": 4.266357421875e-05, "model_forward_time": 0.025776147842407227, "step": 27960 }, { "epoch": 4.266357421875e-05, "step": 27960, "training_step_time": 0.10680270195007324 }, { "epoch": 4.266510009765625e-05, "model_forward_time": 0.02565145492553711, "step": 27961 }, { "epoch": 4.266510009765625e-05, "step": 27961, "training_step_time": 0.10778188705444336 }, { "epoch": 4.26666259765625e-05, "model_forward_time": 0.025341033935546875, "step": 27962 }, { "epoch": 4.26666259765625e-05, "step": 27962, "training_step_time": 0.10608410835266113 }, { "epoch": 4.266815185546875e-05, "model_forward_time": 0.025365829467773438, "step": 27963 }, { "epoch": 4.266815185546875e-05, "step": 27963, "training_step_time": 0.10579872131347656 }, { "epoch": 4.2669677734375e-05, "model_forward_time": 0.025606393814086914, "step": 27964 }, { "epoch": 4.2669677734375e-05, "step": 27964, "training_step_time": 0.10605478286743164 }, { "epoch": 4.267120361328125e-05, "model_forward_time": 0.02568984031677246, "step": 27965 }, { "epoch": 4.267120361328125e-05, "step": 27965, "training_step_time": 0.10603857040405273 }, { "epoch": 4.26727294921875e-05, "model_forward_time": 0.025866270065307617, "step": 27966 }, { "epoch": 4.26727294921875e-05, "step": 27966, "training_step_time": 0.1074059009552002 }, { "epoch": 4.267425537109375e-05, "model_forward_time": 0.0254061222076416, "step": 27967 }, { "epoch": 4.267425537109375e-05, "step": 27967, "training_step_time": 0.10718178749084473 }, { "epoch": 4.267578125e-05, "model_forward_time": 0.025082111358642578, "step": 27968 }, { "epoch": 4.267578125e-05, "step": 27968, "training_step_time": 0.10847663879394531 }, { "epoch": 4.267730712890625e-05, "model_forward_time": 0.025501012802124023, "step": 27969 }, { "epoch": 4.267730712890625e-05, "step": 27969, "training_step_time": 0.10683178901672363 }, { "epoch": 4.26788330078125e-05, "grad_norm": 0.06859178096055984, "learning_rate": 1.2466058399479952e-06, "loss": 0.0029, "step": 27970 }, { "epoch": 4.26788330078125e-05, "model_forward_time": 0.025496959686279297, "step": 27970 }, { "epoch": 4.26788330078125e-05, "step": 27970, "training_step_time": 0.11376738548278809 }, { "epoch": 4.268035888671875e-05, "model_forward_time": 0.025566577911376953, "step": 27971 }, { "epoch": 4.268035888671875e-05, "step": 27971, "training_step_time": 0.10528016090393066 }, { "epoch": 4.2681884765625e-05, "model_forward_time": 0.02509140968322754, "step": 27972 }, { "epoch": 4.2681884765625e-05, "step": 27972, "training_step_time": 0.10582137107849121 }, { "epoch": 4.268341064453125e-05, "model_forward_time": 0.025210857391357422, "step": 27973 }, { "epoch": 4.268341064453125e-05, "step": 27973, "training_step_time": 0.14858794212341309 }, { "epoch": 4.26849365234375e-05, "model_forward_time": 0.025287151336669922, "step": 27974 }, { "epoch": 4.26849365234375e-05, "step": 27974, "training_step_time": 0.17290425300598145 }, { "epoch": 4.268646240234375e-05, "model_forward_time": 0.024263858795166016, "step": 27975 }, { "epoch": 4.268646240234375e-05, "step": 27975, "training_step_time": 0.1626110076904297 }, { "epoch": 4.268798828125e-05, "model_forward_time": 0.024059295654296875, "step": 27976 }, { "epoch": 4.268798828125e-05, "step": 27976, "training_step_time": 0.1560075283050537 }, { "epoch": 4.268951416015625e-05, "model_forward_time": 0.02505350112915039, "step": 27977 }, { "epoch": 4.268951416015625e-05, "step": 27977, "training_step_time": 0.11144375801086426 }, { "epoch": 4.26910400390625e-05, "model_forward_time": 0.024424076080322266, "step": 27978 }, { "epoch": 4.26910400390625e-05, "step": 27978, "training_step_time": 0.14347386360168457 }, { "epoch": 4.269256591796875e-05, "model_forward_time": 0.024840116500854492, "step": 27979 }, { "epoch": 4.269256591796875e-05, "step": 27979, "training_step_time": 0.1513216495513916 }, { "epoch": 4.2694091796875e-05, "grad_norm": 0.039643775671720505, "learning_rate": 1.2344049231273302e-06, "loss": 0.0021, "step": 27980 }, { "epoch": 4.2694091796875e-05, "model_forward_time": 0.024786710739135742, "step": 27980 }, { "epoch": 4.2694091796875e-05, "step": 27980, "training_step_time": 0.16834235191345215 }, { "epoch": 4.269561767578125e-05, "model_forward_time": 0.024988889694213867, "step": 27981 }, { "epoch": 4.269561767578125e-05, "step": 27981, "training_step_time": 0.16028285026550293 }, { "epoch": 4.26971435546875e-05, "model_forward_time": 0.02411818504333496, "step": 27982 }, { "epoch": 4.26971435546875e-05, "step": 27982, "training_step_time": 0.16425490379333496 }, { "epoch": 4.269866943359375e-05, "model_forward_time": 0.025319576263427734, "step": 27983 }, { "epoch": 4.269866943359375e-05, "step": 27983, "training_step_time": 0.10875105857849121 }, { "epoch": 4.27001953125e-05, "model_forward_time": 0.024596452713012695, "step": 27984 }, { "epoch": 4.27001953125e-05, "step": 27984, "training_step_time": 0.1310865879058838 }, { "epoch": 4.270172119140625e-05, "model_forward_time": 0.02504444122314453, "step": 27985 }, { "epoch": 4.270172119140625e-05, "step": 27985, "training_step_time": 0.14220118522644043 }, { "epoch": 4.27032470703125e-05, "model_forward_time": 0.024613142013549805, "step": 27986 }, { "epoch": 4.27032470703125e-05, "step": 27986, "training_step_time": 0.1565239429473877 }, { "epoch": 4.270477294921875e-05, "model_forward_time": 0.02362799644470215, "step": 27987 }, { "epoch": 4.270477294921875e-05, "step": 27987, "training_step_time": 0.1807713508605957 }, { "epoch": 4.2706298828125e-05, "model_forward_time": 0.02415013313293457, "step": 27988 }, { "epoch": 4.2706298828125e-05, "step": 27988, "training_step_time": 0.1542208194732666 }, { "epoch": 4.270782470703125e-05, "model_forward_time": 0.024315834045410156, "step": 27989 }, { "epoch": 4.270782470703125e-05, "step": 27989, "training_step_time": 0.13875961303710938 }, { "epoch": 4.27093505859375e-05, "grad_norm": 0.05708553269505501, "learning_rate": 1.222263261102985e-06, "loss": 0.004, "step": 27990 }, { "epoch": 4.27093505859375e-05, "model_forward_time": 0.02445673942565918, "step": 27990 }, { "epoch": 4.27093505859375e-05, "step": 27990, "training_step_time": 0.12975358963012695 }, { "epoch": 4.271087646484375e-05, "model_forward_time": 0.024536848068237305, "step": 27991 }, { "epoch": 4.271087646484375e-05, "step": 27991, "training_step_time": 0.13229155540466309 }, { "epoch": 4.271240234375e-05, "model_forward_time": 0.02481389045715332, "step": 27992 }, { "epoch": 4.271240234375e-05, "step": 27992, "training_step_time": 0.1214592456817627 }, { "epoch": 4.271392822265625e-05, "model_forward_time": 0.025017738342285156, "step": 27993 }, { "epoch": 4.271392822265625e-05, "step": 27993, "training_step_time": 0.11903858184814453 }, { "epoch": 4.27154541015625e-05, "model_forward_time": 0.025713443756103516, "step": 27994 }, { "epoch": 4.27154541015625e-05, "step": 27994, "training_step_time": 0.2052774429321289 }, { "epoch": 4.271697998046875e-05, "model_forward_time": 0.02530646324157715, "step": 27995 }, { "epoch": 4.271697998046875e-05, "step": 27995, "training_step_time": 0.1184077262878418 }, { "epoch": 4.2718505859375e-05, "model_forward_time": 0.02584528923034668, "step": 27996 }, { "epoch": 4.2718505859375e-05, "step": 27996, "training_step_time": 0.10634112358093262 }, { "epoch": 4.272003173828125e-05, "model_forward_time": 0.026337623596191406, "step": 27997 }, { "epoch": 4.272003173828125e-05, "step": 27997, "training_step_time": 0.11828494071960449 }, { "epoch": 4.27215576171875e-05, "model_forward_time": 0.025213241577148438, "step": 27998 }, { "epoch": 4.27215576171875e-05, "step": 27998, "training_step_time": 0.11823081970214844 }, { "epoch": 4.272308349609375e-05, "model_forward_time": 0.025218486785888672, "step": 27999 }, { "epoch": 4.272308349609375e-05, "step": 27999, "training_step_time": 0.1120145320892334 }, { "epoch": 4.2724609375e-05, "grad_norm": 0.06876952946186066, "learning_rate": 1.210180868628219e-06, "loss": 0.0072, "step": 28000 }, { "epoch": 4.2724609375e-05, "model_forward_time": 0.025133609771728516, "step": 28000 }, { "epoch": 4.2724609375e-05, "step": 28000, "training_step_time": 0.10908770561218262 }, { "epoch": 4.272613525390625e-05, "model_forward_time": 0.022487640380859375, "step": 28001 }, { "epoch": 4.272613525390625e-05, "step": 28001, "training_step_time": 0.15211749076843262 }, { "epoch": 4.27276611328125e-05, "model_forward_time": 0.024354219436645508, "step": 28002 }, { "epoch": 4.27276611328125e-05, "step": 28002, "training_step_time": 0.10347366333007812 }, { "epoch": 4.272918701171875e-05, "model_forward_time": 0.025212526321411133, "step": 28003 }, { "epoch": 4.272918701171875e-05, "step": 28003, "training_step_time": 0.11535882949829102 }, { "epoch": 4.2730712890625e-05, "model_forward_time": 0.025705575942993164, "step": 28004 }, { "epoch": 4.2730712890625e-05, "step": 28004, "training_step_time": 0.10598611831665039 }, { "epoch": 4.273223876953125e-05, "model_forward_time": 0.025698423385620117, "step": 28005 }, { "epoch": 4.273223876953125e-05, "step": 28005, "training_step_time": 0.10723996162414551 }, { "epoch": 4.27337646484375e-05, "model_forward_time": 0.025939464569091797, "step": 28006 }, { "epoch": 4.27337646484375e-05, "step": 28006, "training_step_time": 0.10460209846496582 }, { "epoch": 4.273529052734375e-05, "model_forward_time": 0.025622129440307617, "step": 28007 }, { "epoch": 4.273529052734375e-05, "step": 28007, "training_step_time": 0.11146187782287598 }, { "epoch": 4.273681640625e-05, "model_forward_time": 0.02557206153869629, "step": 28008 }, { "epoch": 4.273681640625e-05, "step": 28008, "training_step_time": 0.10476064682006836 }, { "epoch": 4.273834228515625e-05, "model_forward_time": 0.025312185287475586, "step": 28009 }, { "epoch": 4.273834228515625e-05, "step": 28009, "training_step_time": 0.10474443435668945 }, { "epoch": 4.27398681640625e-05, "grad_norm": 0.04854021221399307, "learning_rate": 1.1981577603842776e-06, "loss": 0.0023, "step": 28010 }, { "epoch": 4.27398681640625e-05, "model_forward_time": 0.025357484817504883, "step": 28010 }, { "epoch": 4.27398681640625e-05, "step": 28010, "training_step_time": 0.10552358627319336 }, { "epoch": 4.274139404296875e-05, "model_forward_time": 0.02460336685180664, "step": 28011 }, { "epoch": 4.274139404296875e-05, "step": 28011, "training_step_time": 0.10281848907470703 }, { "epoch": 4.2742919921875e-05, "model_forward_time": 0.025520801544189453, "step": 28012 }, { "epoch": 4.2742919921875e-05, "step": 28012, "training_step_time": 0.10708379745483398 }, { "epoch": 4.274444580078125e-05, "model_forward_time": 0.025524616241455078, "step": 28013 }, { "epoch": 4.274444580078125e-05, "step": 28013, "training_step_time": 0.1055593490600586 }, { "epoch": 4.27459716796875e-05, "model_forward_time": 0.025459766387939453, "step": 28014 }, { "epoch": 4.27459716796875e-05, "step": 28014, "training_step_time": 0.10937333106994629 }, { "epoch": 4.274749755859375e-05, "model_forward_time": 0.025835514068603516, "step": 28015 }, { "epoch": 4.274749755859375e-05, "step": 28015, "training_step_time": 0.1066441535949707 }, { "epoch": 4.27490234375e-05, "model_forward_time": 0.02529287338256836, "step": 28016 }, { "epoch": 4.27490234375e-05, "step": 28016, "training_step_time": 0.10380792617797852 }, { "epoch": 4.275054931640625e-05, "model_forward_time": 0.026292085647583008, "step": 28017 }, { "epoch": 4.275054931640625e-05, "step": 28017, "training_step_time": 0.10513162612915039 }, { "epoch": 4.27520751953125e-05, "model_forward_time": 0.025243520736694336, "step": 28018 }, { "epoch": 4.27520751953125e-05, "step": 28018, "training_step_time": 0.10362100601196289 }, { "epoch": 4.275360107421875e-05, "model_forward_time": 0.02588486671447754, "step": 28019 }, { "epoch": 4.275360107421875e-05, "step": 28019, "training_step_time": 0.1051938533782959 }, { "epoch": 4.2755126953125e-05, "grad_norm": 0.44585156440734863, "learning_rate": 1.1861939509803687e-06, "loss": 0.0068, "step": 28020 }, { "epoch": 4.2755126953125e-05, "model_forward_time": 0.025751113891601562, "step": 28020 }, { "epoch": 4.2755126953125e-05, "step": 28020, "training_step_time": 0.10657715797424316 }, { "epoch": 4.275665283203125e-05, "model_forward_time": 0.025300025939941406, "step": 28021 }, { "epoch": 4.275665283203125e-05, "step": 28021, "training_step_time": 0.10511255264282227 }, { "epoch": 4.27581787109375e-05, "model_forward_time": 0.025283098220825195, "step": 28022 }, { "epoch": 4.27581787109375e-05, "step": 28022, "training_step_time": 0.11232757568359375 }, { "epoch": 4.275970458984375e-05, "model_forward_time": 0.025183439254760742, "step": 28023 }, { "epoch": 4.275970458984375e-05, "step": 28023, "training_step_time": 0.1087639331817627 }, { "epoch": 4.276123046875e-05, "model_forward_time": 0.02609395980834961, "step": 28024 }, { "epoch": 4.276123046875e-05, "step": 28024, "training_step_time": 0.11055231094360352 }, { "epoch": 4.276275634765625e-05, "model_forward_time": 0.024733543395996094, "step": 28025 }, { "epoch": 4.276275634765625e-05, "step": 28025, "training_step_time": 0.14400339126586914 }, { "epoch": 4.27642822265625e-05, "model_forward_time": 0.025200366973876953, "step": 28026 }, { "epoch": 4.27642822265625e-05, "step": 28026, "training_step_time": 0.1917896270751953 }, { "epoch": 4.276580810546875e-05, "model_forward_time": 0.025035381317138672, "step": 28027 }, { "epoch": 4.276580810546875e-05, "step": 28027, "training_step_time": 0.21288537979125977 }, { "epoch": 4.2767333984375e-05, "model_forward_time": 0.024612903594970703, "step": 28028 }, { "epoch": 4.2767333984375e-05, "step": 28028, "training_step_time": 0.18929052352905273 }, { "epoch": 4.276885986328125e-05, "model_forward_time": 0.026010751724243164, "step": 28029 }, { "epoch": 4.276885986328125e-05, "step": 28029, "training_step_time": 0.10692119598388672 }, { "epoch": 4.27703857421875e-05, "grad_norm": 0.15105819702148438, "learning_rate": 1.1742894549536477e-06, "loss": 0.0041, "step": 28030 }, { "epoch": 4.27703857421875e-05, "model_forward_time": 0.024358034133911133, "step": 28030 }, { "epoch": 4.27703857421875e-05, "step": 28030, "training_step_time": 0.17907190322875977 }, { "epoch": 4.277191162109375e-05, "model_forward_time": 0.024917125701904297, "step": 28031 }, { "epoch": 4.277191162109375e-05, "step": 28031, "training_step_time": 0.12036561965942383 }, { "epoch": 4.27734375e-05, "model_forward_time": 0.025186538696289062, "step": 28032 }, { "epoch": 4.27734375e-05, "step": 28032, "training_step_time": 0.10973906517028809 }, { "epoch": 4.277496337890625e-05, "model_forward_time": 0.025532007217407227, "step": 28033 }, { "epoch": 4.277496337890625e-05, "step": 28033, "training_step_time": 0.12372922897338867 }, { "epoch": 4.27764892578125e-05, "model_forward_time": 0.025794506072998047, "step": 28034 }, { "epoch": 4.27764892578125e-05, "step": 28034, "training_step_time": 0.14110684394836426 }, { "epoch": 4.277801513671875e-05, "model_forward_time": 0.024763822555541992, "step": 28035 }, { "epoch": 4.277801513671875e-05, "step": 28035, "training_step_time": 0.11424756050109863 }, { "epoch": 4.2779541015625e-05, "model_forward_time": 0.02552652359008789, "step": 28036 }, { "epoch": 4.2779541015625e-05, "step": 28036, "training_step_time": 0.10770750045776367 }, { "epoch": 4.278106689453125e-05, "model_forward_time": 0.025593042373657227, "step": 28037 }, { "epoch": 4.278106689453125e-05, "step": 28037, "training_step_time": 0.13204526901245117 }, { "epoch": 4.27825927734375e-05, "model_forward_time": 0.026675939559936523, "step": 28038 }, { "epoch": 4.27825927734375e-05, "step": 28038, "training_step_time": 0.13980460166931152 }, { "epoch": 4.278411865234375e-05, "model_forward_time": 0.025687694549560547, "step": 28039 }, { "epoch": 4.278411865234375e-05, "step": 28039, "training_step_time": 0.12833142280578613 }, { "epoch": 4.278564453125e-05, "grad_norm": 0.07757231593132019, "learning_rate": 1.16244428676921e-06, "loss": 0.0047, "step": 28040 }, { "epoch": 4.278564453125e-05, "model_forward_time": 0.024254322052001953, "step": 28040 }, { "epoch": 4.278564453125e-05, "step": 28040, "training_step_time": 0.12408614158630371 }, { "epoch": 4.278717041015625e-05, "model_forward_time": 0.024830102920532227, "step": 28041 }, { "epoch": 4.278717041015625e-05, "step": 28041, "training_step_time": 0.12283539772033691 }, { "epoch": 4.27886962890625e-05, "model_forward_time": 0.0246274471282959, "step": 28042 }, { "epoch": 4.27886962890625e-05, "step": 28042, "training_step_time": 0.12130618095397949 }, { "epoch": 4.279022216796875e-05, "model_forward_time": 0.024924755096435547, "step": 28043 }, { "epoch": 4.279022216796875e-05, "step": 28043, "training_step_time": 0.17862915992736816 }, { "epoch": 4.2791748046875e-05, "model_forward_time": 0.025013208389282227, "step": 28044 }, { "epoch": 4.2791748046875e-05, "step": 28044, "training_step_time": 0.14233994483947754 }, { "epoch": 4.279327392578125e-05, "model_forward_time": 0.02472710609436035, "step": 28045 }, { "epoch": 4.279327392578125e-05, "step": 28045, "training_step_time": 0.10658884048461914 }, { "epoch": 4.27947998046875e-05, "model_forward_time": 0.025505781173706055, "step": 28046 }, { "epoch": 4.27947998046875e-05, "step": 28046, "training_step_time": 0.10288357734680176 }, { "epoch": 4.279632568359375e-05, "model_forward_time": 0.025631427764892578, "step": 28047 }, { "epoch": 4.279632568359375e-05, "step": 28047, "training_step_time": 0.11542534828186035 }, { "epoch": 4.27978515625e-05, "model_forward_time": 0.02580428123474121, "step": 28048 }, { "epoch": 4.27978515625e-05, "step": 28048, "training_step_time": 0.1045529842376709 }, { "epoch": 4.279937744140625e-05, "model_forward_time": 0.025458574295043945, "step": 28049 }, { "epoch": 4.279937744140625e-05, "step": 28049, "training_step_time": 0.19089555740356445 }, { "epoch": 4.28009033203125e-05, "grad_norm": 0.024919060990214348, "learning_rate": 1.1506584608200367e-06, "loss": 0.0035, "step": 28050 }, { "epoch": 4.28009033203125e-05, "model_forward_time": 0.024945735931396484, "step": 28050 }, { "epoch": 4.28009033203125e-05, "step": 28050, "training_step_time": 0.10418534278869629 }, { "epoch": 4.280242919921875e-05, "model_forward_time": 0.025121212005615234, "step": 28051 }, { "epoch": 4.280242919921875e-05, "step": 28051, "training_step_time": 0.10232686996459961 }, { "epoch": 4.2803955078125e-05, "model_forward_time": 0.02559828758239746, "step": 28052 }, { "epoch": 4.2803955078125e-05, "step": 28052, "training_step_time": 0.1072854995727539 }, { "epoch": 4.280548095703125e-05, "model_forward_time": 0.02575230598449707, "step": 28053 }, { "epoch": 4.280548095703125e-05, "step": 28053, "training_step_time": 0.10650467872619629 }, { "epoch": 4.28070068359375e-05, "model_forward_time": 0.0249788761138916, "step": 28054 }, { "epoch": 4.28070068359375e-05, "step": 28054, "training_step_time": 0.10440659523010254 }, { "epoch": 4.280853271484375e-05, "model_forward_time": 0.025271892547607422, "step": 28055 }, { "epoch": 4.280853271484375e-05, "step": 28055, "training_step_time": 0.13753819465637207 }, { "epoch": 4.281005859375e-05, "model_forward_time": 0.025707721710205078, "step": 28056 }, { "epoch": 4.281005859375e-05, "step": 28056, "training_step_time": 0.13420724868774414 }, { "epoch": 4.281158447265625e-05, "model_forward_time": 0.02409505844116211, "step": 28057 }, { "epoch": 4.281158447265625e-05, "step": 28057, "training_step_time": 0.12948894500732422 }, { "epoch": 4.28131103515625e-05, "model_forward_time": 0.024275779724121094, "step": 28058 }, { "epoch": 4.28131103515625e-05, "step": 28058, "training_step_time": 0.13346385955810547 }, { "epoch": 4.281463623046875e-05, "model_forward_time": 0.02422165870666504, "step": 28059 }, { "epoch": 4.281463623046875e-05, "step": 28059, "training_step_time": 0.11788535118103027 }, { "epoch": 4.2816162109375e-05, "grad_norm": 0.04625081270933151, "learning_rate": 1.138931991427028e-06, "loss": 0.0027, "step": 28060 }, { "epoch": 4.2816162109375e-05, "model_forward_time": 0.024268388748168945, "step": 28060 }, { "epoch": 4.2816162109375e-05, "step": 28060, "training_step_time": 0.11838340759277344 }, { "epoch": 4.281768798828125e-05, "model_forward_time": 0.025130271911621094, "step": 28061 }, { "epoch": 4.281768798828125e-05, "step": 28061, "training_step_time": 0.11795234680175781 }, { "epoch": 4.28192138671875e-05, "model_forward_time": 0.024408578872680664, "step": 28062 }, { "epoch": 4.28192138671875e-05, "step": 28062, "training_step_time": 0.11512994766235352 }, { "epoch": 4.282073974609375e-05, "model_forward_time": 0.027941226959228516, "step": 28063 }, { "epoch": 4.282073974609375e-05, "step": 28063, "training_step_time": 0.1131432056427002 }, { "epoch": 4.2822265625e-05, "model_forward_time": 0.02573227882385254, "step": 28064 }, { "epoch": 4.2822265625e-05, "step": 28064, "training_step_time": 0.11481738090515137 }, { "epoch": 4.282379150390625e-05, "model_forward_time": 0.02604365348815918, "step": 28065 }, { "epoch": 4.282379150390625e-05, "step": 28065, "training_step_time": 0.11078023910522461 }, { "epoch": 4.28253173828125e-05, "model_forward_time": 0.02553105354309082, "step": 28066 }, { "epoch": 4.28253173828125e-05, "step": 28066, "training_step_time": 0.1105649471282959 }, { "epoch": 4.282684326171875e-05, "model_forward_time": 0.02533578872680664, "step": 28067 }, { "epoch": 4.282684326171875e-05, "step": 28067, "training_step_time": 0.10690593719482422 }, { "epoch": 4.2828369140625e-05, "model_forward_time": 0.026012420654296875, "step": 28068 }, { "epoch": 4.2828369140625e-05, "step": 28068, "training_step_time": 0.15660619735717773 }, { "epoch": 4.282989501953125e-05, "model_forward_time": 0.02501654624938965, "step": 28069 }, { "epoch": 4.282989501953125e-05, "step": 28069, "training_step_time": 0.11046075820922852 }, { "epoch": 4.28314208984375e-05, "grad_norm": 0.07085675746202469, "learning_rate": 1.1272648928389473e-06, "loss": 0.0028, "step": 28070 }, { "epoch": 4.28314208984375e-05, "model_forward_time": 0.025017499923706055, "step": 28070 }, { "epoch": 4.28314208984375e-05, "step": 28070, "training_step_time": 0.15639376640319824 }, { "epoch": 4.283294677734375e-05, "model_forward_time": 0.025296926498413086, "step": 28071 }, { "epoch": 4.283294677734375e-05, "step": 28071, "training_step_time": 0.13969755172729492 }, { "epoch": 4.283447265625e-05, "model_forward_time": 0.024966001510620117, "step": 28072 }, { "epoch": 4.283447265625e-05, "step": 28072, "training_step_time": 0.20888018608093262 }, { "epoch": 4.283599853515625e-05, "model_forward_time": 0.0251462459564209, "step": 28073 }, { "epoch": 4.283599853515625e-05, "step": 28073, "training_step_time": 0.10761117935180664 }, { "epoch": 4.28375244140625e-05, "model_forward_time": 0.025363683700561523, "step": 28074 }, { "epoch": 4.28375244140625e-05, "step": 28074, "training_step_time": 0.10629796981811523 }, { "epoch": 4.283905029296875e-05, "model_forward_time": 0.025469303131103516, "step": 28075 }, { "epoch": 4.283905029296875e-05, "step": 28075, "training_step_time": 0.2293379306793213 }, { "epoch": 4.2840576171875e-05, "model_forward_time": 0.024816036224365234, "step": 28076 }, { "epoch": 4.2840576171875e-05, "step": 28076, "training_step_time": 0.21250391006469727 }, { "epoch": 4.284210205078125e-05, "model_forward_time": 0.024728775024414062, "step": 28077 }, { "epoch": 4.284210205078125e-05, "step": 28077, "training_step_time": 0.17592072486877441 }, { "epoch": 4.28436279296875e-05, "model_forward_time": 0.02487468719482422, "step": 28078 }, { "epoch": 4.28436279296875e-05, "step": 28078, "training_step_time": 0.19633102416992188 }, { "epoch": 4.284515380859375e-05, "model_forward_time": 0.024659395217895508, "step": 28079 }, { "epoch": 4.284515380859375e-05, "step": 28079, "training_step_time": 0.14942717552185059 }, { "epoch": 4.28466796875e-05, "grad_norm": 0.04600098729133606, "learning_rate": 1.1156571792324211e-06, "loss": 0.003, "step": 28080 }, { "epoch": 4.28466796875e-05, "model_forward_time": 0.023808956146240234, "step": 28080 }, { "epoch": 4.28466796875e-05, "step": 28080, "training_step_time": 0.12925004959106445 }, { "epoch": 4.284820556640625e-05, "model_forward_time": 0.02474522590637207, "step": 28081 }, { "epoch": 4.284820556640625e-05, "step": 28081, "training_step_time": 0.1114661693572998 }, { "epoch": 4.28497314453125e-05, "model_forward_time": 0.025533199310302734, "step": 28082 }, { "epoch": 4.28497314453125e-05, "step": 28082, "training_step_time": 0.10246682167053223 }, { "epoch": 4.285125732421875e-05, "model_forward_time": 0.025798559188842773, "step": 28083 }, { "epoch": 4.285125732421875e-05, "step": 28083, "training_step_time": 0.10435366630554199 }, { "epoch": 4.2852783203125e-05, "model_forward_time": 0.025624990463256836, "step": 28084 }, { "epoch": 4.2852783203125e-05, "step": 28084, "training_step_time": 0.103851318359375 }, { "epoch": 4.285430908203125e-05, "model_forward_time": 0.02579045295715332, "step": 28085 }, { "epoch": 4.285430908203125e-05, "step": 28085, "training_step_time": 0.10672783851623535 }, { "epoch": 4.28558349609375e-05, "model_forward_time": 0.025568008422851562, "step": 28086 }, { "epoch": 4.28558349609375e-05, "step": 28086, "training_step_time": 0.15857982635498047 }, { "epoch": 4.285736083984375e-05, "model_forward_time": 0.024938106536865234, "step": 28087 }, { "epoch": 4.285736083984375e-05, "step": 28087, "training_step_time": 0.16930484771728516 }, { "epoch": 4.285888671875e-05, "model_forward_time": 0.024765491485595703, "step": 28088 }, { "epoch": 4.285888671875e-05, "step": 28088, "training_step_time": 0.11368107795715332 }, { "epoch": 4.286041259765625e-05, "model_forward_time": 0.024991512298583984, "step": 28089 }, { "epoch": 4.286041259765625e-05, "step": 28089, "training_step_time": 0.10468292236328125 }, { "epoch": 4.28619384765625e-05, "grad_norm": 0.118675097823143, "learning_rate": 1.1041088647119114e-06, "loss": 0.0046, "step": 28090 }, { "epoch": 4.28619384765625e-05, "model_forward_time": 0.02599048614501953, "step": 28090 }, { "epoch": 4.28619384765625e-05, "step": 28090, "training_step_time": 0.1128849983215332 }, { "epoch": 4.286346435546875e-05, "model_forward_time": 0.02588963508605957, "step": 28091 }, { "epoch": 4.286346435546875e-05, "step": 28091, "training_step_time": 0.10625219345092773 }, { "epoch": 4.2864990234375e-05, "model_forward_time": 0.025443077087402344, "step": 28092 }, { "epoch": 4.2864990234375e-05, "step": 28092, "training_step_time": 0.19758391380310059 }, { "epoch": 4.286651611328125e-05, "model_forward_time": 0.02498316764831543, "step": 28093 }, { "epoch": 4.286651611328125e-05, "step": 28093, "training_step_time": 0.10712242126464844 }, { "epoch": 4.28680419921875e-05, "model_forward_time": 0.02486586570739746, "step": 28094 }, { "epoch": 4.28680419921875e-05, "step": 28094, "training_step_time": 0.10593652725219727 }, { "epoch": 4.286956787109375e-05, "model_forward_time": 0.025663375854492188, "step": 28095 }, { "epoch": 4.286956787109375e-05, "step": 28095, "training_step_time": 0.1056520938873291 }, { "epoch": 4.287109375e-05, "model_forward_time": 0.025650739669799805, "step": 28096 }, { "epoch": 4.287109375e-05, "step": 28096, "training_step_time": 0.10642790794372559 }, { "epoch": 4.287261962890625e-05, "model_forward_time": 0.02581167221069336, "step": 28097 }, { "epoch": 4.287261962890625e-05, "step": 28097, "training_step_time": 0.10577678680419922 }, { "epoch": 4.28741455078125e-05, "model_forward_time": 0.02585744857788086, "step": 28098 }, { "epoch": 4.28741455078125e-05, "step": 28098, "training_step_time": 0.10816359519958496 }, { "epoch": 4.287567138671875e-05, "model_forward_time": 0.025646209716796875, "step": 28099 }, { "epoch": 4.287567138671875e-05, "step": 28099, "training_step_time": 0.11125969886779785 }, { "epoch": 4.2877197265625e-05, "grad_norm": 0.02818923443555832, "learning_rate": 1.0926199633097157e-06, "loss": 0.0042, "step": 28100 }, { "epoch": 4.2877197265625e-05, "model_forward_time": 0.025690317153930664, "step": 28100 }, { "epoch": 4.2877197265625e-05, "step": 28100, "training_step_time": 0.11008667945861816 }, { "epoch": 4.287872314453125e-05, "model_forward_time": 0.029033184051513672, "step": 28101 }, { "epoch": 4.287872314453125e-05, "step": 28101, "training_step_time": 0.1139678955078125 }, { "epoch": 4.28802490234375e-05, "model_forward_time": 0.025434494018554688, "step": 28102 }, { "epoch": 4.28802490234375e-05, "step": 28102, "training_step_time": 0.11111235618591309 }, { "epoch": 4.288177490234375e-05, "model_forward_time": 0.025571823120117188, "step": 28103 }, { "epoch": 4.288177490234375e-05, "step": 28103, "training_step_time": 0.10701942443847656 }, { "epoch": 4.288330078125e-05, "model_forward_time": 0.025650978088378906, "step": 28104 }, { "epoch": 4.288330078125e-05, "step": 28104, "training_step_time": 0.10683321952819824 }, { "epoch": 4.288482666015625e-05, "model_forward_time": 0.025441408157348633, "step": 28105 }, { "epoch": 4.288482666015625e-05, "step": 28105, "training_step_time": 0.10703063011169434 }, { "epoch": 4.28863525390625e-05, "model_forward_time": 0.025362491607666016, "step": 28106 }, { "epoch": 4.28863525390625e-05, "step": 28106, "training_step_time": 0.10633420944213867 }, { "epoch": 4.288787841796875e-05, "model_forward_time": 0.025365352630615234, "step": 28107 }, { "epoch": 4.288787841796875e-05, "step": 28107, "training_step_time": 0.10951447486877441 }, { "epoch": 4.2889404296875e-05, "model_forward_time": 0.02532029151916504, "step": 28108 }, { "epoch": 4.2889404296875e-05, "step": 28108, "training_step_time": 0.10729670524597168 }, { "epoch": 4.289093017578125e-05, "model_forward_time": 0.024563312530517578, "step": 28109 }, { "epoch": 4.289093017578125e-05, "step": 28109, "training_step_time": 0.10837173461914062 }, { "epoch": 4.28924560546875e-05, "grad_norm": 0.05564859136939049, "learning_rate": 1.0811904889859336e-06, "loss": 0.0046, "step": 28110 }, { "epoch": 4.28924560546875e-05, "model_forward_time": 0.02592325210571289, "step": 28110 }, { "epoch": 4.28924560546875e-05, "step": 28110, "training_step_time": 0.10683822631835938 }, { "epoch": 4.289398193359375e-05, "model_forward_time": 0.025578975677490234, "step": 28111 }, { "epoch": 4.289398193359375e-05, "step": 28111, "training_step_time": 0.10595178604125977 }, { "epoch": 4.28955078125e-05, "model_forward_time": 0.025487422943115234, "step": 28112 }, { "epoch": 4.28955078125e-05, "step": 28112, "training_step_time": 0.10527920722961426 }, { "epoch": 4.289703369140625e-05, "model_forward_time": 0.024895906448364258, "step": 28113 }, { "epoch": 4.289703369140625e-05, "step": 28113, "training_step_time": 0.1571955680847168 }, { "epoch": 4.28985595703125e-05, "model_forward_time": 0.024834156036376953, "step": 28114 }, { "epoch": 4.28985595703125e-05, "step": 28114, "training_step_time": 0.16796636581420898 }, { "epoch": 4.290008544921875e-05, "model_forward_time": 0.024730443954467773, "step": 28115 }, { "epoch": 4.290008544921875e-05, "step": 28115, "training_step_time": 0.16473150253295898 }, { "epoch": 4.2901611328125e-05, "model_forward_time": 0.025068283081054688, "step": 28116 }, { "epoch": 4.2901611328125e-05, "step": 28116, "training_step_time": 0.11041641235351562 }, { "epoch": 4.290313720703125e-05, "model_forward_time": 0.02454686164855957, "step": 28117 }, { "epoch": 4.290313720703125e-05, "step": 28117, "training_step_time": 0.13852810859680176 }, { "epoch": 4.29046630859375e-05, "model_forward_time": 0.025707244873046875, "step": 28118 }, { "epoch": 4.29046630859375e-05, "step": 28118, "training_step_time": 0.10743856430053711 }, { "epoch": 4.290618896484375e-05, "model_forward_time": 0.025455951690673828, "step": 28119 }, { "epoch": 4.290618896484375e-05, "step": 28119, "training_step_time": 0.10467886924743652 }, { "epoch": 4.290771484375e-05, "grad_norm": 0.15088430047035217, "learning_rate": 1.0698204556284452e-06, "loss": 0.0053, "step": 28120 }, { "epoch": 4.290771484375e-05, "model_forward_time": 0.024934768676757812, "step": 28120 }, { "epoch": 4.290771484375e-05, "step": 28120, "training_step_time": 0.20907306671142578 }, { "epoch": 4.290924072265625e-05, "model_forward_time": 0.02465057373046875, "step": 28121 }, { "epoch": 4.290924072265625e-05, "step": 28121, "training_step_time": 0.22231125831604004 }, { "epoch": 4.29107666015625e-05, "model_forward_time": 0.02471923828125, "step": 28122 }, { "epoch": 4.29107666015625e-05, "step": 28122, "training_step_time": 0.11757469177246094 }, { "epoch": 4.291229248046875e-05, "model_forward_time": 0.024435043334960938, "step": 28123 }, { "epoch": 4.291229248046875e-05, "step": 28123, "training_step_time": 0.12290835380554199 }, { "epoch": 4.2913818359375e-05, "model_forward_time": 0.025502681732177734, "step": 28124 }, { "epoch": 4.2913818359375e-05, "step": 28124, "training_step_time": 0.1162412166595459 }, { "epoch": 4.291534423828125e-05, "model_forward_time": 0.025265216827392578, "step": 28125 }, { "epoch": 4.291534423828125e-05, "step": 28125, "training_step_time": 0.11341333389282227 }, { "epoch": 4.29168701171875e-05, "model_forward_time": 0.025426149368286133, "step": 28126 }, { "epoch": 4.29168701171875e-05, "step": 28126, "training_step_time": 0.14142131805419922 }, { "epoch": 4.291839599609375e-05, "model_forward_time": 0.025430917739868164, "step": 28127 }, { "epoch": 4.291839599609375e-05, "step": 28127, "training_step_time": 0.103271484375 }, { "epoch": 4.2919921875e-05, "model_forward_time": 0.025682687759399414, "step": 28128 }, { "epoch": 4.2919921875e-05, "step": 28128, "training_step_time": 0.10607409477233887 }, { "epoch": 4.292144775390625e-05, "model_forward_time": 0.0254056453704834, "step": 28129 }, { "epoch": 4.292144775390625e-05, "step": 28129, "training_step_time": 0.10705685615539551 }, { "epoch": 4.29229736328125e-05, "grad_norm": 0.03304993733763695, "learning_rate": 1.0585098770529157e-06, "loss": 0.003, "step": 28130 }, { "epoch": 4.29229736328125e-05, "model_forward_time": 0.02542901039123535, "step": 28130 }, { "epoch": 4.29229736328125e-05, "step": 28130, "training_step_time": 0.10944628715515137 }, { "epoch": 4.292449951171875e-05, "model_forward_time": 0.02570486068725586, "step": 28131 }, { "epoch": 4.292449951171875e-05, "step": 28131, "training_step_time": 0.11533522605895996 }, { "epoch": 4.2926025390625e-05, "model_forward_time": 0.025487661361694336, "step": 28132 }, { "epoch": 4.2926025390625e-05, "step": 28132, "training_step_time": 0.1496903896331787 }, { "epoch": 4.292755126953125e-05, "model_forward_time": 0.025234699249267578, "step": 28133 }, { "epoch": 4.292755126953125e-05, "step": 28133, "training_step_time": 0.14674067497253418 }, { "epoch": 4.29290771484375e-05, "model_forward_time": 0.024352073669433594, "step": 28134 }, { "epoch": 4.29290771484375e-05, "step": 28134, "training_step_time": 0.1492311954498291 }, { "epoch": 4.293060302734375e-05, "model_forward_time": 0.025493860244750977, "step": 28135 }, { "epoch": 4.293060302734375e-05, "step": 28135, "training_step_time": 0.17179393768310547 }, { "epoch": 4.293212890625e-05, "model_forward_time": 0.025066375732421875, "step": 28136 }, { "epoch": 4.293212890625e-05, "step": 28136, "training_step_time": 0.11461687088012695 }, { "epoch": 4.293365478515625e-05, "model_forward_time": 0.023778200149536133, "step": 28137 }, { "epoch": 4.293365478515625e-05, "step": 28137, "training_step_time": 0.18233203887939453 }, { "epoch": 4.29351806640625e-05, "model_forward_time": 0.024322032928466797, "step": 28138 }, { "epoch": 4.29351806640625e-05, "step": 28138, "training_step_time": 0.10688185691833496 }, { "epoch": 4.293670654296875e-05, "model_forward_time": 0.024742841720581055, "step": 28139 }, { "epoch": 4.293670654296875e-05, "step": 28139, "training_step_time": 0.10454940795898438 }, { "epoch": 4.2938232421875e-05, "grad_norm": 0.17801275849342346, "learning_rate": 1.0472587670027678e-06, "loss": 0.0033, "step": 28140 }, { "epoch": 4.2938232421875e-05, "model_forward_time": 0.025681257247924805, "step": 28140 }, { "epoch": 4.2938232421875e-05, "step": 28140, "training_step_time": 0.11020898818969727 }, { "epoch": 4.293975830078125e-05, "model_forward_time": 0.02542281150817871, "step": 28141 }, { "epoch": 4.293975830078125e-05, "step": 28141, "training_step_time": 0.10460519790649414 }, { "epoch": 4.29412841796875e-05, "model_forward_time": 0.0254056453704834, "step": 28142 }, { "epoch": 4.29412841796875e-05, "step": 28142, "training_step_time": 0.10540962219238281 }, { "epoch": 4.294281005859375e-05, "model_forward_time": 0.025315523147583008, "step": 28143 }, { "epoch": 4.294281005859375e-05, "step": 28143, "training_step_time": 0.10575366020202637 }, { "epoch": 4.29443359375e-05, "model_forward_time": 0.02530503273010254, "step": 28144 }, { "epoch": 4.29443359375e-05, "step": 28144, "training_step_time": 0.10960984230041504 }, { "epoch": 4.294586181640625e-05, "model_forward_time": 0.02729511260986328, "step": 28145 }, { "epoch": 4.294586181640625e-05, "step": 28145, "training_step_time": 0.10846996307373047 }, { "epoch": 4.29473876953125e-05, "model_forward_time": 0.025135278701782227, "step": 28146 }, { "epoch": 4.29473876953125e-05, "step": 28146, "training_step_time": 0.10770273208618164 }, { "epoch": 4.294891357421875e-05, "model_forward_time": 0.02505779266357422, "step": 28147 }, { "epoch": 4.294891357421875e-05, "step": 28147, "training_step_time": 0.10571455955505371 }, { "epoch": 4.2950439453125e-05, "model_forward_time": 0.025591373443603516, "step": 28148 }, { "epoch": 4.2950439453125e-05, "step": 28148, "training_step_time": 0.10745716094970703 }, { "epoch": 4.295196533203125e-05, "model_forward_time": 0.02503657341003418, "step": 28149 }, { "epoch": 4.295196533203125e-05, "step": 28149, "training_step_time": 0.10613107681274414 }, { "epoch": 4.29534912109375e-05, "grad_norm": 0.29600000381469727, "learning_rate": 1.0360671391491606e-06, "loss": 0.0131, "step": 28150 }, { "epoch": 4.29534912109375e-05, "model_forward_time": 0.02520442008972168, "step": 28150 }, { "epoch": 4.29534912109375e-05, "step": 28150, "training_step_time": 0.10464811325073242 }, { "epoch": 4.295501708984375e-05, "model_forward_time": 0.0256650447845459, "step": 28151 }, { "epoch": 4.295501708984375e-05, "step": 28151, "training_step_time": 0.10714006423950195 }, { "epoch": 4.295654296875e-05, "model_forward_time": 0.02523517608642578, "step": 28152 }, { "epoch": 4.295654296875e-05, "step": 28152, "training_step_time": 0.10495471954345703 }, { "epoch": 4.295806884765625e-05, "model_forward_time": 0.02512502670288086, "step": 28153 }, { "epoch": 4.295806884765625e-05, "step": 28153, "training_step_time": 0.10732030868530273 }, { "epoch": 4.29595947265625e-05, "model_forward_time": 0.025397300720214844, "step": 28154 }, { "epoch": 4.29595947265625e-05, "step": 28154, "training_step_time": 0.1050872802734375 }, { "epoch": 4.296112060546875e-05, "model_forward_time": 0.025055646896362305, "step": 28155 }, { "epoch": 4.296112060546875e-05, "step": 28155, "training_step_time": 0.10627460479736328 }, { "epoch": 4.2962646484375e-05, "model_forward_time": 0.0256044864654541, "step": 28156 }, { "epoch": 4.2962646484375e-05, "step": 28156, "training_step_time": 0.10723519325256348 }, { "epoch": 4.296417236328125e-05, "model_forward_time": 0.024872779846191406, "step": 28157 }, { "epoch": 4.296417236328125e-05, "step": 28157, "training_step_time": 0.10510730743408203 }, { "epoch": 4.29656982421875e-05, "model_forward_time": 0.025111913681030273, "step": 28158 }, { "epoch": 4.29656982421875e-05, "step": 28158, "training_step_time": 0.10512185096740723 }, { "epoch": 4.296722412109375e-05, "model_forward_time": 0.0239865779876709, "step": 28159 }, { "epoch": 4.296722412109375e-05, "step": 28159, "training_step_time": 0.14714455604553223 }, { "epoch": 4.296875e-05, "grad_norm": 0.04726273566484451, "learning_rate": 1.0249350070909768e-06, "loss": 0.007, "step": 28160 }, { "epoch": 4.296875e-05, "model_forward_time": 0.024842262268066406, "step": 28160 }, { "epoch": 4.296875e-05, "step": 28160, "training_step_time": 0.20943999290466309 }, { "epoch": 4.297027587890625e-05, "model_forward_time": 0.024492263793945312, "step": 28161 }, { "epoch": 4.297027587890625e-05, "step": 28161, "training_step_time": 0.10382938385009766 }, { "epoch": 4.29718017578125e-05, "model_forward_time": 0.024725675582885742, "step": 28162 }, { "epoch": 4.29718017578125e-05, "step": 28162, "training_step_time": 0.17518854141235352 }, { "epoch": 4.297332763671875e-05, "model_forward_time": 0.024048566818237305, "step": 28163 }, { "epoch": 4.297332763671875e-05, "step": 28163, "training_step_time": 0.15068626403808594 }, { "epoch": 4.2974853515625e-05, "model_forward_time": 0.024222135543823242, "step": 28164 }, { "epoch": 4.2974853515625e-05, "step": 28164, "training_step_time": 0.10298681259155273 }, { "epoch": 4.297637939453125e-05, "model_forward_time": 0.02491021156311035, "step": 28165 }, { "epoch": 4.297637939453125e-05, "step": 28165, "training_step_time": 0.10348963737487793 }, { "epoch": 4.29779052734375e-05, "model_forward_time": 0.02529120445251465, "step": 28166 }, { "epoch": 4.29779052734375e-05, "step": 28166, "training_step_time": 0.1744542121887207 }, { "epoch": 4.297943115234375e-05, "model_forward_time": 0.024445295333862305, "step": 28167 }, { "epoch": 4.297943115234375e-05, "step": 28167, "training_step_time": 0.11167669296264648 }, { "epoch": 4.298095703125e-05, "model_forward_time": 0.02465510368347168, "step": 28168 }, { "epoch": 4.298095703125e-05, "step": 28168, "training_step_time": 0.1209871768951416 }, { "epoch": 4.298248291015625e-05, "model_forward_time": 0.025136470794677734, "step": 28169 }, { "epoch": 4.298248291015625e-05, "step": 28169, "training_step_time": 0.1324923038482666 }, { "epoch": 4.29840087890625e-05, "grad_norm": 0.03980742767453194, "learning_rate": 1.0138623843548078e-06, "loss": 0.0037, "step": 28170 }, { "epoch": 4.29840087890625e-05, "model_forward_time": 0.02494025230407715, "step": 28170 }, { "epoch": 4.29840087890625e-05, "step": 28170, "training_step_time": 0.11650586128234863 }, { "epoch": 4.298553466796875e-05, "model_forward_time": 0.02489328384399414, "step": 28171 }, { "epoch": 4.298553466796875e-05, "step": 28171, "training_step_time": 0.11912274360656738 }, { "epoch": 4.2987060546875e-05, "model_forward_time": 0.026041030883789062, "step": 28172 }, { "epoch": 4.2987060546875e-05, "step": 28172, "training_step_time": 0.11481356620788574 }, { "epoch": 4.298858642578125e-05, "model_forward_time": 0.02515125274658203, "step": 28173 }, { "epoch": 4.298858642578125e-05, "step": 28173, "training_step_time": 0.14807891845703125 }, { "epoch": 4.29901123046875e-05, "model_forward_time": 0.024869203567504883, "step": 28174 }, { "epoch": 4.29901123046875e-05, "step": 28174, "training_step_time": 0.10747885704040527 }, { "epoch": 4.299163818359375e-05, "model_forward_time": 0.02606797218322754, "step": 28175 }, { "epoch": 4.299163818359375e-05, "step": 28175, "training_step_time": 0.10799264907836914 }, { "epoch": 4.29931640625e-05, "model_forward_time": 0.025571107864379883, "step": 28176 }, { "epoch": 4.29931640625e-05, "step": 28176, "training_step_time": 0.1104433536529541 }, { "epoch": 4.299468994140625e-05, "model_forward_time": 0.02513432502746582, "step": 28177 }, { "epoch": 4.299468994140625e-05, "step": 28177, "training_step_time": 0.10561108589172363 }, { "epoch": 4.29962158203125e-05, "model_forward_time": 0.02480626106262207, "step": 28178 }, { "epoch": 4.29962158203125e-05, "step": 28178, "training_step_time": 0.1166372299194336 }, { "epoch": 4.299774169921875e-05, "model_forward_time": 0.024896860122680664, "step": 28179 }, { "epoch": 4.299774169921875e-05, "step": 28179, "training_step_time": 0.13382267951965332 }, { "epoch": 4.2999267578125e-05, "grad_norm": 0.05883041396737099, "learning_rate": 1.00284928439493e-06, "loss": 0.0093, "step": 28180 }, { "epoch": 4.2999267578125e-05, "model_forward_time": 0.025124788284301758, "step": 28180 }, { "epoch": 4.2999267578125e-05, "step": 28180, "training_step_time": 0.1123046875 }, { "epoch": 4.300079345703125e-05, "model_forward_time": 0.024877548217773438, "step": 28181 }, { "epoch": 4.300079345703125e-05, "step": 28181, "training_step_time": 0.10957503318786621 }, { "epoch": 4.30023193359375e-05, "model_forward_time": 0.02510380744934082, "step": 28182 }, { "epoch": 4.30023193359375e-05, "step": 28182, "training_step_time": 0.11555647850036621 }, { "epoch": 4.300384521484375e-05, "model_forward_time": 0.024840831756591797, "step": 28183 }, { "epoch": 4.300384521484375e-05, "step": 28183, "training_step_time": 0.10877656936645508 }, { "epoch": 4.300537109375e-05, "model_forward_time": 0.024894237518310547, "step": 28184 }, { "epoch": 4.300537109375e-05, "step": 28184, "training_step_time": 0.19675993919372559 }, { "epoch": 4.300689697265625e-05, "model_forward_time": 0.024432659149169922, "step": 28185 }, { "epoch": 4.300689697265625e-05, "step": 28185, "training_step_time": 0.12584710121154785 }, { "epoch": 4.30084228515625e-05, "model_forward_time": 0.02417445182800293, "step": 28186 }, { "epoch": 4.30084228515625e-05, "step": 28186, "training_step_time": 0.12694740295410156 }, { "epoch": 4.300994873046875e-05, "model_forward_time": 0.02432084083557129, "step": 28187 }, { "epoch": 4.300994873046875e-05, "step": 28187, "training_step_time": 0.12506532669067383 }, { "epoch": 4.3011474609375e-05, "model_forward_time": 0.024622678756713867, "step": 28188 }, { "epoch": 4.3011474609375e-05, "step": 28188, "training_step_time": 0.12342071533203125 }, { "epoch": 4.301300048828125e-05, "model_forward_time": 0.025003671646118164, "step": 28189 }, { "epoch": 4.301300048828125e-05, "step": 28189, "training_step_time": 0.11469721794128418 }, { "epoch": 4.30145263671875e-05, "grad_norm": 0.18981486558914185, "learning_rate": 9.918957205933e-07, "loss": 0.0166, "step": 28190 }, { "epoch": 4.30145263671875e-05, "model_forward_time": 0.025143861770629883, "step": 28190 }, { "epoch": 4.30145263671875e-05, "step": 28190, "training_step_time": 0.11365652084350586 }, { "epoch": 4.301605224609375e-05, "model_forward_time": 0.025329113006591797, "step": 28191 }, { "epoch": 4.301605224609375e-05, "step": 28191, "training_step_time": 0.11486983299255371 }, { "epoch": 4.3017578125e-05, "model_forward_time": 0.025015830993652344, "step": 28192 }, { "epoch": 4.3017578125e-05, "step": 28192, "training_step_time": 0.10884952545166016 }, { "epoch": 4.301910400390625e-05, "model_forward_time": 0.025074005126953125, "step": 28193 }, { "epoch": 4.301910400390625e-05, "step": 28193, "training_step_time": 0.11087918281555176 }, { "epoch": 4.30206298828125e-05, "model_forward_time": 0.024862051010131836, "step": 28194 }, { "epoch": 4.30206298828125e-05, "step": 28194, "training_step_time": 0.10720634460449219 }, { "epoch": 4.302215576171875e-05, "model_forward_time": 0.024864912033081055, "step": 28195 }, { "epoch": 4.302215576171875e-05, "step": 28195, "training_step_time": 0.10701656341552734 }, { "epoch": 4.3023681640625e-05, "model_forward_time": 0.024621248245239258, "step": 28196 }, { "epoch": 4.3023681640625e-05, "step": 28196, "training_step_time": 0.11017298698425293 }, { "epoch": 4.302520751953125e-05, "model_forward_time": 0.02510833740234375, "step": 28197 }, { "epoch": 4.302520751953125e-05, "step": 28197, "training_step_time": 0.10614943504333496 }, { "epoch": 4.30267333984375e-05, "model_forward_time": 0.02577495574951172, "step": 28198 }, { "epoch": 4.30267333984375e-05, "step": 28198, "training_step_time": 0.1085202693939209 }, { "epoch": 4.302825927734375e-05, "model_forward_time": 0.02536773681640625, "step": 28199 }, { "epoch": 4.302825927734375e-05, "step": 28199, "training_step_time": 0.10648727416992188 }, { "epoch": 4.302978515625e-05, "grad_norm": 0.1806434690952301, "learning_rate": 9.810017062595322e-07, "loss": 0.0063, "step": 28200 }, { "epoch": 4.302978515625e-05, "model_forward_time": 0.024773597717285156, "step": 28200 }, { "epoch": 4.302978515625e-05, "step": 28200, "training_step_time": 0.11128950119018555 }, { "epoch": 4.303131103515625e-05, "model_forward_time": 0.024989843368530273, "step": 28201 }, { "epoch": 4.303131103515625e-05, "step": 28201, "training_step_time": 0.10437560081481934 }, { "epoch": 4.30328369140625e-05, "model_forward_time": 0.025533199310302734, "step": 28202 }, { "epoch": 4.30328369140625e-05, "step": 28202, "training_step_time": 0.1064298152923584 }, { "epoch": 4.303436279296875e-05, "model_forward_time": 0.025118350982666016, "step": 28203 }, { "epoch": 4.303436279296875e-05, "step": 28203, "training_step_time": 0.10435366630554199 }, { "epoch": 4.3035888671875e-05, "model_forward_time": 0.02518463134765625, "step": 28204 }, { "epoch": 4.3035888671875e-05, "step": 28204, "training_step_time": 0.1034393310546875 }, { "epoch": 4.303741455078125e-05, "model_forward_time": 0.024702072143554688, "step": 28205 }, { "epoch": 4.303741455078125e-05, "step": 28205, "training_step_time": 0.18328022956848145 }, { "epoch": 4.30389404296875e-05, "model_forward_time": 0.02493882179260254, "step": 28206 }, { "epoch": 4.30389404296875e-05, "step": 28206, "training_step_time": 0.1294238567352295 }, { "epoch": 4.304046630859375e-05, "model_forward_time": 0.024860382080078125, "step": 28207 }, { "epoch": 4.304046630859375e-05, "step": 28207, "training_step_time": 0.15462756156921387 }, { "epoch": 4.30419921875e-05, "model_forward_time": 0.024163246154785156, "step": 28208 }, { "epoch": 4.30419921875e-05, "step": 28208, "training_step_time": 0.21624112129211426 }, { "epoch": 4.304351806640625e-05, "model_forward_time": 0.02478814125061035, "step": 28209 }, { "epoch": 4.304351806640625e-05, "step": 28209, "training_step_time": 0.11119961738586426 }, { "epoch": 4.30450439453125e-05, "grad_norm": 0.05699380114674568, "learning_rate": 9.701672546308827e-07, "loss": 0.0049, "step": 28210 }, { "epoch": 4.30450439453125e-05, "model_forward_time": 0.02462482452392578, "step": 28210 }, { "epoch": 4.30450439453125e-05, "step": 28210, "training_step_time": 0.10605740547180176 }, { "epoch": 4.304656982421875e-05, "model_forward_time": 0.02567124366760254, "step": 28211 }, { "epoch": 4.304656982421875e-05, "step": 28211, "training_step_time": 0.10669803619384766 }, { "epoch": 4.3048095703125e-05, "model_forward_time": 0.026340484619140625, "step": 28212 }, { "epoch": 4.3048095703125e-05, "step": 28212, "training_step_time": 0.10974860191345215 }, { "epoch": 4.304962158203125e-05, "model_forward_time": 0.025995969772338867, "step": 28213 }, { "epoch": 4.304962158203125e-05, "step": 28213, "training_step_time": 0.12052607536315918 }, { "epoch": 4.30511474609375e-05, "model_forward_time": 0.027274370193481445, "step": 28214 }, { "epoch": 4.30511474609375e-05, "step": 28214, "training_step_time": 0.22322678565979004 }, { "epoch": 4.305267333984375e-05, "model_forward_time": 0.024499177932739258, "step": 28215 }, { "epoch": 4.305267333984375e-05, "step": 28215, "training_step_time": 0.11465907096862793 }, { "epoch": 4.305419921875e-05, "model_forward_time": 0.02574634552001953, "step": 28216 }, { "epoch": 4.305419921875e-05, "step": 28216, "training_step_time": 0.11844086647033691 }, { "epoch": 4.305572509765625e-05, "model_forward_time": 0.025334835052490234, "step": 28217 }, { "epoch": 4.305572509765625e-05, "step": 28217, "training_step_time": 0.16774702072143555 }, { "epoch": 4.30572509765625e-05, "model_forward_time": 0.024841785430908203, "step": 28218 }, { "epoch": 4.30572509765625e-05, "step": 28218, "training_step_time": 0.17049193382263184 }, { "epoch": 4.305877685546875e-05, "model_forward_time": 0.024930477142333984, "step": 28219 }, { "epoch": 4.305877685546875e-05, "step": 28219, "training_step_time": 0.10560393333435059 }, { "epoch": 4.3060302734375e-05, "grad_norm": 0.07996619492769241, "learning_rate": 9.593923788722315e-07, "loss": 0.0042, "step": 28220 }, { "epoch": 4.3060302734375e-05, "model_forward_time": 0.024774551391601562, "step": 28220 }, { "epoch": 4.3060302734375e-05, "step": 28220, "training_step_time": 0.10705971717834473 }, { "epoch": 4.306182861328125e-05, "model_forward_time": 0.024884462356567383, "step": 28221 }, { "epoch": 4.306182861328125e-05, "step": 28221, "training_step_time": 0.10380029678344727 }, { "epoch": 4.30633544921875e-05, "model_forward_time": 0.025249481201171875, "step": 28222 }, { "epoch": 4.30633544921875e-05, "step": 28222, "training_step_time": 0.18227505683898926 }, { "epoch": 4.306488037109375e-05, "model_forward_time": 0.024384498596191406, "step": 28223 }, { "epoch": 4.306488037109375e-05, "step": 28223, "training_step_time": 0.14304304122924805 }, { "epoch": 4.306640625e-05, "model_forward_time": 0.024037599563598633, "step": 28224 }, { "epoch": 4.306640625e-05, "step": 28224, "training_step_time": 0.10934042930603027 }, { "epoch": 4.306793212890625e-05, "model_forward_time": 0.024297475814819336, "step": 28225 }, { "epoch": 4.306793212890625e-05, "step": 28225, "training_step_time": 0.10251498222351074 }, { "epoch": 4.30694580078125e-05, "model_forward_time": 0.024562835693359375, "step": 28226 }, { "epoch": 4.30694580078125e-05, "step": 28226, "training_step_time": 0.12615728378295898 }, { "epoch": 4.307098388671875e-05, "model_forward_time": 0.025257110595703125, "step": 28227 }, { "epoch": 4.307098388671875e-05, "step": 28227, "training_step_time": 0.155104398727417 }, { "epoch": 4.3072509765625e-05, "model_forward_time": 0.024361133575439453, "step": 28228 }, { "epoch": 4.3072509765625e-05, "step": 28228, "training_step_time": 0.15401816368103027 }, { "epoch": 4.307403564453125e-05, "model_forward_time": 0.024705886840820312, "step": 28229 }, { "epoch": 4.307403564453125e-05, "step": 28229, "training_step_time": 0.13148045539855957 }, { "epoch": 4.30755615234375e-05, "grad_norm": 0.17489050328731537, "learning_rate": 9.486770920760668e-07, "loss": 0.0044, "step": 28230 }, { "epoch": 4.30755615234375e-05, "model_forward_time": 0.02397632598876953, "step": 28230 }, { "epoch": 4.30755615234375e-05, "step": 28230, "training_step_time": 0.1252427101135254 }, { "epoch": 4.307708740234375e-05, "model_forward_time": 0.024882793426513672, "step": 28231 }, { "epoch": 4.307708740234375e-05, "step": 28231, "training_step_time": 0.1289370059967041 }, { "epoch": 4.307861328125e-05, "model_forward_time": 0.02490520477294922, "step": 28232 }, { "epoch": 4.307861328125e-05, "step": 28232, "training_step_time": 0.1280078887939453 }, { "epoch": 4.308013916015625e-05, "model_forward_time": 0.024621009826660156, "step": 28233 }, { "epoch": 4.308013916015625e-05, "step": 28233, "training_step_time": 0.1309969425201416 }, { "epoch": 4.30816650390625e-05, "model_forward_time": 0.024280071258544922, "step": 28234 }, { "epoch": 4.30816650390625e-05, "step": 28234, "training_step_time": 0.12112188339233398 }, { "epoch": 4.308319091796875e-05, "model_forward_time": 0.02489781379699707, "step": 28235 }, { "epoch": 4.308319091796875e-05, "step": 28235, "training_step_time": 0.11665225028991699 }, { "epoch": 4.3084716796875e-05, "model_forward_time": 0.02522134780883789, "step": 28236 }, { "epoch": 4.3084716796875e-05, "step": 28236, "training_step_time": 0.11767888069152832 }, { "epoch": 4.308624267578125e-05, "model_forward_time": 0.025439739227294922, "step": 28237 }, { "epoch": 4.308624267578125e-05, "step": 28237, "training_step_time": 0.1096193790435791 }, { "epoch": 4.30877685546875e-05, "model_forward_time": 0.02503657341003418, "step": 28238 }, { "epoch": 4.30877685546875e-05, "step": 28238, "training_step_time": 0.11063909530639648 }, { "epoch": 4.308929443359375e-05, "model_forward_time": 0.0253903865814209, "step": 28239 }, { "epoch": 4.308929443359375e-05, "step": 28239, "training_step_time": 0.10936403274536133 }, { "epoch": 4.30908203125e-05, "grad_norm": 0.04891812801361084, "learning_rate": 9.380214072624682e-07, "loss": 0.0025, "step": 28240 }, { "epoch": 4.30908203125e-05, "model_forward_time": 0.025256633758544922, "step": 28240 }, { "epoch": 4.30908203125e-05, "step": 28240, "training_step_time": 0.1112825870513916 }, { "epoch": 4.309234619140625e-05, "model_forward_time": 0.025159597396850586, "step": 28241 }, { "epoch": 4.309234619140625e-05, "step": 28241, "training_step_time": 0.11025381088256836 }, { "epoch": 4.30938720703125e-05, "model_forward_time": 0.024993419647216797, "step": 28242 }, { "epoch": 4.30938720703125e-05, "step": 28242, "training_step_time": 0.10692548751831055 }, { "epoch": 4.309539794921875e-05, "model_forward_time": 0.02507495880126953, "step": 28243 }, { "epoch": 4.309539794921875e-05, "step": 28243, "training_step_time": 0.10691332817077637 }, { "epoch": 4.3096923828125e-05, "model_forward_time": 0.025977611541748047, "step": 28244 }, { "epoch": 4.3096923828125e-05, "step": 28244, "training_step_time": 0.10986566543579102 }, { "epoch": 4.309844970703125e-05, "model_forward_time": 0.024999618530273438, "step": 28245 }, { "epoch": 4.309844970703125e-05, "step": 28245, "training_step_time": 0.1065213680267334 }, { "epoch": 4.30999755859375e-05, "model_forward_time": 0.02481532096862793, "step": 28246 }, { "epoch": 4.30999755859375e-05, "step": 28246, "training_step_time": 0.10678696632385254 }, { "epoch": 4.310150146484375e-05, "model_forward_time": 0.024996519088745117, "step": 28247 }, { "epoch": 4.310150146484375e-05, "step": 28247, "training_step_time": 0.10556387901306152 }, { "epoch": 4.310302734375e-05, "model_forward_time": 0.02518773078918457, "step": 28248 }, { "epoch": 4.310302734375e-05, "step": 28248, "training_step_time": 0.10475993156433105 }, { "epoch": 4.310455322265625e-05, "model_forward_time": 0.02476358413696289, "step": 28249 }, { "epoch": 4.310455322265625e-05, "step": 28249, "training_step_time": 0.16903138160705566 }, { "epoch": 4.31060791015625e-05, "grad_norm": 0.34499987959861755, "learning_rate": 9.274253373791064e-07, "loss": 0.0093, "step": 28250 }, { "epoch": 4.31060791015625e-05, "model_forward_time": 0.024405956268310547, "step": 28250 }, { "epoch": 4.31060791015625e-05, "step": 28250, "training_step_time": 0.1441822052001953 }, { "epoch": 4.310760498046875e-05, "model_forward_time": 0.02486705780029297, "step": 28251 }, { "epoch": 4.310760498046875e-05, "step": 28251, "training_step_time": 0.14620566368103027 }, { "epoch": 4.3109130859375e-05, "model_forward_time": 0.02442789077758789, "step": 28252 }, { "epoch": 4.3109130859375e-05, "step": 28252, "training_step_time": 0.21453332901000977 }, { "epoch": 4.311065673828125e-05, "model_forward_time": 0.02476644515991211, "step": 28253 }, { "epoch": 4.311065673828125e-05, "step": 28253, "training_step_time": 0.11744499206542969 }, { "epoch": 4.31121826171875e-05, "model_forward_time": 0.024611949920654297, "step": 28254 }, { "epoch": 4.31121826171875e-05, "step": 28254, "training_step_time": 0.10311126708984375 }, { "epoch": 4.311370849609375e-05, "model_forward_time": 0.02522110939025879, "step": 28255 }, { "epoch": 4.311370849609375e-05, "step": 28255, "training_step_time": 0.10689926147460938 }, { "epoch": 4.3115234375e-05, "model_forward_time": 0.02557969093322754, "step": 28256 }, { "epoch": 4.3115234375e-05, "step": 28256, "training_step_time": 0.10348844528198242 }, { "epoch": 4.311676025390625e-05, "model_forward_time": 0.02521347999572754, "step": 28257 }, { "epoch": 4.311676025390625e-05, "step": 28257, "training_step_time": 0.1448228359222412 }, { "epoch": 4.31182861328125e-05, "model_forward_time": 0.025263547897338867, "step": 28258 }, { "epoch": 4.31182861328125e-05, "step": 28258, "training_step_time": 0.2192821502685547 }, { "epoch": 4.311981201171875e-05, "model_forward_time": 0.02473282814025879, "step": 28259 }, { "epoch": 4.311981201171875e-05, "step": 28259, "training_step_time": 0.17320942878723145 }, { "epoch": 4.3121337890625e-05, "grad_norm": 0.11382688581943512, "learning_rate": 9.168888953011989e-07, "loss": 0.006, "step": 28260 }, { "epoch": 4.3121337890625e-05, "model_forward_time": 0.024869441986083984, "step": 28260 }, { "epoch": 4.3121337890625e-05, "step": 28260, "training_step_time": 0.14842438697814941 }, { "epoch": 4.312286376953125e-05, "model_forward_time": 0.02479243278503418, "step": 28261 }, { "epoch": 4.312286376953125e-05, "step": 28261, "training_step_time": 0.11565065383911133 }, { "epoch": 4.31243896484375e-05, "model_forward_time": 0.026392698287963867, "step": 28262 }, { "epoch": 4.31243896484375e-05, "step": 28262, "training_step_time": 0.14272475242614746 }, { "epoch": 4.312591552734375e-05, "model_forward_time": 0.025150537490844727, "step": 28263 }, { "epoch": 4.312591552734375e-05, "step": 28263, "training_step_time": 0.10574936866760254 }, { "epoch": 4.312744140625e-05, "model_forward_time": 0.025140047073364258, "step": 28264 }, { "epoch": 4.312744140625e-05, "step": 28264, "training_step_time": 0.10417962074279785 }, { "epoch": 4.312896728515625e-05, "model_forward_time": 0.02566385269165039, "step": 28265 }, { "epoch": 4.312896728515625e-05, "step": 28265, "training_step_time": 0.1063389778137207 }, { "epoch": 4.31304931640625e-05, "model_forward_time": 0.02505803108215332, "step": 28266 }, { "epoch": 4.31304931640625e-05, "step": 28266, "training_step_time": 0.16218280792236328 }, { "epoch": 4.313201904296875e-05, "model_forward_time": 0.024709701538085938, "step": 28267 }, { "epoch": 4.313201904296875e-05, "step": 28267, "training_step_time": 0.13949322700500488 }, { "epoch": 4.3133544921875e-05, "model_forward_time": 0.02446126937866211, "step": 28268 }, { "epoch": 4.3133544921875e-05, "step": 28268, "training_step_time": 0.10626792907714844 }, { "epoch": 4.313507080078125e-05, "model_forward_time": 0.025397777557373047, "step": 28269 }, { "epoch": 4.313507080078125e-05, "step": 28269, "training_step_time": 0.11220073699951172 }, { "epoch": 4.31365966796875e-05, "grad_norm": 0.08717884123325348, "learning_rate": 9.064120938315213e-07, "loss": 0.0023, "step": 28270 }, { "epoch": 4.31365966796875e-05, "model_forward_time": 0.0251922607421875, "step": 28270 }, { "epoch": 4.31365966796875e-05, "step": 28270, "training_step_time": 0.1097097396850586 }, { "epoch": 4.313812255859375e-05, "model_forward_time": 0.02539539337158203, "step": 28271 }, { "epoch": 4.313812255859375e-05, "step": 28271, "training_step_time": 0.10501718521118164 }, { "epoch": 4.31396484375e-05, "model_forward_time": 0.02543354034423828, "step": 28272 }, { "epoch": 4.31396484375e-05, "step": 28272, "training_step_time": 0.19508075714111328 }, { "epoch": 4.314117431640625e-05, "model_forward_time": 0.024466753005981445, "step": 28273 }, { "epoch": 4.314117431640625e-05, "step": 28273, "training_step_time": 0.1120765209197998 }, { "epoch": 4.31427001953125e-05, "model_forward_time": 0.024715900421142578, "step": 28274 }, { "epoch": 4.31427001953125e-05, "step": 28274, "training_step_time": 0.12807583808898926 }, { "epoch": 4.314422607421875e-05, "model_forward_time": 0.02486252784729004, "step": 28275 }, { "epoch": 4.314422607421875e-05, "step": 28275, "training_step_time": 0.12356305122375488 }, { "epoch": 4.3145751953125e-05, "model_forward_time": 0.02499985694885254, "step": 28276 }, { "epoch": 4.3145751953125e-05, "step": 28276, "training_step_time": 0.12183666229248047 }, { "epoch": 4.314727783203125e-05, "model_forward_time": 0.025493144989013672, "step": 28277 }, { "epoch": 4.314727783203125e-05, "step": 28277, "training_step_time": 0.12496089935302734 }, { "epoch": 4.31488037109375e-05, "model_forward_time": 0.025468826293945312, "step": 28278 }, { "epoch": 4.31488037109375e-05, "step": 28278, "training_step_time": 0.11671662330627441 }, { "epoch": 4.315032958984375e-05, "model_forward_time": 0.024949073791503906, "step": 28279 }, { "epoch": 4.315032958984375e-05, "step": 28279, "training_step_time": 0.1119842529296875 }, { "epoch": 4.315185546875e-05, "grad_norm": 0.04625631868839264, "learning_rate": 8.959949457003736e-07, "loss": 0.0054, "step": 28280 }, { "epoch": 4.315185546875e-05, "model_forward_time": 0.025320768356323242, "step": 28280 }, { "epoch": 4.315185546875e-05, "step": 28280, "training_step_time": 0.11227297782897949 }, { "epoch": 4.315338134765625e-05, "model_forward_time": 0.025124073028564453, "step": 28281 }, { "epoch": 4.315338134765625e-05, "step": 28281, "training_step_time": 0.10923576354980469 }, { "epoch": 4.31549072265625e-05, "model_forward_time": 0.02552032470703125, "step": 28282 }, { "epoch": 4.31549072265625e-05, "step": 28282, "training_step_time": 0.1077263355255127 }, { "epoch": 4.315643310546875e-05, "model_forward_time": 0.02720165252685547, "step": 28283 }, { "epoch": 4.315643310546875e-05, "step": 28283, "training_step_time": 0.10877156257629395 }, { "epoch": 4.3157958984375e-05, "model_forward_time": 0.025402545928955078, "step": 28284 }, { "epoch": 4.3157958984375e-05, "step": 28284, "training_step_time": 0.11357951164245605 }, { "epoch": 4.315948486328125e-05, "model_forward_time": 0.027219772338867188, "step": 28285 }, { "epoch": 4.315948486328125e-05, "step": 28285, "training_step_time": 0.10899949073791504 }, { "epoch": 4.31610107421875e-05, "model_forward_time": 0.025385618209838867, "step": 28286 }, { "epoch": 4.31610107421875e-05, "step": 28286, "training_step_time": 0.11057853698730469 }, { "epoch": 4.316253662109375e-05, "model_forward_time": 0.025757551193237305, "step": 28287 }, { "epoch": 4.316253662109375e-05, "step": 28287, "training_step_time": 0.1057596206665039 }, { "epoch": 4.31640625e-05, "model_forward_time": 0.02553415298461914, "step": 28288 }, { "epoch": 4.31640625e-05, "step": 28288, "training_step_time": 0.10589098930358887 }, { "epoch": 4.316558837890625e-05, "model_forward_time": 0.02532505989074707, "step": 28289 }, { "epoch": 4.316558837890625e-05, "step": 28289, "training_step_time": 0.10649704933166504 }, { "epoch": 4.31671142578125e-05, "grad_norm": 0.05212622508406639, "learning_rate": 8.856374635655695e-07, "loss": 0.0025, "step": 28290 }, { "epoch": 4.31671142578125e-05, "model_forward_time": 0.025525331497192383, "step": 28290 }, { "epoch": 4.31671142578125e-05, "step": 28290, "training_step_time": 0.10468745231628418 }, { "epoch": 4.316864013671875e-05, "model_forward_time": 0.025528907775878906, "step": 28291 }, { "epoch": 4.316864013671875e-05, "step": 28291, "training_step_time": 0.11216282844543457 }, { "epoch": 4.3170166015625e-05, "model_forward_time": 0.025467634201049805, "step": 28292 }, { "epoch": 4.3170166015625e-05, "step": 28292, "training_step_time": 0.10926532745361328 }, { "epoch": 4.317169189453125e-05, "model_forward_time": 0.02861166000366211, "step": 28293 }, { "epoch": 4.317169189453125e-05, "step": 28293, "training_step_time": 0.16585516929626465 }, { "epoch": 4.31732177734375e-05, "model_forward_time": 0.024658679962158203, "step": 28294 }, { "epoch": 4.31732177734375e-05, "step": 28294, "training_step_time": 0.16858959197998047 }, { "epoch": 4.317474365234375e-05, "model_forward_time": 0.024713516235351562, "step": 28295 }, { "epoch": 4.317474365234375e-05, "step": 28295, "training_step_time": 0.18735861778259277 }, { "epoch": 4.317626953125e-05, "model_forward_time": 0.026334524154663086, "step": 28296 }, { "epoch": 4.317626953125e-05, "step": 28296, "training_step_time": 0.1381378173828125 }, { "epoch": 4.317779541015625e-05, "model_forward_time": 0.024439096450805664, "step": 28297 }, { "epoch": 4.317779541015625e-05, "step": 28297, "training_step_time": 0.19620060920715332 }, { "epoch": 4.31793212890625e-05, "model_forward_time": 0.024636268615722656, "step": 28298 }, { "epoch": 4.31793212890625e-05, "step": 28298, "training_step_time": 0.18410444259643555 }, { "epoch": 4.318084716796875e-05, "model_forward_time": 0.024605274200439453, "step": 28299 }, { "epoch": 4.318084716796875e-05, "step": 28299, "training_step_time": 0.10127019882202148 }, { "epoch": 4.3182373046875e-05, "grad_norm": 0.0862986296415329, "learning_rate": 8.753396600124253e-07, "loss": 0.003, "step": 28300 }, { "epoch": 4.3182373046875e-05, "model_forward_time": 0.02459716796875, "step": 28300 }, { "epoch": 4.3182373046875e-05, "step": 28300, "training_step_time": 0.10310173034667969 }, { "epoch": 4.318389892578125e-05, "model_forward_time": 0.025064706802368164, "step": 28301 }, { "epoch": 4.318389892578125e-05, "step": 28301, "training_step_time": 0.1896193027496338 }, { "epoch": 4.31854248046875e-05, "model_forward_time": 0.02478647232055664, "step": 28302 }, { "epoch": 4.31854248046875e-05, "step": 28302, "training_step_time": 0.2170579433441162 }, { "epoch": 4.318695068359375e-05, "model_forward_time": 0.024701833724975586, "step": 28303 }, { "epoch": 4.318695068359375e-05, "step": 28303, "training_step_time": 0.1712322235107422 }, { "epoch": 4.31884765625e-05, "model_forward_time": 0.023927927017211914, "step": 28304 }, { "epoch": 4.31884765625e-05, "step": 28304, "training_step_time": 0.14850974082946777 }, { "epoch": 4.319000244140625e-05, "model_forward_time": 0.024541139602661133, "step": 28305 }, { "epoch": 4.319000244140625e-05, "step": 28305, "training_step_time": 0.13044452667236328 }, { "epoch": 4.31915283203125e-05, "model_forward_time": 0.024962663650512695, "step": 28306 }, { "epoch": 4.31915283203125e-05, "step": 28306, "training_step_time": 0.17561054229736328 }, { "epoch": 4.319305419921875e-05, "model_forward_time": 0.024559974670410156, "step": 28307 }, { "epoch": 4.319305419921875e-05, "step": 28307, "training_step_time": 0.1543276309967041 }, { "epoch": 4.3194580078125e-05, "model_forward_time": 0.024391889572143555, "step": 28308 }, { "epoch": 4.3194580078125e-05, "step": 28308, "training_step_time": 0.14554142951965332 }, { "epoch": 4.319610595703125e-05, "model_forward_time": 0.02467036247253418, "step": 28309 }, { "epoch": 4.319610595703125e-05, "step": 28309, "training_step_time": 0.20698904991149902 }, { "epoch": 4.31976318359375e-05, "grad_norm": 0.06878754496574402, "learning_rate": 8.651015475537538e-07, "loss": 0.003, "step": 28310 }, { "epoch": 4.31976318359375e-05, "model_forward_time": 0.024463176727294922, "step": 28310 }, { "epoch": 4.31976318359375e-05, "step": 28310, "training_step_time": 0.1341533660888672 }, { "epoch": 4.319915771484375e-05, "model_forward_time": 0.02340841293334961, "step": 28311 }, { "epoch": 4.319915771484375e-05, "step": 28311, "training_step_time": 0.19932293891906738 }, { "epoch": 4.320068359375e-05, "model_forward_time": 0.02461385726928711, "step": 28312 }, { "epoch": 4.320068359375e-05, "step": 28312, "training_step_time": 0.11487221717834473 }, { "epoch": 4.320220947265625e-05, "model_forward_time": 0.024092435836791992, "step": 28313 }, { "epoch": 4.320220947265625e-05, "step": 28313, "training_step_time": 0.18918967247009277 }, { "epoch": 4.32037353515625e-05, "model_forward_time": 0.024534940719604492, "step": 28314 }, { "epoch": 4.32037353515625e-05, "step": 28314, "training_step_time": 0.10866928100585938 }, { "epoch": 4.320526123046875e-05, "model_forward_time": 0.024505615234375, "step": 28315 }, { "epoch": 4.320526123046875e-05, "step": 28315, "training_step_time": 0.11437535285949707 }, { "epoch": 4.3206787109375e-05, "model_forward_time": 0.02554154396057129, "step": 28316 }, { "epoch": 4.3206787109375e-05, "step": 28316, "training_step_time": 0.10933947563171387 }, { "epoch": 4.320831298828125e-05, "model_forward_time": 0.025414466857910156, "step": 28317 }, { "epoch": 4.320831298828125e-05, "step": 28317, "training_step_time": 0.109588623046875 }, { "epoch": 4.32098388671875e-05, "model_forward_time": 0.025625944137573242, "step": 28318 }, { "epoch": 4.32098388671875e-05, "step": 28318, "training_step_time": 0.10823202133178711 }, { "epoch": 4.321136474609375e-05, "model_forward_time": 0.02571582794189453, "step": 28319 }, { "epoch": 4.321136474609375e-05, "step": 28319, "training_step_time": 0.10779619216918945 }, { "epoch": 4.3212890625e-05, "grad_norm": 0.04035257175564766, "learning_rate": 8.549231386298151e-07, "loss": 0.0038, "step": 28320 }, { "epoch": 4.3212890625e-05, "model_forward_time": 0.025146484375, "step": 28320 }, { "epoch": 4.3212890625e-05, "step": 28320, "training_step_time": 0.10576033592224121 }, { "epoch": 4.321441650390625e-05, "model_forward_time": 0.02530074119567871, "step": 28321 }, { "epoch": 4.321441650390625e-05, "step": 28321, "training_step_time": 0.10698771476745605 }, { "epoch": 4.32159423828125e-05, "model_forward_time": 0.025170326232910156, "step": 28322 }, { "epoch": 4.32159423828125e-05, "step": 28322, "training_step_time": 0.10840225219726562 }, { "epoch": 4.321746826171875e-05, "model_forward_time": 0.025565385818481445, "step": 28323 }, { "epoch": 4.321746826171875e-05, "step": 28323, "training_step_time": 0.10559844970703125 }, { "epoch": 4.3218994140625e-05, "model_forward_time": 0.02537393569946289, "step": 28324 }, { "epoch": 4.3218994140625e-05, "step": 28324, "training_step_time": 0.10785841941833496 }, { "epoch": 4.322052001953125e-05, "model_forward_time": 0.025562047958374023, "step": 28325 }, { "epoch": 4.322052001953125e-05, "step": 28325, "training_step_time": 0.10639643669128418 }, { "epoch": 4.32220458984375e-05, "model_forward_time": 0.027622222900390625, "step": 28326 }, { "epoch": 4.32220458984375e-05, "step": 28326, "training_step_time": 0.10772156715393066 }, { "epoch": 4.322357177734375e-05, "model_forward_time": 0.025440454483032227, "step": 28327 }, { "epoch": 4.322357177734375e-05, "step": 28327, "training_step_time": 0.10530447959899902 }, { "epoch": 4.322509765625e-05, "model_forward_time": 0.02528071403503418, "step": 28328 }, { "epoch": 4.322509765625e-05, "step": 28328, "training_step_time": 0.1056208610534668 }, { "epoch": 4.322662353515625e-05, "model_forward_time": 0.02536630630493164, "step": 28329 }, { "epoch": 4.322662353515625e-05, "step": 28329, "training_step_time": 0.10483694076538086 }, { "epoch": 4.32281494140625e-05, "grad_norm": 0.042170602828264236, "learning_rate": 8.448044456083493e-07, "loss": 0.0049, "step": 28330 }, { "epoch": 4.32281494140625e-05, "model_forward_time": 0.025777339935302734, "step": 28330 }, { "epoch": 4.32281494140625e-05, "step": 28330, "training_step_time": 0.10893654823303223 }, { "epoch": 4.322967529296875e-05, "model_forward_time": 0.02574014663696289, "step": 28331 }, { "epoch": 4.322967529296875e-05, "step": 28331, "training_step_time": 0.10791540145874023 }, { "epoch": 4.3231201171875e-05, "model_forward_time": 0.027769088745117188, "step": 28332 }, { "epoch": 4.3231201171875e-05, "step": 28332, "training_step_time": 0.10796689987182617 }, { "epoch": 4.323272705078125e-05, "model_forward_time": 0.025609254837036133, "step": 28333 }, { "epoch": 4.323272705078125e-05, "step": 28333, "training_step_time": 0.10552978515625 }, { "epoch": 4.32342529296875e-05, "model_forward_time": 0.02552652359008789, "step": 28334 }, { "epoch": 4.32342529296875e-05, "step": 28334, "training_step_time": 0.10676908493041992 }, { "epoch": 4.323577880859375e-05, "model_forward_time": 0.02567267417907715, "step": 28335 }, { "epoch": 4.323577880859375e-05, "step": 28335, "training_step_time": 0.10521364212036133 }, { "epoch": 4.32373046875e-05, "model_forward_time": 0.026295185089111328, "step": 28336 }, { "epoch": 4.32373046875e-05, "step": 28336, "training_step_time": 0.15050673484802246 }, { "epoch": 4.323883056640625e-05, "model_forward_time": 0.025277376174926758, "step": 28337 }, { "epoch": 4.323883056640625e-05, "step": 28337, "training_step_time": 0.1932518482208252 }, { "epoch": 4.32403564453125e-05, "model_forward_time": 0.024654150009155273, "step": 28338 }, { "epoch": 4.32403564453125e-05, "step": 28338, "training_step_time": 0.21532201766967773 }, { "epoch": 4.324188232421875e-05, "model_forward_time": 0.024502992630004883, "step": 28339 }, { "epoch": 4.324188232421875e-05, "step": 28339, "training_step_time": 0.16487741470336914 }, { "epoch": 4.3243408203125e-05, "grad_norm": 0.0799122229218483, "learning_rate": 8.347454807845045e-07, "loss": 0.0043, "step": 28340 }, { "epoch": 4.3243408203125e-05, "model_forward_time": 0.02477741241455078, "step": 28340 }, { "epoch": 4.3243408203125e-05, "step": 28340, "training_step_time": 0.10457611083984375 }, { "epoch": 4.324493408203125e-05, "model_forward_time": 0.025510311126708984, "step": 28341 }, { "epoch": 4.324493408203125e-05, "step": 28341, "training_step_time": 0.10272932052612305 }, { "epoch": 4.32464599609375e-05, "model_forward_time": 0.02563953399658203, "step": 28342 }, { "epoch": 4.32464599609375e-05, "step": 28342, "training_step_time": 0.1034855842590332 }, { "epoch": 4.324798583984375e-05, "model_forward_time": 0.025463104248046875, "step": 28343 }, { "epoch": 4.324798583984375e-05, "step": 28343, "training_step_time": 0.10534262657165527 }, { "epoch": 4.324951171875e-05, "model_forward_time": 0.025864839553833008, "step": 28344 }, { "epoch": 4.324951171875e-05, "step": 28344, "training_step_time": 0.17320752143859863 }, { "epoch": 4.325103759765625e-05, "model_forward_time": 0.024507999420166016, "step": 28345 }, { "epoch": 4.325103759765625e-05, "step": 28345, "training_step_time": 0.22058534622192383 }, { "epoch": 4.32525634765625e-05, "model_forward_time": 0.024581193923950195, "step": 28346 }, { "epoch": 4.32525634765625e-05, "step": 28346, "training_step_time": 0.2081892490386963 }, { "epoch": 4.325408935546875e-05, "model_forward_time": 0.024051189422607422, "step": 28347 }, { "epoch": 4.325408935546875e-05, "step": 28347, "training_step_time": 0.11482667922973633 }, { "epoch": 4.3255615234375e-05, "model_forward_time": 0.024718284606933594, "step": 28348 }, { "epoch": 4.3255615234375e-05, "step": 28348, "training_step_time": 0.12096476554870605 }, { "epoch": 4.325714111328125e-05, "model_forward_time": 0.025110721588134766, "step": 28349 }, { "epoch": 4.325714111328125e-05, "step": 28349, "training_step_time": 0.13621950149536133 }, { "epoch": 4.32586669921875e-05, "grad_norm": 0.05207629129290581, "learning_rate": 8.247462563808817e-07, "loss": 0.0028, "step": 28350 }, { "epoch": 4.32586669921875e-05, "model_forward_time": 0.02513432502746582, "step": 28350 }, { "epoch": 4.32586669921875e-05, "step": 28350, "training_step_time": 0.10732269287109375 }, { "epoch": 4.326019287109375e-05, "model_forward_time": 0.025487422943115234, "step": 28351 }, { "epoch": 4.326019287109375e-05, "step": 28351, "training_step_time": 0.10588431358337402 }, { "epoch": 4.326171875e-05, "model_forward_time": 0.025292396545410156, "step": 28352 }, { "epoch": 4.326171875e-05, "step": 28352, "training_step_time": 0.10665154457092285 }, { "epoch": 4.326324462890625e-05, "model_forward_time": 0.025133371353149414, "step": 28353 }, { "epoch": 4.326324462890625e-05, "step": 28353, "training_step_time": 0.12339425086975098 }, { "epoch": 4.32647705078125e-05, "model_forward_time": 0.02559828758239746, "step": 28354 }, { "epoch": 4.32647705078125e-05, "step": 28354, "training_step_time": 0.14159178733825684 }, { "epoch": 4.326629638671875e-05, "model_forward_time": 0.024936676025390625, "step": 28355 }, { "epoch": 4.326629638671875e-05, "step": 28355, "training_step_time": 0.10797977447509766 }, { "epoch": 4.3267822265625e-05, "model_forward_time": 0.02536797523498535, "step": 28356 }, { "epoch": 4.3267822265625e-05, "step": 28356, "training_step_time": 0.10581254959106445 }, { "epoch": 4.326934814453125e-05, "model_forward_time": 0.02582550048828125, "step": 28357 }, { "epoch": 4.326934814453125e-05, "step": 28357, "training_step_time": 0.11330509185791016 }, { "epoch": 4.32708740234375e-05, "model_forward_time": 0.025530576705932617, "step": 28358 }, { "epoch": 4.32708740234375e-05, "step": 28358, "training_step_time": 0.10529446601867676 }, { "epoch": 4.327239990234375e-05, "model_forward_time": 0.024924516677856445, "step": 28359 }, { "epoch": 4.327239990234375e-05, "step": 28359, "training_step_time": 0.20937108993530273 }, { "epoch": 4.327392578125e-05, "grad_norm": 0.029038051143288612, "learning_rate": 8.148067845474838e-07, "loss": 0.0026, "step": 28360 }, { "epoch": 4.327392578125e-05, "model_forward_time": 0.024399757385253906, "step": 28360 }, { "epoch": 4.327392578125e-05, "step": 28360, "training_step_time": 0.10466861724853516 }, { "epoch": 4.327545166015625e-05, "model_forward_time": 0.024623870849609375, "step": 28361 }, { "epoch": 4.327545166015625e-05, "step": 28361, "training_step_time": 0.10635733604431152 }, { "epoch": 4.32769775390625e-05, "model_forward_time": 0.027372121810913086, "step": 28362 }, { "epoch": 4.32769775390625e-05, "step": 28362, "training_step_time": 0.10723304748535156 }, { "epoch": 4.327850341796875e-05, "model_forward_time": 0.02536940574645996, "step": 28363 }, { "epoch": 4.327850341796875e-05, "step": 28363, "training_step_time": 0.10953235626220703 }, { "epoch": 4.3280029296875e-05, "model_forward_time": 0.02526545524597168, "step": 28364 }, { "epoch": 4.3280029296875e-05, "step": 28364, "training_step_time": 0.10798263549804688 }, { "epoch": 4.328155517578125e-05, "model_forward_time": 0.024599790573120117, "step": 28365 }, { "epoch": 4.328155517578125e-05, "step": 28365, "training_step_time": 0.10374927520751953 }, { "epoch": 4.32830810546875e-05, "model_forward_time": 0.025591373443603516, "step": 28366 }, { "epoch": 4.32830810546875e-05, "step": 28366, "training_step_time": 0.1036214828491211 }, { "epoch": 4.328460693359375e-05, "model_forward_time": 0.025064706802368164, "step": 28367 }, { "epoch": 4.328460693359375e-05, "step": 28367, "training_step_time": 0.1049656867980957 }, { "epoch": 4.32861328125e-05, "model_forward_time": 0.025427579879760742, "step": 28368 }, { "epoch": 4.32861328125e-05, "step": 28368, "training_step_time": 0.10957956314086914 }, { "epoch": 4.328765869140625e-05, "model_forward_time": 0.025300264358520508, "step": 28369 }, { "epoch": 4.328765869140625e-05, "step": 28369, "training_step_time": 0.10419106483459473 }, { "epoch": 4.32891845703125e-05, "grad_norm": 0.0704699456691742, "learning_rate": 8.049270773617057e-07, "loss": 0.0056, "step": 28370 }, { "epoch": 4.32891845703125e-05, "model_forward_time": 0.02546095848083496, "step": 28370 }, { "epoch": 4.32891845703125e-05, "step": 28370, "training_step_time": 0.10808873176574707 }, { "epoch": 4.329071044921875e-05, "model_forward_time": 0.02561163902282715, "step": 28371 }, { "epoch": 4.329071044921875e-05, "step": 28371, "training_step_time": 0.10383391380310059 }, { "epoch": 4.3292236328125e-05, "model_forward_time": 0.02538585662841797, "step": 28372 }, { "epoch": 4.3292236328125e-05, "step": 28372, "training_step_time": 0.10588693618774414 }, { "epoch": 4.329376220703125e-05, "model_forward_time": 0.02519965171813965, "step": 28373 }, { "epoch": 4.329376220703125e-05, "step": 28373, "training_step_time": 0.10465693473815918 }, { "epoch": 4.32952880859375e-05, "model_forward_time": 0.025647878646850586, "step": 28374 }, { "epoch": 4.32952880859375e-05, "step": 28374, "training_step_time": 0.10506415367126465 }, { "epoch": 4.329681396484375e-05, "model_forward_time": 0.025729894638061523, "step": 28375 }, { "epoch": 4.329681396484375e-05, "step": 28375, "training_step_time": 0.10447239875793457 }, { "epoch": 4.329833984375e-05, "model_forward_time": 0.02562689781188965, "step": 28376 }, { "epoch": 4.329833984375e-05, "step": 28376, "training_step_time": 0.10456967353820801 }, { "epoch": 4.329986572265625e-05, "model_forward_time": 0.025527000427246094, "step": 28377 }, { "epoch": 4.329986572265625e-05, "step": 28377, "training_step_time": 0.1053462028503418 }, { "epoch": 4.33013916015625e-05, "model_forward_time": 0.026200532913208008, "step": 28378 }, { "epoch": 4.33013916015625e-05, "step": 28378, "training_step_time": 0.10837292671203613 }, { "epoch": 4.330291748046875e-05, "model_forward_time": 0.02554607391357422, "step": 28379 }, { "epoch": 4.330291748046875e-05, "step": 28379, "training_step_time": 0.11129450798034668 }, { "epoch": 4.3304443359375e-05, "grad_norm": 0.04326881840825081, "learning_rate": 7.951071468283167e-07, "loss": 0.0044, "step": 28380 }, { "epoch": 4.3304443359375e-05, "model_forward_time": 0.024960041046142578, "step": 28380 }, { "epoch": 4.3304443359375e-05, "step": 28380, "training_step_time": 0.17937064170837402 }, { "epoch": 4.330596923828125e-05, "model_forward_time": 0.02476811408996582, "step": 28381 }, { "epoch": 4.330596923828125e-05, "step": 28381, "training_step_time": 0.20872068405151367 }, { "epoch": 4.33074951171875e-05, "model_forward_time": 0.02447199821472168, "step": 28382 }, { "epoch": 4.33074951171875e-05, "step": 28382, "training_step_time": 0.16397500038146973 }, { "epoch": 4.330902099609375e-05, "model_forward_time": 0.024724960327148438, "step": 28383 }, { "epoch": 4.330902099609375e-05, "step": 28383, "training_step_time": 0.16937494277954102 }, { "epoch": 4.3310546875e-05, "model_forward_time": 0.02457714080810547, "step": 28384 }, { "epoch": 4.3310546875e-05, "step": 28384, "training_step_time": 0.2020871639251709 }, { "epoch": 4.331207275390625e-05, "model_forward_time": 0.025063514709472656, "step": 28385 }, { "epoch": 4.331207275390625e-05, "step": 28385, "training_step_time": 0.14789819717407227 }, { "epoch": 4.33135986328125e-05, "model_forward_time": 0.024549484252929688, "step": 28386 }, { "epoch": 4.33135986328125e-05, "step": 28386, "training_step_time": 0.13049674034118652 }, { "epoch": 4.331512451171875e-05, "model_forward_time": 0.0248873233795166, "step": 28387 }, { "epoch": 4.331512451171875e-05, "step": 28387, "training_step_time": 0.12593984603881836 }, { "epoch": 4.3316650390625e-05, "model_forward_time": 0.02491283416748047, "step": 28388 }, { "epoch": 4.3316650390625e-05, "step": 28388, "training_step_time": 0.12424492835998535 }, { "epoch": 4.331817626953125e-05, "model_forward_time": 0.02517104148864746, "step": 28389 }, { "epoch": 4.331817626953125e-05, "step": 28389, "training_step_time": 0.13075041770935059 }, { "epoch": 4.33197021484375e-05, "grad_norm": 0.032751381397247314, "learning_rate": 7.853470048794664e-07, "loss": 0.0081, "step": 28390 }, { "epoch": 4.33197021484375e-05, "model_forward_time": 0.02523064613342285, "step": 28390 }, { "epoch": 4.33197021484375e-05, "step": 28390, "training_step_time": 0.20630574226379395 }, { "epoch": 4.332122802734375e-05, "model_forward_time": 0.023665904998779297, "step": 28391 }, { "epoch": 4.332122802734375e-05, "step": 28391, "training_step_time": 0.11001729965209961 }, { "epoch": 4.332275390625e-05, "model_forward_time": 0.02480292320251465, "step": 28392 }, { "epoch": 4.332275390625e-05, "step": 28392, "training_step_time": 0.10884547233581543 }, { "epoch": 4.332427978515625e-05, "model_forward_time": 0.02496480941772461, "step": 28393 }, { "epoch": 4.332427978515625e-05, "step": 28393, "training_step_time": 0.11189484596252441 }, { "epoch": 4.33258056640625e-05, "model_forward_time": 0.025141239166259766, "step": 28394 }, { "epoch": 4.33258056640625e-05, "step": 28394, "training_step_time": 0.10819745063781738 }, { "epoch": 4.332733154296875e-05, "model_forward_time": 0.02517986297607422, "step": 28395 }, { "epoch": 4.332733154296875e-05, "step": 28395, "training_step_time": 0.13584160804748535 }, { "epoch": 4.3328857421875e-05, "model_forward_time": 0.025691747665405273, "step": 28396 }, { "epoch": 4.3328857421875e-05, "step": 28396, "training_step_time": 0.10487222671508789 }, { "epoch": 4.333038330078125e-05, "model_forward_time": 0.0257570743560791, "step": 28397 }, { "epoch": 4.333038330078125e-05, "step": 28397, "training_step_time": 0.10948920249938965 }, { "epoch": 4.33319091796875e-05, "model_forward_time": 0.02497076988220215, "step": 28398 }, { "epoch": 4.33319091796875e-05, "step": 28398, "training_step_time": 0.10701894760131836 }, { "epoch": 4.333343505859375e-05, "model_forward_time": 0.02463364601135254, "step": 28399 }, { "epoch": 4.333343505859375e-05, "step": 28399, "training_step_time": 0.12427473068237305 }, { "epoch": 4.33349609375e-05, "grad_norm": 0.0698418915271759, "learning_rate": 7.756466633746407e-07, "loss": 0.0028, "step": 28400 }, { "epoch": 4.33349609375e-05, "model_forward_time": 0.024944305419921875, "step": 28400 }, { "epoch": 4.33349609375e-05, "step": 28400, "training_step_time": 0.12619304656982422 }, { "epoch": 4.333648681640625e-05, "model_forward_time": 0.02522134780883789, "step": 28401 }, { "epoch": 4.333648681640625e-05, "step": 28401, "training_step_time": 0.1080789566040039 }, { "epoch": 4.33380126953125e-05, "model_forward_time": 0.0251920223236084, "step": 28402 }, { "epoch": 4.33380126953125e-05, "step": 28402, "training_step_time": 0.10990762710571289 }, { "epoch": 4.333953857421875e-05, "model_forward_time": 0.025344133377075195, "step": 28403 }, { "epoch": 4.333953857421875e-05, "step": 28403, "training_step_time": 0.11408782005310059 }, { "epoch": 4.3341064453125e-05, "model_forward_time": 0.02684807777404785, "step": 28404 }, { "epoch": 4.3341064453125e-05, "step": 28404, "training_step_time": 0.10718750953674316 }, { "epoch": 4.334259033203125e-05, "model_forward_time": 0.025441884994506836, "step": 28405 }, { "epoch": 4.334259033203125e-05, "step": 28405, "training_step_time": 0.19615864753723145 }, { "epoch": 4.33441162109375e-05, "model_forward_time": 0.024446487426757812, "step": 28406 }, { "epoch": 4.33441162109375e-05, "step": 28406, "training_step_time": 0.10939240455627441 }, { "epoch": 4.334564208984375e-05, "model_forward_time": 0.026717185974121094, "step": 28407 }, { "epoch": 4.334564208984375e-05, "step": 28407, "training_step_time": 0.10455703735351562 }, { "epoch": 4.334716796875e-05, "model_forward_time": 0.02548360824584961, "step": 28408 }, { "epoch": 4.334716796875e-05, "step": 28408, "training_step_time": 0.10851883888244629 }, { "epoch": 4.334869384765625e-05, "model_forward_time": 0.025321006774902344, "step": 28409 }, { "epoch": 4.334869384765625e-05, "step": 28409, "training_step_time": 0.10486459732055664 }, { "epoch": 4.33502197265625e-05, "grad_norm": 0.5670133233070374, "learning_rate": 7.66006134100672e-07, "loss": 0.0056, "step": 28410 }, { "epoch": 4.33502197265625e-05, "model_forward_time": 0.025903940200805664, "step": 28410 }, { "epoch": 4.33502197265625e-05, "step": 28410, "training_step_time": 0.10527443885803223 }, { "epoch": 4.335174560546875e-05, "model_forward_time": 0.025406837463378906, "step": 28411 }, { "epoch": 4.335174560546875e-05, "step": 28411, "training_step_time": 0.10618281364440918 }, { "epoch": 4.3353271484375e-05, "model_forward_time": 0.025653839111328125, "step": 28412 }, { "epoch": 4.3353271484375e-05, "step": 28412, "training_step_time": 0.10473847389221191 }, { "epoch": 4.335479736328125e-05, "model_forward_time": 0.025304794311523438, "step": 28413 }, { "epoch": 4.335479736328125e-05, "step": 28413, "training_step_time": 0.10793423652648926 }, { "epoch": 4.33563232421875e-05, "model_forward_time": 0.026701688766479492, "step": 28414 }, { "epoch": 4.33563232421875e-05, "step": 28414, "training_step_time": 0.10609793663024902 }, { "epoch": 4.335784912109375e-05, "model_forward_time": 0.025300979614257812, "step": 28415 }, { "epoch": 4.335784912109375e-05, "step": 28415, "training_step_time": 0.10976386070251465 }, { "epoch": 4.3359375e-05, "model_forward_time": 0.025431394577026367, "step": 28416 }, { "epoch": 4.3359375e-05, "step": 28416, "training_step_time": 0.10615658760070801 }, { "epoch": 4.336090087890625e-05, "model_forward_time": 0.025214672088623047, "step": 28417 }, { "epoch": 4.336090087890625e-05, "step": 28417, "training_step_time": 0.10617351531982422 }, { "epoch": 4.33624267578125e-05, "model_forward_time": 0.025531530380249023, "step": 28418 }, { "epoch": 4.33624267578125e-05, "step": 28418, "training_step_time": 0.10517525672912598 }, { "epoch": 4.336395263671875e-05, "model_forward_time": 0.025285959243774414, "step": 28419 }, { "epoch": 4.336395263671875e-05, "step": 28419, "training_step_time": 0.10478591918945312 }, { "epoch": 4.3365478515625e-05, "grad_norm": 0.13137125968933105, "learning_rate": 7.564254287717176e-07, "loss": 0.008, "step": 28420 }, { "epoch": 4.3365478515625e-05, "model_forward_time": 0.02513718605041504, "step": 28420 }, { "epoch": 4.3365478515625e-05, "step": 28420, "training_step_time": 0.10477781295776367 }, { "epoch": 4.336700439453125e-05, "model_forward_time": 0.02508544921875, "step": 28421 }, { "epoch": 4.336700439453125e-05, "step": 28421, "training_step_time": 0.10487127304077148 }, { "epoch": 4.33685302734375e-05, "model_forward_time": 0.026108980178833008, "step": 28422 }, { "epoch": 4.33685302734375e-05, "step": 28422, "training_step_time": 0.10954022407531738 }, { "epoch": 4.337005615234375e-05, "model_forward_time": 0.025206804275512695, "step": 28423 }, { "epoch": 4.337005615234375e-05, "step": 28423, "training_step_time": 0.10972356796264648 }, { "epoch": 4.337158203125e-05, "model_forward_time": 0.028063535690307617, "step": 28424 }, { "epoch": 4.337158203125e-05, "step": 28424, "training_step_time": 0.11254763603210449 }, { "epoch": 4.337310791015625e-05, "model_forward_time": 0.025130271911621094, "step": 28425 }, { "epoch": 4.337310791015625e-05, "step": 28425, "training_step_time": 0.10553693771362305 }, { "epoch": 4.33746337890625e-05, "model_forward_time": 0.025455713272094727, "step": 28426 }, { "epoch": 4.33746337890625e-05, "step": 28426, "training_step_time": 0.1358485221862793 }, { "epoch": 4.337615966796875e-05, "model_forward_time": 0.025660276412963867, "step": 28427 }, { "epoch": 4.337615966796875e-05, "step": 28427, "training_step_time": 0.11090707778930664 }, { "epoch": 4.3377685546875e-05, "model_forward_time": 0.02532219886779785, "step": 28428 }, { "epoch": 4.3377685546875e-05, "step": 28428, "training_step_time": 0.10703229904174805 }, { "epoch": 4.337921142578125e-05, "model_forward_time": 0.025213956832885742, "step": 28429 }, { "epoch": 4.337921142578125e-05, "step": 28429, "training_step_time": 0.11951375007629395 }, { "epoch": 4.33807373046875e-05, "grad_norm": 0.20636147260665894, "learning_rate": 7.469045590292323e-07, "loss": 0.0043, "step": 28430 }, { "epoch": 4.33807373046875e-05, "model_forward_time": 0.025378942489624023, "step": 28430 }, { "epoch": 4.33807373046875e-05, "step": 28430, "training_step_time": 0.11332273483276367 }, { "epoch": 4.338226318359375e-05, "model_forward_time": 0.02594447135925293, "step": 28431 }, { "epoch": 4.338226318359375e-05, "step": 28431, "training_step_time": 0.12037944793701172 }, { "epoch": 4.33837890625e-05, "model_forward_time": 0.027541637420654297, "step": 28432 }, { "epoch": 4.33837890625e-05, "step": 28432, "training_step_time": 0.16683673858642578 }, { "epoch": 4.338531494140625e-05, "model_forward_time": 0.024903297424316406, "step": 28433 }, { "epoch": 4.338531494140625e-05, "step": 28433, "training_step_time": 0.1829085350036621 }, { "epoch": 4.33868408203125e-05, "model_forward_time": 0.02461409568786621, "step": 28434 }, { "epoch": 4.33868408203125e-05, "step": 28434, "training_step_time": 0.10988569259643555 }, { "epoch": 4.338836669921875e-05, "model_forward_time": 0.024920940399169922, "step": 28435 }, { "epoch": 4.338836669921875e-05, "step": 28435, "training_step_time": 0.11377382278442383 }, { "epoch": 4.3389892578125e-05, "model_forward_time": 0.02543783187866211, "step": 28436 }, { "epoch": 4.3389892578125e-05, "step": 28436, "training_step_time": 0.11171746253967285 }, { "epoch": 4.339141845703125e-05, "model_forward_time": 0.02561497688293457, "step": 28437 }, { "epoch": 4.339141845703125e-05, "step": 28437, "training_step_time": 0.1553177833557129 }, { "epoch": 4.33929443359375e-05, "model_forward_time": 0.024953842163085938, "step": 28438 }, { "epoch": 4.33929443359375e-05, "step": 28438, "training_step_time": 0.10993361473083496 }, { "epoch": 4.339447021484375e-05, "model_forward_time": 0.025770187377929688, "step": 28439 }, { "epoch": 4.339447021484375e-05, "step": 28439, "training_step_time": 0.12657999992370605 }, { "epoch": 4.339599609375e-05, "grad_norm": 0.04000052809715271, "learning_rate": 7.374435364419674e-07, "loss": 0.0032, "step": 28440 }, { "epoch": 4.339599609375e-05, "model_forward_time": 0.025289058685302734, "step": 28440 }, { "epoch": 4.339599609375e-05, "step": 28440, "training_step_time": 0.1298682689666748 }, { "epoch": 4.339752197265625e-05, "model_forward_time": 0.024939537048339844, "step": 28441 }, { "epoch": 4.339752197265625e-05, "step": 28441, "training_step_time": 0.12552380561828613 }, { "epoch": 4.33990478515625e-05, "model_forward_time": 0.02550053596496582, "step": 28442 }, { "epoch": 4.33990478515625e-05, "step": 28442, "training_step_time": 0.11357283592224121 }, { "epoch": 4.340057373046875e-05, "model_forward_time": 0.025386571884155273, "step": 28443 }, { "epoch": 4.340057373046875e-05, "step": 28443, "training_step_time": 0.11450409889221191 }, { "epoch": 4.3402099609375e-05, "model_forward_time": 0.025455713272094727, "step": 28444 }, { "epoch": 4.3402099609375e-05, "step": 28444, "training_step_time": 0.14101576805114746 }, { "epoch": 4.340362548828125e-05, "model_forward_time": 0.024930477142333984, "step": 28445 }, { "epoch": 4.340362548828125e-05, "step": 28445, "training_step_time": 0.10496807098388672 }, { "epoch": 4.34051513671875e-05, "model_forward_time": 0.025425195693969727, "step": 28446 }, { "epoch": 4.34051513671875e-05, "step": 28446, "training_step_time": 0.10696625709533691 }, { "epoch": 4.340667724609375e-05, "model_forward_time": 0.025606155395507812, "step": 28447 }, { "epoch": 4.340667724609375e-05, "step": 28447, "training_step_time": 0.11908602714538574 }, { "epoch": 4.3408203125e-05, "model_forward_time": 0.025002241134643555, "step": 28448 }, { "epoch": 4.3408203125e-05, "step": 28448, "training_step_time": 0.13784241676330566 }, { "epoch": 4.340972900390625e-05, "model_forward_time": 0.025092601776123047, "step": 28449 }, { "epoch": 4.340972900390625e-05, "step": 28449, "training_step_time": 0.10826444625854492 }, { "epoch": 4.34112548828125e-05, "grad_norm": 0.029679667204618454, "learning_rate": 7.280423725059604e-07, "loss": 0.0034, "step": 28450 }, { "epoch": 4.34112548828125e-05, "model_forward_time": 0.02518630027770996, "step": 28450 }, { "epoch": 4.34112548828125e-05, "step": 28450, "training_step_time": 0.10819125175476074 }, { "epoch": 4.341278076171875e-05, "model_forward_time": 0.025093555450439453, "step": 28451 }, { "epoch": 4.341278076171875e-05, "step": 28451, "training_step_time": 0.11107993125915527 }, { "epoch": 4.3414306640625e-05, "model_forward_time": 0.02514958381652832, "step": 28452 }, { "epoch": 4.3414306640625e-05, "step": 28452, "training_step_time": 0.10954093933105469 }, { "epoch": 4.341583251953125e-05, "model_forward_time": 0.025235652923583984, "step": 28453 }, { "epoch": 4.341583251953125e-05, "step": 28453, "training_step_time": 0.19406819343566895 }, { "epoch": 4.34173583984375e-05, "model_forward_time": 0.028592348098754883, "step": 28454 }, { "epoch": 4.34173583984375e-05, "step": 28454, "training_step_time": 0.10696721076965332 }, { "epoch": 4.341888427734375e-05, "model_forward_time": 0.024432897567749023, "step": 28455 }, { "epoch": 4.341888427734375e-05, "step": 28455, "training_step_time": 0.10461091995239258 }, { "epoch": 4.342041015625e-05, "model_forward_time": 0.025321483612060547, "step": 28456 }, { "epoch": 4.342041015625e-05, "step": 28456, "training_step_time": 0.10473322868347168 }, { "epoch": 4.342193603515625e-05, "model_forward_time": 0.024932146072387695, "step": 28457 }, { "epoch": 4.342193603515625e-05, "step": 28457, "training_step_time": 0.1053776741027832 }, { "epoch": 4.34234619140625e-05, "model_forward_time": 0.025384902954101562, "step": 28458 }, { "epoch": 4.34234619140625e-05, "step": 28458, "training_step_time": 0.11069917678833008 }, { "epoch": 4.342498779296875e-05, "model_forward_time": 0.026886940002441406, "step": 28459 }, { "epoch": 4.342498779296875e-05, "step": 28459, "training_step_time": 0.12270545959472656 }, { "epoch": 4.3426513671875e-05, "grad_norm": 0.07157254219055176, "learning_rate": 7.187010786445181e-07, "loss": 0.0056, "step": 28460 }, { "epoch": 4.3426513671875e-05, "model_forward_time": 0.02575206756591797, "step": 28460 }, { "epoch": 4.3426513671875e-05, "step": 28460, "training_step_time": 0.12765192985534668 }, { "epoch": 4.342803955078125e-05, "model_forward_time": 0.024914979934692383, "step": 28461 }, { "epoch": 4.342803955078125e-05, "step": 28461, "training_step_time": 0.12010741233825684 }, { "epoch": 4.34295654296875e-05, "model_forward_time": 0.026850461959838867, "step": 28462 }, { "epoch": 4.34295654296875e-05, "step": 28462, "training_step_time": 0.12210941314697266 }, { "epoch": 4.343109130859375e-05, "model_forward_time": 0.025312185287475586, "step": 28463 }, { "epoch": 4.343109130859375e-05, "step": 28463, "training_step_time": 0.12207746505737305 }, { "epoch": 4.34326171875e-05, "model_forward_time": 0.024826765060424805, "step": 28464 }, { "epoch": 4.34326171875e-05, "step": 28464, "training_step_time": 0.11501002311706543 }, { "epoch": 4.343414306640625e-05, "model_forward_time": 0.02542424201965332, "step": 28465 }, { "epoch": 4.343414306640625e-05, "step": 28465, "training_step_time": 0.11893677711486816 }, { "epoch": 4.34356689453125e-05, "model_forward_time": 0.025875091552734375, "step": 28466 }, { "epoch": 4.34356689453125e-05, "step": 28466, "training_step_time": 0.10900759696960449 }, { "epoch": 4.343719482421875e-05, "model_forward_time": 0.025378942489624023, "step": 28467 }, { "epoch": 4.343719482421875e-05, "step": 28467, "training_step_time": 0.11067676544189453 }, { "epoch": 4.3438720703125e-05, "model_forward_time": 0.025061845779418945, "step": 28468 }, { "epoch": 4.3438720703125e-05, "step": 28468, "training_step_time": 0.1082911491394043 }, { "epoch": 4.344024658203125e-05, "model_forward_time": 0.02568197250366211, "step": 28469 }, { "epoch": 4.344024658203125e-05, "step": 28469, "training_step_time": 0.10946369171142578 }, { "epoch": 4.34417724609375e-05, "grad_norm": 0.09917882084846497, "learning_rate": 7.094196662081831e-07, "loss": 0.0037, "step": 28470 }, { "epoch": 4.34417724609375e-05, "model_forward_time": 0.025172948837280273, "step": 28470 }, { "epoch": 4.34417724609375e-05, "step": 28470, "training_step_time": 0.1076650619506836 }, { "epoch": 4.344329833984375e-05, "model_forward_time": 0.025418519973754883, "step": 28471 }, { "epoch": 4.344329833984375e-05, "step": 28471, "training_step_time": 0.10642600059509277 }, { "epoch": 4.344482421875e-05, "model_forward_time": 0.025420427322387695, "step": 28472 }, { "epoch": 4.344482421875e-05, "step": 28472, "training_step_time": 0.10732269287109375 }, { "epoch": 4.344635009765625e-05, "model_forward_time": 0.025669097900390625, "step": 28473 }, { "epoch": 4.344635009765625e-05, "step": 28473, "training_step_time": 0.16993093490600586 }, { "epoch": 4.34478759765625e-05, "model_forward_time": 0.025479793548583984, "step": 28474 }, { "epoch": 4.34478759765625e-05, "step": 28474, "training_step_time": 0.10505270957946777 }, { "epoch": 4.344940185546875e-05, "model_forward_time": 0.02503490447998047, "step": 28475 }, { "epoch": 4.344940185546875e-05, "step": 28475, "training_step_time": 0.17090129852294922 }, { "epoch": 4.3450927734375e-05, "model_forward_time": 0.024590253829956055, "step": 28476 }, { "epoch": 4.3450927734375e-05, "step": 28476, "training_step_time": 0.16178441047668457 }, { "epoch": 4.345245361328125e-05, "model_forward_time": 0.024831295013427734, "step": 28477 }, { "epoch": 4.345245361328125e-05, "step": 28477, "training_step_time": 0.14457058906555176 }, { "epoch": 4.34539794921875e-05, "model_forward_time": 0.02495551109313965, "step": 28478 }, { "epoch": 4.34539794921875e-05, "step": 28478, "training_step_time": 0.1083688735961914 }, { "epoch": 4.345550537109375e-05, "model_forward_time": 0.025355100631713867, "step": 28479 }, { "epoch": 4.345550537109375e-05, "step": 28479, "training_step_time": 0.19072651863098145 }, { "epoch": 4.345703125e-05, "grad_norm": 0.3075224459171295, "learning_rate": 7.001981464747565e-07, "loss": 0.0089, "step": 28480 }, { "epoch": 4.345703125e-05, "model_forward_time": 0.02473592758178711, "step": 28480 }, { "epoch": 4.345703125e-05, "step": 28480, "training_step_time": 0.10347795486450195 }, { "epoch": 4.345855712890625e-05, "model_forward_time": 0.024626731872558594, "step": 28481 }, { "epoch": 4.345855712890625e-05, "step": 28481, "training_step_time": 0.10383129119873047 }, { "epoch": 4.34600830078125e-05, "model_forward_time": 0.025348663330078125, "step": 28482 }, { "epoch": 4.34600830078125e-05, "step": 28482, "training_step_time": 0.10932469367980957 }, { "epoch": 4.346160888671875e-05, "model_forward_time": 0.025280237197875977, "step": 28483 }, { "epoch": 4.346160888671875e-05, "step": 28483, "training_step_time": 0.10519838333129883 }, { "epoch": 4.3463134765625e-05, "model_forward_time": 0.025243759155273438, "step": 28484 }, { "epoch": 4.3463134765625e-05, "step": 28484, "training_step_time": 0.17499041557312012 }, { "epoch": 4.346466064453125e-05, "model_forward_time": 0.02454066276550293, "step": 28485 }, { "epoch": 4.346466064453125e-05, "step": 28485, "training_step_time": 0.22075915336608887 }, { "epoch": 4.34661865234375e-05, "model_forward_time": 0.0244905948638916, "step": 28486 }, { "epoch": 4.34661865234375e-05, "step": 28486, "training_step_time": 0.10803580284118652 }, { "epoch": 4.346771240234375e-05, "model_forward_time": 0.024473905563354492, "step": 28487 }, { "epoch": 4.346771240234375e-05, "step": 28487, "training_step_time": 0.11919045448303223 }, { "epoch": 4.346923828125e-05, "model_forward_time": 0.025184154510498047, "step": 28488 }, { "epoch": 4.346923828125e-05, "step": 28488, "training_step_time": 0.11977267265319824 }, { "epoch": 4.347076416015625e-05, "model_forward_time": 0.025565624237060547, "step": 28489 }, { "epoch": 4.347076416015625e-05, "step": 28489, "training_step_time": 0.10879993438720703 }, { "epoch": 4.34722900390625e-05, "grad_norm": 0.05301102250814438, "learning_rate": 6.910365306492416e-07, "loss": 0.0019, "step": 28490 }, { "epoch": 4.34722900390625e-05, "model_forward_time": 0.026027202606201172, "step": 28490 }, { "epoch": 4.34722900390625e-05, "step": 28490, "training_step_time": 0.17548179626464844 }, { "epoch": 4.347381591796875e-05, "model_forward_time": 0.024810075759887695, "step": 28491 }, { "epoch": 4.347381591796875e-05, "step": 28491, "training_step_time": 0.17010188102722168 }, { "epoch": 4.3475341796875e-05, "model_forward_time": 0.02449965476989746, "step": 28492 }, { "epoch": 4.3475341796875e-05, "step": 28492, "training_step_time": 0.20152664184570312 }, { "epoch": 4.347686767578125e-05, "model_forward_time": 0.024408578872680664, "step": 28493 }, { "epoch": 4.347686767578125e-05, "step": 28493, "training_step_time": 0.1467597484588623 }, { "epoch": 4.34783935546875e-05, "model_forward_time": 0.024835824966430664, "step": 28494 }, { "epoch": 4.34783935546875e-05, "step": 28494, "training_step_time": 0.1776142120361328 }, { "epoch": 4.347991943359375e-05, "model_forward_time": 0.02476811408996582, "step": 28495 }, { "epoch": 4.347991943359375e-05, "step": 28495, "training_step_time": 0.10346817970275879 }, { "epoch": 4.34814453125e-05, "model_forward_time": 0.024451255798339844, "step": 28496 }, { "epoch": 4.34814453125e-05, "step": 28496, "training_step_time": 0.18772602081298828 }, { "epoch": 4.348297119140625e-05, "model_forward_time": 0.024767160415649414, "step": 28497 }, { "epoch": 4.348297119140625e-05, "step": 28497, "training_step_time": 0.10262608528137207 }, { "epoch": 4.34844970703125e-05, "model_forward_time": 0.024786710739135742, "step": 28498 }, { "epoch": 4.34844970703125e-05, "step": 28498, "training_step_time": 0.10196542739868164 }, { "epoch": 4.348602294921875e-05, "model_forward_time": 0.025360822677612305, "step": 28499 }, { "epoch": 4.348602294921875e-05, "step": 28499, "training_step_time": 0.10585379600524902 }, { "epoch": 4.3487548828125e-05, "grad_norm": 0.04340476915240288, "learning_rate": 6.819348298638839e-07, "loss": 0.0051, "step": 28500 }, { "epoch": 4.3487548828125e-05, "model_forward_time": 0.024919748306274414, "step": 28500 }, { "epoch": 4.3487548828125e-05, "step": 28500, "training_step_time": 0.10794901847839355 }, { "epoch": 4.348907470703125e-05, "model_forward_time": 0.025435924530029297, "step": 28501 }, { "epoch": 4.348907470703125e-05, "step": 28501, "training_step_time": 0.1041259765625 }, { "epoch": 4.34906005859375e-05, "model_forward_time": 0.02566981315612793, "step": 28502 }, { "epoch": 4.34906005859375e-05, "step": 28502, "training_step_time": 0.10534191131591797 }, { "epoch": 4.349212646484375e-05, "model_forward_time": 0.025554656982421875, "step": 28503 }, { "epoch": 4.349212646484375e-05, "step": 28503, "training_step_time": 0.11136603355407715 }, { "epoch": 4.349365234375e-05, "model_forward_time": 0.025473356246948242, "step": 28504 }, { "epoch": 4.349365234375e-05, "step": 28504, "training_step_time": 0.10604190826416016 }, { "epoch": 4.349517822265625e-05, "model_forward_time": 0.027715682983398438, "step": 28505 }, { "epoch": 4.349517822265625e-05, "step": 28505, "training_step_time": 0.10896015167236328 }, { "epoch": 4.34967041015625e-05, "model_forward_time": 0.025409221649169922, "step": 28506 }, { "epoch": 4.34967041015625e-05, "step": 28506, "training_step_time": 0.1063232421875 }, { "epoch": 4.349822998046875e-05, "model_forward_time": 0.025758743286132812, "step": 28507 }, { "epoch": 4.349822998046875e-05, "step": 28507, "training_step_time": 0.10753798484802246 }, { "epoch": 4.3499755859375e-05, "model_forward_time": 0.025473356246948242, "step": 28508 }, { "epoch": 4.3499755859375e-05, "step": 28508, "training_step_time": 0.11147928237915039 }, { "epoch": 4.350128173828125e-05, "model_forward_time": 0.025168418884277344, "step": 28509 }, { "epoch": 4.350128173828125e-05, "step": 28509, "training_step_time": 0.11475610733032227 }, { "epoch": 4.35028076171875e-05, "grad_norm": 0.3876003324985504, "learning_rate": 6.728930551780865e-07, "loss": 0.004, "step": 28510 }, { "epoch": 4.35028076171875e-05, "model_forward_time": 0.02562117576599121, "step": 28510 }, { "epoch": 4.35028076171875e-05, "step": 28510, "training_step_time": 0.10536026954650879 }, { "epoch": 4.350433349609375e-05, "model_forward_time": 0.025583982467651367, "step": 28511 }, { "epoch": 4.350433349609375e-05, "step": 28511, "training_step_time": 0.10833311080932617 }, { "epoch": 4.3505859375e-05, "model_forward_time": 0.026442527770996094, "step": 28512 }, { "epoch": 4.3505859375e-05, "step": 28512, "training_step_time": 0.10732173919677734 }, { "epoch": 4.350738525390625e-05, "model_forward_time": 0.025744199752807617, "step": 28513 }, { "epoch": 4.350738525390625e-05, "step": 28513, "training_step_time": 0.11157751083374023 }, { "epoch": 4.35089111328125e-05, "model_forward_time": 0.025368452072143555, "step": 28514 }, { "epoch": 4.35089111328125e-05, "step": 28514, "training_step_time": 0.10731792449951172 }, { "epoch": 4.351043701171875e-05, "model_forward_time": 0.0254819393157959, "step": 28515 }, { "epoch": 4.351043701171875e-05, "step": 28515, "training_step_time": 0.10789299011230469 }, { "epoch": 4.3511962890625e-05, "model_forward_time": 0.02768397331237793, "step": 28516 }, { "epoch": 4.3511962890625e-05, "step": 28516, "training_step_time": 0.11053299903869629 }, { "epoch": 4.351348876953125e-05, "model_forward_time": 0.027440547943115234, "step": 28517 }, { "epoch": 4.351348876953125e-05, "step": 28517, "training_step_time": 0.17016959190368652 }, { "epoch": 4.35150146484375e-05, "model_forward_time": 0.026053905487060547, "step": 28518 }, { "epoch": 4.35150146484375e-05, "step": 28518, "training_step_time": 0.10873699188232422 }, { "epoch": 4.351654052734375e-05, "model_forward_time": 0.025048255920410156, "step": 28519 }, { "epoch": 4.351654052734375e-05, "step": 28519, "training_step_time": 0.17573928833007812 }, { "epoch": 4.351806640625e-05, "grad_norm": 0.04569575935602188, "learning_rate": 6.639112175784778e-07, "loss": 0.0078, "step": 28520 }, { "epoch": 4.351806640625e-05, "model_forward_time": 0.026684999465942383, "step": 28520 }, { "epoch": 4.351806640625e-05, "step": 28520, "training_step_time": 0.18541431427001953 }, { "epoch": 4.351959228515625e-05, "model_forward_time": 0.024338960647583008, "step": 28521 }, { "epoch": 4.351959228515625e-05, "step": 28521, "training_step_time": 0.18069696426391602 }, { "epoch": 4.35211181640625e-05, "model_forward_time": 0.02463078498840332, "step": 28522 }, { "epoch": 4.35211181640625e-05, "step": 28522, "training_step_time": 0.1444244384765625 }, { "epoch": 4.352264404296875e-05, "model_forward_time": 0.0252532958984375, "step": 28523 }, { "epoch": 4.352264404296875e-05, "step": 28523, "training_step_time": 0.10292387008666992 }, { "epoch": 4.3524169921875e-05, "model_forward_time": 0.025261878967285156, "step": 28524 }, { "epoch": 4.3524169921875e-05, "step": 28524, "training_step_time": 0.10213589668273926 }, { "epoch": 4.352569580078125e-05, "model_forward_time": 0.025493383407592773, "step": 28525 }, { "epoch": 4.352569580078125e-05, "step": 28525, "training_step_time": 0.10588884353637695 }, { "epoch": 4.35272216796875e-05, "model_forward_time": 0.025641918182373047, "step": 28526 }, { "epoch": 4.35272216796875e-05, "step": 28526, "training_step_time": 0.11014914512634277 }, { "epoch": 4.352874755859375e-05, "model_forward_time": 0.024883747100830078, "step": 28527 }, { "epoch": 4.352874755859375e-05, "step": 28527, "training_step_time": 0.11501193046569824 }, { "epoch": 4.35302734375e-05, "model_forward_time": 0.025816679000854492, "step": 28528 }, { "epoch": 4.35302734375e-05, "step": 28528, "training_step_time": 0.2022254467010498 }, { "epoch": 4.353179931640625e-05, "model_forward_time": 0.024451017379760742, "step": 28529 }, { "epoch": 4.353179931640625e-05, "step": 28529, "training_step_time": 0.21092963218688965 }, { "epoch": 4.35333251953125e-05, "grad_norm": 0.15558089315891266, "learning_rate": 6.549893279788277e-07, "loss": 0.0054, "step": 28530 }, { "epoch": 4.35333251953125e-05, "model_forward_time": 0.025069713592529297, "step": 28530 }, { "epoch": 4.35333251953125e-05, "step": 28530, "training_step_time": 0.1679999828338623 }, { "epoch": 4.353485107421875e-05, "model_forward_time": 0.024863243103027344, "step": 28531 }, { "epoch": 4.353485107421875e-05, "step": 28531, "training_step_time": 0.15567612648010254 }, { "epoch": 4.3536376953125e-05, "model_forward_time": 0.025053739547729492, "step": 28532 }, { "epoch": 4.3536376953125e-05, "step": 28532, "training_step_time": 0.11257410049438477 }, { "epoch": 4.353790283203125e-05, "model_forward_time": 0.026127338409423828, "step": 28533 }, { "epoch": 4.353790283203125e-05, "step": 28533, "training_step_time": 0.1369321346282959 }, { "epoch": 4.35394287109375e-05, "model_forward_time": 0.025477886199951172, "step": 28534 }, { "epoch": 4.35394287109375e-05, "step": 28534, "training_step_time": 0.1584947109222412 }, { "epoch": 4.354095458984375e-05, "model_forward_time": 0.025111913681030273, "step": 28535 }, { "epoch": 4.354095458984375e-05, "step": 28535, "training_step_time": 0.17205452919006348 }, { "epoch": 4.354248046875e-05, "model_forward_time": 0.024799346923828125, "step": 28536 }, { "epoch": 4.354248046875e-05, "step": 28536, "training_step_time": 0.11008405685424805 }, { "epoch": 4.354400634765625e-05, "model_forward_time": 0.025413036346435547, "step": 28537 }, { "epoch": 4.354400634765625e-05, "step": 28537, "training_step_time": 0.1068882942199707 }, { "epoch": 4.35455322265625e-05, "model_forward_time": 0.026021957397460938, "step": 28538 }, { "epoch": 4.35455322265625e-05, "step": 28538, "training_step_time": 0.12607479095458984 }, { "epoch": 4.354705810546875e-05, "model_forward_time": 0.025748252868652344, "step": 28539 }, { "epoch": 4.354705810546875e-05, "step": 28539, "training_step_time": 0.10910534858703613 }, { "epoch": 4.3548583984375e-05, "grad_norm": 0.048946134746074677, "learning_rate": 6.461273972200755e-07, "loss": 0.0032, "step": 28540 }, { "epoch": 4.3548583984375e-05, "model_forward_time": 0.026276826858520508, "step": 28540 }, { "epoch": 4.3548583984375e-05, "step": 28540, "training_step_time": 0.1281576156616211 }, { "epoch": 4.355010986328125e-05, "model_forward_time": 0.02550482749938965, "step": 28541 }, { "epoch": 4.355010986328125e-05, "step": 28541, "training_step_time": 0.12551021575927734 }, { "epoch": 4.35516357421875e-05, "model_forward_time": 0.025445222854614258, "step": 28542 }, { "epoch": 4.35516357421875e-05, "step": 28542, "training_step_time": 0.10669088363647461 }, { "epoch": 4.355316162109375e-05, "model_forward_time": 0.02532196044921875, "step": 28543 }, { "epoch": 4.355316162109375e-05, "step": 28543, "training_step_time": 0.10470199584960938 }, { "epoch": 4.35546875e-05, "model_forward_time": 0.025837182998657227, "step": 28544 }, { "epoch": 4.35546875e-05, "step": 28544, "training_step_time": 0.10712647438049316 }, { "epoch": 4.355621337890625e-05, "model_forward_time": 0.025485515594482422, "step": 28545 }, { "epoch": 4.355621337890625e-05, "step": 28545, "training_step_time": 0.1055910587310791 }, { "epoch": 4.35577392578125e-05, "model_forward_time": 0.025990724563598633, "step": 28546 }, { "epoch": 4.35577392578125e-05, "step": 28546, "training_step_time": 0.10730719566345215 }, { "epoch": 4.355926513671875e-05, "model_forward_time": 0.025641441345214844, "step": 28547 }, { "epoch": 4.355926513671875e-05, "step": 28547, "training_step_time": 0.10591316223144531 }, { "epoch": 4.3560791015625e-05, "model_forward_time": 0.025481224060058594, "step": 28548 }, { "epoch": 4.3560791015625e-05, "step": 28548, "training_step_time": 0.11147642135620117 }, { "epoch": 4.356231689453125e-05, "model_forward_time": 0.025760412216186523, "step": 28549 }, { "epoch": 4.356231689453125e-05, "step": 28549, "training_step_time": 0.10542678833007812 }, { "epoch": 4.35638427734375e-05, "grad_norm": 0.12368736416101456, "learning_rate": 6.373254360703018e-07, "loss": 0.0075, "step": 28550 }, { "epoch": 4.35638427734375e-05, "model_forward_time": 0.02554178237915039, "step": 28550 }, { "epoch": 4.35638427734375e-05, "step": 28550, "training_step_time": 0.1055300235748291 }, { "epoch": 4.356536865234375e-05, "model_forward_time": 0.025308847427368164, "step": 28551 }, { "epoch": 4.356536865234375e-05, "step": 28551, "training_step_time": 0.10625481605529785 }, { "epoch": 4.356689453125e-05, "model_forward_time": 0.02552199363708496, "step": 28552 }, { "epoch": 4.356689453125e-05, "step": 28552, "training_step_time": 0.10514521598815918 }, { "epoch": 4.356842041015625e-05, "model_forward_time": 0.025453805923461914, "step": 28553 }, { "epoch": 4.356842041015625e-05, "step": 28553, "training_step_time": 0.10446619987487793 }, { "epoch": 4.35699462890625e-05, "model_forward_time": 0.02574896812438965, "step": 28554 }, { "epoch": 4.35699462890625e-05, "step": 28554, "training_step_time": 0.10512971878051758 }, { "epoch": 4.357147216796875e-05, "model_forward_time": 0.025708436965942383, "step": 28555 }, { "epoch": 4.357147216796875e-05, "step": 28555, "training_step_time": 0.10616421699523926 }, { "epoch": 4.3572998046875e-05, "model_forward_time": 0.02606034278869629, "step": 28556 }, { "epoch": 4.3572998046875e-05, "step": 28556, "training_step_time": 0.11003375053405762 }, { "epoch": 4.357452392578125e-05, "model_forward_time": 0.026695966720581055, "step": 28557 }, { "epoch": 4.357452392578125e-05, "step": 28557, "training_step_time": 0.10884666442871094 }, { "epoch": 4.35760498046875e-05, "model_forward_time": 0.025632619857788086, "step": 28558 }, { "epoch": 4.35760498046875e-05, "step": 28558, "training_step_time": 0.10709595680236816 }, { "epoch": 4.357757568359375e-05, "model_forward_time": 0.025560617446899414, "step": 28559 }, { "epoch": 4.357757568359375e-05, "step": 28559, "training_step_time": 0.10614800453186035 }, { "epoch": 4.35791015625e-05, "grad_norm": 0.0649910569190979, "learning_rate": 6.285834552247128e-07, "loss": 0.006, "step": 28560 }, { "epoch": 4.35791015625e-05, "model_forward_time": 0.02550482749938965, "step": 28560 }, { "epoch": 4.35791015625e-05, "step": 28560, "training_step_time": 0.10710692405700684 }, { "epoch": 4.358062744140625e-05, "model_forward_time": 0.026319503784179688, "step": 28561 }, { "epoch": 4.358062744140625e-05, "step": 28561, "training_step_time": 0.1077280044555664 }, { "epoch": 4.35821533203125e-05, "model_forward_time": 0.025849103927612305, "step": 28562 }, { "epoch": 4.35821533203125e-05, "step": 28562, "training_step_time": 0.13392400741577148 }, { "epoch": 4.358367919921875e-05, "model_forward_time": 0.02629566192626953, "step": 28563 }, { "epoch": 4.358367919921875e-05, "step": 28563, "training_step_time": 0.10712862014770508 }, { "epoch": 4.3585205078125e-05, "model_forward_time": 0.025490283966064453, "step": 28564 }, { "epoch": 4.3585205078125e-05, "step": 28564, "training_step_time": 0.14705657958984375 }, { "epoch": 4.358673095703125e-05, "model_forward_time": 0.02550506591796875, "step": 28565 }, { "epoch": 4.358673095703125e-05, "step": 28565, "training_step_time": 0.1252429485321045 }, { "epoch": 4.35882568359375e-05, "model_forward_time": 0.025360584259033203, "step": 28566 }, { "epoch": 4.35882568359375e-05, "step": 28566, "training_step_time": 0.2146608829498291 }, { "epoch": 4.358978271484375e-05, "model_forward_time": 0.024610280990600586, "step": 28567 }, { "epoch": 4.358978271484375e-05, "step": 28567, "training_step_time": 0.12557053565979004 }, { "epoch": 4.359130859375e-05, "model_forward_time": 0.025031566619873047, "step": 28568 }, { "epoch": 4.359130859375e-05, "step": 28568, "training_step_time": 0.12017822265625 }, { "epoch": 4.359283447265625e-05, "model_forward_time": 0.025310754776000977, "step": 28569 }, { "epoch": 4.359283447265625e-05, "step": 28569, "training_step_time": 0.1103971004486084 }, { "epoch": 4.35943603515625e-05, "grad_norm": 0.0443444661796093, "learning_rate": 6.1990146530565e-07, "loss": 0.0124, "step": 28570 }, { "epoch": 4.35943603515625e-05, "model_forward_time": 0.024916410446166992, "step": 28570 }, { "epoch": 4.35943603515625e-05, "step": 28570, "training_step_time": 0.10602307319641113 }, { "epoch": 4.359588623046875e-05, "model_forward_time": 0.025303125381469727, "step": 28571 }, { "epoch": 4.359588623046875e-05, "step": 28571, "training_step_time": 0.1060328483581543 }, { "epoch": 4.3597412109375e-05, "model_forward_time": 0.025837421417236328, "step": 28572 }, { "epoch": 4.3597412109375e-05, "step": 28572, "training_step_time": 0.10612988471984863 }, { "epoch": 4.359893798828125e-05, "model_forward_time": 0.025573253631591797, "step": 28573 }, { "epoch": 4.359893798828125e-05, "step": 28573, "training_step_time": 0.10785531997680664 }, { "epoch": 4.36004638671875e-05, "model_forward_time": 0.025715112686157227, "step": 28574 }, { "epoch": 4.36004638671875e-05, "step": 28574, "training_step_time": 0.16468501091003418 }, { "epoch": 4.360198974609375e-05, "model_forward_time": 0.02467513084411621, "step": 28575 }, { "epoch": 4.360198974609375e-05, "step": 28575, "training_step_time": 0.10724735260009766 }, { "epoch": 4.3603515625e-05, "model_forward_time": 0.025193214416503906, "step": 28576 }, { "epoch": 4.3603515625e-05, "step": 28576, "training_step_time": 0.11670398712158203 }, { "epoch": 4.360504150390625e-05, "model_forward_time": 0.025644779205322266, "step": 28577 }, { "epoch": 4.360504150390625e-05, "step": 28577, "training_step_time": 0.10608243942260742 }, { "epoch": 4.36065673828125e-05, "model_forward_time": 0.025310516357421875, "step": 28578 }, { "epoch": 4.36065673828125e-05, "step": 28578, "training_step_time": 0.11088323593139648 }, { "epoch": 4.360809326171875e-05, "model_forward_time": 0.025697946548461914, "step": 28579 }, { "epoch": 4.360809326171875e-05, "step": 28579, "training_step_time": 0.12023282051086426 }, { "epoch": 4.3609619140625e-05, "grad_norm": 0.05735481157898903, "learning_rate": 6.11279476862553e-07, "loss": 0.0071, "step": 28580 }, { "epoch": 4.3609619140625e-05, "model_forward_time": 0.025631189346313477, "step": 28580 }, { "epoch": 4.3609619140625e-05, "step": 28580, "training_step_time": 0.10937190055847168 }, { "epoch": 4.361114501953125e-05, "model_forward_time": 0.025699138641357422, "step": 28581 }, { "epoch": 4.361114501953125e-05, "step": 28581, "training_step_time": 0.14316773414611816 }, { "epoch": 4.36126708984375e-05, "model_forward_time": 0.026482105255126953, "step": 28582 }, { "epoch": 4.36126708984375e-05, "step": 28582, "training_step_time": 0.10617661476135254 }, { "epoch": 4.361419677734375e-05, "model_forward_time": 0.02537083625793457, "step": 28583 }, { "epoch": 4.361419677734375e-05, "step": 28583, "training_step_time": 0.16034579277038574 }, { "epoch": 4.361572265625e-05, "model_forward_time": 0.024753332138061523, "step": 28584 }, { "epoch": 4.361572265625e-05, "step": 28584, "training_step_time": 0.10776662826538086 }, { "epoch": 4.361724853515625e-05, "model_forward_time": 0.024502038955688477, "step": 28585 }, { "epoch": 4.361724853515625e-05, "step": 28585, "training_step_time": 0.10992574691772461 }, { "epoch": 4.36187744140625e-05, "model_forward_time": 0.02548360824584961, "step": 28586 }, { "epoch": 4.36187744140625e-05, "step": 28586, "training_step_time": 0.10399746894836426 }, { "epoch": 4.362030029296875e-05, "model_forward_time": 0.025161266326904297, "step": 28587 }, { "epoch": 4.362030029296875e-05, "step": 28587, "training_step_time": 0.1152961254119873 }, { "epoch": 4.3621826171875e-05, "model_forward_time": 0.025491714477539062, "step": 28588 }, { "epoch": 4.3621826171875e-05, "step": 28588, "training_step_time": 0.12170577049255371 }, { "epoch": 4.362335205078125e-05, "model_forward_time": 0.025161266326904297, "step": 28589 }, { "epoch": 4.362335205078125e-05, "step": 28589, "training_step_time": 0.13399744033813477 }, { "epoch": 4.36248779296875e-05, "grad_norm": 0.04751794412732124, "learning_rate": 6.027175003719354e-07, "loss": 0.0042, "step": 28590 }, { "epoch": 4.36248779296875e-05, "model_forward_time": 0.025127172470092773, "step": 28590 }, { "epoch": 4.36248779296875e-05, "step": 28590, "training_step_time": 0.12870049476623535 }, { "epoch": 4.362640380859375e-05, "model_forward_time": 0.024999141693115234, "step": 28591 }, { "epoch": 4.362640380859375e-05, "step": 28591, "training_step_time": 0.1277327537536621 }, { "epoch": 4.36279296875e-05, "model_forward_time": 0.024950504302978516, "step": 28592 }, { "epoch": 4.36279296875e-05, "step": 28592, "training_step_time": 0.12285947799682617 }, { "epoch": 4.362945556640625e-05, "model_forward_time": 0.02522587776184082, "step": 28593 }, { "epoch": 4.362945556640625e-05, "step": 28593, "training_step_time": 0.12109613418579102 }, { "epoch": 4.36309814453125e-05, "model_forward_time": 0.02544379234313965, "step": 28594 }, { "epoch": 4.36309814453125e-05, "step": 28594, "training_step_time": 0.11397051811218262 }, { "epoch": 4.363250732421875e-05, "model_forward_time": 0.026660442352294922, "step": 28595 }, { "epoch": 4.363250732421875e-05, "step": 28595, "training_step_time": 0.11322784423828125 }, { "epoch": 4.3634033203125e-05, "model_forward_time": 0.02526545524597168, "step": 28596 }, { "epoch": 4.3634033203125e-05, "step": 28596, "training_step_time": 0.11051225662231445 }, { "epoch": 4.363555908203125e-05, "model_forward_time": 0.025479555130004883, "step": 28597 }, { "epoch": 4.363555908203125e-05, "step": 28597, "training_step_time": 0.11149311065673828 }, { "epoch": 4.36370849609375e-05, "model_forward_time": 0.025597333908081055, "step": 28598 }, { "epoch": 4.36370849609375e-05, "step": 28598, "training_step_time": 0.10731387138366699 }, { "epoch": 4.363861083984375e-05, "model_forward_time": 0.025788545608520508, "step": 28599 }, { "epoch": 4.363861083984375e-05, "step": 28599, "training_step_time": 0.10622906684875488 }, { "epoch": 4.364013671875e-05, "grad_norm": 0.04318931698799133, "learning_rate": 5.9421554623742e-07, "loss": 0.0024, "step": 28600 }, { "epoch": 4.364013671875e-05, "model_forward_time": 0.024780750274658203, "step": 28600 }, { "epoch": 4.364013671875e-05, "step": 28600, "training_step_time": 0.10535550117492676 }, { "epoch": 4.364166259765625e-05, "model_forward_time": 0.025745153427124023, "step": 28601 }, { "epoch": 4.364166259765625e-05, "step": 28601, "training_step_time": 0.10775208473205566 }, { "epoch": 4.36431884765625e-05, "model_forward_time": 0.025554656982421875, "step": 28602 }, { "epoch": 4.36431884765625e-05, "step": 28602, "training_step_time": 0.10738229751586914 }, { "epoch": 4.364471435546875e-05, "model_forward_time": 0.02525496482849121, "step": 28603 }, { "epoch": 4.364471435546875e-05, "step": 28603, "training_step_time": 0.10945391654968262 }, { "epoch": 4.3646240234375e-05, "model_forward_time": 0.025452375411987305, "step": 28604 }, { "epoch": 4.3646240234375e-05, "step": 28604, "training_step_time": 0.10456585884094238 }, { "epoch": 4.364776611328125e-05, "model_forward_time": 0.024976253509521484, "step": 28605 }, { "epoch": 4.364776611328125e-05, "step": 28605, "training_step_time": 0.10734701156616211 }, { "epoch": 4.36492919921875e-05, "model_forward_time": 0.02535700798034668, "step": 28606 }, { "epoch": 4.36492919921875e-05, "step": 28606, "training_step_time": 0.10553812980651855 }, { "epoch": 4.365081787109375e-05, "model_forward_time": 0.025526046752929688, "step": 28607 }, { "epoch": 4.365081787109375e-05, "step": 28607, "training_step_time": 0.10612058639526367 }, { "epoch": 4.365234375e-05, "model_forward_time": 0.025731325149536133, "step": 28608 }, { "epoch": 4.365234375e-05, "step": 28608, "training_step_time": 0.10743904113769531 }, { "epoch": 4.365386962890625e-05, "model_forward_time": 0.025513648986816406, "step": 28609 }, { "epoch": 4.365386962890625e-05, "step": 28609, "training_step_time": 0.1964414119720459 }, { "epoch": 4.36553955078125e-05, "grad_norm": 0.07380035519599915, "learning_rate": 5.857736247896706e-07, "loss": 0.0121, "step": 28610 }, { "epoch": 4.36553955078125e-05, "model_forward_time": 0.024281024932861328, "step": 28610 }, { "epoch": 4.36553955078125e-05, "step": 28610, "training_step_time": 0.10724282264709473 }, { "epoch": 4.365692138671875e-05, "model_forward_time": 0.024554729461669922, "step": 28611 }, { "epoch": 4.365692138671875e-05, "step": 28611, "training_step_time": 0.14631390571594238 }, { "epoch": 4.3658447265625e-05, "model_forward_time": 0.025051116943359375, "step": 28612 }, { "epoch": 4.3658447265625e-05, "step": 28612, "training_step_time": 0.11396408081054688 }, { "epoch": 4.365997314453125e-05, "model_forward_time": 0.02541327476501465, "step": 28613 }, { "epoch": 4.365997314453125e-05, "step": 28613, "training_step_time": 0.19150447845458984 }, { "epoch": 4.36614990234375e-05, "model_forward_time": 0.024627685546875, "step": 28614 }, { "epoch": 4.36614990234375e-05, "step": 28614, "training_step_time": 0.1318202018737793 }, { "epoch": 4.366302490234375e-05, "model_forward_time": 0.02457594871520996, "step": 28615 }, { "epoch": 4.366302490234375e-05, "step": 28615, "training_step_time": 0.11049270629882812 }, { "epoch": 4.366455078125e-05, "model_forward_time": 0.025348186492919922, "step": 28616 }, { "epoch": 4.366455078125e-05, "step": 28616, "training_step_time": 0.10710954666137695 }, { "epoch": 4.366607666015625e-05, "model_forward_time": 0.0253903865814209, "step": 28617 }, { "epoch": 4.366607666015625e-05, "step": 28617, "training_step_time": 0.10844111442565918 }, { "epoch": 4.36676025390625e-05, "model_forward_time": 0.025473356246948242, "step": 28618 }, { "epoch": 4.36676025390625e-05, "step": 28618, "training_step_time": 0.10583710670471191 }, { "epoch": 4.366912841796875e-05, "model_forward_time": 0.025549650192260742, "step": 28619 }, { "epoch": 4.366912841796875e-05, "step": 28619, "training_step_time": 0.1052405834197998 }, { "epoch": 4.3670654296875e-05, "grad_norm": 0.05241416022181511, "learning_rate": 5.773917462864264e-07, "loss": 0.0082, "step": 28620 }, { "epoch": 4.3670654296875e-05, "model_forward_time": 0.025548219680786133, "step": 28620 }, { "epoch": 4.3670654296875e-05, "step": 28620, "training_step_time": 0.10493206977844238 }, { "epoch": 4.367218017578125e-05, "model_forward_time": 0.025765419006347656, "step": 28621 }, { "epoch": 4.367218017578125e-05, "step": 28621, "training_step_time": 0.2019200325012207 }, { "epoch": 4.36737060546875e-05, "model_forward_time": 0.024622201919555664, "step": 28622 }, { "epoch": 4.36737060546875e-05, "step": 28622, "training_step_time": 0.18738198280334473 }, { "epoch": 4.367523193359375e-05, "model_forward_time": 0.024585485458374023, "step": 28623 }, { "epoch": 4.367523193359375e-05, "step": 28623, "training_step_time": 0.14621329307556152 }, { "epoch": 4.36767578125e-05, "model_forward_time": 0.02507328987121582, "step": 28624 }, { "epoch": 4.36767578125e-05, "step": 28624, "training_step_time": 0.12362337112426758 }, { "epoch": 4.367828369140625e-05, "model_forward_time": 0.024922847747802734, "step": 28625 }, { "epoch": 4.367828369140625e-05, "step": 28625, "training_step_time": 0.11743831634521484 }, { "epoch": 4.36798095703125e-05, "model_forward_time": 0.02505946159362793, "step": 28626 }, { "epoch": 4.36798095703125e-05, "step": 28626, "training_step_time": 0.11562252044677734 }, { "epoch": 4.368133544921875e-05, "model_forward_time": 0.02549123764038086, "step": 28627 }, { "epoch": 4.368133544921875e-05, "step": 28627, "training_step_time": 0.12139701843261719 }, { "epoch": 4.3682861328125e-05, "model_forward_time": 0.02548503875732422, "step": 28628 }, { "epoch": 4.3682861328125e-05, "step": 28628, "training_step_time": 0.10976624488830566 }, { "epoch": 4.368438720703125e-05, "model_forward_time": 0.025452136993408203, "step": 28629 }, { "epoch": 4.368438720703125e-05, "step": 28629, "training_step_time": 0.11101984977722168 }, { "epoch": 4.36859130859375e-05, "grad_norm": 0.02089017629623413, "learning_rate": 5.690699209124573e-07, "loss": 0.0033, "step": 28630 }, { "epoch": 4.36859130859375e-05, "model_forward_time": 0.024380207061767578, "step": 28630 }, { "epoch": 4.36859130859375e-05, "step": 28630, "training_step_time": 0.11689567565917969 }, { "epoch": 4.368743896484375e-05, "model_forward_time": 0.025591611862182617, "step": 28631 }, { "epoch": 4.368743896484375e-05, "step": 28631, "training_step_time": 0.11536622047424316 }, { "epoch": 4.368896484375e-05, "model_forward_time": 0.025465965270996094, "step": 28632 }, { "epoch": 4.368896484375e-05, "step": 28632, "training_step_time": 0.1165170669555664 }, { "epoch": 4.369049072265625e-05, "model_forward_time": 0.025345563888549805, "step": 28633 }, { "epoch": 4.369049072265625e-05, "step": 28633, "training_step_time": 0.12158536911010742 }, { "epoch": 4.36920166015625e-05, "model_forward_time": 0.025699853897094727, "step": 28634 }, { "epoch": 4.36920166015625e-05, "step": 28634, "training_step_time": 0.11202764511108398 }, { "epoch": 4.369354248046875e-05, "model_forward_time": 0.02570509910583496, "step": 28635 }, { "epoch": 4.369354248046875e-05, "step": 28635, "training_step_time": 0.10531783103942871 }, { "epoch": 4.3695068359375e-05, "model_forward_time": 0.02556300163269043, "step": 28636 }, { "epoch": 4.3695068359375e-05, "step": 28636, "training_step_time": 0.10564160346984863 }, { "epoch": 4.369659423828125e-05, "model_forward_time": 0.02572011947631836, "step": 28637 }, { "epoch": 4.369659423828125e-05, "step": 28637, "training_step_time": 0.10660719871520996 }, { "epoch": 4.36981201171875e-05, "model_forward_time": 0.02571702003479004, "step": 28638 }, { "epoch": 4.36981201171875e-05, "step": 28638, "training_step_time": 0.10975003242492676 }, { "epoch": 4.369964599609375e-05, "model_forward_time": 0.02564716339111328, "step": 28639 }, { "epoch": 4.369964599609375e-05, "step": 28639, "training_step_time": 0.10535955429077148 }, { "epoch": 4.3701171875e-05, "grad_norm": 0.02224591188132763, "learning_rate": 5.608081587795688e-07, "loss": 0.0041, "step": 28640 }, { "epoch": 4.3701171875e-05, "model_forward_time": 0.02522587776184082, "step": 28640 }, { "epoch": 4.3701171875e-05, "step": 28640, "training_step_time": 0.10827207565307617 }, { "epoch": 4.370269775390625e-05, "model_forward_time": 0.02577948570251465, "step": 28641 }, { "epoch": 4.370269775390625e-05, "step": 28641, "training_step_time": 0.10598969459533691 }, { "epoch": 4.37042236328125e-05, "model_forward_time": 0.025465965270996094, "step": 28642 }, { "epoch": 4.37042236328125e-05, "step": 28642, "training_step_time": 0.1079413890838623 }, { "epoch": 4.370574951171875e-05, "model_forward_time": 0.02557229995727539, "step": 28643 }, { "epoch": 4.370574951171875e-05, "step": 28643, "training_step_time": 0.10524821281433105 }, { "epoch": 4.3707275390625e-05, "model_forward_time": 0.025959253311157227, "step": 28644 }, { "epoch": 4.3707275390625e-05, "step": 28644, "training_step_time": 0.10634231567382812 }, { "epoch": 4.370880126953125e-05, "model_forward_time": 0.02557373046875, "step": 28645 }, { "epoch": 4.370880126953125e-05, "step": 28645, "training_step_time": 0.10688996315002441 }, { "epoch": 4.37103271484375e-05, "model_forward_time": 0.025282859802246094, "step": 28646 }, { "epoch": 4.37103271484375e-05, "step": 28646, "training_step_time": 0.10584163665771484 }, { "epoch": 4.371185302734375e-05, "model_forward_time": 0.02537059783935547, "step": 28647 }, { "epoch": 4.371185302734375e-05, "step": 28647, "training_step_time": 0.11057186126708984 }, { "epoch": 4.371337890625e-05, "model_forward_time": 0.025252103805541992, "step": 28648 }, { "epoch": 4.371337890625e-05, "step": 28648, "training_step_time": 0.11389875411987305 }, { "epoch": 4.371490478515625e-05, "model_forward_time": 0.02669548988342285, "step": 28649 }, { "epoch": 4.371490478515625e-05, "step": 28649, "training_step_time": 0.10694527626037598 }, { "epoch": 4.37164306640625e-05, "grad_norm": 0.031245356425642967, "learning_rate": 5.526064699265753e-07, "loss": 0.0035, "step": 28650 }, { "epoch": 4.37164306640625e-05, "model_forward_time": 0.027602434158325195, "step": 28650 }, { "epoch": 4.37164306640625e-05, "step": 28650, "training_step_time": 0.10717177391052246 }, { "epoch": 4.371795654296875e-05, "model_forward_time": 0.02560281753540039, "step": 28651 }, { "epoch": 4.371795654296875e-05, "step": 28651, "training_step_time": 0.1047677993774414 }, { "epoch": 4.3719482421875e-05, "model_forward_time": 0.025110244750976562, "step": 28652 }, { "epoch": 4.3719482421875e-05, "step": 28652, "training_step_time": 0.10331916809082031 }, { "epoch": 4.372100830078125e-05, "model_forward_time": 0.025554418563842773, "step": 28653 }, { "epoch": 4.372100830078125e-05, "step": 28653, "training_step_time": 0.10694169998168945 }, { "epoch": 4.37225341796875e-05, "model_forward_time": 0.026206493377685547, "step": 28654 }, { "epoch": 4.37225341796875e-05, "step": 28654, "training_step_time": 0.10876035690307617 }, { "epoch": 4.372406005859375e-05, "model_forward_time": 0.02534341812133789, "step": 28655 }, { "epoch": 4.372406005859375e-05, "step": 28655, "training_step_time": 0.19789481163024902 }, { "epoch": 4.37255859375e-05, "model_forward_time": 0.024993419647216797, "step": 28656 }, { "epoch": 4.37255859375e-05, "step": 28656, "training_step_time": 0.11295914649963379 }, { "epoch": 4.372711181640625e-05, "model_forward_time": 0.025389909744262695, "step": 28657 }, { "epoch": 4.372711181640625e-05, "step": 28657, "training_step_time": 0.10670018196105957 }, { "epoch": 4.37286376953125e-05, "model_forward_time": 0.024616479873657227, "step": 28658 }, { "epoch": 4.37286376953125e-05, "step": 28658, "training_step_time": 0.14543581008911133 }, { "epoch": 4.373016357421875e-05, "model_forward_time": 0.024707317352294922, "step": 28659 }, { "epoch": 4.373016357421875e-05, "step": 28659, "training_step_time": 0.16424298286437988 }, { "epoch": 4.3731689453125e-05, "grad_norm": 0.22222883999347687, "learning_rate": 5.444648643193051e-07, "loss": 0.0053, "step": 28660 }, { "epoch": 4.3731689453125e-05, "model_forward_time": 0.02474188804626465, "step": 28660 }, { "epoch": 4.3731689453125e-05, "step": 28660, "training_step_time": 0.10510492324829102 }, { "epoch": 4.373321533203125e-05, "model_forward_time": 0.024711132049560547, "step": 28661 }, { "epoch": 4.373321533203125e-05, "step": 28661, "training_step_time": 0.12618732452392578 }, { "epoch": 4.37347412109375e-05, "model_forward_time": 0.025019407272338867, "step": 28662 }, { "epoch": 4.37347412109375e-05, "step": 28662, "training_step_time": 0.19892597198486328 }, { "epoch": 4.373626708984375e-05, "model_forward_time": 0.024721860885620117, "step": 28663 }, { "epoch": 4.373626708984375e-05, "step": 28663, "training_step_time": 0.10156416893005371 }, { "epoch": 4.373779296875e-05, "model_forward_time": 0.02506566047668457, "step": 28664 }, { "epoch": 4.373779296875e-05, "step": 28664, "training_step_time": 0.10683441162109375 }, { "epoch": 4.373931884765625e-05, "model_forward_time": 0.025484323501586914, "step": 28665 }, { "epoch": 4.373931884765625e-05, "step": 28665, "training_step_time": 0.10516858100891113 }, { "epoch": 4.37408447265625e-05, "model_forward_time": 0.025399208068847656, "step": 28666 }, { "epoch": 4.37408447265625e-05, "step": 28666, "training_step_time": 0.10379433631896973 }, { "epoch": 4.374237060546875e-05, "model_forward_time": 0.025669574737548828, "step": 28667 }, { "epoch": 4.374237060546875e-05, "step": 28667, "training_step_time": 0.10510706901550293 }, { "epoch": 4.3743896484375e-05, "model_forward_time": 0.02569293975830078, "step": 28668 }, { "epoch": 4.3743896484375e-05, "step": 28668, "training_step_time": 0.10596442222595215 }, { "epoch": 4.374542236328125e-05, "model_forward_time": 0.025362730026245117, "step": 28669 }, { "epoch": 4.374542236328125e-05, "step": 28669, "training_step_time": 0.16637587547302246 }, { "epoch": 4.37469482421875e-05, "grad_norm": 0.030979402363300323, "learning_rate": 5.363833518505834e-07, "loss": 0.0027, "step": 28670 }, { "epoch": 4.37469482421875e-05, "model_forward_time": 0.024903297424316406, "step": 28670 }, { "epoch": 4.37469482421875e-05, "step": 28670, "training_step_time": 0.20902800559997559 }, { "epoch": 4.374847412109375e-05, "model_forward_time": 0.0249326229095459, "step": 28671 }, { "epoch": 4.374847412109375e-05, "step": 28671, "training_step_time": 0.10184240341186523 }, { "epoch": 4.375e-05, "model_forward_time": 0.024610519409179688, "step": 28672 }, { "epoch": 4.375e-05, "step": 28672, "training_step_time": 0.11134147644042969 }, { "epoch": 4.375152587890625e-05, "model_forward_time": 0.026660680770874023, "step": 28673 }, { "epoch": 4.375152587890625e-05, "step": 28673, "training_step_time": 0.13817405700683594 }, { "epoch": 4.37530517578125e-05, "model_forward_time": 0.02506566047668457, "step": 28674 }, { "epoch": 4.37530517578125e-05, "step": 28674, "training_step_time": 0.13202238082885742 }, { "epoch": 4.375457763671875e-05, "model_forward_time": 0.02370929718017578, "step": 28675 }, { "epoch": 4.375457763671875e-05, "step": 28675, "training_step_time": 0.19225335121154785 }, { "epoch": 4.3756103515625e-05, "model_forward_time": 0.025133132934570312, "step": 28676 }, { "epoch": 4.3756103515625e-05, "step": 28676, "training_step_time": 0.15506219863891602 }, { "epoch": 4.375762939453125e-05, "model_forward_time": 0.024753808975219727, "step": 28677 }, { "epoch": 4.375762939453125e-05, "step": 28677, "training_step_time": 0.19497227668762207 }, { "epoch": 4.37591552734375e-05, "model_forward_time": 0.024383068084716797, "step": 28678 }, { "epoch": 4.37591552734375e-05, "step": 28678, "training_step_time": 0.12999725341796875 }, { "epoch": 4.376068115234375e-05, "model_forward_time": 0.024532794952392578, "step": 28679 }, { "epoch": 4.376068115234375e-05, "step": 28679, "training_step_time": 0.18098139762878418 }, { "epoch": 4.376220703125e-05, "grad_norm": 0.05937690660357475, "learning_rate": 5.283619423401998e-07, "loss": 0.0047, "step": 28680 }, { "epoch": 4.376220703125e-05, "model_forward_time": 0.024864673614501953, "step": 28680 }, { "epoch": 4.376220703125e-05, "step": 28680, "training_step_time": 0.11048579216003418 }, { "epoch": 4.376373291015625e-05, "model_forward_time": 0.024738550186157227, "step": 28681 }, { "epoch": 4.376373291015625e-05, "step": 28681, "training_step_time": 0.11281442642211914 }, { "epoch": 4.37652587890625e-05, "model_forward_time": 0.02571272850036621, "step": 28682 }, { "epoch": 4.37652587890625e-05, "step": 28682, "training_step_time": 0.11158180236816406 }, { "epoch": 4.376678466796875e-05, "model_forward_time": 0.025527000427246094, "step": 28683 }, { "epoch": 4.376678466796875e-05, "step": 28683, "training_step_time": 0.10747480392456055 }, { "epoch": 4.3768310546875e-05, "model_forward_time": 0.025927305221557617, "step": 28684 }, { "epoch": 4.3768310546875e-05, "step": 28684, "training_step_time": 0.11036086082458496 }, { "epoch": 4.376983642578125e-05, "model_forward_time": 0.025383472442626953, "step": 28685 }, { "epoch": 4.376983642578125e-05, "step": 28685, "training_step_time": 0.1073920726776123 }, { "epoch": 4.37713623046875e-05, "model_forward_time": 0.025612831115722656, "step": 28686 }, { "epoch": 4.37713623046875e-05, "step": 28686, "training_step_time": 0.10630631446838379 }, { "epoch": 4.377288818359375e-05, "model_forward_time": 0.02557659149169922, "step": 28687 }, { "epoch": 4.377288818359375e-05, "step": 28687, "training_step_time": 0.10571908950805664 }, { "epoch": 4.37744140625e-05, "model_forward_time": 0.025554656982421875, "step": 28688 }, { "epoch": 4.37744140625e-05, "step": 28688, "training_step_time": 0.1061251163482666 }, { "epoch": 4.377593994140625e-05, "model_forward_time": 0.02557206153869629, "step": 28689 }, { "epoch": 4.377593994140625e-05, "step": 28689, "training_step_time": 0.10648465156555176 }, { "epoch": 4.37774658203125e-05, "grad_norm": 0.04048529267311096, "learning_rate": 5.204006455349297e-07, "loss": 0.0048, "step": 28690 }, { "epoch": 4.37774658203125e-05, "model_forward_time": 0.025365591049194336, "step": 28690 }, { "epoch": 4.37774658203125e-05, "step": 28690, "training_step_time": 0.10643601417541504 }, { "epoch": 4.377899169921875e-05, "model_forward_time": 0.025557756423950195, "step": 28691 }, { "epoch": 4.377899169921875e-05, "step": 28691, "training_step_time": 0.10906744003295898 }, { "epoch": 4.3780517578125e-05, "model_forward_time": 0.02510547637939453, "step": 28692 }, { "epoch": 4.3780517578125e-05, "step": 28692, "training_step_time": 0.10975503921508789 }, { "epoch": 4.378204345703125e-05, "model_forward_time": 0.025404930114746094, "step": 28693 }, { "epoch": 4.378204345703125e-05, "step": 28693, "training_step_time": 0.10972857475280762 }, { "epoch": 4.37835693359375e-05, "model_forward_time": 0.02550053596496582, "step": 28694 }, { "epoch": 4.37835693359375e-05, "step": 28694, "training_step_time": 0.10531830787658691 }, { "epoch": 4.378509521484375e-05, "model_forward_time": 0.02547740936279297, "step": 28695 }, { "epoch": 4.378509521484375e-05, "step": 28695, "training_step_time": 0.105316162109375 }, { "epoch": 4.378662109375e-05, "model_forward_time": 0.0255124568939209, "step": 28696 }, { "epoch": 4.378662109375e-05, "step": 28696, "training_step_time": 0.10662174224853516 }, { "epoch": 4.378814697265625e-05, "model_forward_time": 0.02524733543395996, "step": 28697 }, { "epoch": 4.378814697265625e-05, "step": 28697, "training_step_time": 0.10446643829345703 }, { "epoch": 4.37896728515625e-05, "model_forward_time": 0.025721073150634766, "step": 28698 }, { "epoch": 4.37896728515625e-05, "step": 28698, "training_step_time": 0.10605764389038086 }, { "epoch": 4.379119873046875e-05, "model_forward_time": 0.025681018829345703, "step": 28699 }, { "epoch": 4.379119873046875e-05, "step": 28699, "training_step_time": 0.19356274604797363 }, { "epoch": 4.3792724609375e-05, "grad_norm": 0.05778107792139053, "learning_rate": 5.124994711084963e-07, "loss": 0.0065, "step": 28700 }, { "epoch": 4.3792724609375e-05, "model_forward_time": 0.025851011276245117, "step": 28700 }, { "epoch": 4.3792724609375e-05, "step": 28700, "training_step_time": 0.10588860511779785 }, { "epoch": 4.379425048828125e-05, "model_forward_time": 0.0242769718170166, "step": 28701 }, { "epoch": 4.379425048828125e-05, "step": 28701, "training_step_time": 0.12964534759521484 }, { "epoch": 4.37957763671875e-05, "model_forward_time": 0.02562689781188965, "step": 28702 }, { "epoch": 4.37957763671875e-05, "step": 28702, "training_step_time": 0.12981057167053223 }, { "epoch": 4.379730224609375e-05, "model_forward_time": 0.024976015090942383, "step": 28703 }, { "epoch": 4.379730224609375e-05, "step": 28703, "training_step_time": 0.21854662895202637 }, { "epoch": 4.3798828125e-05, "model_forward_time": 0.025269269943237305, "step": 28704 }, { "epoch": 4.3798828125e-05, "step": 28704, "training_step_time": 0.18779921531677246 }, { "epoch": 4.380035400390625e-05, "model_forward_time": 0.0251920223236084, "step": 28705 }, { "epoch": 4.380035400390625e-05, "step": 28705, "training_step_time": 0.12252402305603027 }, { "epoch": 4.38018798828125e-05, "model_forward_time": 0.024760961532592773, "step": 28706 }, { "epoch": 4.38018798828125e-05, "step": 28706, "training_step_time": 0.19674134254455566 }, { "epoch": 4.380340576171875e-05, "model_forward_time": 0.025147438049316406, "step": 28707 }, { "epoch": 4.380340576171875e-05, "step": 28707, "training_step_time": 0.10844016075134277 }, { "epoch": 4.3804931640625e-05, "model_forward_time": 0.024641036987304688, "step": 28708 }, { "epoch": 4.3804931640625e-05, "step": 28708, "training_step_time": 0.10479879379272461 }, { "epoch": 4.380645751953125e-05, "model_forward_time": 0.025074481964111328, "step": 28709 }, { "epoch": 4.380645751953125e-05, "step": 28709, "training_step_time": 0.10576105117797852 }, { "epoch": 4.38079833984375e-05, "grad_norm": 0.03397779166698456, "learning_rate": 5.046584286615697e-07, "loss": 0.003, "step": 28710 }, { "epoch": 4.38079833984375e-05, "model_forward_time": 0.025345563888549805, "step": 28710 }, { "epoch": 4.38079833984375e-05, "step": 28710, "training_step_time": 0.10462260246276855 }, { "epoch": 4.380950927734375e-05, "model_forward_time": 0.025316715240478516, "step": 28711 }, { "epoch": 4.380950927734375e-05, "step": 28711, "training_step_time": 0.10436415672302246 }, { "epoch": 4.381103515625e-05, "model_forward_time": 0.02571725845336914, "step": 28712 }, { "epoch": 4.381103515625e-05, "step": 28712, "training_step_time": 0.10566592216491699 }, { "epoch": 4.381256103515625e-05, "model_forward_time": 0.025564908981323242, "step": 28713 }, { "epoch": 4.381256103515625e-05, "step": 28713, "training_step_time": 0.15004801750183105 }, { "epoch": 4.38140869140625e-05, "model_forward_time": 0.025386571884155273, "step": 28714 }, { "epoch": 4.38140869140625e-05, "step": 28714, "training_step_time": 0.10986542701721191 }, { "epoch": 4.381561279296875e-05, "model_forward_time": 0.025165319442749023, "step": 28715 }, { "epoch": 4.381561279296875e-05, "step": 28715, "training_step_time": 0.11510515213012695 }, { "epoch": 4.3817138671875e-05, "model_forward_time": 0.025249242782592773, "step": 28716 }, { "epoch": 4.3817138671875e-05, "step": 28716, "training_step_time": 0.13802194595336914 }, { "epoch": 4.381866455078125e-05, "model_forward_time": 0.02555084228515625, "step": 28717 }, { "epoch": 4.381866455078125e-05, "step": 28717, "training_step_time": 0.13551115989685059 }, { "epoch": 4.38201904296875e-05, "model_forward_time": 0.02500295639038086, "step": 28718 }, { "epoch": 4.38201904296875e-05, "step": 28718, "training_step_time": 0.1396193504333496 }, { "epoch": 4.382171630859375e-05, "model_forward_time": 0.0247800350189209, "step": 28719 }, { "epoch": 4.382171630859375e-05, "step": 28719, "training_step_time": 0.16705060005187988 }, { "epoch": 4.38232421875e-05, "grad_norm": 0.03233213350176811, "learning_rate": 4.968775277217563e-07, "loss": 0.0067, "step": 28720 }, { "epoch": 4.38232421875e-05, "model_forward_time": 0.024508953094482422, "step": 28720 }, { "epoch": 4.38232421875e-05, "step": 28720, "training_step_time": 0.2248837947845459 }, { "epoch": 4.382476806640625e-05, "model_forward_time": 0.025029897689819336, "step": 28721 }, { "epoch": 4.382476806640625e-05, "step": 28721, "training_step_time": 0.11561918258666992 }, { "epoch": 4.38262939453125e-05, "model_forward_time": 0.02369213104248047, "step": 28722 }, { "epoch": 4.38262939453125e-05, "step": 28722, "training_step_time": 0.11866092681884766 }, { "epoch": 4.382781982421875e-05, "model_forward_time": 0.025710105895996094, "step": 28723 }, { "epoch": 4.382781982421875e-05, "step": 28723, "training_step_time": 0.1869335174560547 }, { "epoch": 4.3829345703125e-05, "model_forward_time": 0.02476358413696289, "step": 28724 }, { "epoch": 4.3829345703125e-05, "step": 28724, "training_step_time": 0.10866451263427734 }, { "epoch": 4.383087158203125e-05, "model_forward_time": 0.0243074893951416, "step": 28725 }, { "epoch": 4.383087158203125e-05, "step": 28725, "training_step_time": 0.1094825267791748 }, { "epoch": 4.38323974609375e-05, "model_forward_time": 0.025636911392211914, "step": 28726 }, { "epoch": 4.38323974609375e-05, "step": 28726, "training_step_time": 0.1097109317779541 }, { "epoch": 4.383392333984375e-05, "model_forward_time": 0.025471925735473633, "step": 28727 }, { "epoch": 4.383392333984375e-05, "step": 28727, "training_step_time": 0.10805296897888184 }, { "epoch": 4.383544921875e-05, "model_forward_time": 0.025635480880737305, "step": 28728 }, { "epoch": 4.383544921875e-05, "step": 28728, "training_step_time": 0.10610580444335938 }, { "epoch": 4.383697509765625e-05, "model_forward_time": 0.026263952255249023, "step": 28729 }, { "epoch": 4.383697509765625e-05, "step": 28729, "training_step_time": 0.10891938209533691 }, { "epoch": 4.38385009765625e-05, "grad_norm": 0.04797426238656044, "learning_rate": 4.891567777435879e-07, "loss": 0.0149, "step": 28730 }, { "epoch": 4.38385009765625e-05, "model_forward_time": 0.024899005889892578, "step": 28730 }, { "epoch": 4.38385009765625e-05, "step": 28730, "training_step_time": 0.10553216934204102 }, { "epoch": 4.384002685546875e-05, "model_forward_time": 0.025508403778076172, "step": 28731 }, { "epoch": 4.384002685546875e-05, "step": 28731, "training_step_time": 0.10512709617614746 }, { "epoch": 4.3841552734375e-05, "model_forward_time": 0.025115251541137695, "step": 28732 }, { "epoch": 4.3841552734375e-05, "step": 28732, "training_step_time": 0.10852861404418945 }, { "epoch": 4.384307861328125e-05, "model_forward_time": 0.02540302276611328, "step": 28733 }, { "epoch": 4.384307861328125e-05, "step": 28733, "training_step_time": 0.106414794921875 }, { "epoch": 4.38446044921875e-05, "model_forward_time": 0.02573537826538086, "step": 28734 }, { "epoch": 4.38446044921875e-05, "step": 28734, "training_step_time": 0.10681629180908203 }, { "epoch": 4.384613037109375e-05, "model_forward_time": 0.02525162696838379, "step": 28735 }, { "epoch": 4.384613037109375e-05, "step": 28735, "training_step_time": 0.10460925102233887 }, { "epoch": 4.384765625e-05, "model_forward_time": 0.025490522384643555, "step": 28736 }, { "epoch": 4.384765625e-05, "step": 28736, "training_step_time": 0.10910987854003906 }, { "epoch": 4.384918212890625e-05, "model_forward_time": 0.025560379028320312, "step": 28737 }, { "epoch": 4.384918212890625e-05, "step": 28737, "training_step_time": 0.1045072078704834 }, { "epoch": 4.38507080078125e-05, "model_forward_time": 0.025290489196777344, "step": 28738 }, { "epoch": 4.38507080078125e-05, "step": 28738, "training_step_time": 0.10716915130615234 }, { "epoch": 4.385223388671875e-05, "model_forward_time": 0.02543783187866211, "step": 28739 }, { "epoch": 4.385223388671875e-05, "step": 28739, "training_step_time": 0.10564160346984863 }, { "epoch": 4.3853759765625e-05, "grad_norm": 0.027422254905104637, "learning_rate": 4.814961881085045e-07, "loss": 0.0023, "step": 28740 }, { "epoch": 4.3853759765625e-05, "model_forward_time": 0.024868011474609375, "step": 28740 }, { "epoch": 4.3853759765625e-05, "step": 28740, "training_step_time": 0.10418844223022461 }, { "epoch": 4.385528564453125e-05, "model_forward_time": 0.02553868293762207, "step": 28741 }, { "epoch": 4.385528564453125e-05, "step": 28741, "training_step_time": 0.106231689453125 }, { "epoch": 4.38568115234375e-05, "model_forward_time": 0.025843143463134766, "step": 28742 }, { "epoch": 4.38568115234375e-05, "step": 28742, "training_step_time": 0.10389828681945801 }, { "epoch": 4.385833740234375e-05, "model_forward_time": 0.02524733543395996, "step": 28743 }, { "epoch": 4.385833740234375e-05, "step": 28743, "training_step_time": 0.10532879829406738 }, { "epoch": 4.385986328125e-05, "model_forward_time": 0.025456905364990234, "step": 28744 }, { "epoch": 4.385986328125e-05, "step": 28744, "training_step_time": 0.1393134593963623 }, { "epoch": 4.386138916015625e-05, "model_forward_time": 0.02593255043029785, "step": 28745 }, { "epoch": 4.386138916015625e-05, "step": 28745, "training_step_time": 0.1181640625 }, { "epoch": 4.38629150390625e-05, "model_forward_time": 0.025081396102905273, "step": 28746 }, { "epoch": 4.38629150390625e-05, "step": 28746, "training_step_time": 0.12436532974243164 }, { "epoch": 4.386444091796875e-05, "model_forward_time": 0.02532172203063965, "step": 28747 }, { "epoch": 4.386444091796875e-05, "step": 28747, "training_step_time": 0.10606884956359863 }, { "epoch": 4.3865966796875e-05, "model_forward_time": 0.0247647762298584, "step": 28748 }, { "epoch": 4.3865966796875e-05, "step": 28748, "training_step_time": 0.2152705192565918 }, { "epoch": 4.386749267578125e-05, "model_forward_time": 0.024791717529296875, "step": 28749 }, { "epoch": 4.386749267578125e-05, "step": 28749, "training_step_time": 0.11233186721801758 }, { "epoch": 4.38690185546875e-05, "grad_norm": 0.050352275371551514, "learning_rate": 4.738957681248379e-07, "loss": 0.0043, "step": 28750 }, { "epoch": 4.38690185546875e-05, "model_forward_time": 0.02482318878173828, "step": 28750 }, { "epoch": 4.38690185546875e-05, "step": 28750, "training_step_time": 0.21964049339294434 }, { "epoch": 4.387054443359375e-05, "model_forward_time": 0.024962186813354492, "step": 28751 }, { "epoch": 4.387054443359375e-05, "step": 28751, "training_step_time": 0.1973869800567627 }, { "epoch": 4.38720703125e-05, "model_forward_time": 0.025477170944213867, "step": 28752 }, { "epoch": 4.38720703125e-05, "step": 28752, "training_step_time": 0.10285401344299316 }, { "epoch": 4.387359619140625e-05, "model_forward_time": 0.025172710418701172, "step": 28753 }, { "epoch": 4.387359619140625e-05, "step": 28753, "training_step_time": 0.1022496223449707 }, { "epoch": 4.38751220703125e-05, "model_forward_time": 0.025309085845947266, "step": 28754 }, { "epoch": 4.38751220703125e-05, "step": 28754, "training_step_time": 0.1055145263671875 }, { "epoch": 4.387664794921875e-05, "model_forward_time": 0.025611162185668945, "step": 28755 }, { "epoch": 4.387664794921875e-05, "step": 28755, "training_step_time": 0.10721397399902344 }, { "epoch": 4.3878173828125e-05, "model_forward_time": 0.024740934371948242, "step": 28756 }, { "epoch": 4.3878173828125e-05, "step": 28756, "training_step_time": 0.10711526870727539 }, { "epoch": 4.387969970703125e-05, "model_forward_time": 0.024724483489990234, "step": 28757 }, { "epoch": 4.387969970703125e-05, "step": 28757, "training_step_time": 0.11022591590881348 }, { "epoch": 4.38812255859375e-05, "model_forward_time": 0.0256197452545166, "step": 28758 }, { "epoch": 4.38812255859375e-05, "step": 28758, "training_step_time": 0.11146116256713867 }, { "epoch": 4.388275146484375e-05, "model_forward_time": 0.02564549446105957, "step": 28759 }, { "epoch": 4.388275146484375e-05, "step": 28759, "training_step_time": 0.15193724632263184 }, { "epoch": 4.388427734375e-05, "grad_norm": 0.07244842499494553, "learning_rate": 4.6635552702782305e-07, "loss": 0.0101, "step": 28760 }, { "epoch": 4.388427734375e-05, "model_forward_time": 0.025867700576782227, "step": 28760 }, { "epoch": 4.388427734375e-05, "step": 28760, "training_step_time": 0.23668694496154785 }, { "epoch": 4.388580322265625e-05, "model_forward_time": 0.024877548217773438, "step": 28761 }, { "epoch": 4.388580322265625e-05, "step": 28761, "training_step_time": 0.22859525680541992 }, { "epoch": 4.38873291015625e-05, "model_forward_time": 0.024869441986083984, "step": 28762 }, { "epoch": 4.38873291015625e-05, "step": 28762, "training_step_time": 0.17171335220336914 }, { "epoch": 4.388885498046875e-05, "model_forward_time": 0.024939298629760742, "step": 28763 }, { "epoch": 4.388885498046875e-05, "step": 28763, "training_step_time": 0.19254827499389648 }, { "epoch": 4.3890380859375e-05, "model_forward_time": 0.025153398513793945, "step": 28764 }, { "epoch": 4.3890380859375e-05, "step": 28764, "training_step_time": 0.1391303539276123 }, { "epoch": 4.389190673828125e-05, "model_forward_time": 0.02534031867980957, "step": 28765 }, { "epoch": 4.389190673828125e-05, "step": 28765, "training_step_time": 0.18651866912841797 }, { "epoch": 4.38934326171875e-05, "model_forward_time": 0.024590015411376953, "step": 28766 }, { "epoch": 4.38934326171875e-05, "step": 28766, "training_step_time": 0.12140154838562012 }, { "epoch": 4.389495849609375e-05, "model_forward_time": 0.024501562118530273, "step": 28767 }, { "epoch": 4.389495849609375e-05, "step": 28767, "training_step_time": 0.17989182472229004 }, { "epoch": 4.3896484375e-05, "model_forward_time": 0.024941444396972656, "step": 28768 }, { "epoch": 4.3896484375e-05, "step": 28768, "training_step_time": 0.11324524879455566 }, { "epoch": 4.389801025390625e-05, "model_forward_time": 0.024997472763061523, "step": 28769 }, { "epoch": 4.389801025390625e-05, "step": 28769, "training_step_time": 0.10687685012817383 }, { "epoch": 4.38995361328125e-05, "grad_norm": 0.22183045744895935, "learning_rate": 4.5887547397955864e-07, "loss": 0.0031, "step": 28770 }, { "epoch": 4.38995361328125e-05, "model_forward_time": 0.02875542640686035, "step": 28770 }, { "epoch": 4.38995361328125e-05, "step": 28770, "training_step_time": 0.10820436477661133 }, { "epoch": 4.390106201171875e-05, "model_forward_time": 0.025441646575927734, "step": 28771 }, { "epoch": 4.390106201171875e-05, "step": 28771, "training_step_time": 0.10809493064880371 }, { "epoch": 4.3902587890625e-05, "model_forward_time": 0.025870800018310547, "step": 28772 }, { "epoch": 4.3902587890625e-05, "step": 28772, "training_step_time": 0.10555553436279297 }, { "epoch": 4.390411376953125e-05, "model_forward_time": 0.02575850486755371, "step": 28773 }, { "epoch": 4.390411376953125e-05, "step": 28773, "training_step_time": 0.10755634307861328 }, { "epoch": 4.39056396484375e-05, "model_forward_time": 0.025369644165039062, "step": 28774 }, { "epoch": 4.39056396484375e-05, "step": 28774, "training_step_time": 0.10852360725402832 }, { "epoch": 4.390716552734375e-05, "model_forward_time": 0.025286436080932617, "step": 28775 }, { "epoch": 4.390716552734375e-05, "step": 28775, "training_step_time": 0.10610795021057129 }, { "epoch": 4.390869140625e-05, "model_forward_time": 0.025269746780395508, "step": 28776 }, { "epoch": 4.390869140625e-05, "step": 28776, "training_step_time": 0.1108560562133789 }, { "epoch": 4.391021728515625e-05, "model_forward_time": 0.02545475959777832, "step": 28777 }, { "epoch": 4.391021728515625e-05, "step": 28777, "training_step_time": 0.10515713691711426 }, { "epoch": 4.39117431640625e-05, "model_forward_time": 0.02550196647644043, "step": 28778 }, { "epoch": 4.39117431640625e-05, "step": 28778, "training_step_time": 0.11182999610900879 }, { "epoch": 4.391326904296875e-05, "model_forward_time": 0.025603294372558594, "step": 28779 }, { "epoch": 4.391326904296875e-05, "step": 28779, "training_step_time": 0.10822176933288574 }, { "epoch": 4.3914794921875e-05, "grad_norm": 0.082331083714962, "learning_rate": 4.514556180690188e-07, "loss": 0.0046, "step": 28780 }, { "epoch": 4.3914794921875e-05, "model_forward_time": 0.025365352630615234, "step": 28780 }, { "epoch": 4.3914794921875e-05, "step": 28780, "training_step_time": 0.10804080963134766 }, { "epoch": 4.391632080078125e-05, "model_forward_time": 0.0254666805267334, "step": 28781 }, { "epoch": 4.391632080078125e-05, "step": 28781, "training_step_time": 0.10747027397155762 }, { "epoch": 4.39178466796875e-05, "model_forward_time": 0.025144100189208984, "step": 28782 }, { "epoch": 4.39178466796875e-05, "step": 28782, "training_step_time": 0.10770010948181152 }, { "epoch": 4.391937255859375e-05, "model_forward_time": 0.02653980255126953, "step": 28783 }, { "epoch": 4.391937255859375e-05, "step": 28783, "training_step_time": 0.10976195335388184 }, { "epoch": 4.39208984375e-05, "model_forward_time": 0.025371789932250977, "step": 28784 }, { "epoch": 4.39208984375e-05, "step": 28784, "training_step_time": 0.1084136962890625 }, { "epoch": 4.392242431640625e-05, "model_forward_time": 0.025032520294189453, "step": 28785 }, { "epoch": 4.392242431640625e-05, "step": 28785, "training_step_time": 0.10915040969848633 }, { "epoch": 4.39239501953125e-05, "model_forward_time": 0.02529764175415039, "step": 28786 }, { "epoch": 4.39239501953125e-05, "step": 28786, "training_step_time": 0.21702051162719727 }, { "epoch": 4.392547607421875e-05, "model_forward_time": 0.024346113204956055, "step": 28787 }, { "epoch": 4.392547607421875e-05, "step": 28787, "training_step_time": 0.11910724639892578 }, { "epoch": 4.3927001953125e-05, "model_forward_time": 0.024736881256103516, "step": 28788 }, { "epoch": 4.3927001953125e-05, "step": 28788, "training_step_time": 0.12718558311462402 }, { "epoch": 4.392852783203125e-05, "model_forward_time": 0.02529144287109375, "step": 28789 }, { "epoch": 4.392852783203125e-05, "step": 28789, "training_step_time": 0.10621905326843262 }, { "epoch": 4.39300537109375e-05, "grad_norm": 0.02719235047698021, "learning_rate": 4.440959683120194e-07, "loss": 0.0064, "step": 28790 }, { "epoch": 4.39300537109375e-05, "model_forward_time": 0.025585174560546875, "step": 28790 }, { "epoch": 4.39300537109375e-05, "step": 28790, "training_step_time": 0.16669607162475586 }, { "epoch": 4.393157958984375e-05, "model_forward_time": 0.02438807487487793, "step": 28791 }, { "epoch": 4.393157958984375e-05, "step": 28791, "training_step_time": 0.14309453964233398 }, { "epoch": 4.393310546875e-05, "model_forward_time": 0.025151968002319336, "step": 28792 }, { "epoch": 4.393310546875e-05, "step": 28792, "training_step_time": 0.11692285537719727 }, { "epoch": 4.393463134765625e-05, "model_forward_time": 0.025306224822998047, "step": 28793 }, { "epoch": 4.393463134765625e-05, "step": 28793, "training_step_time": 0.12682294845581055 }, { "epoch": 4.39361572265625e-05, "model_forward_time": 0.025196313858032227, "step": 28794 }, { "epoch": 4.39361572265625e-05, "step": 28794, "training_step_time": 0.19578099250793457 }, { "epoch": 4.393768310546875e-05, "model_forward_time": 0.025259733200073242, "step": 28795 }, { "epoch": 4.393768310546875e-05, "step": 28795, "training_step_time": 0.10339617729187012 }, { "epoch": 4.3939208984375e-05, "model_forward_time": 0.024817466735839844, "step": 28796 }, { "epoch": 4.3939208984375e-05, "step": 28796, "training_step_time": 0.10300493240356445 }, { "epoch": 4.394073486328125e-05, "model_forward_time": 0.02497243881225586, "step": 28797 }, { "epoch": 4.394073486328125e-05, "step": 28797, "training_step_time": 0.10687494277954102 }, { "epoch": 4.39422607421875e-05, "model_forward_time": 0.02507615089416504, "step": 28798 }, { "epoch": 4.39422607421875e-05, "step": 28798, "training_step_time": 0.10734963417053223 }, { "epoch": 4.394378662109375e-05, "model_forward_time": 0.02510976791381836, "step": 28799 }, { "epoch": 4.394378662109375e-05, "step": 28799, "training_step_time": 0.10620403289794922 }, { "epoch": 4.39453125e-05, "grad_norm": 0.049385394901037216, "learning_rate": 4.367965336512403e-07, "loss": 0.0031, "step": 28800 }, { "epoch": 4.39453125e-05, "model_forward_time": 0.024399280548095703, "step": 28800 }, { "epoch": 4.39453125e-05, "step": 28800, "training_step_time": 0.11004233360290527 }, { "epoch": 4.394683837890625e-05, "model_forward_time": 0.025557756423950195, "step": 28801 }, { "epoch": 4.394683837890625e-05, "step": 28801, "training_step_time": 0.10661673545837402 }, { "epoch": 4.39483642578125e-05, "model_forward_time": 0.02512359619140625, "step": 28802 }, { "epoch": 4.39483642578125e-05, "step": 28802, "training_step_time": 0.20676732063293457 }, { "epoch": 4.394989013671875e-05, "model_forward_time": 0.024614334106445312, "step": 28803 }, { "epoch": 4.394989013671875e-05, "step": 28803, "training_step_time": 0.2124791145324707 }, { "epoch": 4.3951416015625e-05, "model_forward_time": 0.024322032928466797, "step": 28804 }, { "epoch": 4.3951416015625e-05, "step": 28804, "training_step_time": 0.17612934112548828 }, { "epoch": 4.395294189453125e-05, "model_forward_time": 0.024463653564453125, "step": 28805 }, { "epoch": 4.395294189453125e-05, "step": 28805, "training_step_time": 0.1540968418121338 }, { "epoch": 4.39544677734375e-05, "model_forward_time": 0.02434706687927246, "step": 28806 }, { "epoch": 4.39544677734375e-05, "step": 28806, "training_step_time": 0.1662280559539795 }, { "epoch": 4.395599365234375e-05, "model_forward_time": 0.024532556533813477, "step": 28807 }, { "epoch": 4.395599365234375e-05, "step": 28807, "training_step_time": 0.13926935195922852 }, { "epoch": 4.395751953125e-05, "model_forward_time": 0.024302244186401367, "step": 28808 }, { "epoch": 4.395751953125e-05, "step": 28808, "training_step_time": 0.10504961013793945 }, { "epoch": 4.395904541015625e-05, "model_forward_time": 0.02523946762084961, "step": 28809 }, { "epoch": 4.395904541015625e-05, "step": 28809, "training_step_time": 0.10904264450073242 }, { "epoch": 4.39605712890625e-05, "grad_norm": 0.024276915937662125, "learning_rate": 4.2955732295617554e-07, "loss": 0.0022, "step": 28810 }, { "epoch": 4.39605712890625e-05, "model_forward_time": 0.02545452117919922, "step": 28810 }, { "epoch": 4.39605712890625e-05, "step": 28810, "training_step_time": 0.10454940795898438 }, { "epoch": 4.396209716796875e-05, "model_forward_time": 0.025461673736572266, "step": 28811 }, { "epoch": 4.396209716796875e-05, "step": 28811, "training_step_time": 0.10764288902282715 }, { "epoch": 4.3963623046875e-05, "model_forward_time": 0.025270462036132812, "step": 28812 }, { "epoch": 4.3963623046875e-05, "step": 28812, "training_step_time": 0.19205927848815918 }, { "epoch": 4.396514892578125e-05, "model_forward_time": 0.024467945098876953, "step": 28813 }, { "epoch": 4.396514892578125e-05, "step": 28813, "training_step_time": 0.10696792602539062 }, { "epoch": 4.39666748046875e-05, "model_forward_time": 0.02460503578186035, "step": 28814 }, { "epoch": 4.39666748046875e-05, "step": 28814, "training_step_time": 0.10497260093688965 }, { "epoch": 4.396820068359375e-05, "model_forward_time": 0.025258779525756836, "step": 28815 }, { "epoch": 4.396820068359375e-05, "step": 28815, "training_step_time": 0.10583806037902832 }, { "epoch": 4.39697265625e-05, "model_forward_time": 0.025498151779174805, "step": 28816 }, { "epoch": 4.39697265625e-05, "step": 28816, "training_step_time": 0.10678863525390625 }, { "epoch": 4.397125244140625e-05, "model_forward_time": 0.025346994400024414, "step": 28817 }, { "epoch": 4.397125244140625e-05, "step": 28817, "training_step_time": 0.10732054710388184 }, { "epoch": 4.39727783203125e-05, "model_forward_time": 0.025068998336791992, "step": 28818 }, { "epoch": 4.39727783203125e-05, "step": 28818, "training_step_time": 0.10530805587768555 }, { "epoch": 4.397430419921875e-05, "model_forward_time": 0.025461912155151367, "step": 28819 }, { "epoch": 4.397430419921875e-05, "step": 28819, "training_step_time": 0.10402679443359375 }, { "epoch": 4.3975830078125e-05, "grad_norm": 0.025169670581817627, "learning_rate": 4.2237834502314997e-07, "loss": 0.0038, "step": 28820 }, { "epoch": 4.3975830078125e-05, "model_forward_time": 0.025188207626342773, "step": 28820 }, { "epoch": 4.3975830078125e-05, "step": 28820, "training_step_time": 0.10598039627075195 }, { "epoch": 4.397735595703125e-05, "model_forward_time": 0.025551795959472656, "step": 28821 }, { "epoch": 4.397735595703125e-05, "step": 28821, "training_step_time": 0.10508370399475098 }, { "epoch": 4.39788818359375e-05, "model_forward_time": 0.02833247184753418, "step": 28822 }, { "epoch": 4.39788818359375e-05, "step": 28822, "training_step_time": 0.10770201683044434 }, { "epoch": 4.398040771484375e-05, "model_forward_time": 0.024962186813354492, "step": 28823 }, { "epoch": 4.398040771484375e-05, "step": 28823, "training_step_time": 0.10485339164733887 }, { "epoch": 4.398193359375e-05, "model_forward_time": 0.02578258514404297, "step": 28824 }, { "epoch": 4.398193359375e-05, "step": 28824, "training_step_time": 0.10573816299438477 }, { "epoch": 4.398345947265625e-05, "model_forward_time": 0.024971485137939453, "step": 28825 }, { "epoch": 4.398345947265625e-05, "step": 28825, "training_step_time": 0.10591983795166016 }, { "epoch": 4.39849853515625e-05, "model_forward_time": 0.025452136993408203, "step": 28826 }, { "epoch": 4.39849853515625e-05, "step": 28826, "training_step_time": 0.10672640800476074 }, { "epoch": 4.398651123046875e-05, "model_forward_time": 0.025547027587890625, "step": 28827 }, { "epoch": 4.398651123046875e-05, "step": 28827, "training_step_time": 0.10734677314758301 }, { "epoch": 4.3988037109375e-05, "model_forward_time": 0.025259971618652344, "step": 28828 }, { "epoch": 4.3988037109375e-05, "step": 28828, "training_step_time": 0.11012053489685059 }, { "epoch": 4.398956298828125e-05, "model_forward_time": 0.025006532669067383, "step": 28829 }, { "epoch": 4.398956298828125e-05, "step": 28829, "training_step_time": 0.13423705101013184 }, { "epoch": 4.39910888671875e-05, "grad_norm": 0.09788142144680023, "learning_rate": 4.1525960857530243e-07, "loss": 0.0026, "step": 28830 }, { "epoch": 4.39910888671875e-05, "model_forward_time": 0.024151086807250977, "step": 28830 }, { "epoch": 4.39910888671875e-05, "step": 28830, "training_step_time": 0.18470382690429688 }, { "epoch": 4.399261474609375e-05, "model_forward_time": 0.02412700653076172, "step": 28831 }, { "epoch": 4.399261474609375e-05, "step": 28831, "training_step_time": 0.20001959800720215 }, { "epoch": 4.3994140625e-05, "model_forward_time": 0.02448868751525879, "step": 28832 }, { "epoch": 4.3994140625e-05, "step": 28832, "training_step_time": 0.172684907913208 }, { "epoch": 4.399566650390625e-05, "model_forward_time": 0.025416135787963867, "step": 28833 }, { "epoch": 4.399566650390625e-05, "step": 28833, "training_step_time": 0.11399722099304199 }, { "epoch": 4.39971923828125e-05, "model_forward_time": 0.024176359176635742, "step": 28834 }, { "epoch": 4.39971923828125e-05, "step": 28834, "training_step_time": 0.17377471923828125 }, { "epoch": 4.399871826171875e-05, "model_forward_time": 0.02455759048461914, "step": 28835 }, { "epoch": 4.399871826171875e-05, "step": 28835, "training_step_time": 0.1382441520690918 }, { "epoch": 4.4000244140625e-05, "model_forward_time": 0.02526545524597168, "step": 28836 }, { "epoch": 4.4000244140625e-05, "step": 28836, "training_step_time": 0.12614202499389648 }, { "epoch": 4.400177001953125e-05, "model_forward_time": 0.02462315559387207, "step": 28837 }, { "epoch": 4.400177001953125e-05, "step": 28837, "training_step_time": 0.1191110610961914 }, { "epoch": 4.40032958984375e-05, "model_forward_time": 0.02477717399597168, "step": 28838 }, { "epoch": 4.40032958984375e-05, "step": 28838, "training_step_time": 0.18198323249816895 }, { "epoch": 4.400482177734375e-05, "model_forward_time": 0.02429366111755371, "step": 28839 }, { "epoch": 4.400482177734375e-05, "step": 28839, "training_step_time": 0.10352921485900879 }, { "epoch": 4.400634765625e-05, "grad_norm": 0.13595394790172577, "learning_rate": 4.082011222625637e-07, "loss": 0.0024, "step": 28840 }, { "epoch": 4.400634765625e-05, "model_forward_time": 0.024634838104248047, "step": 28840 }, { "epoch": 4.400634765625e-05, "step": 28840, "training_step_time": 0.1066291332244873 }, { "epoch": 4.400787353515625e-05, "model_forward_time": 0.025807619094848633, "step": 28841 }, { "epoch": 4.400787353515625e-05, "step": 28841, "training_step_time": 0.10807108879089355 }, { "epoch": 4.40093994140625e-05, "model_forward_time": 0.0251462459564209, "step": 28842 }, { "epoch": 4.40093994140625e-05, "step": 28842, "training_step_time": 0.10721302032470703 }, { "epoch": 4.401092529296875e-05, "model_forward_time": 0.026604413986206055, "step": 28843 }, { "epoch": 4.401092529296875e-05, "step": 28843, "training_step_time": 0.1101830005645752 }, { "epoch": 4.4012451171875e-05, "model_forward_time": 0.024954795837402344, "step": 28844 }, { "epoch": 4.4012451171875e-05, "step": 28844, "training_step_time": 0.10507941246032715 }, { "epoch": 4.401397705078125e-05, "model_forward_time": 0.025441884994506836, "step": 28845 }, { "epoch": 4.401397705078125e-05, "step": 28845, "training_step_time": 0.10663938522338867 }, { "epoch": 4.40155029296875e-05, "model_forward_time": 0.02576613426208496, "step": 28846 }, { "epoch": 4.40155029296875e-05, "step": 28846, "training_step_time": 0.11036968231201172 }, { "epoch": 4.401702880859375e-05, "model_forward_time": 0.02521491050720215, "step": 28847 }, { "epoch": 4.401702880859375e-05, "step": 28847, "training_step_time": 0.11086630821228027 }, { "epoch": 4.40185546875e-05, "model_forward_time": 0.02510547637939453, "step": 28848 }, { "epoch": 4.40185546875e-05, "step": 28848, "training_step_time": 0.2214219570159912 }, { "epoch": 4.402008056640625e-05, "model_forward_time": 0.024599075317382812, "step": 28849 }, { "epoch": 4.402008056640625e-05, "step": 28849, "training_step_time": 0.16921019554138184 }, { "epoch": 4.40216064453125e-05, "grad_norm": 0.513080358505249, "learning_rate": 4.012028946616675e-07, "loss": 0.0072, "step": 28850 }, { "epoch": 4.40216064453125e-05, "model_forward_time": 0.025159358978271484, "step": 28850 }, { "epoch": 4.40216064453125e-05, "step": 28850, "training_step_time": 0.15341901779174805 }, { "epoch": 4.402313232421875e-05, "model_forward_time": 0.02451300621032715, "step": 28851 }, { "epoch": 4.402313232421875e-05, "step": 28851, "training_step_time": 0.10731053352355957 }, { "epoch": 4.4024658203125e-05, "model_forward_time": 0.02467823028564453, "step": 28852 }, { "epoch": 4.4024658203125e-05, "step": 28852, "training_step_time": 0.12532258033752441 }, { "epoch": 4.402618408203125e-05, "model_forward_time": 0.025277137756347656, "step": 28853 }, { "epoch": 4.402618408203125e-05, "step": 28853, "training_step_time": 0.10787487030029297 }, { "epoch": 4.40277099609375e-05, "model_forward_time": 0.025817155838012695, "step": 28854 }, { "epoch": 4.40277099609375e-05, "step": 28854, "training_step_time": 0.10804390907287598 }, { "epoch": 4.402923583984375e-05, "model_forward_time": 0.024899721145629883, "step": 28855 }, { "epoch": 4.402923583984375e-05, "step": 28855, "training_step_time": 0.11023569107055664 }, { "epoch": 4.403076171875e-05, "model_forward_time": 0.025223493576049805, "step": 28856 }, { "epoch": 4.403076171875e-05, "step": 28856, "training_step_time": 0.10880446434020996 }, { "epoch": 4.403228759765625e-05, "model_forward_time": 0.025037527084350586, "step": 28857 }, { "epoch": 4.403228759765625e-05, "step": 28857, "training_step_time": 0.20793533325195312 }, { "epoch": 4.40338134765625e-05, "model_forward_time": 0.024899721145629883, "step": 28858 }, { "epoch": 4.40338134765625e-05, "step": 28858, "training_step_time": 0.1049356460571289 }, { "epoch": 4.403533935546875e-05, "model_forward_time": 0.026018857955932617, "step": 28859 }, { "epoch": 4.403533935546875e-05, "step": 28859, "training_step_time": 0.11279058456420898 }, { "epoch": 4.4036865234375e-05, "grad_norm": 0.05081988126039505, "learning_rate": 3.9426493427611177e-07, "loss": 0.0045, "step": 28860 }, { "epoch": 4.4036865234375e-05, "model_forward_time": 0.02529621124267578, "step": 28860 }, { "epoch": 4.4036865234375e-05, "step": 28860, "training_step_time": 0.10503101348876953 }, { "epoch": 4.403839111328125e-05, "model_forward_time": 0.025681257247924805, "step": 28861 }, { "epoch": 4.403839111328125e-05, "step": 28861, "training_step_time": 0.10536026954650879 }, { "epoch": 4.40399169921875e-05, "model_forward_time": 0.02542591094970703, "step": 28862 }, { "epoch": 4.40399169921875e-05, "step": 28862, "training_step_time": 0.10457992553710938 }, { "epoch": 4.404144287109375e-05, "model_forward_time": 0.025218725204467773, "step": 28863 }, { "epoch": 4.404144287109375e-05, "step": 28863, "training_step_time": 0.12376117706298828 }, { "epoch": 4.404296875e-05, "model_forward_time": 0.025081634521484375, "step": 28864 }, { "epoch": 4.404296875e-05, "step": 28864, "training_step_time": 0.1279911994934082 }, { "epoch": 4.404449462890625e-05, "model_forward_time": 0.024982213973999023, "step": 28865 }, { "epoch": 4.404449462890625e-05, "step": 28865, "training_step_time": 0.12434577941894531 }, { "epoch": 4.40460205078125e-05, "model_forward_time": 0.02506852149963379, "step": 28866 }, { "epoch": 4.40460205078125e-05, "step": 28866, "training_step_time": 0.12430357933044434 }, { "epoch": 4.404754638671875e-05, "model_forward_time": 0.025437116622924805, "step": 28867 }, { "epoch": 4.404754638671875e-05, "step": 28867, "training_step_time": 0.12047362327575684 }, { "epoch": 4.4049072265625e-05, "model_forward_time": 0.025087356567382812, "step": 28868 }, { "epoch": 4.4049072265625e-05, "step": 28868, "training_step_time": 0.11482882499694824 }, { "epoch": 4.405059814453125e-05, "model_forward_time": 0.02448296546936035, "step": 28869 }, { "epoch": 4.405059814453125e-05, "step": 28869, "training_step_time": 0.11351704597473145 }, { "epoch": 4.40521240234375e-05, "grad_norm": 0.05161585658788681, "learning_rate": 3.873872495361697e-07, "loss": 0.0046, "step": 28870 }, { "epoch": 4.40521240234375e-05, "model_forward_time": 0.024175167083740234, "step": 28870 }, { "epoch": 4.40521240234375e-05, "step": 28870, "training_step_time": 0.11364078521728516 }, { "epoch": 4.405364990234375e-05, "model_forward_time": 0.026335477828979492, "step": 28871 }, { "epoch": 4.405364990234375e-05, "step": 28871, "training_step_time": 0.11411714553833008 }, { "epoch": 4.405517578125e-05, "model_forward_time": 0.024860620498657227, "step": 28872 }, { "epoch": 4.405517578125e-05, "step": 28872, "training_step_time": 0.10787200927734375 }, { "epoch": 4.405670166015625e-05, "model_forward_time": 0.02548360824584961, "step": 28873 }, { "epoch": 4.405670166015625e-05, "step": 28873, "training_step_time": 0.11005330085754395 }, { "epoch": 4.40582275390625e-05, "model_forward_time": 0.025074005126953125, "step": 28874 }, { "epoch": 4.40582275390625e-05, "step": 28874, "training_step_time": 0.10732698440551758 }, { "epoch": 4.405975341796875e-05, "model_forward_time": 0.02527165412902832, "step": 28875 }, { "epoch": 4.405975341796875e-05, "step": 28875, "training_step_time": 0.19524073600769043 }, { "epoch": 4.4061279296875e-05, "model_forward_time": 0.024552583694458008, "step": 28876 }, { "epoch": 4.4061279296875e-05, "step": 28876, "training_step_time": 0.1189119815826416 }, { "epoch": 4.406280517578125e-05, "model_forward_time": 0.024407386779785156, "step": 28877 }, { "epoch": 4.406280517578125e-05, "step": 28877, "training_step_time": 0.12789463996887207 }, { "epoch": 4.40643310546875e-05, "model_forward_time": 0.02468085289001465, "step": 28878 }, { "epoch": 4.40643310546875e-05, "step": 28878, "training_step_time": 0.10458493232727051 }, { "epoch": 4.406585693359375e-05, "model_forward_time": 0.02455282211303711, "step": 28879 }, { "epoch": 4.406585693359375e-05, "step": 28879, "training_step_time": 0.18645238876342773 }, { "epoch": 4.40673828125e-05, "grad_norm": 0.04541734606027603, "learning_rate": 3.805698487988951e-07, "loss": 0.0099, "step": 28880 }, { "epoch": 4.40673828125e-05, "model_forward_time": 0.02471613883972168, "step": 28880 }, { "epoch": 4.40673828125e-05, "step": 28880, "training_step_time": 0.11664962768554688 }, { "epoch": 4.406890869140625e-05, "model_forward_time": 0.024853229522705078, "step": 28881 }, { "epoch": 4.406890869140625e-05, "step": 28881, "training_step_time": 0.11352849006652832 }, { "epoch": 4.40704345703125e-05, "model_forward_time": 0.02517557144165039, "step": 28882 }, { "epoch": 4.40704345703125e-05, "step": 28882, "training_step_time": 0.12951254844665527 }, { "epoch": 4.407196044921875e-05, "model_forward_time": 0.0255124568939209, "step": 28883 }, { "epoch": 4.407196044921875e-05, "step": 28883, "training_step_time": 0.1979384422302246 }, { "epoch": 4.4073486328125e-05, "model_forward_time": 0.02451944351196289, "step": 28884 }, { "epoch": 4.4073486328125e-05, "step": 28884, "training_step_time": 0.10195326805114746 }, { "epoch": 4.407501220703125e-05, "model_forward_time": 0.024811506271362305, "step": 28885 }, { "epoch": 4.407501220703125e-05, "step": 28885, "training_step_time": 0.10289311408996582 }, { "epoch": 4.40765380859375e-05, "model_forward_time": 0.025223255157470703, "step": 28886 }, { "epoch": 4.40765380859375e-05, "step": 28886, "training_step_time": 0.10558891296386719 }, { "epoch": 4.407806396484375e-05, "model_forward_time": 0.029429912567138672, "step": 28887 }, { "epoch": 4.407806396484375e-05, "step": 28887, "training_step_time": 0.11038517951965332 }, { "epoch": 4.407958984375e-05, "model_forward_time": 0.02462029457092285, "step": 28888 }, { "epoch": 4.407958984375e-05, "step": 28888, "training_step_time": 0.10375475883483887 }, { "epoch": 4.408111572265625e-05, "model_forward_time": 0.024369239807128906, "step": 28889 }, { "epoch": 4.408111572265625e-05, "step": 28889, "training_step_time": 0.1042327880859375 }, { "epoch": 4.40826416015625e-05, "grad_norm": 0.06463886052370071, "learning_rate": 3.738127403480507e-07, "loss": 0.0026, "step": 28890 }, { "epoch": 4.40826416015625e-05, "model_forward_time": 0.024164199829101562, "step": 28890 }, { "epoch": 4.40826416015625e-05, "step": 28890, "training_step_time": 0.10666346549987793 }, { "epoch": 4.408416748046875e-05, "model_forward_time": 0.02506232261657715, "step": 28891 }, { "epoch": 4.408416748046875e-05, "step": 28891, "training_step_time": 0.22899746894836426 }, { "epoch": 4.4085693359375e-05, "model_forward_time": 0.02410149574279785, "step": 28892 }, { "epoch": 4.4085693359375e-05, "step": 28892, "training_step_time": 0.20785951614379883 }, { "epoch": 4.408721923828125e-05, "model_forward_time": 0.024324893951416016, "step": 28893 }, { "epoch": 4.408721923828125e-05, "step": 28893, "training_step_time": 0.10620307922363281 }, { "epoch": 4.40887451171875e-05, "model_forward_time": 0.02455306053161621, "step": 28894 }, { "epoch": 4.40887451171875e-05, "step": 28894, "training_step_time": 0.10653328895568848 }, { "epoch": 4.409027099609375e-05, "model_forward_time": 0.02509617805480957, "step": 28895 }, { "epoch": 4.409027099609375e-05, "step": 28895, "training_step_time": 0.1209714412689209 }, { "epoch": 4.4091796875e-05, "model_forward_time": 0.025086402893066406, "step": 28896 }, { "epoch": 4.4091796875e-05, "step": 28896, "training_step_time": 0.10881519317626953 }, { "epoch": 4.409332275390625e-05, "model_forward_time": 0.025372743606567383, "step": 28897 }, { "epoch": 4.409332275390625e-05, "step": 28897, "training_step_time": 0.142564058303833 }, { "epoch": 4.40948486328125e-05, "model_forward_time": 0.024983882904052734, "step": 28898 }, { "epoch": 4.40948486328125e-05, "step": 28898, "training_step_time": 0.19273090362548828 }, { "epoch": 4.409637451171875e-05, "model_forward_time": 0.024141788482666016, "step": 28899 }, { "epoch": 4.409637451171875e-05, "step": 28899, "training_step_time": 0.1181938648223877 }, { "epoch": 4.4097900390625e-05, "grad_norm": 0.04314415156841278, "learning_rate": 3.6711593239417973e-07, "loss": 0.004, "step": 28900 }, { "epoch": 4.4097900390625e-05, "model_forward_time": 0.025745630264282227, "step": 28900 }, { "epoch": 4.4097900390625e-05, "step": 28900, "training_step_time": 0.10544061660766602 }, { "epoch": 4.409942626953125e-05, "model_forward_time": 0.028963327407836914, "step": 28901 }, { "epoch": 4.409942626953125e-05, "step": 28901, "training_step_time": 0.190568208694458 }, { "epoch": 4.41009521484375e-05, "model_forward_time": 0.0243074893951416, "step": 28902 }, { "epoch": 4.41009521484375e-05, "step": 28902, "training_step_time": 0.10541963577270508 }, { "epoch": 4.410247802734375e-05, "model_forward_time": 0.024118661880493164, "step": 28903 }, { "epoch": 4.410247802734375e-05, "step": 28903, "training_step_time": 0.10113000869750977 }, { "epoch": 4.410400390625e-05, "model_forward_time": 0.02498483657836914, "step": 28904 }, { "epoch": 4.410400390625e-05, "step": 28904, "training_step_time": 0.10480904579162598 }, { "epoch": 4.410552978515625e-05, "model_forward_time": 0.024924039840698242, "step": 28905 }, { "epoch": 4.410552978515625e-05, "step": 28905, "training_step_time": 0.10590624809265137 }, { "epoch": 4.41070556640625e-05, "model_forward_time": 0.025211095809936523, "step": 28906 }, { "epoch": 4.41070556640625e-05, "step": 28906, "training_step_time": 0.10602545738220215 }, { "epoch": 4.410858154296875e-05, "model_forward_time": 0.024839401245117188, "step": 28907 }, { "epoch": 4.410858154296875e-05, "step": 28907, "training_step_time": 0.10840249061584473 }, { "epoch": 4.4110107421875e-05, "model_forward_time": 0.02520442008972168, "step": 28908 }, { "epoch": 4.4110107421875e-05, "step": 28908, "training_step_time": 0.10515737533569336 }, { "epoch": 4.411163330078125e-05, "model_forward_time": 0.028593778610229492, "step": 28909 }, { "epoch": 4.411163330078125e-05, "step": 28909, "training_step_time": 0.10853242874145508 }, { "epoch": 4.41131591796875e-05, "grad_norm": 0.0470416285097599, "learning_rate": 3.604794330745176e-07, "loss": 0.004, "step": 28910 }, { "epoch": 4.41131591796875e-05, "model_forward_time": 0.0250546932220459, "step": 28910 }, { "epoch": 4.41131591796875e-05, "step": 28910, "training_step_time": 0.1045522689819336 }, { "epoch": 4.411468505859375e-05, "model_forward_time": 0.025251150131225586, "step": 28911 }, { "epoch": 4.411468505859375e-05, "step": 28911, "training_step_time": 0.10526728630065918 }, { "epoch": 4.41162109375e-05, "model_forward_time": 0.02504134178161621, "step": 28912 }, { "epoch": 4.41162109375e-05, "step": 28912, "training_step_time": 0.10443687438964844 }, { "epoch": 4.411773681640625e-05, "model_forward_time": 0.025326251983642578, "step": 28913 }, { "epoch": 4.411773681640625e-05, "step": 28913, "training_step_time": 0.10687613487243652 }, { "epoch": 4.41192626953125e-05, "model_forward_time": 0.025009870529174805, "step": 28914 }, { "epoch": 4.41192626953125e-05, "step": 28914, "training_step_time": 0.10468673706054688 }, { "epoch": 4.412078857421875e-05, "model_forward_time": 0.0255126953125, "step": 28915 }, { "epoch": 4.412078857421875e-05, "step": 28915, "training_step_time": 0.10990715026855469 }, { "epoch": 4.4122314453125e-05, "model_forward_time": 0.024985551834106445, "step": 28916 }, { "epoch": 4.4122314453125e-05, "step": 28916, "training_step_time": 0.1049659252166748 }, { "epoch": 4.412384033203125e-05, "model_forward_time": 0.02509021759033203, "step": 28917 }, { "epoch": 4.412384033203125e-05, "step": 28917, "training_step_time": 0.10686898231506348 }, { "epoch": 4.41253662109375e-05, "model_forward_time": 0.025774717330932617, "step": 28918 }, { "epoch": 4.41253662109375e-05, "step": 28918, "training_step_time": 0.10689449310302734 }, { "epoch": 4.412689208984375e-05, "model_forward_time": 0.025504589080810547, "step": 28919 }, { "epoch": 4.412689208984375e-05, "step": 28919, "training_step_time": 0.10934948921203613 }, { "epoch": 4.412841796875e-05, "grad_norm": 0.04599743336439133, "learning_rate": 3.5390325045304706e-07, "loss": 0.0025, "step": 28920 }, { "epoch": 4.412841796875e-05, "model_forward_time": 0.025136709213256836, "step": 28920 }, { "epoch": 4.412841796875e-05, "step": 28920, "training_step_time": 0.10533905029296875 }, { "epoch": 4.412994384765625e-05, "model_forward_time": 0.02535247802734375, "step": 28921 }, { "epoch": 4.412994384765625e-05, "step": 28921, "training_step_time": 0.10580635070800781 }, { "epoch": 4.41314697265625e-05, "model_forward_time": 0.025734424591064453, "step": 28922 }, { "epoch": 4.41314697265625e-05, "step": 28922, "training_step_time": 0.139634370803833 }, { "epoch": 4.413299560546875e-05, "model_forward_time": 0.025485992431640625, "step": 28923 }, { "epoch": 4.413299560546875e-05, "step": 28923, "training_step_time": 0.12592744827270508 }, { "epoch": 4.4134521484375e-05, "model_forward_time": 0.025511741638183594, "step": 28924 }, { "epoch": 4.4134521484375e-05, "step": 28924, "training_step_time": 0.10522317886352539 }, { "epoch": 4.413604736328125e-05, "model_forward_time": 0.025289535522460938, "step": 28925 }, { "epoch": 4.413604736328125e-05, "step": 28925, "training_step_time": 0.1590440273284912 }, { "epoch": 4.41375732421875e-05, "model_forward_time": 0.02474808692932129, "step": 28926 }, { "epoch": 4.41375732421875e-05, "step": 28926, "training_step_time": 0.15165472030639648 }, { "epoch": 4.413909912109375e-05, "model_forward_time": 0.02467823028564453, "step": 28927 }, { "epoch": 4.413909912109375e-05, "step": 28927, "training_step_time": 0.10590267181396484 }, { "epoch": 4.4140625e-05, "model_forward_time": 0.0252838134765625, "step": 28928 }, { "epoch": 4.4140625e-05, "step": 28928, "training_step_time": 0.12900424003601074 }, { "epoch": 4.414215087890625e-05, "model_forward_time": 0.027978897094726562, "step": 28929 }, { "epoch": 4.414215087890625e-05, "step": 28929, "training_step_time": 0.165266752243042 }, { "epoch": 4.41436767578125e-05, "grad_norm": 0.041539549827575684, "learning_rate": 3.4738739252045405e-07, "loss": 0.0026, "step": 28930 }, { "epoch": 4.41436767578125e-05, "model_forward_time": 0.024912357330322266, "step": 28930 }, { "epoch": 4.41436767578125e-05, "step": 28930, "training_step_time": 0.10999226570129395 }, { "epoch": 4.414520263671875e-05, "model_forward_time": 0.0251007080078125, "step": 28931 }, { "epoch": 4.414520263671875e-05, "step": 28931, "training_step_time": 0.10576152801513672 }, { "epoch": 4.4146728515625e-05, "model_forward_time": 0.02565479278564453, "step": 28932 }, { "epoch": 4.4146728515625e-05, "step": 28932, "training_step_time": 0.11045002937316895 }, { "epoch": 4.414825439453125e-05, "model_forward_time": 0.025359630584716797, "step": 28933 }, { "epoch": 4.414825439453125e-05, "step": 28933, "training_step_time": 0.10691070556640625 }, { "epoch": 4.41497802734375e-05, "model_forward_time": 0.02516770362854004, "step": 28934 }, { "epoch": 4.41497802734375e-05, "step": 28934, "training_step_time": 0.10630297660827637 }, { "epoch": 4.415130615234375e-05, "model_forward_time": 0.024569272994995117, "step": 28935 }, { "epoch": 4.415130615234375e-05, "step": 28935, "training_step_time": 0.10555052757263184 }, { "epoch": 4.415283203125e-05, "model_forward_time": 0.0256807804107666, "step": 28936 }, { "epoch": 4.415283203125e-05, "step": 28936, "training_step_time": 0.10944628715515137 }, { "epoch": 4.415435791015625e-05, "model_forward_time": 0.02529597282409668, "step": 28937 }, { "epoch": 4.415435791015625e-05, "step": 28937, "training_step_time": 0.1071007251739502 }, { "epoch": 4.41558837890625e-05, "model_forward_time": 0.026287078857421875, "step": 28938 }, { "epoch": 4.41558837890625e-05, "step": 28938, "training_step_time": 0.16593360900878906 }, { "epoch": 4.415740966796875e-05, "model_forward_time": 0.02499699592590332, "step": 28939 }, { "epoch": 4.415740966796875e-05, "step": 28939, "training_step_time": 0.2112419605255127 }, { "epoch": 4.4158935546875e-05, "grad_norm": 0.22400256991386414, "learning_rate": 3.4093186719411085e-07, "loss": 0.0055, "step": 28940 }, { "epoch": 4.4158935546875e-05, "model_forward_time": 0.024495363235473633, "step": 28940 }, { "epoch": 4.4158935546875e-05, "step": 28940, "training_step_time": 0.20941877365112305 }, { "epoch": 4.416046142578125e-05, "model_forward_time": 0.024332046508789062, "step": 28941 }, { "epoch": 4.416046142578125e-05, "step": 28941, "training_step_time": 0.1098325252532959 }, { "epoch": 4.41619873046875e-05, "model_forward_time": 0.024298906326293945, "step": 28942 }, { "epoch": 4.41619873046875e-05, "step": 28942, "training_step_time": 0.11886477470397949 }, { "epoch": 4.416351318359375e-05, "model_forward_time": 0.025374174118041992, "step": 28943 }, { "epoch": 4.416351318359375e-05, "step": 28943, "training_step_time": 0.13247227668762207 }, { "epoch": 4.41650390625e-05, "model_forward_time": 0.025155305862426758, "step": 28944 }, { "epoch": 4.41650390625e-05, "step": 28944, "training_step_time": 0.10594677925109863 }, { "epoch": 4.416656494140625e-05, "model_forward_time": 0.02543807029724121, "step": 28945 }, { "epoch": 4.416656494140625e-05, "step": 28945, "training_step_time": 0.11469078063964844 }, { "epoch": 4.41680908203125e-05, "model_forward_time": 0.025419950485229492, "step": 28946 }, { "epoch": 4.41680908203125e-05, "step": 28946, "training_step_time": 0.10903024673461914 }, { "epoch": 4.416961669921875e-05, "model_forward_time": 0.02522730827331543, "step": 28947 }, { "epoch": 4.416961669921875e-05, "step": 28947, "training_step_time": 0.10511589050292969 }, { "epoch": 4.4171142578125e-05, "model_forward_time": 0.025255203247070312, "step": 28948 }, { "epoch": 4.4171142578125e-05, "step": 28948, "training_step_time": 0.1908271312713623 }, { "epoch": 4.417266845703125e-05, "model_forward_time": 0.024558067321777344, "step": 28949 }, { "epoch": 4.417266845703125e-05, "step": 28949, "training_step_time": 0.10468149185180664 }, { "epoch": 4.41741943359375e-05, "grad_norm": 0.02406979538500309, "learning_rate": 3.3453668231809286e-07, "loss": 0.0061, "step": 28950 }, { "epoch": 4.41741943359375e-05, "model_forward_time": 0.02489471435546875, "step": 28950 }, { "epoch": 4.41741943359375e-05, "step": 28950, "training_step_time": 0.1136469841003418 }, { "epoch": 4.417572021484375e-05, "model_forward_time": 0.025480031967163086, "step": 28951 }, { "epoch": 4.417572021484375e-05, "step": 28951, "training_step_time": 0.10869717597961426 }, { "epoch": 4.417724609375e-05, "model_forward_time": 0.02547454833984375, "step": 28952 }, { "epoch": 4.417724609375e-05, "step": 28952, "training_step_time": 0.10746073722839355 }, { "epoch": 4.417877197265625e-05, "model_forward_time": 0.02521228790283203, "step": 28953 }, { "epoch": 4.417877197265625e-05, "step": 28953, "training_step_time": 0.10619950294494629 }, { "epoch": 4.41802978515625e-05, "model_forward_time": 0.02527165412902832, "step": 28954 }, { "epoch": 4.41802978515625e-05, "step": 28954, "training_step_time": 0.10786700248718262 }, { "epoch": 4.418182373046875e-05, "model_forward_time": 0.02542400360107422, "step": 28955 }, { "epoch": 4.418182373046875e-05, "step": 28955, "training_step_time": 0.10452723503112793 }, { "epoch": 4.4183349609375e-05, "model_forward_time": 0.025342226028442383, "step": 28956 }, { "epoch": 4.4183349609375e-05, "step": 28956, "training_step_time": 0.10795402526855469 }, { "epoch": 4.418487548828125e-05, "model_forward_time": 0.025736093521118164, "step": 28957 }, { "epoch": 4.418487548828125e-05, "step": 28957, "training_step_time": 0.10680675506591797 }, { "epoch": 4.41864013671875e-05, "model_forward_time": 0.025098562240600586, "step": 28958 }, { "epoch": 4.41864013671875e-05, "step": 28958, "training_step_time": 0.10611391067504883 }, { "epoch": 4.418792724609375e-05, "model_forward_time": 0.02525615692138672, "step": 28959 }, { "epoch": 4.418792724609375e-05, "step": 28959, "training_step_time": 0.1047670841217041 }, { "epoch": 4.4189453125e-05, "grad_norm": 0.036531899124383926, "learning_rate": 3.2820184566315084e-07, "loss": 0.0049, "step": 28960 }, { "epoch": 4.4189453125e-05, "model_forward_time": 0.025341272354125977, "step": 28960 }, { "epoch": 4.4189453125e-05, "step": 28960, "training_step_time": 0.10561847686767578 }, { "epoch": 4.419097900390625e-05, "model_forward_time": 0.025133132934570312, "step": 28961 }, { "epoch": 4.419097900390625e-05, "step": 28961, "training_step_time": 0.10742950439453125 }, { "epoch": 4.41925048828125e-05, "model_forward_time": 0.025563478469848633, "step": 28962 }, { "epoch": 4.41925048828125e-05, "step": 28962, "training_step_time": 0.13123226165771484 }, { "epoch": 4.419403076171875e-05, "model_forward_time": 0.02568817138671875, "step": 28963 }, { "epoch": 4.419403076171875e-05, "step": 28963, "training_step_time": 0.12539291381835938 }, { "epoch": 4.4195556640625e-05, "model_forward_time": 0.024170637130737305, "step": 28964 }, { "epoch": 4.4195556640625e-05, "step": 28964, "training_step_time": 0.13669657707214355 }, { "epoch": 4.419708251953125e-05, "model_forward_time": 0.023932933807373047, "step": 28965 }, { "epoch": 4.419708251953125e-05, "step": 28965, "training_step_time": 0.1270308494567871 }, { "epoch": 4.41986083984375e-05, "model_forward_time": 0.023763179779052734, "step": 28966 }, { "epoch": 4.41986083984375e-05, "step": 28966, "training_step_time": 0.11907172203063965 }, { "epoch": 4.420013427734375e-05, "model_forward_time": 0.024312734603881836, "step": 28967 }, { "epoch": 4.420013427734375e-05, "step": 28967, "training_step_time": 0.13876867294311523 }, { "epoch": 4.420166015625e-05, "model_forward_time": 0.02509284019470215, "step": 28968 }, { "epoch": 4.420166015625e-05, "step": 28968, "training_step_time": 0.1332089900970459 }, { "epoch": 4.420318603515625e-05, "model_forward_time": 0.02444624900817871, "step": 28969 }, { "epoch": 4.420318603515625e-05, "step": 28969, "training_step_time": 0.10707497596740723 }, { "epoch": 4.42047119140625e-05, "grad_norm": 0.03241880238056183, "learning_rate": 3.219273649267163e-07, "loss": 0.0043, "step": 28970 }, { "epoch": 4.42047119140625e-05, "model_forward_time": 0.02509760856628418, "step": 28970 }, { "epoch": 4.42047119140625e-05, "step": 28970, "training_step_time": 0.14246392250061035 }, { "epoch": 4.420623779296875e-05, "model_forward_time": 0.025603294372558594, "step": 28971 }, { "epoch": 4.420623779296875e-05, "step": 28971, "training_step_time": 0.15750336647033691 }, { "epoch": 4.4207763671875e-05, "model_forward_time": 0.025864839553833008, "step": 28972 }, { "epoch": 4.4207763671875e-05, "step": 28972, "training_step_time": 0.10809588432312012 }, { "epoch": 4.420928955078125e-05, "model_forward_time": 0.02497100830078125, "step": 28973 }, { "epoch": 4.420928955078125e-05, "step": 28973, "training_step_time": 0.12720489501953125 }, { "epoch": 4.42108154296875e-05, "model_forward_time": 0.02567887306213379, "step": 28974 }, { "epoch": 4.42108154296875e-05, "step": 28974, "training_step_time": 0.17195773124694824 }, { "epoch": 4.421234130859375e-05, "model_forward_time": 0.024714231491088867, "step": 28975 }, { "epoch": 4.421234130859375e-05, "step": 28975, "training_step_time": 0.10414695739746094 }, { "epoch": 4.42138671875e-05, "model_forward_time": 0.024482250213623047, "step": 28976 }, { "epoch": 4.42138671875e-05, "step": 28976, "training_step_time": 0.10502767562866211 }, { "epoch": 4.421539306640625e-05, "model_forward_time": 0.025331735610961914, "step": 28977 }, { "epoch": 4.421539306640625e-05, "step": 28977, "training_step_time": 0.10675430297851562 }, { "epoch": 4.42169189453125e-05, "model_forward_time": 0.024800539016723633, "step": 28978 }, { "epoch": 4.42169189453125e-05, "step": 28978, "training_step_time": 0.1061248779296875 }, { "epoch": 4.421844482421875e-05, "model_forward_time": 0.02580571174621582, "step": 28979 }, { "epoch": 4.421844482421875e-05, "step": 28979, "training_step_time": 0.10585165023803711 }, { "epoch": 4.4219970703125e-05, "grad_norm": 0.03276151791214943, "learning_rate": 3.157132477328628e-07, "loss": 0.0028, "step": 28980 }, { "epoch": 4.4219970703125e-05, "model_forward_time": 0.025708675384521484, "step": 28980 }, { "epoch": 4.4219970703125e-05, "step": 28980, "training_step_time": 0.10717535018920898 }, { "epoch": 4.422149658203125e-05, "model_forward_time": 0.025322437286376953, "step": 28981 }, { "epoch": 4.422149658203125e-05, "step": 28981, "training_step_time": 0.10813021659851074 }, { "epoch": 4.42230224609375e-05, "model_forward_time": 0.025374650955200195, "step": 28982 }, { "epoch": 4.42230224609375e-05, "step": 28982, "training_step_time": 0.10534453392028809 }, { "epoch": 4.422454833984375e-05, "model_forward_time": 0.025620222091674805, "step": 28983 }, { "epoch": 4.422454833984375e-05, "step": 28983, "training_step_time": 0.171766996383667 }, { "epoch": 4.422607421875e-05, "model_forward_time": 0.02486705780029297, "step": 28984 }, { "epoch": 4.422607421875e-05, "step": 28984, "training_step_time": 0.12384343147277832 }, { "epoch": 4.422760009765625e-05, "model_forward_time": 0.025088071823120117, "step": 28985 }, { "epoch": 4.422760009765625e-05, "step": 28985, "training_step_time": 0.11200428009033203 }, { "epoch": 4.42291259765625e-05, "model_forward_time": 0.025839567184448242, "step": 28986 }, { "epoch": 4.42291259765625e-05, "step": 28986, "training_step_time": 0.12949275970458984 }, { "epoch": 4.423065185546875e-05, "model_forward_time": 0.025454282760620117, "step": 28987 }, { "epoch": 4.423065185546875e-05, "step": 28987, "training_step_time": 0.12455344200134277 }, { "epoch": 4.4232177734375e-05, "model_forward_time": 0.025292396545410156, "step": 28988 }, { "epoch": 4.4232177734375e-05, "step": 28988, "training_step_time": 0.19358348846435547 }, { "epoch": 4.423370361328125e-05, "model_forward_time": 0.024565458297729492, "step": 28989 }, { "epoch": 4.423370361328125e-05, "step": 28989, "training_step_time": 0.1805591583251953 }, { "epoch": 4.42352294921875e-05, "grad_norm": 0.034987274557352066, "learning_rate": 3.095595016323394e-07, "loss": 0.003, "step": 28990 }, { "epoch": 4.42352294921875e-05, "model_forward_time": 0.024019241333007812, "step": 28990 }, { "epoch": 4.42352294921875e-05, "step": 28990, "training_step_time": 0.1577620506286621 }, { "epoch": 4.423675537109375e-05, "model_forward_time": 0.023499011993408203, "step": 28991 }, { "epoch": 4.423675537109375e-05, "step": 28991, "training_step_time": 0.12829184532165527 }, { "epoch": 4.423828125e-05, "model_forward_time": 0.024863243103027344, "step": 28992 }, { "epoch": 4.423828125e-05, "step": 28992, "training_step_time": 0.10584640502929688 }, { "epoch": 4.423980712890625e-05, "model_forward_time": 0.025820255279541016, "step": 28993 }, { "epoch": 4.423980712890625e-05, "step": 28993, "training_step_time": 0.1330702304840088 }, { "epoch": 4.42413330078125e-05, "model_forward_time": 0.025403738021850586, "step": 28994 }, { "epoch": 4.42413330078125e-05, "step": 28994, "training_step_time": 0.1217191219329834 }, { "epoch": 4.424285888671875e-05, "model_forward_time": 0.02496170997619629, "step": 28995 }, { "epoch": 4.424285888671875e-05, "step": 28995, "training_step_time": 0.10331368446350098 }, { "epoch": 4.4244384765625e-05, "model_forward_time": 0.025321245193481445, "step": 28996 }, { "epoch": 4.4244384765625e-05, "step": 28996, "training_step_time": 0.10410070419311523 }, { "epoch": 4.424591064453125e-05, "model_forward_time": 0.0251920223236084, "step": 28997 }, { "epoch": 4.424591064453125e-05, "step": 28997, "training_step_time": 0.10428667068481445 }, { "epoch": 4.42474365234375e-05, "model_forward_time": 0.0249941349029541, "step": 28998 }, { "epoch": 4.42474365234375e-05, "step": 28998, "training_step_time": 0.10760021209716797 }, { "epoch": 4.424896240234375e-05, "model_forward_time": 0.025174856185913086, "step": 28999 }, { "epoch": 4.424896240234375e-05, "step": 28999, "training_step_time": 0.10345959663391113 }, { "epoch": 4.425048828125e-05, "grad_norm": 0.10391082614660263, "learning_rate": 3.034661341025258e-07, "loss": 0.0037, "step": 29000 }, { "epoch": 4.425048828125e-05, "model_forward_time": 0.024109601974487305, "step": 29000 }, { "epoch": 4.425048828125e-05, "step": 29000, "training_step_time": 0.09999608993530273 }, { "epoch": 4.425201416015625e-05, "model_forward_time": 0.023409366607666016, "step": 29001 }, { "epoch": 4.425201416015625e-05, "step": 29001, "training_step_time": 0.10257744789123535 }, { "epoch": 4.42535400390625e-05, "model_forward_time": 0.02407670021057129, "step": 29002 }, { "epoch": 4.42535400390625e-05, "step": 29002, "training_step_time": 0.10204052925109863 }, { "epoch": 4.425506591796875e-05, "model_forward_time": 0.025092124938964844, "step": 29003 }, { "epoch": 4.425506591796875e-05, "step": 29003, "training_step_time": 0.10475707054138184 }, { "epoch": 4.4256591796875e-05, "model_forward_time": 0.024931907653808594, "step": 29004 }, { "epoch": 4.4256591796875e-05, "step": 29004, "training_step_time": 0.10244345664978027 }, { "epoch": 4.425811767578125e-05, "model_forward_time": 0.025270700454711914, "step": 29005 }, { "epoch": 4.425811767578125e-05, "step": 29005, "training_step_time": 0.10306048393249512 }, { "epoch": 4.42596435546875e-05, "model_forward_time": 0.024897336959838867, "step": 29006 }, { "epoch": 4.42596435546875e-05, "step": 29006, "training_step_time": 0.1074533462524414 }, { "epoch": 4.426116943359375e-05, "model_forward_time": 0.024905920028686523, "step": 29007 }, { "epoch": 4.426116943359375e-05, "step": 29007, "training_step_time": 0.10587167739868164 }, { "epoch": 4.42626953125e-05, "model_forward_time": 0.02533888816833496, "step": 29008 }, { "epoch": 4.42626953125e-05, "step": 29008, "training_step_time": 0.1386117935180664 }, { "epoch": 4.426422119140625e-05, "model_forward_time": 0.02637505531311035, "step": 29009 }, { "epoch": 4.426422119140625e-05, "step": 29009, "training_step_time": 0.1584486961364746 }, { "epoch": 4.42657470703125e-05, "grad_norm": 0.04666359722614288, "learning_rate": 2.9743315254743833e-07, "loss": 0.0032, "step": 29010 }, { "epoch": 4.42657470703125e-05, "model_forward_time": 0.02333974838256836, "step": 29010 }, { "epoch": 4.42657470703125e-05, "step": 29010, "training_step_time": 0.14909768104553223 }, { "epoch": 4.426727294921875e-05, "model_forward_time": 0.02363729476928711, "step": 29011 }, { "epoch": 4.426727294921875e-05, "step": 29011, "training_step_time": 0.13351821899414062 }, { "epoch": 4.4268798828125e-05, "model_forward_time": 0.023451805114746094, "step": 29012 }, { "epoch": 4.4268798828125e-05, "step": 29012, "training_step_time": 0.10564279556274414 }, { "epoch": 4.427032470703125e-05, "model_forward_time": 0.024298906326293945, "step": 29013 }, { "epoch": 4.427032470703125e-05, "step": 29013, "training_step_time": 0.14726567268371582 }, { "epoch": 4.42718505859375e-05, "model_forward_time": 0.024817943572998047, "step": 29014 }, { "epoch": 4.42718505859375e-05, "step": 29014, "training_step_time": 0.1225738525390625 }, { "epoch": 4.427337646484375e-05, "model_forward_time": 0.024453163146972656, "step": 29015 }, { "epoch": 4.427337646484375e-05, "step": 29015, "training_step_time": 0.12597870826721191 }, { "epoch": 4.427490234375e-05, "model_forward_time": 0.025204181671142578, "step": 29016 }, { "epoch": 4.427490234375e-05, "step": 29016, "training_step_time": 0.10244345664978027 }, { "epoch": 4.427642822265625e-05, "model_forward_time": 0.02431488037109375, "step": 29017 }, { "epoch": 4.427642822265625e-05, "step": 29017, "training_step_time": 0.20625066757202148 }, { "epoch": 4.42779541015625e-05, "model_forward_time": 0.023919343948364258, "step": 29018 }, { "epoch": 4.42779541015625e-05, "step": 29018, "training_step_time": 0.14628887176513672 }, { "epoch": 4.427947998046875e-05, "model_forward_time": 0.02465653419494629, "step": 29019 }, { "epoch": 4.427947998046875e-05, "step": 29019, "training_step_time": 0.11667037010192871 }, { "epoch": 4.4281005859375e-05, "grad_norm": 0.05145147815346718, "learning_rate": 2.9146056429771305e-07, "loss": 0.0051, "step": 29020 }, { "epoch": 4.4281005859375e-05, "model_forward_time": 0.02472853660583496, "step": 29020 }, { "epoch": 4.4281005859375e-05, "step": 29020, "training_step_time": 0.10970616340637207 }, { "epoch": 4.428253173828125e-05, "model_forward_time": 0.025367021560668945, "step": 29021 }, { "epoch": 4.428253173828125e-05, "step": 29021, "training_step_time": 0.12661290168762207 }, { "epoch": 4.42840576171875e-05, "model_forward_time": 0.024908065795898438, "step": 29022 }, { "epoch": 4.42840576171875e-05, "step": 29022, "training_step_time": 0.1276712417602539 }, { "epoch": 4.428558349609375e-05, "model_forward_time": 0.025496721267700195, "step": 29023 }, { "epoch": 4.428558349609375e-05, "step": 29023, "training_step_time": 0.10353803634643555 }, { "epoch": 4.4287109375e-05, "model_forward_time": 0.025337934494018555, "step": 29024 }, { "epoch": 4.4287109375e-05, "step": 29024, "training_step_time": 0.10413122177124023 }, { "epoch": 4.428863525390625e-05, "model_forward_time": 0.02543926239013672, "step": 29025 }, { "epoch": 4.428863525390625e-05, "step": 29025, "training_step_time": 0.1066279411315918 }, { "epoch": 4.42901611328125e-05, "model_forward_time": 0.02537250518798828, "step": 29026 }, { "epoch": 4.42901611328125e-05, "step": 29026, "training_step_time": 0.10367727279663086 }, { "epoch": 4.429168701171875e-05, "model_forward_time": 0.02523183822631836, "step": 29027 }, { "epoch": 4.429168701171875e-05, "step": 29027, "training_step_time": 0.10351181030273438 }, { "epoch": 4.4293212890625e-05, "model_forward_time": 0.025522947311401367, "step": 29028 }, { "epoch": 4.4293212890625e-05, "step": 29028, "training_step_time": 0.1075747013092041 }, { "epoch": 4.429473876953125e-05, "model_forward_time": 0.025315284729003906, "step": 29029 }, { "epoch": 4.429473876953125e-05, "step": 29029, "training_step_time": 0.1068105697631836 }, { "epoch": 4.42962646484375e-05, "grad_norm": 0.0309439804404974, "learning_rate": 2.85548376610606e-07, "loss": 0.0053, "step": 29030 }, { "epoch": 4.42962646484375e-05, "model_forward_time": 0.02505350112915039, "step": 29030 }, { "epoch": 4.42962646484375e-05, "step": 29030, "training_step_time": 0.10500693321228027 }, { "epoch": 4.429779052734375e-05, "model_forward_time": 0.025599241256713867, "step": 29031 }, { "epoch": 4.429779052734375e-05, "step": 29031, "training_step_time": 0.10443305969238281 }, { "epoch": 4.429931640625e-05, "model_forward_time": 0.025168895721435547, "step": 29032 }, { "epoch": 4.429931640625e-05, "step": 29032, "training_step_time": 0.12389183044433594 }, { "epoch": 4.430084228515625e-05, "model_forward_time": 0.02502894401550293, "step": 29033 }, { "epoch": 4.430084228515625e-05, "step": 29033, "training_step_time": 0.11312389373779297 }, { "epoch": 4.43023681640625e-05, "model_forward_time": 0.025398731231689453, "step": 29034 }, { "epoch": 4.43023681640625e-05, "step": 29034, "training_step_time": 0.12799954414367676 }, { "epoch": 4.430389404296875e-05, "model_forward_time": 0.025562763214111328, "step": 29035 }, { "epoch": 4.430389404296875e-05, "step": 29035, "training_step_time": 0.12442755699157715 }, { "epoch": 4.4305419921875e-05, "model_forward_time": 0.025344371795654297, "step": 29036 }, { "epoch": 4.4305419921875e-05, "step": 29036, "training_step_time": 0.20122218132019043 }, { "epoch": 4.430694580078125e-05, "model_forward_time": 0.024864673614501953, "step": 29037 }, { "epoch": 4.430694580078125e-05, "step": 29037, "training_step_time": 0.18426012992858887 }, { "epoch": 4.43084716796875e-05, "model_forward_time": 0.024628400802612305, "step": 29038 }, { "epoch": 4.43084716796875e-05, "step": 29038, "training_step_time": 0.10523366928100586 }, { "epoch": 4.430999755859375e-05, "model_forward_time": 0.02457404136657715, "step": 29039 }, { "epoch": 4.430999755859375e-05, "step": 29039, "training_step_time": 0.12404322624206543 }, { "epoch": 4.43115234375e-05, "grad_norm": 0.22773705422878265, "learning_rate": 2.796965966699927e-07, "loss": 0.0053, "step": 29040 }, { "epoch": 4.43115234375e-05, "model_forward_time": 0.025617599487304688, "step": 29040 }, { "epoch": 4.43115234375e-05, "step": 29040, "training_step_time": 0.22382831573486328 }, { "epoch": 4.431304931640625e-05, "model_forward_time": 0.02447342872619629, "step": 29041 }, { "epoch": 4.431304931640625e-05, "step": 29041, "training_step_time": 0.10941147804260254 }, { "epoch": 4.43145751953125e-05, "model_forward_time": 0.024534940719604492, "step": 29042 }, { "epoch": 4.43145751953125e-05, "step": 29042, "training_step_time": 0.10349750518798828 }, { "epoch": 4.431610107421875e-05, "model_forward_time": 0.025150299072265625, "step": 29043 }, { "epoch": 4.431610107421875e-05, "step": 29043, "training_step_time": 0.10471701622009277 }, { "epoch": 4.4317626953125e-05, "model_forward_time": 0.024787187576293945, "step": 29044 }, { "epoch": 4.4317626953125e-05, "step": 29044, "training_step_time": 0.10744404792785645 }, { "epoch": 4.431915283203125e-05, "model_forward_time": 0.025259733200073242, "step": 29045 }, { "epoch": 4.431915283203125e-05, "step": 29045, "training_step_time": 0.11159062385559082 }, { "epoch": 4.43206787109375e-05, "model_forward_time": 0.02506279945373535, "step": 29046 }, { "epoch": 4.43206787109375e-05, "step": 29046, "training_step_time": 0.1042473316192627 }, { "epoch": 4.432220458984375e-05, "model_forward_time": 0.025090456008911133, "step": 29047 }, { "epoch": 4.432220458984375e-05, "step": 29047, "training_step_time": 0.10808777809143066 }, { "epoch": 4.432373046875e-05, "model_forward_time": 0.025212764739990234, "step": 29048 }, { "epoch": 4.432373046875e-05, "step": 29048, "training_step_time": 0.12201237678527832 }, { "epoch": 4.432525634765625e-05, "model_forward_time": 0.02524542808532715, "step": 29049 }, { "epoch": 4.432525634765625e-05, "step": 29049, "training_step_time": 0.12130022048950195 }, { "epoch": 4.43267822265625e-05, "grad_norm": 0.023993385955691338, "learning_rate": 2.7390523158633554e-07, "loss": 0.0039, "step": 29050 }, { "epoch": 4.43267822265625e-05, "model_forward_time": 0.02635478973388672, "step": 29050 }, { "epoch": 4.43267822265625e-05, "step": 29050, "training_step_time": 0.11919927597045898 }, { "epoch": 4.432830810546875e-05, "model_forward_time": 0.025228261947631836, "step": 29051 }, { "epoch": 4.432830810546875e-05, "step": 29051, "training_step_time": 0.12278008460998535 }, { "epoch": 4.4329833984375e-05, "model_forward_time": 0.025281429290771484, "step": 29052 }, { "epoch": 4.4329833984375e-05, "step": 29052, "training_step_time": 0.1181035041809082 }, { "epoch": 4.433135986328125e-05, "model_forward_time": 0.025684118270874023, "step": 29053 }, { "epoch": 4.433135986328125e-05, "step": 29053, "training_step_time": 0.11938238143920898 }, { "epoch": 4.43328857421875e-05, "model_forward_time": 0.026683568954467773, "step": 29054 }, { "epoch": 4.43328857421875e-05, "step": 29054, "training_step_time": 0.11470770835876465 }, { "epoch": 4.433441162109375e-05, "model_forward_time": 0.025034427642822266, "step": 29055 }, { "epoch": 4.433441162109375e-05, "step": 29055, "training_step_time": 0.10893440246582031 }, { "epoch": 4.43359375e-05, "model_forward_time": 0.025298118591308594, "step": 29056 }, { "epoch": 4.43359375e-05, "step": 29056, "training_step_time": 0.11025643348693848 }, { "epoch": 4.433746337890625e-05, "model_forward_time": 0.025495290756225586, "step": 29057 }, { "epoch": 4.433746337890625e-05, "step": 29057, "training_step_time": 0.10738062858581543 }, { "epoch": 4.43389892578125e-05, "model_forward_time": 0.02548050880432129, "step": 29058 }, { "epoch": 4.43389892578125e-05, "step": 29058, "training_step_time": 0.18467330932617188 }, { "epoch": 4.434051513671875e-05, "model_forward_time": 0.02475261688232422, "step": 29059 }, { "epoch": 4.434051513671875e-05, "step": 29059, "training_step_time": 0.11485576629638672 }, { "epoch": 4.4342041015625e-05, "grad_norm": 0.3404434323310852, "learning_rate": 2.6817428839668315e-07, "loss": 0.0048, "step": 29060 }, { "epoch": 4.4342041015625e-05, "model_forward_time": 0.024493932723999023, "step": 29060 }, { "epoch": 4.4342041015625e-05, "step": 29060, "training_step_time": 0.12158823013305664 }, { "epoch": 4.434356689453125e-05, "model_forward_time": 0.025066614151000977, "step": 29061 }, { "epoch": 4.434356689453125e-05, "step": 29061, "training_step_time": 0.1555635929107666 }, { "epoch": 4.43450927734375e-05, "model_forward_time": 0.024604320526123047, "step": 29062 }, { "epoch": 4.43450927734375e-05, "step": 29062, "training_step_time": 0.1147608757019043 }, { "epoch": 4.434661865234375e-05, "model_forward_time": 0.02434372901916504, "step": 29063 }, { "epoch": 4.434661865234375e-05, "step": 29063, "training_step_time": 0.1413893699645996 }, { "epoch": 4.434814453125e-05, "model_forward_time": 0.024782180786132812, "step": 29064 }, { "epoch": 4.434814453125e-05, "step": 29064, "training_step_time": 0.1541309356689453 }, { "epoch": 4.434967041015625e-05, "model_forward_time": 0.02425861358642578, "step": 29065 }, { "epoch": 4.434967041015625e-05, "step": 29065, "training_step_time": 0.14547419548034668 }, { "epoch": 4.43511962890625e-05, "model_forward_time": 0.024445533752441406, "step": 29066 }, { "epoch": 4.43511962890625e-05, "step": 29066, "training_step_time": 0.13780736923217773 }, { "epoch": 4.435272216796875e-05, "model_forward_time": 0.024811267852783203, "step": 29067 }, { "epoch": 4.435272216796875e-05, "step": 29067, "training_step_time": 0.1019437313079834 }, { "epoch": 4.4354248046875e-05, "model_forward_time": 0.02548670768737793, "step": 29068 }, { "epoch": 4.4354248046875e-05, "step": 29068, "training_step_time": 0.11330342292785645 }, { "epoch": 4.435577392578125e-05, "model_forward_time": 0.02496194839477539, "step": 29069 }, { "epoch": 4.435577392578125e-05, "step": 29069, "training_step_time": 0.10464215278625488 }, { "epoch": 4.43572998046875e-05, "grad_norm": 0.08030667901039124, "learning_rate": 2.625037740646763e-07, "loss": 0.0042, "step": 29070 }, { "epoch": 4.43572998046875e-05, "model_forward_time": 0.02514338493347168, "step": 29070 }, { "epoch": 4.43572998046875e-05, "step": 29070, "training_step_time": 0.10356783866882324 }, { "epoch": 4.435882568359375e-05, "model_forward_time": 0.025549650192260742, "step": 29071 }, { "epoch": 4.435882568359375e-05, "step": 29071, "training_step_time": 0.10477137565612793 }, { "epoch": 4.43603515625e-05, "model_forward_time": 0.0253448486328125, "step": 29072 }, { "epoch": 4.43603515625e-05, "step": 29072, "training_step_time": 0.10596418380737305 }, { "epoch": 4.436187744140625e-05, "model_forward_time": 0.025366783142089844, "step": 29073 }, { "epoch": 4.436187744140625e-05, "step": 29073, "training_step_time": 0.10829806327819824 }, { "epoch": 4.43634033203125e-05, "model_forward_time": 0.024848222732543945, "step": 29074 }, { "epoch": 4.43634033203125e-05, "step": 29074, "training_step_time": 0.18905878067016602 }, { "epoch": 4.436492919921875e-05, "model_forward_time": 0.024756431579589844, "step": 29075 }, { "epoch": 4.436492919921875e-05, "step": 29075, "training_step_time": 0.20671701431274414 }, { "epoch": 4.4366455078125e-05, "model_forward_time": 0.02454662322998047, "step": 29076 }, { "epoch": 4.4366455078125e-05, "step": 29076, "training_step_time": 0.21623730659484863 }, { "epoch": 4.436798095703125e-05, "model_forward_time": 0.0242459774017334, "step": 29077 }, { "epoch": 4.436798095703125e-05, "step": 29077, "training_step_time": 0.21062755584716797 }, { "epoch": 4.43695068359375e-05, "model_forward_time": 0.024140357971191406, "step": 29078 }, { "epoch": 4.43695068359375e-05, "step": 29078, "training_step_time": 0.20864343643188477 }, { "epoch": 4.437103271484375e-05, "model_forward_time": 0.024688243865966797, "step": 29079 }, { "epoch": 4.437103271484375e-05, "step": 29079, "training_step_time": 0.176499605178833 }, { "epoch": 4.437255859375e-05, "grad_norm": 0.05033090338110924, "learning_rate": 2.568936954805201e-07, "loss": 0.0042, "step": 29080 }, { "epoch": 4.437255859375e-05, "model_forward_time": 0.024112462997436523, "step": 29080 }, { "epoch": 4.437255859375e-05, "step": 29080, "training_step_time": 0.16588997840881348 }, { "epoch": 4.437408447265625e-05, "model_forward_time": 0.02400946617126465, "step": 29081 }, { "epoch": 4.437408447265625e-05, "step": 29081, "training_step_time": 0.17953181266784668 }, { "epoch": 4.43756103515625e-05, "model_forward_time": 0.02435922622680664, "step": 29082 }, { "epoch": 4.43756103515625e-05, "step": 29082, "training_step_time": 0.13510823249816895 }, { "epoch": 4.437713623046875e-05, "model_forward_time": 0.024512529373168945, "step": 29083 }, { "epoch": 4.437713623046875e-05, "step": 29083, "training_step_time": 0.11664414405822754 }, { "epoch": 4.4378662109375e-05, "model_forward_time": 0.024792194366455078, "step": 29084 }, { "epoch": 4.4378662109375e-05, "step": 29084, "training_step_time": 0.10439538955688477 }, { "epoch": 4.438018798828125e-05, "model_forward_time": 0.024926185607910156, "step": 29085 }, { "epoch": 4.438018798828125e-05, "step": 29085, "training_step_time": 0.10226249694824219 }, { "epoch": 4.43817138671875e-05, "model_forward_time": 0.02522420883178711, "step": 29086 }, { "epoch": 4.43817138671875e-05, "step": 29086, "training_step_time": 0.10770225524902344 }, { "epoch": 4.438323974609375e-05, "model_forward_time": 0.025551557540893555, "step": 29087 }, { "epoch": 4.438323974609375e-05, "step": 29087, "training_step_time": 0.10296130180358887 }, { "epoch": 4.4384765625e-05, "model_forward_time": 0.0253450870513916, "step": 29088 }, { "epoch": 4.4384765625e-05, "step": 29088, "training_step_time": 0.10489773750305176 }, { "epoch": 4.438629150390625e-05, "model_forward_time": 0.02557659149169922, "step": 29089 }, { "epoch": 4.438629150390625e-05, "step": 29089, "training_step_time": 0.10745716094970703 }, { "epoch": 4.43878173828125e-05, "grad_norm": 0.03882180526852608, "learning_rate": 2.51344059460995e-07, "loss": 0.0029, "step": 29090 }, { "epoch": 4.43878173828125e-05, "model_forward_time": 0.025041580200195312, "step": 29090 }, { "epoch": 4.43878173828125e-05, "step": 29090, "training_step_time": 0.1091463565826416 }, { "epoch": 4.438934326171875e-05, "model_forward_time": 0.025408029556274414, "step": 29091 }, { "epoch": 4.438934326171875e-05, "step": 29091, "training_step_time": 0.10446882247924805 }, { "epoch": 4.4390869140625e-05, "model_forward_time": 0.02408742904663086, "step": 29092 }, { "epoch": 4.4390869140625e-05, "step": 29092, "training_step_time": 0.12284255027770996 }, { "epoch": 4.439239501953125e-05, "model_forward_time": 0.02529764175415039, "step": 29093 }, { "epoch": 4.439239501953125e-05, "step": 29093, "training_step_time": 0.13942790031433105 }, { "epoch": 4.43939208984375e-05, "model_forward_time": 0.02500295639038086, "step": 29094 }, { "epoch": 4.43939208984375e-05, "step": 29094, "training_step_time": 0.14726018905639648 }, { "epoch": 4.439544677734375e-05, "model_forward_time": 0.024327993392944336, "step": 29095 }, { "epoch": 4.439544677734375e-05, "step": 29095, "training_step_time": 0.13877534866333008 }, { "epoch": 4.439697265625e-05, "model_forward_time": 0.024378538131713867, "step": 29096 }, { "epoch": 4.439697265625e-05, "step": 29096, "training_step_time": 0.13673067092895508 }, { "epoch": 4.439849853515625e-05, "model_forward_time": 0.024726390838623047, "step": 29097 }, { "epoch": 4.439849853515625e-05, "step": 29097, "training_step_time": 0.12851285934448242 }, { "epoch": 4.44000244140625e-05, "model_forward_time": 0.024462223052978516, "step": 29098 }, { "epoch": 4.44000244140625e-05, "step": 29098, "training_step_time": 0.12666034698486328 }, { "epoch": 4.440155029296875e-05, "model_forward_time": 0.024619340896606445, "step": 29099 }, { "epoch": 4.440155029296875e-05, "step": 29099, "training_step_time": 0.22264647483825684 }, { "epoch": 4.4403076171875e-05, "grad_norm": 0.08160519599914551, "learning_rate": 2.458548727494292e-07, "loss": 0.0076, "step": 29100 }, { "epoch": 4.4403076171875e-05, "model_forward_time": 0.02480006217956543, "step": 29100 }, { "epoch": 4.4403076171875e-05, "step": 29100, "training_step_time": 0.1168978214263916 }, { "epoch": 4.440460205078125e-05, "model_forward_time": 0.025147676467895508, "step": 29101 }, { "epoch": 4.440460205078125e-05, "step": 29101, "training_step_time": 0.12009835243225098 }, { "epoch": 4.44061279296875e-05, "model_forward_time": 0.025388240814208984, "step": 29102 }, { "epoch": 4.44061279296875e-05, "step": 29102, "training_step_time": 0.1540079116821289 }, { "epoch": 4.440765380859375e-05, "model_forward_time": 0.025020122528076172, "step": 29103 }, { "epoch": 4.440765380859375e-05, "step": 29103, "training_step_time": 0.21747255325317383 }, { "epoch": 4.44091796875e-05, "model_forward_time": 0.024644136428833008, "step": 29104 }, { "epoch": 4.44091796875e-05, "step": 29104, "training_step_time": 0.17318153381347656 }, { "epoch": 4.441070556640625e-05, "model_forward_time": 0.023979902267456055, "step": 29105 }, { "epoch": 4.441070556640625e-05, "step": 29105, "training_step_time": 0.16699957847595215 }, { "epoch": 4.44122314453125e-05, "model_forward_time": 0.024883508682250977, "step": 29106 }, { "epoch": 4.44122314453125e-05, "step": 29106, "training_step_time": 0.1152486801147461 }, { "epoch": 4.441375732421875e-05, "model_forward_time": 0.02495098114013672, "step": 29107 }, { "epoch": 4.441375732421875e-05, "step": 29107, "training_step_time": 0.12934494018554688 }, { "epoch": 4.4415283203125e-05, "model_forward_time": 0.024727344512939453, "step": 29108 }, { "epoch": 4.4415283203125e-05, "step": 29108, "training_step_time": 0.19962835311889648 }, { "epoch": 4.441680908203125e-05, "model_forward_time": 0.02460193634033203, "step": 29109 }, { "epoch": 4.441680908203125e-05, "step": 29109, "training_step_time": 0.10294318199157715 }, { "epoch": 4.44183349609375e-05, "grad_norm": 0.04695097729563713, "learning_rate": 2.404261420157039e-07, "loss": 0.0026, "step": 29110 }, { "epoch": 4.44183349609375e-05, "model_forward_time": 0.02520442008972168, "step": 29110 }, { "epoch": 4.44183349609375e-05, "step": 29110, "training_step_time": 0.10407471656799316 }, { "epoch": 4.441986083984375e-05, "model_forward_time": 0.025218963623046875, "step": 29111 }, { "epoch": 4.441986083984375e-05, "step": 29111, "training_step_time": 0.10590052604675293 }, { "epoch": 4.442138671875e-05, "model_forward_time": 0.02543020248413086, "step": 29112 }, { "epoch": 4.442138671875e-05, "step": 29112, "training_step_time": 0.10853838920593262 }, { "epoch": 4.442291259765625e-05, "model_forward_time": 0.024783849716186523, "step": 29113 }, { "epoch": 4.442291259765625e-05, "step": 29113, "training_step_time": 0.10422873497009277 }, { "epoch": 4.44244384765625e-05, "model_forward_time": 0.025430917739868164, "step": 29114 }, { "epoch": 4.44244384765625e-05, "step": 29114, "training_step_time": 0.11363792419433594 }, { "epoch": 4.442596435546875e-05, "model_forward_time": 0.025423765182495117, "step": 29115 }, { "epoch": 4.442596435546875e-05, "step": 29115, "training_step_time": 0.10458850860595703 }, { "epoch": 4.4427490234375e-05, "model_forward_time": 0.025220394134521484, "step": 29116 }, { "epoch": 4.4427490234375e-05, "step": 29116, "training_step_time": 0.10871195793151855 }, { "epoch": 4.442901611328125e-05, "model_forward_time": 0.025431394577026367, "step": 29117 }, { "epoch": 4.442901611328125e-05, "step": 29117, "training_step_time": 0.10499715805053711 }, { "epoch": 4.44305419921875e-05, "model_forward_time": 0.02538156509399414, "step": 29118 }, { "epoch": 4.44305419921875e-05, "step": 29118, "training_step_time": 0.21206164360046387 }, { "epoch": 4.443206787109375e-05, "model_forward_time": 0.02494192123413086, "step": 29119 }, { "epoch": 4.443206787109375e-05, "step": 29119, "training_step_time": 0.22455859184265137 }, { "epoch": 4.443359375e-05, "grad_norm": 0.07692018151283264, "learning_rate": 2.3505787385623702e-07, "loss": 0.0041, "step": 29120 }, { "epoch": 4.443359375e-05, "model_forward_time": 0.024222373962402344, "step": 29120 }, { "epoch": 4.443359375e-05, "step": 29120, "training_step_time": 0.19524073600769043 }, { "epoch": 4.443511962890625e-05, "model_forward_time": 0.024816036224365234, "step": 29121 }, { "epoch": 4.443511962890625e-05, "step": 29121, "training_step_time": 0.15548396110534668 }, { "epoch": 4.44366455078125e-05, "model_forward_time": 0.02508068084716797, "step": 29122 }, { "epoch": 4.44366455078125e-05, "step": 29122, "training_step_time": 0.1475663185119629 }, { "epoch": 4.443817138671875e-05, "model_forward_time": 0.024451255798339844, "step": 29123 }, { "epoch": 4.443817138671875e-05, "step": 29123, "training_step_time": 0.11400413513183594 }, { "epoch": 4.4439697265625e-05, "model_forward_time": 0.024725675582885742, "step": 29124 }, { "epoch": 4.4439697265625e-05, "step": 29124, "training_step_time": 0.10689258575439453 }, { "epoch": 4.444122314453125e-05, "model_forward_time": 0.02590656280517578, "step": 29125 }, { "epoch": 4.444122314453125e-05, "step": 29125, "training_step_time": 0.13040876388549805 }, { "epoch": 4.44427490234375e-05, "model_forward_time": 0.02533555030822754, "step": 29126 }, { "epoch": 4.44427490234375e-05, "step": 29126, "training_step_time": 0.11995410919189453 }, { "epoch": 4.444427490234375e-05, "model_forward_time": 0.0250394344329834, "step": 29127 }, { "epoch": 4.444427490234375e-05, "step": 29127, "training_step_time": 0.1077110767364502 }, { "epoch": 4.444580078125e-05, "model_forward_time": 0.025464296340942383, "step": 29128 }, { "epoch": 4.444580078125e-05, "step": 29128, "training_step_time": 0.10610413551330566 }, { "epoch": 4.444732666015625e-05, "model_forward_time": 0.02468419075012207, "step": 29129 }, { "epoch": 4.444732666015625e-05, "step": 29129, "training_step_time": 0.10801529884338379 }, { "epoch": 4.44488525390625e-05, "grad_norm": 0.04510524496436119, "learning_rate": 2.2975007479397738e-07, "loss": 0.0021, "step": 29130 }, { "epoch": 4.44488525390625e-05, "model_forward_time": 0.025274276733398438, "step": 29130 }, { "epoch": 4.44488525390625e-05, "step": 29130, "training_step_time": 0.11323881149291992 }, { "epoch": 4.445037841796875e-05, "model_forward_time": 0.025043725967407227, "step": 29131 }, { "epoch": 4.445037841796875e-05, "step": 29131, "training_step_time": 0.10579228401184082 }, { "epoch": 4.4451904296875e-05, "model_forward_time": 0.025148868560791016, "step": 29132 }, { "epoch": 4.4451904296875e-05, "step": 29132, "training_step_time": 0.10499954223632812 }, { "epoch": 4.445343017578125e-05, "model_forward_time": 0.02529120445251465, "step": 29133 }, { "epoch": 4.445343017578125e-05, "step": 29133, "training_step_time": 0.10580039024353027 }, { "epoch": 4.44549560546875e-05, "model_forward_time": 0.025068044662475586, "step": 29134 }, { "epoch": 4.44549560546875e-05, "step": 29134, "training_step_time": 0.1089785099029541 }, { "epoch": 4.445648193359375e-05, "model_forward_time": 0.0252530574798584, "step": 29135 }, { "epoch": 4.445648193359375e-05, "step": 29135, "training_step_time": 0.10535717010498047 }, { "epoch": 4.44580078125e-05, "model_forward_time": 0.02528071403503418, "step": 29136 }, { "epoch": 4.44580078125e-05, "step": 29136, "training_step_time": 0.11004066467285156 }, { "epoch": 4.445953369140625e-05, "model_forward_time": 0.025387048721313477, "step": 29137 }, { "epoch": 4.445953369140625e-05, "step": 29137, "training_step_time": 0.10559248924255371 }, { "epoch": 4.44610595703125e-05, "model_forward_time": 0.02553248405456543, "step": 29138 }, { "epoch": 4.44610595703125e-05, "step": 29138, "training_step_time": 0.107025146484375 }, { "epoch": 4.446258544921875e-05, "model_forward_time": 0.025373458862304688, "step": 29139 }, { "epoch": 4.446258544921875e-05, "step": 29139, "training_step_time": 0.10643148422241211 }, { "epoch": 4.4464111328125e-05, "grad_norm": 0.07037770748138428, "learning_rate": 2.2450275127841036e-07, "loss": 0.0036, "step": 29140 }, { "epoch": 4.4464111328125e-05, "model_forward_time": 0.025453567504882812, "step": 29140 }, { "epoch": 4.4464111328125e-05, "step": 29140, "training_step_time": 0.10630440711975098 }, { "epoch": 4.446563720703125e-05, "model_forward_time": 0.025014877319335938, "step": 29141 }, { "epoch": 4.446563720703125e-05, "step": 29141, "training_step_time": 0.10526394844055176 }, { "epoch": 4.44671630859375e-05, "model_forward_time": 0.024930953979492188, "step": 29142 }, { "epoch": 4.44671630859375e-05, "step": 29142, "training_step_time": 0.10628724098205566 }, { "epoch": 4.446868896484375e-05, "model_forward_time": 0.02547430992126465, "step": 29143 }, { "epoch": 4.446868896484375e-05, "step": 29143, "training_step_time": 0.1433579921722412 }, { "epoch": 4.447021484375e-05, "model_forward_time": 0.025321245193481445, "step": 29144 }, { "epoch": 4.447021484375e-05, "step": 29144, "training_step_time": 0.11835265159606934 }, { "epoch": 4.447174072265625e-05, "model_forward_time": 0.02506279945373535, "step": 29145 }, { "epoch": 4.447174072265625e-05, "step": 29145, "training_step_time": 0.12581515312194824 }, { "epoch": 4.44732666015625e-05, "model_forward_time": 0.025551557540893555, "step": 29146 }, { "epoch": 4.44732666015625e-05, "step": 29146, "training_step_time": 0.15565061569213867 }, { "epoch": 4.447479248046875e-05, "model_forward_time": 0.02461409568786621, "step": 29147 }, { "epoch": 4.447479248046875e-05, "step": 29147, "training_step_time": 0.21603035926818848 }, { "epoch": 4.4476318359375e-05, "model_forward_time": 0.02523946762084961, "step": 29148 }, { "epoch": 4.4476318359375e-05, "step": 29148, "training_step_time": 0.11180758476257324 }, { "epoch": 4.447784423828125e-05, "model_forward_time": 0.024934768676757812, "step": 29149 }, { "epoch": 4.447784423828125e-05, "step": 29149, "training_step_time": 0.1334092617034912 }, { "epoch": 4.44793701171875e-05, "grad_norm": 0.04141829535365105, "learning_rate": 2.1931590968551908e-07, "loss": 0.0036, "step": 29150 }, { "epoch": 4.44793701171875e-05, "model_forward_time": 0.025337696075439453, "step": 29150 }, { "epoch": 4.44793701171875e-05, "step": 29150, "training_step_time": 0.10977888107299805 }, { "epoch": 4.448089599609375e-05, "model_forward_time": 0.02550983428955078, "step": 29151 }, { "epoch": 4.448089599609375e-05, "step": 29151, "training_step_time": 0.12189102172851562 }, { "epoch": 4.4482421875e-05, "model_forward_time": 0.02548670768737793, "step": 29152 }, { "epoch": 4.4482421875e-05, "step": 29152, "training_step_time": 0.12258553504943848 }, { "epoch": 4.448394775390625e-05, "model_forward_time": 0.025317668914794922, "step": 29153 }, { "epoch": 4.448394775390625e-05, "step": 29153, "training_step_time": 0.11857938766479492 }, { "epoch": 4.44854736328125e-05, "model_forward_time": 0.025322914123535156, "step": 29154 }, { "epoch": 4.44854736328125e-05, "step": 29154, "training_step_time": 0.18203043937683105 }, { "epoch": 4.448699951171875e-05, "model_forward_time": 0.024985074996948242, "step": 29155 }, { "epoch": 4.448699951171875e-05, "step": 29155, "training_step_time": 0.10677886009216309 }, { "epoch": 4.4488525390625e-05, "model_forward_time": 0.025141477584838867, "step": 29156 }, { "epoch": 4.4488525390625e-05, "step": 29156, "training_step_time": 0.10850119590759277 }, { "epoch": 4.449005126953125e-05, "model_forward_time": 0.025155305862426758, "step": 29157 }, { "epoch": 4.449005126953125e-05, "step": 29157, "training_step_time": 0.1083366870880127 }, { "epoch": 4.44915771484375e-05, "model_forward_time": 0.025179386138916016, "step": 29158 }, { "epoch": 4.44915771484375e-05, "step": 29158, "training_step_time": 0.10811495780944824 }, { "epoch": 4.449310302734375e-05, "model_forward_time": 0.025850296020507812, "step": 29159 }, { "epoch": 4.449310302734375e-05, "step": 29159, "training_step_time": 0.11030793190002441 }, { "epoch": 4.449462890625e-05, "grad_norm": 0.056449707597494125, "learning_rate": 2.1418955631781202e-07, "loss": 0.008, "step": 29160 }, { "epoch": 4.449462890625e-05, "model_forward_time": 0.025135278701782227, "step": 29160 }, { "epoch": 4.449462890625e-05, "step": 29160, "training_step_time": 0.1101830005645752 }, { "epoch": 4.449615478515625e-05, "model_forward_time": 0.02523040771484375, "step": 29161 }, { "epoch": 4.449615478515625e-05, "step": 29161, "training_step_time": 0.10569095611572266 }, { "epoch": 4.44976806640625e-05, "model_forward_time": 0.025432109832763672, "step": 29162 }, { "epoch": 4.44976806640625e-05, "step": 29162, "training_step_time": 0.11112070083618164 }, { "epoch": 4.449920654296875e-05, "model_forward_time": 0.025574922561645508, "step": 29163 }, { "epoch": 4.449920654296875e-05, "step": 29163, "training_step_time": 0.1077265739440918 }, { "epoch": 4.4500732421875e-05, "model_forward_time": 0.025356054306030273, "step": 29164 }, { "epoch": 4.4500732421875e-05, "step": 29164, "training_step_time": 0.1809539794921875 }, { "epoch": 4.450225830078125e-05, "model_forward_time": 0.024548768997192383, "step": 29165 }, { "epoch": 4.450225830078125e-05, "step": 29165, "training_step_time": 0.20938801765441895 }, { "epoch": 4.45037841796875e-05, "model_forward_time": 0.024847745895385742, "step": 29166 }, { "epoch": 4.45037841796875e-05, "step": 29166, "training_step_time": 0.12308073043823242 }, { "epoch": 4.450531005859375e-05, "model_forward_time": 0.024587631225585938, "step": 29167 }, { "epoch": 4.450531005859375e-05, "step": 29167, "training_step_time": 0.14258837699890137 }, { "epoch": 4.45068359375e-05, "model_forward_time": 0.024997711181640625, "step": 29168 }, { "epoch": 4.45068359375e-05, "step": 29168, "training_step_time": 0.20114874839782715 }, { "epoch": 4.450836181640625e-05, "model_forward_time": 0.024848222732543945, "step": 29169 }, { "epoch": 4.450836181640625e-05, "step": 29169, "training_step_time": 0.15875792503356934 }, { "epoch": 4.45098876953125e-05, "grad_norm": 0.33454886078834534, "learning_rate": 2.0912369740428983e-07, "loss": 0.0097, "step": 29170 }, { "epoch": 4.45098876953125e-05, "model_forward_time": 0.024692773818969727, "step": 29170 }, { "epoch": 4.45098876953125e-05, "step": 29170, "training_step_time": 0.10991859436035156 }, { "epoch": 4.451141357421875e-05, "model_forward_time": 0.024778127670288086, "step": 29171 }, { "epoch": 4.451141357421875e-05, "step": 29171, "training_step_time": 0.10628128051757812 }, { "epoch": 4.4512939453125e-05, "model_forward_time": 0.02541637420654297, "step": 29172 }, { "epoch": 4.4512939453125e-05, "step": 29172, "training_step_time": 0.11589574813842773 }, { "epoch": 4.451446533203125e-05, "model_forward_time": 0.02544879913330078, "step": 29173 }, { "epoch": 4.451446533203125e-05, "step": 29173, "training_step_time": 0.10677027702331543 }, { "epoch": 4.45159912109375e-05, "model_forward_time": 0.02527594566345215, "step": 29174 }, { "epoch": 4.45159912109375e-05, "step": 29174, "training_step_time": 0.10689544677734375 }, { "epoch": 4.451751708984375e-05, "model_forward_time": 0.02545166015625, "step": 29175 }, { "epoch": 4.451751708984375e-05, "step": 29175, "training_step_time": 0.10645008087158203 }, { "epoch": 4.451904296875e-05, "model_forward_time": 0.025403499603271484, "step": 29176 }, { "epoch": 4.451904296875e-05, "step": 29176, "training_step_time": 0.10621428489685059 }, { "epoch": 4.452056884765625e-05, "model_forward_time": 0.025614023208618164, "step": 29177 }, { "epoch": 4.452056884765625e-05, "step": 29177, "training_step_time": 0.10514426231384277 }, { "epoch": 4.45220947265625e-05, "model_forward_time": 0.02497076988220215, "step": 29178 }, { "epoch": 4.45220947265625e-05, "step": 29178, "training_step_time": 0.10712051391601562 }, { "epoch": 4.452362060546875e-05, "model_forward_time": 0.025425195693969727, "step": 29179 }, { "epoch": 4.452362060546875e-05, "step": 29179, "training_step_time": 0.10726785659790039 }, { "epoch": 4.4525146484375e-05, "grad_norm": 0.07406570762395859, "learning_rate": 2.041183391004453e-07, "loss": 0.0046, "step": 29180 }, { "epoch": 4.4525146484375e-05, "model_forward_time": 0.02498030662536621, "step": 29180 }, { "epoch": 4.4525146484375e-05, "step": 29180, "training_step_time": 0.10393023490905762 }, { "epoch": 4.452667236328125e-05, "model_forward_time": 0.02504110336303711, "step": 29181 }, { "epoch": 4.452667236328125e-05, "step": 29181, "training_step_time": 0.1090540885925293 }, { "epoch": 4.45281982421875e-05, "model_forward_time": 0.025954484939575195, "step": 29182 }, { "epoch": 4.45281982421875e-05, "step": 29182, "training_step_time": 0.10615181922912598 }, { "epoch": 4.452972412109375e-05, "model_forward_time": 0.025450944900512695, "step": 29183 }, { "epoch": 4.452972412109375e-05, "step": 29183, "training_step_time": 0.10523056983947754 }, { "epoch": 4.453125e-05, "model_forward_time": 0.0277864933013916, "step": 29184 }, { "epoch": 4.453125e-05, "step": 29184, "training_step_time": 0.10680460929870605 }, { "epoch": 4.453277587890625e-05, "model_forward_time": 0.02624225616455078, "step": 29185 }, { "epoch": 4.453277587890625e-05, "step": 29185, "training_step_time": 0.10616755485534668 }, { "epoch": 4.45343017578125e-05, "model_forward_time": 0.025322437286376953, "step": 29186 }, { "epoch": 4.45343017578125e-05, "step": 29186, "training_step_time": 0.10539865493774414 }, { "epoch": 4.453582763671875e-05, "model_forward_time": 0.025300979614257812, "step": 29187 }, { "epoch": 4.453582763671875e-05, "step": 29187, "training_step_time": 0.10503077507019043 }, { "epoch": 4.4537353515625e-05, "model_forward_time": 0.02532029151916504, "step": 29188 }, { "epoch": 4.4537353515625e-05, "step": 29188, "training_step_time": 0.10577821731567383 }, { "epoch": 4.453887939453125e-05, "model_forward_time": 0.025055408477783203, "step": 29189 }, { "epoch": 4.453887939453125e-05, "step": 29189, "training_step_time": 0.12801909446716309 }, { "epoch": 4.45404052734375e-05, "grad_norm": 0.16133560240268707, "learning_rate": 1.9917348748826335e-07, "loss": 0.0054, "step": 29190 }, { "epoch": 4.45404052734375e-05, "model_forward_time": 0.027102231979370117, "step": 29190 }, { "epoch": 4.45404052734375e-05, "step": 29190, "training_step_time": 0.11545085906982422 }, { "epoch": 4.454193115234375e-05, "model_forward_time": 0.025290489196777344, "step": 29191 }, { "epoch": 4.454193115234375e-05, "step": 29191, "training_step_time": 0.11910319328308105 }, { "epoch": 4.454345703125e-05, "model_forward_time": 0.02520918846130371, "step": 29192 }, { "epoch": 4.454345703125e-05, "step": 29192, "training_step_time": 0.13320589065551758 }, { "epoch": 4.454498291015625e-05, "model_forward_time": 0.0253756046295166, "step": 29193 }, { "epoch": 4.454498291015625e-05, "step": 29193, "training_step_time": 0.10840129852294922 }, { "epoch": 4.45465087890625e-05, "model_forward_time": 0.02539539337158203, "step": 29194 }, { "epoch": 4.45465087890625e-05, "step": 29194, "training_step_time": 0.13821911811828613 }, { "epoch": 4.454803466796875e-05, "model_forward_time": 0.025503873825073242, "step": 29195 }, { "epoch": 4.454803466796875e-05, "step": 29195, "training_step_time": 0.10474467277526855 }, { "epoch": 4.4549560546875e-05, "model_forward_time": 0.024520397186279297, "step": 29196 }, { "epoch": 4.4549560546875e-05, "step": 29196, "training_step_time": 0.14089322090148926 }, { "epoch": 4.455108642578125e-05, "model_forward_time": 0.02516460418701172, "step": 29197 }, { "epoch": 4.455108642578125e-05, "step": 29197, "training_step_time": 0.11853504180908203 }, { "epoch": 4.45526123046875e-05, "model_forward_time": 0.024899959564208984, "step": 29198 }, { "epoch": 4.45526123046875e-05, "step": 29198, "training_step_time": 0.19440293312072754 }, { "epoch": 4.455413818359375e-05, "model_forward_time": 0.024455547332763672, "step": 29199 }, { "epoch": 4.455413818359375e-05, "step": 29199, "training_step_time": 0.1593494415283203 }, { "epoch": 4.45556640625e-05, "grad_norm": 0.04935256764292717, "learning_rate": 1.942891485762044e-07, "loss": 0.0042, "step": 29200 }, { "epoch": 4.45556640625e-05, "model_forward_time": 0.024286746978759766, "step": 29200 }, { "epoch": 4.45556640625e-05, "step": 29200, "training_step_time": 0.20178890228271484 }, { "epoch": 4.455718994140625e-05, "model_forward_time": 0.02457714080810547, "step": 29201 }, { "epoch": 4.455718994140625e-05, "step": 29201, "training_step_time": 0.14940643310546875 }, { "epoch": 4.45587158203125e-05, "model_forward_time": 0.024638652801513672, "step": 29202 }, { "epoch": 4.45587158203125e-05, "step": 29202, "training_step_time": 0.1255781650543213 }, { "epoch": 4.456024169921875e-05, "model_forward_time": 0.024190187454223633, "step": 29203 }, { "epoch": 4.456024169921875e-05, "step": 29203, "training_step_time": 0.12446022033691406 }, { "epoch": 4.4561767578125e-05, "model_forward_time": 0.024898529052734375, "step": 29204 }, { "epoch": 4.4561767578125e-05, "step": 29204, "training_step_time": 0.12291383743286133 }, { "epoch": 4.456329345703125e-05, "model_forward_time": 0.0251162052154541, "step": 29205 }, { "epoch": 4.456329345703125e-05, "step": 29205, "training_step_time": 0.11806607246398926 }, { "epoch": 4.45648193359375e-05, "model_forward_time": 0.025507211685180664, "step": 29206 }, { "epoch": 4.45648193359375e-05, "step": 29206, "training_step_time": 0.1146240234375 }, { "epoch": 4.456634521484375e-05, "model_forward_time": 0.025272130966186523, "step": 29207 }, { "epoch": 4.456634521484375e-05, "step": 29207, "training_step_time": 0.11224746704101562 }, { "epoch": 4.456787109375e-05, "model_forward_time": 0.02498340606689453, "step": 29208 }, { "epoch": 4.456787109375e-05, "step": 29208, "training_step_time": 0.10706496238708496 }, { "epoch": 4.456939697265625e-05, "model_forward_time": 0.024964570999145508, "step": 29209 }, { "epoch": 4.456939697265625e-05, "step": 29209, "training_step_time": 0.20986461639404297 }, { "epoch": 4.45709228515625e-05, "grad_norm": 0.30231958627700806, "learning_rate": 1.8946532829920426e-07, "loss": 0.0043, "step": 29210 }, { "epoch": 4.45709228515625e-05, "model_forward_time": 0.024324417114257812, "step": 29210 }, { "epoch": 4.45709228515625e-05, "step": 29210, "training_step_time": 0.20853281021118164 }, { "epoch": 4.457244873046875e-05, "model_forward_time": 0.024558544158935547, "step": 29211 }, { "epoch": 4.457244873046875e-05, "step": 29211, "training_step_time": 0.21250414848327637 }, { "epoch": 4.4573974609375e-05, "model_forward_time": 0.02430438995361328, "step": 29212 }, { "epoch": 4.4573974609375e-05, "step": 29212, "training_step_time": 0.11509418487548828 }, { "epoch": 4.457550048828125e-05, "model_forward_time": 0.02454090118408203, "step": 29213 }, { "epoch": 4.457550048828125e-05, "step": 29213, "training_step_time": 0.11182641983032227 }, { "epoch": 4.45770263671875e-05, "model_forward_time": 0.025635242462158203, "step": 29214 }, { "epoch": 4.45770263671875e-05, "step": 29214, "training_step_time": 0.14321184158325195 }, { "epoch": 4.457855224609375e-05, "model_forward_time": 0.02536749839782715, "step": 29215 }, { "epoch": 4.457855224609375e-05, "step": 29215, "training_step_time": 0.15502190589904785 }, { "epoch": 4.4580078125e-05, "model_forward_time": 0.024321794509887695, "step": 29216 }, { "epoch": 4.4580078125e-05, "step": 29216, "training_step_time": 0.12062668800354004 }, { "epoch": 4.458160400390625e-05, "model_forward_time": 0.024708271026611328, "step": 29217 }, { "epoch": 4.458160400390625e-05, "step": 29217, "training_step_time": 0.10248684883117676 }, { "epoch": 4.45831298828125e-05, "model_forward_time": 0.025120019912719727, "step": 29218 }, { "epoch": 4.45831298828125e-05, "step": 29218, "training_step_time": 0.10790634155273438 }, { "epoch": 4.458465576171875e-05, "model_forward_time": 0.025448322296142578, "step": 29219 }, { "epoch": 4.458465576171875e-05, "step": 29219, "training_step_time": 0.10582756996154785 }, { "epoch": 4.4586181640625e-05, "grad_norm": 0.04679938778281212, "learning_rate": 1.847020325186577e-07, "loss": 0.0103, "step": 29220 }, { "epoch": 4.4586181640625e-05, "model_forward_time": 0.02504873275756836, "step": 29220 }, { "epoch": 4.4586181640625e-05, "step": 29220, "training_step_time": 0.10419988632202148 }, { "epoch": 4.458770751953125e-05, "model_forward_time": 0.025397539138793945, "step": 29221 }, { "epoch": 4.458770751953125e-05, "step": 29221, "training_step_time": 0.10366654396057129 }, { "epoch": 4.45892333984375e-05, "model_forward_time": 0.024834156036376953, "step": 29222 }, { "epoch": 4.45892333984375e-05, "step": 29222, "training_step_time": 0.10530328750610352 }, { "epoch": 4.459075927734375e-05, "model_forward_time": 0.025173425674438477, "step": 29223 }, { "epoch": 4.459075927734375e-05, "step": 29223, "training_step_time": 0.10408830642700195 }, { "epoch": 4.459228515625e-05, "model_forward_time": 0.025326251983642578, "step": 29224 }, { "epoch": 4.459228515625e-05, "step": 29224, "training_step_time": 0.10784053802490234 }, { "epoch": 4.459381103515625e-05, "model_forward_time": 0.02575850486755371, "step": 29225 }, { "epoch": 4.459381103515625e-05, "step": 29225, "training_step_time": 0.105438232421875 }, { "epoch": 4.45953369140625e-05, "model_forward_time": 0.025055646896362305, "step": 29226 }, { "epoch": 4.45953369140625e-05, "step": 29226, "training_step_time": 0.10932159423828125 }, { "epoch": 4.459686279296875e-05, "model_forward_time": 0.024958133697509766, "step": 29227 }, { "epoch": 4.459686279296875e-05, "step": 29227, "training_step_time": 0.10515761375427246 }, { "epoch": 4.4598388671875e-05, "model_forward_time": 0.02505660057067871, "step": 29228 }, { "epoch": 4.4598388671875e-05, "step": 29228, "training_step_time": 0.10516762733459473 }, { "epoch": 4.459991455078125e-05, "model_forward_time": 0.025411367416381836, "step": 29229 }, { "epoch": 4.459991455078125e-05, "step": 29229, "training_step_time": 0.10290980339050293 }, { "epoch": 4.46014404296875e-05, "grad_norm": 0.047037262469530106, "learning_rate": 1.799992670224182e-07, "loss": 0.003, "step": 29230 }, { "epoch": 4.46014404296875e-05, "model_forward_time": 0.02561664581298828, "step": 29230 }, { "epoch": 4.46014404296875e-05, "step": 29230, "training_step_time": 0.1042780876159668 }, { "epoch": 4.460296630859375e-05, "model_forward_time": 0.02519369125366211, "step": 29231 }, { "epoch": 4.460296630859375e-05, "step": 29231, "training_step_time": 0.10624241828918457 }, { "epoch": 4.46044921875e-05, "model_forward_time": 0.02541041374206543, "step": 29232 }, { "epoch": 4.46044921875e-05, "step": 29232, "training_step_time": 0.10304951667785645 }, { "epoch": 4.460601806640625e-05, "model_forward_time": 0.02568507194519043, "step": 29233 }, { "epoch": 4.460601806640625e-05, "step": 29233, "training_step_time": 0.22035741806030273 }, { "epoch": 4.46075439453125e-05, "model_forward_time": 0.02461099624633789, "step": 29234 }, { "epoch": 4.46075439453125e-05, "step": 29234, "training_step_time": 0.12034082412719727 }, { "epoch": 4.460906982421875e-05, "model_forward_time": 0.024698257446289062, "step": 29235 }, { "epoch": 4.460906982421875e-05, "step": 29235, "training_step_time": 0.1255486011505127 }, { "epoch": 4.4610595703125e-05, "model_forward_time": 0.025273799896240234, "step": 29236 }, { "epoch": 4.4610595703125e-05, "step": 29236, "training_step_time": 0.20974516868591309 }, { "epoch": 4.461212158203125e-05, "model_forward_time": 0.024103403091430664, "step": 29237 }, { "epoch": 4.461212158203125e-05, "step": 29237, "training_step_time": 0.22941160202026367 }, { "epoch": 4.46136474609375e-05, "model_forward_time": 0.02493143081665039, "step": 29238 }, { "epoch": 4.46136474609375e-05, "step": 29238, "training_step_time": 0.17444443702697754 }, { "epoch": 4.461517333984375e-05, "model_forward_time": 0.025133848190307617, "step": 29239 }, { "epoch": 4.461517333984375e-05, "step": 29239, "training_step_time": 0.18807530403137207 }, { "epoch": 4.461669921875e-05, "grad_norm": 0.06753551214933395, "learning_rate": 1.753570375247815e-07, "loss": 0.0103, "step": 29240 }, { "epoch": 4.461669921875e-05, "model_forward_time": 0.024422645568847656, "step": 29240 }, { "epoch": 4.461669921875e-05, "step": 29240, "training_step_time": 0.17309999465942383 }, { "epoch": 4.461822509765625e-05, "model_forward_time": 0.024614572525024414, "step": 29241 }, { "epoch": 4.461822509765625e-05, "step": 29241, "training_step_time": 0.1656970977783203 }, { "epoch": 4.46197509765625e-05, "model_forward_time": 0.024381399154663086, "step": 29242 }, { "epoch": 4.46197509765625e-05, "step": 29242, "training_step_time": 0.13591575622558594 }, { "epoch": 4.462127685546875e-05, "model_forward_time": 0.024503707885742188, "step": 29243 }, { "epoch": 4.462127685546875e-05, "step": 29243, "training_step_time": 0.12748312950134277 }, { "epoch": 4.4622802734375e-05, "model_forward_time": 0.024832963943481445, "step": 29244 }, { "epoch": 4.4622802734375e-05, "step": 29244, "training_step_time": 0.11878538131713867 }, { "epoch": 4.462432861328125e-05, "model_forward_time": 0.0252227783203125, "step": 29245 }, { "epoch": 4.462432861328125e-05, "step": 29245, "training_step_time": 0.11771202087402344 }, { "epoch": 4.46258544921875e-05, "model_forward_time": 0.025090932846069336, "step": 29246 }, { "epoch": 4.46258544921875e-05, "step": 29246, "training_step_time": 0.11478972434997559 }, { "epoch": 4.462738037109375e-05, "model_forward_time": 0.025682449340820312, "step": 29247 }, { "epoch": 4.462738037109375e-05, "step": 29247, "training_step_time": 0.11609292030334473 }, { "epoch": 4.462890625e-05, "model_forward_time": 0.02508091926574707, "step": 29248 }, { "epoch": 4.462890625e-05, "step": 29248, "training_step_time": 0.10498404502868652 }, { "epoch": 4.463043212890625e-05, "model_forward_time": 0.02520895004272461, "step": 29249 }, { "epoch": 4.463043212890625e-05, "step": 29249, "training_step_time": 0.10349464416503906 }, { "epoch": 4.46319580078125e-05, "grad_norm": 0.08164948225021362, "learning_rate": 1.7077534966650766e-07, "loss": 0.0029, "step": 29250 }, { "epoch": 4.46319580078125e-05, "model_forward_time": 0.025195598602294922, "step": 29250 }, { "epoch": 4.46319580078125e-05, "step": 29250, "training_step_time": 0.1051490306854248 }, { "epoch": 4.463348388671875e-05, "model_forward_time": 0.025407075881958008, "step": 29251 }, { "epoch": 4.463348388671875e-05, "step": 29251, "training_step_time": 0.10389518737792969 }, { "epoch": 4.4635009765625e-05, "model_forward_time": 0.02539229393005371, "step": 29252 }, { "epoch": 4.4635009765625e-05, "step": 29252, "training_step_time": 0.1061701774597168 }, { "epoch": 4.463653564453125e-05, "model_forward_time": 0.025336265563964844, "step": 29253 }, { "epoch": 4.463653564453125e-05, "step": 29253, "training_step_time": 0.13142704963684082 }, { "epoch": 4.46380615234375e-05, "model_forward_time": 0.024914026260375977, "step": 29254 }, { "epoch": 4.46380615234375e-05, "step": 29254, "training_step_time": 0.10991072654724121 }, { "epoch": 4.463958740234375e-05, "model_forward_time": 0.026196956634521484, "step": 29255 }, { "epoch": 4.463958740234375e-05, "step": 29255, "training_step_time": 0.11872339248657227 }, { "epoch": 4.464111328125e-05, "model_forward_time": 0.025101661682128906, "step": 29256 }, { "epoch": 4.464111328125e-05, "step": 29256, "training_step_time": 0.13953566551208496 }, { "epoch": 4.464263916015625e-05, "model_forward_time": 0.02496814727783203, "step": 29257 }, { "epoch": 4.464263916015625e-05, "step": 29257, "training_step_time": 0.15061140060424805 }, { "epoch": 4.46441650390625e-05, "model_forward_time": 0.024850130081176758, "step": 29258 }, { "epoch": 4.46441650390625e-05, "step": 29258, "training_step_time": 0.19939279556274414 }, { "epoch": 4.464569091796875e-05, "model_forward_time": 0.024552583694458008, "step": 29259 }, { "epoch": 4.464569091796875e-05, "step": 29259, "training_step_time": 0.17019081115722656 }, { "epoch": 4.4647216796875e-05, "grad_norm": 0.044074781239032745, "learning_rate": 1.662542090147712e-07, "loss": 0.0053, "step": 29260 }, { "epoch": 4.4647216796875e-05, "model_forward_time": 0.025516986846923828, "step": 29260 }, { "epoch": 4.4647216796875e-05, "step": 29260, "training_step_time": 0.10477018356323242 }, { "epoch": 4.464874267578125e-05, "model_forward_time": 0.024930715560913086, "step": 29261 }, { "epoch": 4.464874267578125e-05, "step": 29261, "training_step_time": 0.10395383834838867 }, { "epoch": 4.46502685546875e-05, "model_forward_time": 0.0251162052154541, "step": 29262 }, { "epoch": 4.46502685546875e-05, "step": 29262, "training_step_time": 0.10530376434326172 }, { "epoch": 4.465179443359375e-05, "model_forward_time": 0.025175094604492188, "step": 29263 }, { "epoch": 4.465179443359375e-05, "step": 29263, "training_step_time": 0.10660362243652344 }, { "epoch": 4.46533203125e-05, "model_forward_time": 0.025259971618652344, "step": 29264 }, { "epoch": 4.46533203125e-05, "step": 29264, "training_step_time": 0.10516238212585449 }, { "epoch": 4.465484619140625e-05, "model_forward_time": 0.025018692016601562, "step": 29265 }, { "epoch": 4.465484619140625e-05, "step": 29265, "training_step_time": 0.107025146484375 }, { "epoch": 4.46563720703125e-05, "model_forward_time": 0.025271892547607422, "step": 29266 }, { "epoch": 4.46563720703125e-05, "step": 29266, "training_step_time": 0.10708355903625488 }, { "epoch": 4.465789794921875e-05, "model_forward_time": 0.025465965270996094, "step": 29267 }, { "epoch": 4.465789794921875e-05, "step": 29267, "training_step_time": 0.10401201248168945 }, { "epoch": 4.4659423828125e-05, "model_forward_time": 0.025221586227416992, "step": 29268 }, { "epoch": 4.4659423828125e-05, "step": 29268, "training_step_time": 0.10880565643310547 }, { "epoch": 4.466094970703125e-05, "model_forward_time": 0.02512955665588379, "step": 29269 }, { "epoch": 4.466094970703125e-05, "step": 29269, "training_step_time": 0.1041872501373291 }, { "epoch": 4.46624755859375e-05, "grad_norm": 0.04014989733695984, "learning_rate": 1.6179362106318874e-07, "loss": 0.0035, "step": 29270 }, { "epoch": 4.46624755859375e-05, "model_forward_time": 0.025320053100585938, "step": 29270 }, { "epoch": 4.46624755859375e-05, "step": 29270, "training_step_time": 0.10244536399841309 }, { "epoch": 4.466400146484375e-05, "model_forward_time": 0.025294065475463867, "step": 29271 }, { "epoch": 4.466400146484375e-05, "step": 29271, "training_step_time": 0.10302543640136719 }, { "epoch": 4.466552734375e-05, "model_forward_time": 0.025515079498291016, "step": 29272 }, { "epoch": 4.466552734375e-05, "step": 29272, "training_step_time": 0.10659074783325195 }, { "epoch": 4.466705322265625e-05, "model_forward_time": 0.025066375732421875, "step": 29273 }, { "epoch": 4.466705322265625e-05, "step": 29273, "training_step_time": 0.10381865501403809 }, { "epoch": 4.46685791015625e-05, "model_forward_time": 0.02525162696838379, "step": 29274 }, { "epoch": 4.46685791015625e-05, "step": 29274, "training_step_time": 0.10404229164123535 }, { "epoch": 4.467010498046875e-05, "model_forward_time": 0.02462291717529297, "step": 29275 }, { "epoch": 4.467010498046875e-05, "step": 29275, "training_step_time": 0.10335850715637207 }, { "epoch": 4.4671630859375e-05, "model_forward_time": 0.02446913719177246, "step": 29276 }, { "epoch": 4.4671630859375e-05, "step": 29276, "training_step_time": 0.10891079902648926 }, { "epoch": 4.467315673828125e-05, "model_forward_time": 0.02479100227355957, "step": 29277 }, { "epoch": 4.467315673828125e-05, "step": 29277, "training_step_time": 0.20103740692138672 }, { "epoch": 4.46746826171875e-05, "model_forward_time": 0.0239255428314209, "step": 29278 }, { "epoch": 4.46746826171875e-05, "step": 29278, "training_step_time": 0.1177988052368164 }, { "epoch": 4.467620849609375e-05, "model_forward_time": 0.024171113967895508, "step": 29279 }, { "epoch": 4.467620849609375e-05, "step": 29279, "training_step_time": 0.1255195140838623 }, { "epoch": 4.4677734375e-05, "grad_norm": 0.13707120716571808, "learning_rate": 1.5739359123178587e-07, "loss": 0.0044, "step": 29280 }, { "epoch": 4.4677734375e-05, "model_forward_time": 0.025776386260986328, "step": 29280 }, { "epoch": 4.4677734375e-05, "step": 29280, "training_step_time": 0.15343093872070312 }, { "epoch": 4.467926025390625e-05, "model_forward_time": 0.02449965476989746, "step": 29281 }, { "epoch": 4.467926025390625e-05, "step": 29281, "training_step_time": 0.21709203720092773 }, { "epoch": 4.46807861328125e-05, "model_forward_time": 0.02377486228942871, "step": 29282 }, { "epoch": 4.46807861328125e-05, "step": 29282, "training_step_time": 0.10887479782104492 }, { "epoch": 4.468231201171875e-05, "model_forward_time": 0.025783061981201172, "step": 29283 }, { "epoch": 4.468231201171875e-05, "step": 29283, "training_step_time": 0.10442757606506348 }, { "epoch": 4.4683837890625e-05, "model_forward_time": 0.024457454681396484, "step": 29284 }, { "epoch": 4.4683837890625e-05, "step": 29284, "training_step_time": 0.14114928245544434 }, { "epoch": 4.468536376953125e-05, "model_forward_time": 0.025577545166015625, "step": 29285 }, { "epoch": 4.468536376953125e-05, "step": 29285, "training_step_time": 0.10525345802307129 }, { "epoch": 4.46868896484375e-05, "model_forward_time": 0.02523207664489746, "step": 29286 }, { "epoch": 4.46868896484375e-05, "step": 29286, "training_step_time": 0.2073674201965332 }, { "epoch": 4.468841552734375e-05, "model_forward_time": 0.02429366111755371, "step": 29287 }, { "epoch": 4.468841552734375e-05, "step": 29287, "training_step_time": 0.1457967758178711 }, { "epoch": 4.468994140625e-05, "model_forward_time": 0.024573802947998047, "step": 29288 }, { "epoch": 4.468994140625e-05, "step": 29288, "training_step_time": 0.1308450698852539 }, { "epoch": 4.469146728515625e-05, "model_forward_time": 0.024330854415893555, "step": 29289 }, { "epoch": 4.469146728515625e-05, "step": 29289, "training_step_time": 0.13199377059936523 }, { "epoch": 4.46929931640625e-05, "grad_norm": 0.27505090832710266, "learning_rate": 1.5305412486702474e-07, "loss": 0.0052, "step": 29290 }, { "epoch": 4.46929931640625e-05, "model_forward_time": 0.024813175201416016, "step": 29290 }, { "epoch": 4.46929931640625e-05, "step": 29290, "training_step_time": 0.12634062767028809 }, { "epoch": 4.469451904296875e-05, "model_forward_time": 0.024620532989501953, "step": 29291 }, { "epoch": 4.469451904296875e-05, "step": 29291, "training_step_time": 0.12334704399108887 }, { "epoch": 4.4696044921875e-05, "model_forward_time": 0.02477407455444336, "step": 29292 }, { "epoch": 4.4696044921875e-05, "step": 29292, "training_step_time": 0.12033224105834961 }, { "epoch": 4.469757080078125e-05, "model_forward_time": 0.024919986724853516, "step": 29293 }, { "epoch": 4.469757080078125e-05, "step": 29293, "training_step_time": 0.11629605293273926 }, { "epoch": 4.46990966796875e-05, "model_forward_time": 0.024993419647216797, "step": 29294 }, { "epoch": 4.46990966796875e-05, "step": 29294, "training_step_time": 0.1137688159942627 }, { "epoch": 4.470062255859375e-05, "model_forward_time": 0.02485346794128418, "step": 29295 }, { "epoch": 4.470062255859375e-05, "step": 29295, "training_step_time": 0.11148858070373535 }, { "epoch": 4.47021484375e-05, "model_forward_time": 0.025007963180541992, "step": 29296 }, { "epoch": 4.47021484375e-05, "step": 29296, "training_step_time": 0.10565757751464844 }, { "epoch": 4.470367431640625e-05, "model_forward_time": 0.02490401268005371, "step": 29297 }, { "epoch": 4.470367431640625e-05, "step": 29297, "training_step_time": 0.11085367202758789 }, { "epoch": 4.47052001953125e-05, "model_forward_time": 0.02491617202758789, "step": 29298 }, { "epoch": 4.47052001953125e-05, "step": 29298, "training_step_time": 0.10672163963317871 }, { "epoch": 4.470672607421875e-05, "model_forward_time": 0.025066614151000977, "step": 29299 }, { "epoch": 4.470672607421875e-05, "step": 29299, "training_step_time": 0.12346649169921875 }, { "epoch": 4.4708251953125e-05, "grad_norm": 0.22749656438827515, "learning_rate": 1.4877522724175973e-07, "loss": 0.0039, "step": 29300 }, { "epoch": 4.4708251953125e-05, "model_forward_time": 0.025049448013305664, "step": 29300 }, { "epoch": 4.4708251953125e-05, "step": 29300, "training_step_time": 0.22768306732177734 }, { "epoch": 4.470977783203125e-05, "model_forward_time": 0.0242002010345459, "step": 29301 }, { "epoch": 4.470977783203125e-05, "step": 29301, "training_step_time": 0.1381394863128662 }, { "epoch": 4.47113037109375e-05, "model_forward_time": 0.02422642707824707, "step": 29302 }, { "epoch": 4.47113037109375e-05, "step": 29302, "training_step_time": 0.2046375274658203 }, { "epoch": 4.471282958984375e-05, "model_forward_time": 0.024184703826904297, "step": 29303 }, { "epoch": 4.471282958984375e-05, "step": 29303, "training_step_time": 0.1348421573638916 }, { "epoch": 4.471435546875e-05, "model_forward_time": 0.024637937545776367, "step": 29304 }, { "epoch": 4.471435546875e-05, "step": 29304, "training_step_time": 0.1162862777709961 }, { "epoch": 4.471588134765625e-05, "model_forward_time": 0.02466559410095215, "step": 29305 }, { "epoch": 4.471588134765625e-05, "step": 29305, "training_step_time": 0.10253214836120605 }, { "epoch": 4.47174072265625e-05, "model_forward_time": 0.024791955947875977, "step": 29306 }, { "epoch": 4.47174072265625e-05, "step": 29306, "training_step_time": 0.10318541526794434 }, { "epoch": 4.471893310546875e-05, "model_forward_time": 0.024979829788208008, "step": 29307 }, { "epoch": 4.471893310546875e-05, "step": 29307, "training_step_time": 0.10462594032287598 }, { "epoch": 4.4720458984375e-05, "model_forward_time": 0.024505615234375, "step": 29308 }, { "epoch": 4.4720458984375e-05, "step": 29308, "training_step_time": 0.1066582202911377 }, { "epoch": 4.472198486328125e-05, "model_forward_time": 0.02563643455505371, "step": 29309 }, { "epoch": 4.472198486328125e-05, "step": 29309, "training_step_time": 0.11033797264099121 }, { "epoch": 4.47235107421875e-05, "grad_norm": 0.04503900930285454, "learning_rate": 1.4455690355525964e-07, "loss": 0.0096, "step": 29310 }, { "epoch": 4.47235107421875e-05, "model_forward_time": 0.024950265884399414, "step": 29310 }, { "epoch": 4.47235107421875e-05, "step": 29310, "training_step_time": 0.10849785804748535 }, { "epoch": 4.472503662109375e-05, "model_forward_time": 0.02501821517944336, "step": 29311 }, { "epoch": 4.472503662109375e-05, "step": 29311, "training_step_time": 0.12184810638427734 }, { "epoch": 4.47265625e-05, "model_forward_time": 0.025444746017456055, "step": 29312 }, { "epoch": 4.47265625e-05, "step": 29312, "training_step_time": 0.10687923431396484 }, { "epoch": 4.472808837890625e-05, "model_forward_time": 0.025232553482055664, "step": 29313 }, { "epoch": 4.472808837890625e-05, "step": 29313, "training_step_time": 0.10608410835266113 }, { "epoch": 4.47296142578125e-05, "model_forward_time": 0.025468111038208008, "step": 29314 }, { "epoch": 4.47296142578125e-05, "step": 29314, "training_step_time": 0.10686111450195312 }, { "epoch": 4.473114013671875e-05, "model_forward_time": 0.02501392364501953, "step": 29315 }, { "epoch": 4.473114013671875e-05, "step": 29315, "training_step_time": 0.10678863525390625 }, { "epoch": 4.4732666015625e-05, "model_forward_time": 0.02531909942626953, "step": 29316 }, { "epoch": 4.4732666015625e-05, "step": 29316, "training_step_time": 0.10640597343444824 }, { "epoch": 4.473419189453125e-05, "model_forward_time": 0.025478601455688477, "step": 29317 }, { "epoch": 4.473419189453125e-05, "step": 29317, "training_step_time": 0.10679912567138672 }, { "epoch": 4.47357177734375e-05, "model_forward_time": 0.0250089168548584, "step": 29318 }, { "epoch": 4.47357177734375e-05, "step": 29318, "training_step_time": 0.10621905326843262 }, { "epoch": 4.473724365234375e-05, "model_forward_time": 0.02537822723388672, "step": 29319 }, { "epoch": 4.473724365234375e-05, "step": 29319, "training_step_time": 0.10548281669616699 }, { "epoch": 4.473876953125e-05, "grad_norm": 0.056688860058784485, "learning_rate": 1.4039915893318544e-07, "loss": 0.0055, "step": 29320 }, { "epoch": 4.473876953125e-05, "model_forward_time": 0.025515317916870117, "step": 29320 }, { "epoch": 4.473876953125e-05, "step": 29320, "training_step_time": 0.11110639572143555 }, { "epoch": 4.474029541015625e-05, "model_forward_time": 0.025371313095092773, "step": 29321 }, { "epoch": 4.474029541015625e-05, "step": 29321, "training_step_time": 0.211134672164917 }, { "epoch": 4.47418212890625e-05, "model_forward_time": 0.02447986602783203, "step": 29322 }, { "epoch": 4.47418212890625e-05, "step": 29322, "training_step_time": 0.1211540699005127 }, { "epoch": 4.474334716796875e-05, "model_forward_time": 0.023842573165893555, "step": 29323 }, { "epoch": 4.474334716796875e-05, "step": 29323, "training_step_time": 0.1299142837524414 }, { "epoch": 4.4744873046875e-05, "model_forward_time": 0.024712800979614258, "step": 29324 }, { "epoch": 4.4744873046875e-05, "step": 29324, "training_step_time": 0.15225958824157715 }, { "epoch": 4.474639892578125e-05, "model_forward_time": 0.024759769439697266, "step": 29325 }, { "epoch": 4.474639892578125e-05, "step": 29325, "training_step_time": 0.21504831314086914 }, { "epoch": 4.47479248046875e-05, "model_forward_time": 0.024817943572998047, "step": 29326 }, { "epoch": 4.47479248046875e-05, "step": 29326, "training_step_time": 0.11866593360900879 }, { "epoch": 4.474945068359375e-05, "model_forward_time": 0.024143457412719727, "step": 29327 }, { "epoch": 4.474945068359375e-05, "step": 29327, "training_step_time": 0.10236763954162598 }, { "epoch": 4.47509765625e-05, "model_forward_time": 0.025563955307006836, "step": 29328 }, { "epoch": 4.47509765625e-05, "step": 29328, "training_step_time": 0.10315775871276855 }, { "epoch": 4.475250244140625e-05, "model_forward_time": 0.024791240692138672, "step": 29329 }, { "epoch": 4.475250244140625e-05, "step": 29329, "training_step_time": 0.1415700912475586 }, { "epoch": 4.47540283203125e-05, "grad_norm": 0.07716532051563263, "learning_rate": 1.3630199842758484e-07, "loss": 0.0044, "step": 29330 }, { "epoch": 4.47540283203125e-05, "model_forward_time": 0.024802207946777344, "step": 29330 }, { "epoch": 4.47540283203125e-05, "step": 29330, "training_step_time": 0.15965914726257324 }, { "epoch": 4.475555419921875e-05, "model_forward_time": 0.02447342872619629, "step": 29331 }, { "epoch": 4.475555419921875e-05, "step": 29331, "training_step_time": 0.12029671669006348 }, { "epoch": 4.4757080078125e-05, "model_forward_time": 0.024561166763305664, "step": 29332 }, { "epoch": 4.4757080078125e-05, "step": 29332, "training_step_time": 0.12207198143005371 }, { "epoch": 4.475860595703125e-05, "model_forward_time": 0.025473833084106445, "step": 29333 }, { "epoch": 4.475860595703125e-05, "step": 29333, "training_step_time": 0.10641264915466309 }, { "epoch": 4.47601318359375e-05, "model_forward_time": 0.025437355041503906, "step": 29334 }, { "epoch": 4.47601318359375e-05, "step": 29334, "training_step_time": 0.11220908164978027 }, { "epoch": 4.476165771484375e-05, "model_forward_time": 0.025599241256713867, "step": 29335 }, { "epoch": 4.476165771484375e-05, "step": 29335, "training_step_time": 0.11154818534851074 }, { "epoch": 4.476318359375e-05, "model_forward_time": 0.024888277053833008, "step": 29336 }, { "epoch": 4.476318359375e-05, "step": 29336, "training_step_time": 0.12497949600219727 }, { "epoch": 4.476470947265625e-05, "model_forward_time": 0.025096654891967773, "step": 29337 }, { "epoch": 4.476470947265625e-05, "step": 29337, "training_step_time": 0.12675237655639648 }, { "epoch": 4.47662353515625e-05, "model_forward_time": 0.0248262882232666, "step": 29338 }, { "epoch": 4.47662353515625e-05, "step": 29338, "training_step_time": 0.12268280982971191 }, { "epoch": 4.476776123046875e-05, "model_forward_time": 0.024593114852905273, "step": 29339 }, { "epoch": 4.476776123046875e-05, "step": 29339, "training_step_time": 0.12254953384399414 }, { "epoch": 4.4769287109375e-05, "grad_norm": 0.04653387889266014, "learning_rate": 1.3226542701689215e-07, "loss": 0.0027, "step": 29340 }, { "epoch": 4.4769287109375e-05, "model_forward_time": 0.02493762969970703, "step": 29340 }, { "epoch": 4.4769287109375e-05, "step": 29340, "training_step_time": 0.1229238510131836 }, { "epoch": 4.477081298828125e-05, "model_forward_time": 0.024954557418823242, "step": 29341 }, { "epoch": 4.477081298828125e-05, "step": 29341, "training_step_time": 0.12147760391235352 }, { "epoch": 4.47723388671875e-05, "model_forward_time": 0.025064945220947266, "step": 29342 }, { "epoch": 4.47723388671875e-05, "step": 29342, "training_step_time": 0.11735248565673828 }, { "epoch": 4.477386474609375e-05, "model_forward_time": 0.02505636215209961, "step": 29343 }, { "epoch": 4.477386474609375e-05, "step": 29343, "training_step_time": 0.11221003532409668 }, { "epoch": 4.4775390625e-05, "model_forward_time": 0.025596141815185547, "step": 29344 }, { "epoch": 4.4775390625e-05, "step": 29344, "training_step_time": 0.15523958206176758 }, { "epoch": 4.477691650390625e-05, "model_forward_time": 0.024835824966430664, "step": 29345 }, { "epoch": 4.477691650390625e-05, "step": 29345, "training_step_time": 0.19050073623657227 }, { "epoch": 4.47784423828125e-05, "model_forward_time": 0.02443552017211914, "step": 29346 }, { "epoch": 4.47784423828125e-05, "step": 29346, "training_step_time": 0.1771383285522461 }, { "epoch": 4.477996826171875e-05, "model_forward_time": 0.02448296546936035, "step": 29347 }, { "epoch": 4.477996826171875e-05, "step": 29347, "training_step_time": 0.16420888900756836 }, { "epoch": 4.4781494140625e-05, "model_forward_time": 0.02460312843322754, "step": 29348 }, { "epoch": 4.4781494140625e-05, "step": 29348, "training_step_time": 0.10727739334106445 }, { "epoch": 4.478302001953125e-05, "model_forward_time": 0.024475574493408203, "step": 29349 }, { "epoch": 4.478302001953125e-05, "step": 29349, "training_step_time": 0.19325852394104004 }, { "epoch": 4.47845458984375e-05, "grad_norm": 0.19449864327907562, "learning_rate": 1.2828944960592836e-07, "loss": 0.0076, "step": 29350 }, { "epoch": 4.47845458984375e-05, "model_forward_time": 0.02480149269104004, "step": 29350 }, { "epoch": 4.47845458984375e-05, "step": 29350, "training_step_time": 0.10398125648498535 }, { "epoch": 4.478607177734375e-05, "model_forward_time": 0.024389982223510742, "step": 29351 }, { "epoch": 4.478607177734375e-05, "step": 29351, "training_step_time": 0.10195040702819824 }, { "epoch": 4.478759765625e-05, "model_forward_time": 0.025147676467895508, "step": 29352 }, { "epoch": 4.478759765625e-05, "step": 29352, "training_step_time": 0.10646390914916992 }, { "epoch": 4.478912353515625e-05, "model_forward_time": 0.025377750396728516, "step": 29353 }, { "epoch": 4.478912353515625e-05, "step": 29353, "training_step_time": 0.10848593711853027 }, { "epoch": 4.47906494140625e-05, "model_forward_time": 0.025032997131347656, "step": 29354 }, { "epoch": 4.47906494140625e-05, "step": 29354, "training_step_time": 0.1073918342590332 }, { "epoch": 4.479217529296875e-05, "model_forward_time": 0.02521681785583496, "step": 29355 }, { "epoch": 4.479217529296875e-05, "step": 29355, "training_step_time": 0.1096649169921875 }, { "epoch": 4.4793701171875e-05, "model_forward_time": 0.025640249252319336, "step": 29356 }, { "epoch": 4.4793701171875e-05, "step": 29356, "training_step_time": 0.10517334938049316 }, { "epoch": 4.479522705078125e-05, "model_forward_time": 0.02477431297302246, "step": 29357 }, { "epoch": 4.479522705078125e-05, "step": 29357, "training_step_time": 0.10350608825683594 }, { "epoch": 4.47967529296875e-05, "model_forward_time": 0.025552749633789062, "step": 29358 }, { "epoch": 4.47967529296875e-05, "step": 29358, "training_step_time": 0.10907411575317383 }, { "epoch": 4.479827880859375e-05, "model_forward_time": 0.025232791900634766, "step": 29359 }, { "epoch": 4.479827880859375e-05, "step": 29359, "training_step_time": 0.105926513671875 }, { "epoch": 4.47998046875e-05, "grad_norm": 0.24660265445709229, "learning_rate": 1.243740710258734e-07, "loss": 0.004, "step": 29360 }, { "epoch": 4.47998046875e-05, "model_forward_time": 0.02512073516845703, "step": 29360 }, { "epoch": 4.47998046875e-05, "step": 29360, "training_step_time": 0.10642004013061523 }, { "epoch": 4.480133056640625e-05, "model_forward_time": 0.025548934936523438, "step": 29361 }, { "epoch": 4.480133056640625e-05, "step": 29361, "training_step_time": 0.10434293746948242 }, { "epoch": 4.48028564453125e-05, "model_forward_time": 0.025104999542236328, "step": 29362 }, { "epoch": 4.48028564453125e-05, "step": 29362, "training_step_time": 0.10578060150146484 }, { "epoch": 4.480438232421875e-05, "model_forward_time": 0.025576353073120117, "step": 29363 }, { "epoch": 4.480438232421875e-05, "step": 29363, "training_step_time": 0.10851120948791504 }, { "epoch": 4.4805908203125e-05, "model_forward_time": 0.025470256805419922, "step": 29364 }, { "epoch": 4.4805908203125e-05, "step": 29364, "training_step_time": 0.10692811012268066 }, { "epoch": 4.480743408203125e-05, "model_forward_time": 0.025290489196777344, "step": 29365 }, { "epoch": 4.480743408203125e-05, "step": 29365, "training_step_time": 0.11270308494567871 }, { "epoch": 4.48089599609375e-05, "model_forward_time": 0.02540731430053711, "step": 29366 }, { "epoch": 4.48089599609375e-05, "step": 29366, "training_step_time": 0.14193296432495117 }, { "epoch": 4.481048583984375e-05, "model_forward_time": 0.025343656539916992, "step": 29367 }, { "epoch": 4.481048583984375e-05, "step": 29367, "training_step_time": 0.1177818775177002 }, { "epoch": 4.481201171875e-05, "model_forward_time": 0.025026321411132812, "step": 29368 }, { "epoch": 4.481201171875e-05, "step": 29368, "training_step_time": 0.12896466255187988 }, { "epoch": 4.481353759765625e-05, "model_forward_time": 0.025910139083862305, "step": 29369 }, { "epoch": 4.481353759765625e-05, "step": 29369, "training_step_time": 0.15584993362426758 }, { "epoch": 4.48150634765625e-05, "grad_norm": 0.13673478364944458, "learning_rate": 1.2051929603428825e-07, "loss": 0.0062, "step": 29370 }, { "epoch": 4.48150634765625e-05, "model_forward_time": 0.024271249771118164, "step": 29370 }, { "epoch": 4.48150634765625e-05, "step": 29370, "training_step_time": 0.21133208274841309 }, { "epoch": 4.481658935546875e-05, "model_forward_time": 0.025204896926879883, "step": 29371 }, { "epoch": 4.481658935546875e-05, "step": 29371, "training_step_time": 0.10173773765563965 }, { "epoch": 4.4818115234375e-05, "model_forward_time": 0.024027585983276367, "step": 29372 }, { "epoch": 4.4818115234375e-05, "step": 29372, "training_step_time": 0.10189962387084961 }, { "epoch": 4.481964111328125e-05, "model_forward_time": 0.02635812759399414, "step": 29373 }, { "epoch": 4.481964111328125e-05, "step": 29373, "training_step_time": 0.10457754135131836 }, { "epoch": 4.48211669921875e-05, "model_forward_time": 0.02486419677734375, "step": 29374 }, { "epoch": 4.48211669921875e-05, "step": 29374, "training_step_time": 0.1481611728668213 }, { "epoch": 4.482269287109375e-05, "model_forward_time": 0.025624513626098633, "step": 29375 }, { "epoch": 4.482269287109375e-05, "step": 29375, "training_step_time": 0.16077733039855957 }, { "epoch": 4.482421875e-05, "model_forward_time": 0.02535104751586914, "step": 29376 }, { "epoch": 4.482421875e-05, "step": 29376, "training_step_time": 0.11684322357177734 }, { "epoch": 4.482574462890625e-05, "model_forward_time": 0.024559974670410156, "step": 29377 }, { "epoch": 4.482574462890625e-05, "step": 29377, "training_step_time": 0.13288187980651855 }, { "epoch": 4.48272705078125e-05, "model_forward_time": 0.026221513748168945, "step": 29378 }, { "epoch": 4.48272705078125e-05, "step": 29378, "training_step_time": 0.19694876670837402 }, { "epoch": 4.482879638671875e-05, "model_forward_time": 0.025516986846923828, "step": 29379 }, { "epoch": 4.482879638671875e-05, "step": 29379, "training_step_time": 0.10409426689147949 }, { "epoch": 4.4830322265625e-05, "grad_norm": 0.09571550041437149, "learning_rate": 1.1672512931509283e-07, "loss": 0.0033, "step": 29380 }, { "epoch": 4.4830322265625e-05, "model_forward_time": 0.025953054428100586, "step": 29380 }, { "epoch": 4.4830322265625e-05, "step": 29380, "training_step_time": 0.10621023178100586 }, { "epoch": 4.483184814453125e-05, "model_forward_time": 0.025391340255737305, "step": 29381 }, { "epoch": 4.483184814453125e-05, "step": 29381, "training_step_time": 0.10582971572875977 }, { "epoch": 4.48333740234375e-05, "model_forward_time": 0.02524399757385254, "step": 29382 }, { "epoch": 4.48333740234375e-05, "step": 29382, "training_step_time": 0.13315868377685547 }, { "epoch": 4.483489990234375e-05, "model_forward_time": 0.025664091110229492, "step": 29383 }, { "epoch": 4.483489990234375e-05, "step": 29383, "training_step_time": 0.16475868225097656 }, { "epoch": 4.483642578125e-05, "model_forward_time": 0.02571725845336914, "step": 29384 }, { "epoch": 4.483642578125e-05, "step": 29384, "training_step_time": 0.1621706485748291 }, { "epoch": 4.483795166015625e-05, "model_forward_time": 0.023891925811767578, "step": 29385 }, { "epoch": 4.483795166015625e-05, "step": 29385, "training_step_time": 0.16046619415283203 }, { "epoch": 4.48394775390625e-05, "model_forward_time": 0.02487492561340332, "step": 29386 }, { "epoch": 4.48394775390625e-05, "step": 29386, "training_step_time": 0.14921927452087402 }, { "epoch": 4.484100341796875e-05, "model_forward_time": 0.025042295455932617, "step": 29387 }, { "epoch": 4.484100341796875e-05, "step": 29387, "training_step_time": 0.13836145401000977 }, { "epoch": 4.4842529296875e-05, "model_forward_time": 0.024645566940307617, "step": 29388 }, { "epoch": 4.4842529296875e-05, "step": 29388, "training_step_time": 0.22414875030517578 }, { "epoch": 4.484405517578125e-05, "model_forward_time": 0.02482128143310547, "step": 29389 }, { "epoch": 4.484405517578125e-05, "step": 29389, "training_step_time": 0.12362360954284668 }, { "epoch": 4.48455810546875e-05, "grad_norm": 0.06256138533353806, "learning_rate": 1.1299157547854377e-07, "loss": 0.0019, "step": 29390 }, { "epoch": 4.48455810546875e-05, "model_forward_time": 0.023352622985839844, "step": 29390 }, { "epoch": 4.48455810546875e-05, "step": 29390, "training_step_time": 0.2050638198852539 }, { "epoch": 4.484710693359375e-05, "model_forward_time": 0.024926424026489258, "step": 29391 }, { "epoch": 4.484710693359375e-05, "step": 29391, "training_step_time": 0.1689913272857666 }, { "epoch": 4.48486328125e-05, "model_forward_time": 0.024693727493286133, "step": 29392 }, { "epoch": 4.48486328125e-05, "step": 29392, "training_step_time": 0.14019131660461426 }, { "epoch": 4.485015869140625e-05, "model_forward_time": 0.024831295013427734, "step": 29393 }, { "epoch": 4.485015869140625e-05, "step": 29393, "training_step_time": 0.10866999626159668 }, { "epoch": 4.48516845703125e-05, "model_forward_time": 0.02533411979675293, "step": 29394 }, { "epoch": 4.48516845703125e-05, "step": 29394, "training_step_time": 0.10558748245239258 }, { "epoch": 4.485321044921875e-05, "model_forward_time": 0.02636408805847168, "step": 29395 }, { "epoch": 4.485321044921875e-05, "step": 29395, "training_step_time": 0.1079709529876709 }, { "epoch": 4.4854736328125e-05, "model_forward_time": 0.025482177734375, "step": 29396 }, { "epoch": 4.4854736328125e-05, "step": 29396, "training_step_time": 0.10696840286254883 }, { "epoch": 4.485626220703125e-05, "model_forward_time": 0.02669525146484375, "step": 29397 }, { "epoch": 4.485626220703125e-05, "step": 29397, "training_step_time": 0.11041784286499023 }, { "epoch": 4.48577880859375e-05, "model_forward_time": 0.02595353126525879, "step": 29398 }, { "epoch": 4.48577880859375e-05, "step": 29398, "training_step_time": 0.10701441764831543 }, { "epoch": 4.485931396484375e-05, "model_forward_time": 0.025667905807495117, "step": 29399 }, { "epoch": 4.485931396484375e-05, "step": 29399, "training_step_time": 0.10563087463378906 }, { "epoch": 4.486083984375e-05, "grad_norm": 0.03418092057108879, "learning_rate": 1.0931863906127327e-07, "loss": 0.0051, "step": 29400 }, { "epoch": 4.486083984375e-05, "model_forward_time": 0.02523040771484375, "step": 29400 }, { "epoch": 4.486083984375e-05, "step": 29400, "training_step_time": 0.1109151840209961 }, { "epoch": 4.486236572265625e-05, "model_forward_time": 0.025670528411865234, "step": 29401 }, { "epoch": 4.486236572265625e-05, "step": 29401, "training_step_time": 0.10586166381835938 }, { "epoch": 4.48638916015625e-05, "model_forward_time": 0.025612592697143555, "step": 29402 }, { "epoch": 4.48638916015625e-05, "step": 29402, "training_step_time": 0.10492444038391113 }, { "epoch": 4.486541748046875e-05, "model_forward_time": 0.026044368743896484, "step": 29403 }, { "epoch": 4.486541748046875e-05, "step": 29403, "training_step_time": 0.10693025588989258 }, { "epoch": 4.4866943359375e-05, "model_forward_time": 0.02565455436706543, "step": 29404 }, { "epoch": 4.4866943359375e-05, "step": 29404, "training_step_time": 0.10594749450683594 }, { "epoch": 4.486846923828125e-05, "model_forward_time": 0.026036977767944336, "step": 29405 }, { "epoch": 4.486846923828125e-05, "step": 29405, "training_step_time": 0.10925412178039551 }, { "epoch": 4.48699951171875e-05, "model_forward_time": 0.029282569885253906, "step": 29406 }, { "epoch": 4.48699951171875e-05, "step": 29406, "training_step_time": 0.11032700538635254 }, { "epoch": 4.487152099609375e-05, "model_forward_time": 0.025209665298461914, "step": 29407 }, { "epoch": 4.487152099609375e-05, "step": 29407, "training_step_time": 0.10767579078674316 }, { "epoch": 4.4873046875e-05, "model_forward_time": 0.02625274658203125, "step": 29408 }, { "epoch": 4.4873046875e-05, "step": 29408, "training_step_time": 0.10532498359680176 }, { "epoch": 4.487457275390625e-05, "model_forward_time": 0.02665114402770996, "step": 29409 }, { "epoch": 4.487457275390625e-05, "step": 29409, "training_step_time": 0.16915082931518555 }, { "epoch": 4.48760986328125e-05, "grad_norm": 0.17175669968128204, "learning_rate": 1.0570632452623353e-07, "loss": 0.0045, "step": 29410 }, { "epoch": 4.48760986328125e-05, "model_forward_time": 0.025166749954223633, "step": 29410 }, { "epoch": 4.48760986328125e-05, "step": 29410, "training_step_time": 0.12027359008789062 }, { "epoch": 4.487762451171875e-05, "model_forward_time": 0.025444984436035156, "step": 29411 }, { "epoch": 4.487762451171875e-05, "step": 29411, "training_step_time": 0.11290574073791504 }, { "epoch": 4.4879150390625e-05, "model_forward_time": 0.027086496353149414, "step": 29412 }, { "epoch": 4.4879150390625e-05, "step": 29412, "training_step_time": 0.14971017837524414 }, { "epoch": 4.488067626953125e-05, "model_forward_time": 0.025290727615356445, "step": 29413 }, { "epoch": 4.488067626953125e-05, "step": 29413, "training_step_time": 0.21808147430419922 }, { "epoch": 4.48822021484375e-05, "model_forward_time": 0.024555206298828125, "step": 29414 }, { "epoch": 4.48822021484375e-05, "step": 29414, "training_step_time": 0.11126422882080078 }, { "epoch": 4.488372802734375e-05, "model_forward_time": 0.02469348907470703, "step": 29415 }, { "epoch": 4.488372802734375e-05, "step": 29415, "training_step_time": 0.10843229293823242 }, { "epoch": 4.488525390625e-05, "model_forward_time": 0.025799036026000977, "step": 29416 }, { "epoch": 4.488525390625e-05, "step": 29416, "training_step_time": 0.11892032623291016 }, { "epoch": 4.488677978515625e-05, "model_forward_time": 0.02448415756225586, "step": 29417 }, { "epoch": 4.488677978515625e-05, "step": 29417, "training_step_time": 0.14757204055786133 }, { "epoch": 4.48883056640625e-05, "model_forward_time": 0.025730371475219727, "step": 29418 }, { "epoch": 4.48883056640625e-05, "step": 29418, "training_step_time": 0.1570439338684082 }, { "epoch": 4.488983154296875e-05, "model_forward_time": 0.024774551391601562, "step": 29419 }, { "epoch": 4.488983154296875e-05, "step": 29419, "training_step_time": 0.12609076499938965 }, { "epoch": 4.4891357421875e-05, "grad_norm": 0.0336330346763134, "learning_rate": 1.0215463626274125e-07, "loss": 0.0022, "step": 29420 }, { "epoch": 4.4891357421875e-05, "model_forward_time": 0.025694847106933594, "step": 29420 }, { "epoch": 4.4891357421875e-05, "step": 29420, "training_step_time": 0.11999082565307617 }, { "epoch": 4.489288330078125e-05, "model_forward_time": 0.02543473243713379, "step": 29421 }, { "epoch": 4.489288330078125e-05, "step": 29421, "training_step_time": 0.18630027770996094 }, { "epoch": 4.48944091796875e-05, "model_forward_time": 0.024593353271484375, "step": 29422 }, { "epoch": 4.48944091796875e-05, "step": 29422, "training_step_time": 0.10984396934509277 }, { "epoch": 4.489593505859375e-05, "model_forward_time": 0.025817155838012695, "step": 29423 }, { "epoch": 4.489593505859375e-05, "step": 29423, "training_step_time": 0.10954904556274414 }, { "epoch": 4.48974609375e-05, "model_forward_time": 0.026928424835205078, "step": 29424 }, { "epoch": 4.48974609375e-05, "step": 29424, "training_step_time": 0.11081457138061523 }, { "epoch": 4.489898681640625e-05, "model_forward_time": 0.02454543113708496, "step": 29425 }, { "epoch": 4.489898681640625e-05, "step": 29425, "training_step_time": 0.11007428169250488 }, { "epoch": 4.49005126953125e-05, "model_forward_time": 0.024797916412353516, "step": 29426 }, { "epoch": 4.49005126953125e-05, "step": 29426, "training_step_time": 0.10775136947631836 }, { "epoch": 4.490203857421875e-05, "model_forward_time": 0.02487802505493164, "step": 29427 }, { "epoch": 4.490203857421875e-05, "step": 29427, "training_step_time": 0.10765814781188965 }, { "epoch": 4.4903564453125e-05, "model_forward_time": 0.025678634643554688, "step": 29428 }, { "epoch": 4.4903564453125e-05, "step": 29428, "training_step_time": 0.10901188850402832 }, { "epoch": 4.490509033203125e-05, "model_forward_time": 0.02556896209716797, "step": 29429 }, { "epoch": 4.490509033203125e-05, "step": 29429, "training_step_time": 0.10471272468566895 }, { "epoch": 4.49066162109375e-05, "grad_norm": 0.05490518733859062, "learning_rate": 9.866357858642205e-08, "loss": 0.005, "step": 29430 }, { "epoch": 4.49066162109375e-05, "model_forward_time": 0.02416539192199707, "step": 29430 }, { "epoch": 4.49066162109375e-05, "step": 29430, "training_step_time": 0.10748982429504395 }, { "epoch": 4.490814208984375e-05, "model_forward_time": 0.02450251579284668, "step": 29431 }, { "epoch": 4.490814208984375e-05, "step": 29431, "training_step_time": 0.10341930389404297 }, { "epoch": 4.490966796875e-05, "model_forward_time": 0.025054931640625, "step": 29432 }, { "epoch": 4.490966796875e-05, "step": 29432, "training_step_time": 0.2066783905029297 }, { "epoch": 4.491119384765625e-05, "model_forward_time": 0.024718523025512695, "step": 29433 }, { "epoch": 4.491119384765625e-05, "step": 29433, "training_step_time": 0.11582636833190918 }, { "epoch": 4.49127197265625e-05, "model_forward_time": 0.024502277374267578, "step": 29434 }, { "epoch": 4.49127197265625e-05, "step": 29434, "training_step_time": 0.22933101654052734 }, { "epoch": 4.491424560546875e-05, "model_forward_time": 0.025376319885253906, "step": 29435 }, { "epoch": 4.491424560546875e-05, "step": 29435, "training_step_time": 0.11725831031799316 }, { "epoch": 4.4915771484375e-05, "model_forward_time": 0.024591445922851562, "step": 29436 }, { "epoch": 4.4915771484375e-05, "step": 29436, "training_step_time": 0.11391377449035645 }, { "epoch": 4.491729736328125e-05, "model_forward_time": 0.025354862213134766, "step": 29437 }, { "epoch": 4.491729736328125e-05, "step": 29437, "training_step_time": 0.1900019645690918 }, { "epoch": 4.49188232421875e-05, "model_forward_time": 0.024646520614624023, "step": 29438 }, { "epoch": 4.49188232421875e-05, "step": 29438, "training_step_time": 0.20105743408203125 }, { "epoch": 4.492034912109375e-05, "model_forward_time": 0.0247344970703125, "step": 29439 }, { "epoch": 4.492034912109375e-05, "step": 29439, "training_step_time": 0.10267257690429688 }, { "epoch": 4.4921875e-05, "grad_norm": 0.036482006311416626, "learning_rate": 9.523315573924385e-08, "loss": 0.0064, "step": 29440 }, { "epoch": 4.4921875e-05, "model_forward_time": 0.024335145950317383, "step": 29440 }, { "epoch": 4.4921875e-05, "step": 29440, "training_step_time": 0.10899829864501953 }, { "epoch": 4.492340087890625e-05, "model_forward_time": 0.026276588439941406, "step": 29441 }, { "epoch": 4.492340087890625e-05, "step": 29441, "training_step_time": 0.10861968994140625 }, { "epoch": 4.49249267578125e-05, "model_forward_time": 0.02548670768737793, "step": 29442 }, { "epoch": 4.49249267578125e-05, "step": 29442, "training_step_time": 0.10769820213317871 }, { "epoch": 4.492645263671875e-05, "model_forward_time": 0.025188922882080078, "step": 29443 }, { "epoch": 4.492645263671875e-05, "step": 29443, "training_step_time": 0.10910940170288086 }, { "epoch": 4.4927978515625e-05, "model_forward_time": 0.02538776397705078, "step": 29444 }, { "epoch": 4.4927978515625e-05, "step": 29444, "training_step_time": 0.10652279853820801 }, { "epoch": 4.492950439453125e-05, "model_forward_time": 0.025638103485107422, "step": 29445 }, { "epoch": 4.492950439453125e-05, "step": 29445, "training_step_time": 0.11357355117797852 }, { "epoch": 4.49310302734375e-05, "model_forward_time": 0.02532029151916504, "step": 29446 }, { "epoch": 4.49310302734375e-05, "step": 29446, "training_step_time": 0.10700798034667969 }, { "epoch": 4.493255615234375e-05, "model_forward_time": 0.025609254837036133, "step": 29447 }, { "epoch": 4.493255615234375e-05, "step": 29447, "training_step_time": 0.10924243927001953 }, { "epoch": 4.493408203125e-05, "model_forward_time": 0.026072263717651367, "step": 29448 }, { "epoch": 4.493408203125e-05, "step": 29448, "training_step_time": 0.10736083984375 }, { "epoch": 4.493560791015625e-05, "model_forward_time": 0.025032997131347656, "step": 29449 }, { "epoch": 4.493560791015625e-05, "step": 29449, "training_step_time": 0.1064748764038086 }, { "epoch": 4.49371337890625e-05, "grad_norm": 0.032262369990348816, "learning_rate": 9.186337188949457e-08, "loss": 0.0054, "step": 29450 }, { "epoch": 4.49371337890625e-05, "model_forward_time": 0.02523970603942871, "step": 29450 }, { "epoch": 4.49371337890625e-05, "step": 29450, "training_step_time": 0.10555768013000488 }, { "epoch": 4.493865966796875e-05, "model_forward_time": 0.025784730911254883, "step": 29451 }, { "epoch": 4.493865966796875e-05, "step": 29451, "training_step_time": 0.10557794570922852 }, { "epoch": 4.4940185546875e-05, "model_forward_time": 0.025780439376831055, "step": 29452 }, { "epoch": 4.4940185546875e-05, "step": 29452, "training_step_time": 0.1052999496459961 }, { "epoch": 4.494171142578125e-05, "model_forward_time": 0.025603055953979492, "step": 29453 }, { "epoch": 4.494171142578125e-05, "step": 29453, "training_step_time": 0.1891028881072998 }, { "epoch": 4.49432373046875e-05, "model_forward_time": 0.024623394012451172, "step": 29454 }, { "epoch": 4.49432373046875e-05, "step": 29454, "training_step_time": 0.12005233764648438 }, { "epoch": 4.494476318359375e-05, "model_forward_time": 0.024707555770874023, "step": 29455 }, { "epoch": 4.494476318359375e-05, "step": 29455, "training_step_time": 0.1356675624847412 }, { "epoch": 4.49462890625e-05, "model_forward_time": 0.02519369125366211, "step": 29456 }, { "epoch": 4.49462890625e-05, "step": 29456, "training_step_time": 0.1589033603668213 }, { "epoch": 4.494781494140625e-05, "model_forward_time": 0.02423095703125, "step": 29457 }, { "epoch": 4.494781494140625e-05, "step": 29457, "training_step_time": 0.21025347709655762 }, { "epoch": 4.49493408203125e-05, "model_forward_time": 0.024885892868041992, "step": 29458 }, { "epoch": 4.49493408203125e-05, "step": 29458, "training_step_time": 0.11473727226257324 }, { "epoch": 4.495086669921875e-05, "model_forward_time": 0.02506732940673828, "step": 29459 }, { "epoch": 4.495086669921875e-05, "step": 29459, "training_step_time": 0.10505080223083496 }, { "epoch": 4.4952392578125e-05, "grad_norm": 0.03905640169978142, "learning_rate": 8.855423113177664e-08, "loss": 0.002, "step": 29460 }, { "epoch": 4.4952392578125e-05, "model_forward_time": 0.025784730911254883, "step": 29460 }, { "epoch": 4.4952392578125e-05, "step": 29460, "training_step_time": 0.10622811317443848 }, { "epoch": 4.495391845703125e-05, "model_forward_time": 0.024966001510620117, "step": 29461 }, { "epoch": 4.495391845703125e-05, "step": 29461, "training_step_time": 0.15274357795715332 }, { "epoch": 4.49554443359375e-05, "model_forward_time": 0.024703502655029297, "step": 29462 }, { "epoch": 4.49554443359375e-05, "step": 29462, "training_step_time": 0.1620626449584961 }, { "epoch": 4.495697021484375e-05, "model_forward_time": 0.02467203140258789, "step": 29463 }, { "epoch": 4.495697021484375e-05, "step": 29463, "training_step_time": 0.10690450668334961 }, { "epoch": 4.495849609375e-05, "model_forward_time": 0.024972915649414062, "step": 29464 }, { "epoch": 4.495849609375e-05, "step": 29464, "training_step_time": 0.12940526008605957 }, { "epoch": 4.496002197265625e-05, "model_forward_time": 0.0255889892578125, "step": 29465 }, { "epoch": 4.496002197265625e-05, "step": 29465, "training_step_time": 0.16599678993225098 }, { "epoch": 4.49615478515625e-05, "model_forward_time": 0.024616479873657227, "step": 29466 }, { "epoch": 4.49615478515625e-05, "step": 29466, "training_step_time": 0.10270428657531738 }, { "epoch": 4.496307373046875e-05, "model_forward_time": 0.024338722229003906, "step": 29467 }, { "epoch": 4.496307373046875e-05, "step": 29467, "training_step_time": 0.10360097885131836 }, { "epoch": 4.4964599609375e-05, "model_forward_time": 0.025195598602294922, "step": 29468 }, { "epoch": 4.4964599609375e-05, "step": 29468, "training_step_time": 0.10978460311889648 }, { "epoch": 4.496612548828125e-05, "model_forward_time": 0.025525569915771484, "step": 29469 }, { "epoch": 4.496612548828125e-05, "step": 29469, "training_step_time": 0.18714070320129395 }, { "epoch": 4.49676513671875e-05, "grad_norm": 0.1374875009059906, "learning_rate": 8.530573748701253e-08, "loss": 0.0035, "step": 29470 }, { "epoch": 4.49676513671875e-05, "model_forward_time": 0.02646160125732422, "step": 29470 }, { "epoch": 4.49676513671875e-05, "step": 29470, "training_step_time": 0.20954585075378418 }, { "epoch": 4.496917724609375e-05, "model_forward_time": 0.023836612701416016, "step": 29471 }, { "epoch": 4.496917724609375e-05, "step": 29471, "training_step_time": 0.21194839477539062 }, { "epoch": 4.4970703125e-05, "model_forward_time": 0.024641752243041992, "step": 29472 }, { "epoch": 4.4970703125e-05, "step": 29472, "training_step_time": 0.21080231666564941 }, { "epoch": 4.497222900390625e-05, "model_forward_time": 0.024664878845214844, "step": 29473 }, { "epoch": 4.497222900390625e-05, "step": 29473, "training_step_time": 0.22679853439331055 }, { "epoch": 4.49737548828125e-05, "model_forward_time": 0.024332046508789062, "step": 29474 }, { "epoch": 4.49737548828125e-05, "step": 29474, "training_step_time": 0.2137765884399414 }, { "epoch": 4.497528076171875e-05, "model_forward_time": 0.024348735809326172, "step": 29475 }, { "epoch": 4.497528076171875e-05, "step": 29475, "training_step_time": 0.22655749320983887 }, { "epoch": 4.4976806640625e-05, "model_forward_time": 0.02452254295349121, "step": 29476 }, { "epoch": 4.4976806640625e-05, "step": 29476, "training_step_time": 0.17042064666748047 }, { "epoch": 4.497833251953125e-05, "model_forward_time": 0.024749040603637695, "step": 29477 }, { "epoch": 4.497833251953125e-05, "step": 29477, "training_step_time": 0.15212535858154297 }, { "epoch": 4.49798583984375e-05, "model_forward_time": 0.024416208267211914, "step": 29478 }, { "epoch": 4.49798583984375e-05, "step": 29478, "training_step_time": 0.13606572151184082 }, { "epoch": 4.498138427734375e-05, "model_forward_time": 0.024572372436523438, "step": 29479 }, { "epoch": 4.498138427734375e-05, "step": 29479, "training_step_time": 0.10658812522888184 }, { "epoch": 4.498291015625e-05, "grad_norm": 0.03711557388305664, "learning_rate": 8.211789490242261e-08, "loss": 0.0077, "step": 29480 }, { "epoch": 4.498291015625e-05, "model_forward_time": 0.025015592575073242, "step": 29480 }, { "epoch": 4.498291015625e-05, "step": 29480, "training_step_time": 0.10750842094421387 }, { "epoch": 4.498443603515625e-05, "model_forward_time": 0.02528238296508789, "step": 29481 }, { "epoch": 4.498443603515625e-05, "step": 29481, "training_step_time": 0.10809707641601562 }, { "epoch": 4.49859619140625e-05, "model_forward_time": 0.02579665184020996, "step": 29482 }, { "epoch": 4.49859619140625e-05, "step": 29482, "training_step_time": 0.10844707489013672 }, { "epoch": 4.498748779296875e-05, "model_forward_time": 0.025348424911499023, "step": 29483 }, { "epoch": 4.498748779296875e-05, "step": 29483, "training_step_time": 0.10738515853881836 }, { "epoch": 4.4989013671875e-05, "model_forward_time": 0.025777101516723633, "step": 29484 }, { "epoch": 4.4989013671875e-05, "step": 29484, "training_step_time": 0.10957574844360352 }, { "epoch": 4.499053955078125e-05, "model_forward_time": 0.02539372444152832, "step": 29485 }, { "epoch": 4.499053955078125e-05, "step": 29485, "training_step_time": 0.10946273803710938 }, { "epoch": 4.49920654296875e-05, "model_forward_time": 0.02535557746887207, "step": 29486 }, { "epoch": 4.49920654296875e-05, "step": 29486, "training_step_time": 0.10946774482727051 }, { "epoch": 4.499359130859375e-05, "model_forward_time": 0.025066137313842773, "step": 29487 }, { "epoch": 4.499359130859375e-05, "step": 29487, "training_step_time": 0.10408854484558105 }, { "epoch": 4.49951171875e-05, "model_forward_time": 0.025450468063354492, "step": 29488 }, { "epoch": 4.49951171875e-05, "step": 29488, "training_step_time": 0.10880017280578613 }, { "epoch": 4.499664306640625e-05, "model_forward_time": 0.027222633361816406, "step": 29489 }, { "epoch": 4.499664306640625e-05, "step": 29489, "training_step_time": 0.10672712326049805 }, { "epoch": 4.49981689453125e-05, "grad_norm": 0.07051877677440643, "learning_rate": 7.899070725153613e-08, "loss": 0.0041, "step": 29490 }, { "epoch": 4.49981689453125e-05, "model_forward_time": 0.025384902954101562, "step": 29490 }, { "epoch": 4.49981689453125e-05, "step": 29490, "training_step_time": 0.10701346397399902 }, { "epoch": 4.499969482421875e-05, "model_forward_time": 0.028430700302124023, "step": 29491 }, { "epoch": 4.499969482421875e-05, "step": 29491, "training_step_time": 0.10626935958862305 }, { "epoch": 4.5001220703125e-05, "model_forward_time": 0.025125503540039062, "step": 29492 }, { "epoch": 4.5001220703125e-05, "step": 29492, "training_step_time": 0.1042487621307373 }, { "epoch": 4.500274658203125e-05, "model_forward_time": 0.02514815330505371, "step": 29493 }, { "epoch": 4.500274658203125e-05, "step": 29493, "training_step_time": 0.10399985313415527 }, { "epoch": 4.50042724609375e-05, "model_forward_time": 0.02517414093017578, "step": 29494 }, { "epoch": 4.50042724609375e-05, "step": 29494, "training_step_time": 0.1473388671875 }, { "epoch": 4.500579833984375e-05, "model_forward_time": 0.025235891342163086, "step": 29495 }, { "epoch": 4.500579833984375e-05, "step": 29495, "training_step_time": 0.1195220947265625 }, { "epoch": 4.500732421875e-05, "model_forward_time": 0.024762392044067383, "step": 29496 }, { "epoch": 4.500732421875e-05, "step": 29496, "training_step_time": 0.13084673881530762 }, { "epoch": 4.500885009765625e-05, "model_forward_time": 0.02756810188293457, "step": 29497 }, { "epoch": 4.500885009765625e-05, "step": 29497, "training_step_time": 0.21385765075683594 }, { "epoch": 4.50103759765625e-05, "model_forward_time": 0.0243072509765625, "step": 29498 }, { "epoch": 4.50103759765625e-05, "step": 29498, "training_step_time": 0.24418091773986816 }, { "epoch": 4.501190185546875e-05, "model_forward_time": 0.024507999420166016, "step": 29499 }, { "epoch": 4.501190185546875e-05, "step": 29499, "training_step_time": 0.20441913604736328 }, { "epoch": 4.5013427734375e-05, "grad_norm": 0.03889711946249008, "learning_rate": 7.59241783341913e-08, "loss": 0.0071, "step": 29500 }, { "epoch": 4.5013427734375e-05, "model_forward_time": 0.024188995361328125, "step": 29500 }, { "epoch": 4.5013427734375e-05, "step": 29500, "training_step_time": 0.1918184757232666 }, { "epoch": 4.501495361328125e-05, "model_forward_time": 0.024157285690307617, "step": 29501 }, { "epoch": 4.501495361328125e-05, "step": 29501, "training_step_time": 0.20156264305114746 }, { "epoch": 4.50164794921875e-05, "model_forward_time": 0.024899959564208984, "step": 29502 }, { "epoch": 4.50164794921875e-05, "step": 29502, "training_step_time": 0.1621389389038086 }, { "epoch": 4.501800537109375e-05, "model_forward_time": 0.0247647762298584, "step": 29503 }, { "epoch": 4.501800537109375e-05, "step": 29503, "training_step_time": 0.11053681373596191 }, { "epoch": 4.501953125e-05, "model_forward_time": 0.024320602416992188, "step": 29504 }, { "epoch": 4.501953125e-05, "step": 29504, "training_step_time": 0.1356067657470703 }, { "epoch": 4.502105712890625e-05, "model_forward_time": 0.025597572326660156, "step": 29505 }, { "epoch": 4.502105712890625e-05, "step": 29505, "training_step_time": 0.19828391075134277 }, { "epoch": 4.50225830078125e-05, "model_forward_time": 0.024166584014892578, "step": 29506 }, { "epoch": 4.50225830078125e-05, "step": 29506, "training_step_time": 0.12172150611877441 }, { "epoch": 4.502410888671875e-05, "model_forward_time": 0.024943828582763672, "step": 29507 }, { "epoch": 4.502410888671875e-05, "step": 29507, "training_step_time": 0.14272189140319824 }, { "epoch": 4.5025634765625e-05, "model_forward_time": 0.02544379234313965, "step": 29508 }, { "epoch": 4.5025634765625e-05, "step": 29508, "training_step_time": 0.1332087516784668 }, { "epoch": 4.502716064453125e-05, "model_forward_time": 0.024133682250976562, "step": 29509 }, { "epoch": 4.502716064453125e-05, "step": 29509, "training_step_time": 0.12697267532348633 }, { "epoch": 4.50286865234375e-05, "grad_norm": 0.07993664592504501, "learning_rate": 7.291831187649645e-08, "loss": 0.0036, "step": 29510 }, { "epoch": 4.50286865234375e-05, "model_forward_time": 0.024898529052734375, "step": 29510 }, { "epoch": 4.50286865234375e-05, "step": 29510, "training_step_time": 0.12034821510314941 }, { "epoch": 4.503021240234375e-05, "model_forward_time": 0.024936437606811523, "step": 29511 }, { "epoch": 4.503021240234375e-05, "step": 29511, "training_step_time": 0.11817789077758789 }, { "epoch": 4.503173828125e-05, "model_forward_time": 0.025554656982421875, "step": 29512 }, { "epoch": 4.503173828125e-05, "step": 29512, "training_step_time": 0.11660552024841309 }, { "epoch": 4.503326416015625e-05, "model_forward_time": 0.02495741844177246, "step": 29513 }, { "epoch": 4.503326416015625e-05, "step": 29513, "training_step_time": 0.17430377006530762 }, { "epoch": 4.50347900390625e-05, "model_forward_time": 0.024595022201538086, "step": 29514 }, { "epoch": 4.50347900390625e-05, "step": 29514, "training_step_time": 0.1102595329284668 }, { "epoch": 4.503631591796875e-05, "model_forward_time": 0.025981426239013672, "step": 29515 }, { "epoch": 4.503631591796875e-05, "step": 29515, "training_step_time": 0.1913909912109375 }, { "epoch": 4.5037841796875e-05, "model_forward_time": 0.025408506393432617, "step": 29516 }, { "epoch": 4.5037841796875e-05, "step": 29516, "training_step_time": 0.14170575141906738 }, { "epoch": 4.503936767578125e-05, "model_forward_time": 0.02487635612487793, "step": 29517 }, { "epoch": 4.503936767578125e-05, "step": 29517, "training_step_time": 0.20699381828308105 }, { "epoch": 4.50408935546875e-05, "model_forward_time": 0.024484872817993164, "step": 29518 }, { "epoch": 4.50408935546875e-05, "step": 29518, "training_step_time": 0.20782995223999023 }, { "epoch": 4.504241943359375e-05, "model_forward_time": 0.024587154388427734, "step": 29519 }, { "epoch": 4.504241943359375e-05, "step": 29519, "training_step_time": 0.1246800422668457 }, { "epoch": 4.50439453125e-05, "grad_norm": 0.05316556245088577, "learning_rate": 6.997311153086883e-08, "loss": 0.0031, "step": 29520 }, { "epoch": 4.50439453125e-05, "model_forward_time": 0.02485966682434082, "step": 29520 }, { "epoch": 4.50439453125e-05, "step": 29520, "training_step_time": 0.12635016441345215 }, { "epoch": 4.504547119140625e-05, "model_forward_time": 0.025443553924560547, "step": 29521 }, { "epoch": 4.504547119140625e-05, "step": 29521, "training_step_time": 0.10416936874389648 }, { "epoch": 4.50469970703125e-05, "model_forward_time": 0.025396347045898438, "step": 29522 }, { "epoch": 4.50469970703125e-05, "step": 29522, "training_step_time": 0.10674047470092773 }, { "epoch": 4.504852294921875e-05, "model_forward_time": 0.025795936584472656, "step": 29523 }, { "epoch": 4.504852294921875e-05, "step": 29523, "training_step_time": 0.10595059394836426 }, { "epoch": 4.5050048828125e-05, "model_forward_time": 0.02550196647644043, "step": 29524 }, { "epoch": 4.5050048828125e-05, "step": 29524, "training_step_time": 0.10480141639709473 }, { "epoch": 4.505157470703125e-05, "model_forward_time": 0.02515101432800293, "step": 29525 }, { "epoch": 4.505157470703125e-05, "step": 29525, "training_step_time": 0.10499286651611328 }, { "epoch": 4.50531005859375e-05, "model_forward_time": 0.024863243103027344, "step": 29526 }, { "epoch": 4.50531005859375e-05, "step": 29526, "training_step_time": 0.10365819931030273 }, { "epoch": 4.505462646484375e-05, "model_forward_time": 0.0252988338470459, "step": 29527 }, { "epoch": 4.505462646484375e-05, "step": 29527, "training_step_time": 0.1051180362701416 }, { "epoch": 4.505615234375e-05, "model_forward_time": 0.025111675262451172, "step": 29528 }, { "epoch": 4.505615234375e-05, "step": 29528, "training_step_time": 0.11017584800720215 }, { "epoch": 4.505767822265625e-05, "model_forward_time": 0.025771379470825195, "step": 29529 }, { "epoch": 4.505767822265625e-05, "step": 29529, "training_step_time": 0.11001253128051758 }, { "epoch": 4.50592041015625e-05, "grad_norm": 0.03150056302547455, "learning_rate": 6.708858087601244e-08, "loss": 0.0058, "step": 29530 }, { "epoch": 4.50592041015625e-05, "model_forward_time": 0.025542497634887695, "step": 29530 }, { "epoch": 4.50592041015625e-05, "step": 29530, "training_step_time": 0.10853767395019531 }, { "epoch": 4.506072998046875e-05, "model_forward_time": 0.025412797927856445, "step": 29531 }, { "epoch": 4.506072998046875e-05, "step": 29531, "training_step_time": 0.10640072822570801 }, { "epoch": 4.5062255859375e-05, "model_forward_time": 0.024363994598388672, "step": 29532 }, { "epoch": 4.5062255859375e-05, "step": 29532, "training_step_time": 0.10201001167297363 }, { "epoch": 4.506378173828125e-05, "model_forward_time": 0.02498936653137207, "step": 29533 }, { "epoch": 4.506378173828125e-05, "step": 29533, "training_step_time": 0.10501909255981445 }, { "epoch": 4.50653076171875e-05, "model_forward_time": 0.025330781936645508, "step": 29534 }, { "epoch": 4.50653076171875e-05, "step": 29534, "training_step_time": 0.10584712028503418 }, { "epoch": 4.506683349609375e-05, "model_forward_time": 0.025069475173950195, "step": 29535 }, { "epoch": 4.506683349609375e-05, "step": 29535, "training_step_time": 0.13206076622009277 }, { "epoch": 4.5068359375e-05, "model_forward_time": 0.02539682388305664, "step": 29536 }, { "epoch": 4.5068359375e-05, "step": 29536, "training_step_time": 0.11951661109924316 }, { "epoch": 4.506988525390625e-05, "model_forward_time": 0.024988651275634766, "step": 29537 }, { "epoch": 4.506988525390625e-05, "step": 29537, "training_step_time": 0.12917017936706543 }, { "epoch": 4.50714111328125e-05, "model_forward_time": 0.02502751350402832, "step": 29538 }, { "epoch": 4.50714111328125e-05, "step": 29538, "training_step_time": 0.15400075912475586 }, { "epoch": 4.507293701171875e-05, "model_forward_time": 0.02503514289855957, "step": 29539 }, { "epoch": 4.507293701171875e-05, "step": 29539, "training_step_time": 0.21957707405090332 }, { "epoch": 4.5074462890625e-05, "grad_norm": 0.022882062941789627, "learning_rate": 6.426472341689027e-08, "loss": 0.0123, "step": 29540 }, { "epoch": 4.5074462890625e-05, "model_forward_time": 0.02451300621032715, "step": 29540 }, { "epoch": 4.5074462890625e-05, "step": 29540, "training_step_time": 0.10738420486450195 }, { "epoch": 4.507598876953125e-05, "model_forward_time": 0.0245819091796875, "step": 29541 }, { "epoch": 4.507598876953125e-05, "step": 29541, "training_step_time": 0.10121941566467285 }, { "epoch": 4.50775146484375e-05, "model_forward_time": 0.02523064613342285, "step": 29542 }, { "epoch": 4.50775146484375e-05, "step": 29542, "training_step_time": 0.10693168640136719 }, { "epoch": 4.507904052734375e-05, "model_forward_time": 0.025423049926757812, "step": 29543 }, { "epoch": 4.507904052734375e-05, "step": 29543, "training_step_time": 0.13923025131225586 }, { "epoch": 4.508056640625e-05, "model_forward_time": 0.024567842483520508, "step": 29544 }, { "epoch": 4.508056640625e-05, "step": 29544, "training_step_time": 0.2061014175415039 }, { "epoch": 4.508209228515625e-05, "model_forward_time": 0.024405717849731445, "step": 29545 }, { "epoch": 4.508209228515625e-05, "step": 29545, "training_step_time": 0.22521543502807617 }, { "epoch": 4.50836181640625e-05, "model_forward_time": 0.02455282211303711, "step": 29546 }, { "epoch": 4.50836181640625e-05, "step": 29546, "training_step_time": 0.19358158111572266 }, { "epoch": 4.508514404296875e-05, "model_forward_time": 0.02466416358947754, "step": 29547 }, { "epoch": 4.508514404296875e-05, "step": 29547, "training_step_time": 0.17590022087097168 }, { "epoch": 4.5086669921875e-05, "model_forward_time": 0.0241241455078125, "step": 29548 }, { "epoch": 4.5086669921875e-05, "step": 29548, "training_step_time": 0.16410207748413086 }, { "epoch": 4.508819580078125e-05, "model_forward_time": 0.02409958839416504, "step": 29549 }, { "epoch": 4.508819580078125e-05, "step": 29549, "training_step_time": 0.15457534790039062 }, { "epoch": 4.50897216796875e-05, "grad_norm": 0.027359770610928535, "learning_rate": 6.150154258476315e-08, "loss": 0.0042, "step": 29550 }, { "epoch": 4.50897216796875e-05, "model_forward_time": 0.024699926376342773, "step": 29550 }, { "epoch": 4.50897216796875e-05, "step": 29550, "training_step_time": 0.10277676582336426 }, { "epoch": 4.509124755859375e-05, "model_forward_time": 0.024931669235229492, "step": 29551 }, { "epoch": 4.509124755859375e-05, "step": 29551, "training_step_time": 0.10227799415588379 }, { "epoch": 4.50927734375e-05, "model_forward_time": 0.025644302368164062, "step": 29552 }, { "epoch": 4.50927734375e-05, "step": 29552, "training_step_time": 0.10514211654663086 }, { "epoch": 4.509429931640625e-05, "model_forward_time": 0.025731801986694336, "step": 29553 }, { "epoch": 4.509429931640625e-05, "step": 29553, "training_step_time": 0.1040337085723877 }, { "epoch": 4.50958251953125e-05, "model_forward_time": 0.025430917739868164, "step": 29554 }, { "epoch": 4.50958251953125e-05, "step": 29554, "training_step_time": 0.10729837417602539 }, { "epoch": 4.509735107421875e-05, "model_forward_time": 0.02573871612548828, "step": 29555 }, { "epoch": 4.509735107421875e-05, "step": 29555, "training_step_time": 0.12199234962463379 }, { "epoch": 4.5098876953125e-05, "model_forward_time": 0.025090456008911133, "step": 29556 }, { "epoch": 4.5098876953125e-05, "step": 29556, "training_step_time": 0.10915231704711914 }, { "epoch": 4.510040283203125e-05, "model_forward_time": 0.025728940963745117, "step": 29557 }, { "epoch": 4.510040283203125e-05, "step": 29557, "training_step_time": 0.18514370918273926 }, { "epoch": 4.51019287109375e-05, "model_forward_time": 0.024450063705444336, "step": 29558 }, { "epoch": 4.51019287109375e-05, "step": 29558, "training_step_time": 0.20688199996948242 }, { "epoch": 4.510345458984375e-05, "model_forward_time": 0.024922847747802734, "step": 29559 }, { "epoch": 4.510345458984375e-05, "step": 29559, "training_step_time": 0.22192049026489258 }, { "epoch": 4.510498046875e-05, "grad_norm": 0.02034337818622589, "learning_rate": 5.8799041737150896e-08, "loss": 0.0025, "step": 29560 }, { "epoch": 4.510498046875e-05, "model_forward_time": 0.02508687973022461, "step": 29560 }, { "epoch": 4.510498046875e-05, "step": 29560, "training_step_time": 0.13025856018066406 }, { "epoch": 4.510650634765625e-05, "model_forward_time": 0.02457284927368164, "step": 29561 }, { "epoch": 4.510650634765625e-05, "step": 29561, "training_step_time": 0.12818384170532227 }, { "epoch": 4.51080322265625e-05, "model_forward_time": 0.02541184425354004, "step": 29562 }, { "epoch": 4.51080322265625e-05, "step": 29562, "training_step_time": 0.12350058555603027 }, { "epoch": 4.510955810546875e-05, "model_forward_time": 0.024986982345581055, "step": 29563 }, { "epoch": 4.510955810546875e-05, "step": 29563, "training_step_time": 0.11614370346069336 }, { "epoch": 4.5111083984375e-05, "model_forward_time": 0.027311086654663086, "step": 29564 }, { "epoch": 4.5111083984375e-05, "step": 29564, "training_step_time": 0.1427912712097168 }, { "epoch": 4.511260986328125e-05, "model_forward_time": 0.02537822723388672, "step": 29565 }, { "epoch": 4.511260986328125e-05, "step": 29565, "training_step_time": 0.14483189582824707 }, { "epoch": 4.51141357421875e-05, "model_forward_time": 0.02461385726928711, "step": 29566 }, { "epoch": 4.51141357421875e-05, "step": 29566, "training_step_time": 0.1836996078491211 }, { "epoch": 4.511566162109375e-05, "model_forward_time": 0.02485799789428711, "step": 29567 }, { "epoch": 4.511566162109375e-05, "step": 29567, "training_step_time": 0.1797933578491211 }, { "epoch": 4.51171875e-05, "model_forward_time": 0.024726152420043945, "step": 29568 }, { "epoch": 4.51171875e-05, "step": 29568, "training_step_time": 0.18574166297912598 }, { "epoch": 4.511871337890625e-05, "model_forward_time": 0.025325536727905273, "step": 29569 }, { "epoch": 4.511871337890625e-05, "step": 29569, "training_step_time": 0.1748056411743164 }, { "epoch": 4.51202392578125e-05, "grad_norm": 0.03413934260606766, "learning_rate": 5.615722415785451e-08, "loss": 0.0064, "step": 29570 }, { "epoch": 4.51202392578125e-05, "model_forward_time": 0.024906635284423828, "step": 29570 }, { "epoch": 4.51202392578125e-05, "step": 29570, "training_step_time": 0.15620708465576172 }, { "epoch": 4.512176513671875e-05, "model_forward_time": 0.024425268173217773, "step": 29571 }, { "epoch": 4.512176513671875e-05, "step": 29571, "training_step_time": 0.13498377799987793 }, { "epoch": 4.5123291015625e-05, "model_forward_time": 0.024925947189331055, "step": 29572 }, { "epoch": 4.5123291015625e-05, "step": 29572, "training_step_time": 0.13255739212036133 }, { "epoch": 4.512481689453125e-05, "model_forward_time": 0.024873971939086914, "step": 29573 }, { "epoch": 4.512481689453125e-05, "step": 29573, "training_step_time": 0.1482563018798828 }, { "epoch": 4.51263427734375e-05, "model_forward_time": 0.025078296661376953, "step": 29574 }, { "epoch": 4.51263427734375e-05, "step": 29574, "training_step_time": 0.11328339576721191 }, { "epoch": 4.512786865234375e-05, "model_forward_time": 0.025264263153076172, "step": 29575 }, { "epoch": 4.512786865234375e-05, "step": 29575, "training_step_time": 0.12916111946105957 }, { "epoch": 4.512939453125e-05, "model_forward_time": 0.025290966033935547, "step": 29576 }, { "epoch": 4.512939453125e-05, "step": 29576, "training_step_time": 0.15732574462890625 }, { "epoch": 4.513092041015625e-05, "model_forward_time": 0.024350643157958984, "step": 29577 }, { "epoch": 4.513092041015625e-05, "step": 29577, "training_step_time": 0.21616196632385254 }, { "epoch": 4.51324462890625e-05, "model_forward_time": 0.024686098098754883, "step": 29578 }, { "epoch": 4.51324462890625e-05, "step": 29578, "training_step_time": 0.11483907699584961 }, { "epoch": 4.513397216796875e-05, "model_forward_time": 0.025191783905029297, "step": 29579 }, { "epoch": 4.513397216796875e-05, "step": 29579, "training_step_time": 0.10467362403869629 }, { "epoch": 4.5135498046875e-05, "grad_norm": 0.03096199594438076, "learning_rate": 5.3576093056922906e-08, "loss": 0.0061, "step": 29580 }, { "epoch": 4.5135498046875e-05, "model_forward_time": 0.024056673049926758, "step": 29580 }, { "epoch": 4.5135498046875e-05, "step": 29580, "training_step_time": 0.10682868957519531 }, { "epoch": 4.513702392578125e-05, "model_forward_time": 0.02612137794494629, "step": 29581 }, { "epoch": 4.513702392578125e-05, "step": 29581, "training_step_time": 0.10569047927856445 }, { "epoch": 4.51385498046875e-05, "model_forward_time": 0.026274442672729492, "step": 29582 }, { "epoch": 4.51385498046875e-05, "step": 29582, "training_step_time": 0.14847588539123535 }, { "epoch": 4.514007568359375e-05, "model_forward_time": 0.025204181671142578, "step": 29583 }, { "epoch": 4.514007568359375e-05, "step": 29583, "training_step_time": 0.16208243370056152 }, { "epoch": 4.51416015625e-05, "model_forward_time": 0.02490854263305664, "step": 29584 }, { "epoch": 4.51416015625e-05, "step": 29584, "training_step_time": 0.1108555793762207 }, { "epoch": 4.514312744140625e-05, "model_forward_time": 0.02445054054260254, "step": 29585 }, { "epoch": 4.514312744140625e-05, "step": 29585, "training_step_time": 0.14454984664916992 }, { "epoch": 4.51446533203125e-05, "model_forward_time": 0.025461196899414062, "step": 29586 }, { "epoch": 4.51446533203125e-05, "step": 29586, "training_step_time": 0.20086145401000977 }, { "epoch": 4.514617919921875e-05, "model_forward_time": 0.025015830993652344, "step": 29587 }, { "epoch": 4.514617919921875e-05, "step": 29587, "training_step_time": 0.10629844665527344 }, { "epoch": 4.5147705078125e-05, "model_forward_time": 0.025182247161865234, "step": 29588 }, { "epoch": 4.5147705078125e-05, "step": 29588, "training_step_time": 0.10264205932617188 }, { "epoch": 4.514923095703125e-05, "model_forward_time": 0.02554774284362793, "step": 29589 }, { "epoch": 4.514923095703125e-05, "step": 29589, "training_step_time": 0.1043248176574707 }, { "epoch": 4.51507568359375e-05, "grad_norm": 0.03093460574746132, "learning_rate": 5.105565157068615e-08, "loss": 0.0046, "step": 29590 }, { "epoch": 4.51507568359375e-05, "model_forward_time": 0.025167226791381836, "step": 29590 }, { "epoch": 4.51507568359375e-05, "step": 29590, "training_step_time": 0.10372567176818848 }, { "epoch": 4.515228271484375e-05, "model_forward_time": 0.025574922561645508, "step": 29591 }, { "epoch": 4.515228271484375e-05, "step": 29591, "training_step_time": 0.1045234203338623 }, { "epoch": 4.515380859375e-05, "model_forward_time": 0.025262117385864258, "step": 29592 }, { "epoch": 4.515380859375e-05, "step": 29592, "training_step_time": 0.10402798652648926 }, { "epoch": 4.515533447265625e-05, "model_forward_time": 0.025628328323364258, "step": 29593 }, { "epoch": 4.515533447265625e-05, "step": 29593, "training_step_time": 0.10593795776367188 }, { "epoch": 4.51568603515625e-05, "model_forward_time": 0.025805950164794922, "step": 29594 }, { "epoch": 4.51568603515625e-05, "step": 29594, "training_step_time": 0.10753583908081055 }, { "epoch": 4.515838623046875e-05, "model_forward_time": 0.025567054748535156, "step": 29595 }, { "epoch": 4.515838623046875e-05, "step": 29595, "training_step_time": 0.10724234580993652 }, { "epoch": 4.5159912109375e-05, "model_forward_time": 0.025744915008544922, "step": 29596 }, { "epoch": 4.5159912109375e-05, "step": 29596, "training_step_time": 0.17777085304260254 }, { "epoch": 4.516143798828125e-05, "model_forward_time": 0.024701356887817383, "step": 29597 }, { "epoch": 4.516143798828125e-05, "step": 29597, "training_step_time": 0.15043044090270996 }, { "epoch": 4.51629638671875e-05, "model_forward_time": 0.024866342544555664, "step": 29598 }, { "epoch": 4.51629638671875e-05, "step": 29598, "training_step_time": 0.13985466957092285 }, { "epoch": 4.516448974609375e-05, "model_forward_time": 0.024860382080078125, "step": 29599 }, { "epoch": 4.516448974609375e-05, "step": 29599, "training_step_time": 0.21168065071105957 }, { "epoch": 4.5166015625e-05, "grad_norm": 0.10285698622465134, "learning_rate": 4.859590276170556e-08, "loss": 0.0025, "step": 29600 }, { "epoch": 4.5166015625e-05, "model_forward_time": 0.024628877639770508, "step": 29600 }, { "epoch": 4.5166015625e-05, "step": 29600, "training_step_time": 0.20354580879211426 }, { "epoch": 4.516754150390625e-05, "model_forward_time": 0.0248110294342041, "step": 29601 }, { "epoch": 4.516754150390625e-05, "step": 29601, "training_step_time": 0.10490560531616211 }, { "epoch": 4.51690673828125e-05, "model_forward_time": 0.027085065841674805, "step": 29602 }, { "epoch": 4.51690673828125e-05, "step": 29602, "training_step_time": 0.1075284481048584 }, { "epoch": 4.517059326171875e-05, "model_forward_time": 0.025545835494995117, "step": 29603 }, { "epoch": 4.517059326171875e-05, "step": 29603, "training_step_time": 0.11116170883178711 }, { "epoch": 4.5172119140625e-05, "model_forward_time": 0.025629758834838867, "step": 29604 }, { "epoch": 4.5172119140625e-05, "step": 29604, "training_step_time": 0.11196136474609375 }, { "epoch": 4.517364501953125e-05, "model_forward_time": 0.025400876998901367, "step": 29605 }, { "epoch": 4.517364501953125e-05, "step": 29605, "training_step_time": 0.1323997974395752 }, { "epoch": 4.51751708984375e-05, "model_forward_time": 0.02507615089416504, "step": 29606 }, { "epoch": 4.51751708984375e-05, "step": 29606, "training_step_time": 0.10466551780700684 }, { "epoch": 4.517669677734375e-05, "model_forward_time": 0.025432348251342773, "step": 29607 }, { "epoch": 4.517669677734375e-05, "step": 29607, "training_step_time": 0.10953330993652344 }, { "epoch": 4.517822265625e-05, "model_forward_time": 0.02554011344909668, "step": 29608 }, { "epoch": 4.517822265625e-05, "step": 29608, "training_step_time": 0.10809111595153809 }, { "epoch": 4.517974853515625e-05, "model_forward_time": 0.025523900985717773, "step": 29609 }, { "epoch": 4.517974853515625e-05, "step": 29609, "training_step_time": 0.11200356483459473 }, { "epoch": 4.51812744140625e-05, "grad_norm": 0.03034748136997223, "learning_rate": 4.619684961881254e-08, "loss": 0.0018, "step": 29610 }, { "epoch": 4.51812744140625e-05, "model_forward_time": 0.02568197250366211, "step": 29610 }, { "epoch": 4.51812744140625e-05, "step": 29610, "training_step_time": 0.10518407821655273 }, { "epoch": 4.518280029296875e-05, "model_forward_time": 0.025279521942138672, "step": 29611 }, { "epoch": 4.518280029296875e-05, "step": 29611, "training_step_time": 0.10691499710083008 }, { "epoch": 4.5184326171875e-05, "model_forward_time": 0.025506258010864258, "step": 29612 }, { "epoch": 4.5184326171875e-05, "step": 29612, "training_step_time": 0.10376715660095215 }, { "epoch": 4.518585205078125e-05, "model_forward_time": 0.025234460830688477, "step": 29613 }, { "epoch": 4.518585205078125e-05, "step": 29613, "training_step_time": 0.10405778884887695 }, { "epoch": 4.51873779296875e-05, "model_forward_time": 0.025545597076416016, "step": 29614 }, { "epoch": 4.51873779296875e-05, "step": 29614, "training_step_time": 0.10643482208251953 }, { "epoch": 4.518890380859375e-05, "model_forward_time": 0.026504993438720703, "step": 29615 }, { "epoch": 4.518890380859375e-05, "step": 29615, "training_step_time": 0.10744071006774902 }, { "epoch": 4.51904296875e-05, "model_forward_time": 0.025057554244995117, "step": 29616 }, { "epoch": 4.51904296875e-05, "step": 29616, "training_step_time": 0.10919523239135742 }, { "epoch": 4.519195556640625e-05, "model_forward_time": 0.025006771087646484, "step": 29617 }, { "epoch": 4.519195556640625e-05, "step": 29617, "training_step_time": 0.10374331474304199 }, { "epoch": 4.51934814453125e-05, "model_forward_time": 0.025271177291870117, "step": 29618 }, { "epoch": 4.51934814453125e-05, "step": 29618, "training_step_time": 0.15247654914855957 }, { "epoch": 4.519500732421875e-05, "model_forward_time": 0.025696992874145508, "step": 29619 }, { "epoch": 4.519500732421875e-05, "step": 29619, "training_step_time": 0.1170954704284668 }, { "epoch": 4.5196533203125e-05, "grad_norm": 0.082671158015728, "learning_rate": 4.385849505708084e-08, "loss": 0.0028, "step": 29620 }, { "epoch": 4.5196533203125e-05, "model_forward_time": 0.024617433547973633, "step": 29620 }, { "epoch": 4.5196533203125e-05, "step": 29620, "training_step_time": 0.15830063819885254 }, { "epoch": 4.519805908203125e-05, "model_forward_time": 0.02474188804626465, "step": 29621 }, { "epoch": 4.519805908203125e-05, "step": 29621, "training_step_time": 0.18254399299621582 }, { "epoch": 4.51995849609375e-05, "model_forward_time": 0.024033784866333008, "step": 29622 }, { "epoch": 4.51995849609375e-05, "step": 29622, "training_step_time": 0.15720224380493164 }, { "epoch": 4.520111083984375e-05, "model_forward_time": 0.024799108505249023, "step": 29623 }, { "epoch": 4.520111083984375e-05, "step": 29623, "training_step_time": 0.11114501953125 }, { "epoch": 4.520263671875e-05, "model_forward_time": 0.025066375732421875, "step": 29624 }, { "epoch": 4.520263671875e-05, "step": 29624, "training_step_time": 0.10458064079284668 }, { "epoch": 4.520416259765625e-05, "model_forward_time": 0.025496482849121094, "step": 29625 }, { "epoch": 4.520416259765625e-05, "step": 29625, "training_step_time": 0.10793375968933105 }, { "epoch": 4.52056884765625e-05, "model_forward_time": 0.025423049926757812, "step": 29626 }, { "epoch": 4.52056884765625e-05, "step": 29626, "training_step_time": 0.10202431678771973 }, { "epoch": 4.520721435546875e-05, "model_forward_time": 0.025261640548706055, "step": 29627 }, { "epoch": 4.520721435546875e-05, "step": 29627, "training_step_time": 0.15013718605041504 }, { "epoch": 4.5208740234375e-05, "model_forward_time": 0.02523207664489746, "step": 29628 }, { "epoch": 4.5208740234375e-05, "step": 29628, "training_step_time": 0.15972375869750977 }, { "epoch": 4.521026611328125e-05, "model_forward_time": 0.02500176429748535, "step": 29629 }, { "epoch": 4.521026611328125e-05, "step": 29629, "training_step_time": 0.11524581909179688 }, { "epoch": 4.52117919921875e-05, "grad_norm": 0.06548045575618744, "learning_rate": 4.158084191783762e-08, "loss": 0.0072, "step": 29630 }, { "epoch": 4.52117919921875e-05, "model_forward_time": 0.025054454803466797, "step": 29630 }, { "epoch": 4.52117919921875e-05, "step": 29630, "training_step_time": 0.12883996963500977 }, { "epoch": 4.521331787109375e-05, "model_forward_time": 0.02620863914489746, "step": 29631 }, { "epoch": 4.521331787109375e-05, "step": 29631, "training_step_time": 0.13492131233215332 }, { "epoch": 4.521484375e-05, "model_forward_time": 0.025491952896118164, "step": 29632 }, { "epoch": 4.521484375e-05, "step": 29632, "training_step_time": 0.13121461868286133 }, { "epoch": 4.521636962890625e-05, "model_forward_time": 0.025008440017700195, "step": 29633 }, { "epoch": 4.521636962890625e-05, "step": 29633, "training_step_time": 0.1312880516052246 }, { "epoch": 4.52178955078125e-05, "model_forward_time": 0.02568984031677246, "step": 29634 }, { "epoch": 4.52178955078125e-05, "step": 29634, "training_step_time": 0.12571215629577637 }, { "epoch": 4.521942138671875e-05, "model_forward_time": 0.0250089168548584, "step": 29635 }, { "epoch": 4.521942138671875e-05, "step": 29635, "training_step_time": 0.12392401695251465 }, { "epoch": 4.5220947265625e-05, "model_forward_time": 0.02521538734436035, "step": 29636 }, { "epoch": 4.5220947265625e-05, "step": 29636, "training_step_time": 0.12266349792480469 }, { "epoch": 4.522247314453125e-05, "model_forward_time": 0.025313615798950195, "step": 29637 }, { "epoch": 4.522247314453125e-05, "step": 29637, "training_step_time": 0.12054300308227539 }, { "epoch": 4.52239990234375e-05, "model_forward_time": 0.02552962303161621, "step": 29638 }, { "epoch": 4.52239990234375e-05, "step": 29638, "training_step_time": 0.11164546012878418 }, { "epoch": 4.522552490234375e-05, "model_forward_time": 0.025350093841552734, "step": 29639 }, { "epoch": 4.522552490234375e-05, "step": 29639, "training_step_time": 0.11586546897888184 }, { "epoch": 4.522705078125e-05, "grad_norm": 0.09248725324869156, "learning_rate": 3.936389296864129e-08, "loss": 0.0031, "step": 29640 }, { "epoch": 4.522705078125e-05, "model_forward_time": 0.025119304656982422, "step": 29640 }, { "epoch": 4.522705078125e-05, "step": 29640, "training_step_time": 0.13777756690979004 }, { "epoch": 4.522857666015625e-05, "model_forward_time": 0.02499556541442871, "step": 29641 }, { "epoch": 4.522857666015625e-05, "step": 29641, "training_step_time": 0.13263726234436035 }, { "epoch": 4.52301025390625e-05, "model_forward_time": 0.024793386459350586, "step": 29642 }, { "epoch": 4.52301025390625e-05, "step": 29642, "training_step_time": 0.10691165924072266 }, { "epoch": 4.523162841796875e-05, "model_forward_time": 0.02570033073425293, "step": 29643 }, { "epoch": 4.523162841796875e-05, "step": 29643, "training_step_time": 0.11609125137329102 }, { "epoch": 4.5233154296875e-05, "model_forward_time": 0.02536320686340332, "step": 29644 }, { "epoch": 4.5233154296875e-05, "step": 29644, "training_step_time": 0.17969465255737305 }, { "epoch": 4.523468017578125e-05, "model_forward_time": 0.025518178939819336, "step": 29645 }, { "epoch": 4.523468017578125e-05, "step": 29645, "training_step_time": 0.21148180961608887 }, { "epoch": 4.52362060546875e-05, "model_forward_time": 0.02523660659790039, "step": 29646 }, { "epoch": 4.52362060546875e-05, "step": 29646, "training_step_time": 0.17023825645446777 }, { "epoch": 4.523773193359375e-05, "model_forward_time": 0.02474236488342285, "step": 29647 }, { "epoch": 4.523773193359375e-05, "step": 29647, "training_step_time": 0.15087556838989258 }, { "epoch": 4.52392578125e-05, "model_forward_time": 0.025370359420776367, "step": 29648 }, { "epoch": 4.52392578125e-05, "step": 29648, "training_step_time": 0.1161494255065918 }, { "epoch": 4.524078369140625e-05, "model_forward_time": 0.025065898895263672, "step": 29649 }, { "epoch": 4.524078369140625e-05, "step": 29649, "training_step_time": 0.13740086555480957 }, { "epoch": 4.52423095703125e-05, "grad_norm": 0.024560654535889626, "learning_rate": 3.720765090329814e-08, "loss": 0.0065, "step": 29650 }, { "epoch": 4.52423095703125e-05, "model_forward_time": 0.025783538818359375, "step": 29650 }, { "epoch": 4.52423095703125e-05, "step": 29650, "training_step_time": 0.10371065139770508 }, { "epoch": 4.524383544921875e-05, "model_forward_time": 0.025590181350708008, "step": 29651 }, { "epoch": 4.524383544921875e-05, "step": 29651, "training_step_time": 0.11135268211364746 }, { "epoch": 4.5245361328125e-05, "model_forward_time": 0.025798559188842773, "step": 29652 }, { "epoch": 4.5245361328125e-05, "step": 29652, "training_step_time": 0.10965442657470703 }, { "epoch": 4.524688720703125e-05, "model_forward_time": 0.028910160064697266, "step": 29653 }, { "epoch": 4.524688720703125e-05, "step": 29653, "training_step_time": 0.10869574546813965 }, { "epoch": 4.52484130859375e-05, "model_forward_time": 0.025638580322265625, "step": 29654 }, { "epoch": 4.52484130859375e-05, "step": 29654, "training_step_time": 0.10786223411560059 }, { "epoch": 4.524993896484375e-05, "model_forward_time": 0.02565622329711914, "step": 29655 }, { "epoch": 4.524993896484375e-05, "step": 29655, "training_step_time": 0.10527157783508301 }, { "epoch": 4.525146484375e-05, "model_forward_time": 0.027047395706176758, "step": 29656 }, { "epoch": 4.525146484375e-05, "step": 29656, "training_step_time": 0.11253166198730469 }, { "epoch": 4.525299072265625e-05, "model_forward_time": 0.025848388671875, "step": 29657 }, { "epoch": 4.525299072265625e-05, "step": 29657, "training_step_time": 0.10574674606323242 }, { "epoch": 4.52545166015625e-05, "model_forward_time": 0.02568221092224121, "step": 29658 }, { "epoch": 4.52545166015625e-05, "step": 29658, "training_step_time": 0.1050422191619873 }, { "epoch": 4.525604248046875e-05, "model_forward_time": 0.025738239288330078, "step": 29659 }, { "epoch": 4.525604248046875e-05, "step": 29659, "training_step_time": 0.10905933380126953 }, { "epoch": 4.5257568359375e-05, "grad_norm": 0.02502184920012951, "learning_rate": 3.511211834184014e-08, "loss": 0.0081, "step": 29660 }, { "epoch": 4.5257568359375e-05, "model_forward_time": 0.025752544403076172, "step": 29660 }, { "epoch": 4.5257568359375e-05, "step": 29660, "training_step_time": 0.1080021858215332 }, { "epoch": 4.525909423828125e-05, "model_forward_time": 0.025728464126586914, "step": 29661 }, { "epoch": 4.525909423828125e-05, "step": 29661, "training_step_time": 0.2097759246826172 }, { "epoch": 4.52606201171875e-05, "model_forward_time": 0.024498462677001953, "step": 29662 }, { "epoch": 4.52606201171875e-05, "step": 29662, "training_step_time": 0.12109589576721191 }, { "epoch": 4.526214599609375e-05, "model_forward_time": 0.024850845336914062, "step": 29663 }, { "epoch": 4.526214599609375e-05, "step": 29663, "training_step_time": 0.12487363815307617 }, { "epoch": 4.5263671875e-05, "model_forward_time": 0.026038646697998047, "step": 29664 }, { "epoch": 4.5263671875e-05, "step": 29664, "training_step_time": 0.1618044376373291 }, { "epoch": 4.526519775390625e-05, "model_forward_time": 0.025014877319335938, "step": 29665 }, { "epoch": 4.526519775390625e-05, "step": 29665, "training_step_time": 0.21860194206237793 }, { "epoch": 4.52667236328125e-05, "model_forward_time": 0.026047945022583008, "step": 29666 }, { "epoch": 4.52667236328125e-05, "step": 29666, "training_step_time": 0.10300517082214355 }, { "epoch": 4.526824951171875e-05, "model_forward_time": 0.024447202682495117, "step": 29667 }, { "epoch": 4.526824951171875e-05, "step": 29667, "training_step_time": 0.10590958595275879 }, { "epoch": 4.5269775390625e-05, "model_forward_time": 0.025720596313476562, "step": 29668 }, { "epoch": 4.5269775390625e-05, "step": 29668, "training_step_time": 0.10593175888061523 }, { "epoch": 4.527130126953125e-05, "model_forward_time": 0.025516748428344727, "step": 29669 }, { "epoch": 4.527130126953125e-05, "step": 29669, "training_step_time": 0.10470247268676758 }, { "epoch": 4.52728271484375e-05, "grad_norm": 0.11808640509843826, "learning_rate": 3.3077297830541584e-08, "loss": 0.0051, "step": 29670 }, { "epoch": 4.52728271484375e-05, "model_forward_time": 0.025986194610595703, "step": 29670 }, { "epoch": 4.52728271484375e-05, "step": 29670, "training_step_time": 0.10517525672912598 }, { "epoch": 4.527435302734375e-05, "model_forward_time": 0.025160789489746094, "step": 29671 }, { "epoch": 4.527435302734375e-05, "step": 29671, "training_step_time": 0.13320612907409668 }, { "epoch": 4.527587890625e-05, "model_forward_time": 0.02528095245361328, "step": 29672 }, { "epoch": 4.527587890625e-05, "step": 29672, "training_step_time": 0.1121985912322998 }, { "epoch": 4.527740478515625e-05, "model_forward_time": 0.025724411010742188, "step": 29673 }, { "epoch": 4.527740478515625e-05, "step": 29673, "training_step_time": 0.11493659019470215 }, { "epoch": 4.52789306640625e-05, "model_forward_time": 0.025823593139648438, "step": 29674 }, { "epoch": 4.52789306640625e-05, "step": 29674, "training_step_time": 0.12138032913208008 }, { "epoch": 4.528045654296875e-05, "model_forward_time": 0.0260159969329834, "step": 29675 }, { "epoch": 4.528045654296875e-05, "step": 29675, "training_step_time": 0.10980749130249023 }, { "epoch": 4.5281982421875e-05, "model_forward_time": 0.025948762893676758, "step": 29676 }, { "epoch": 4.5281982421875e-05, "step": 29676, "training_step_time": 0.12473344802856445 }, { "epoch": 4.528350830078125e-05, "model_forward_time": 0.025831937789916992, "step": 29677 }, { "epoch": 4.528350830078125e-05, "step": 29677, "training_step_time": 0.1088247299194336 }, { "epoch": 4.52850341796875e-05, "model_forward_time": 0.025553464889526367, "step": 29678 }, { "epoch": 4.52850341796875e-05, "step": 29678, "training_step_time": 0.10664176940917969 }, { "epoch": 4.528656005859375e-05, "model_forward_time": 0.025921106338500977, "step": 29679 }, { "epoch": 4.528656005859375e-05, "step": 29679, "training_step_time": 0.10860466957092285 }, { "epoch": 4.52880859375e-05, "grad_norm": 0.05965844541788101, "learning_rate": 3.110319184189692e-08, "loss": 0.0047, "step": 29680 }, { "epoch": 4.52880859375e-05, "model_forward_time": 0.02552032470703125, "step": 29680 }, { "epoch": 4.52880859375e-05, "step": 29680, "training_step_time": 0.10634803771972656 }, { "epoch": 4.528961181640625e-05, "model_forward_time": 0.02579021453857422, "step": 29681 }, { "epoch": 4.528961181640625e-05, "step": 29681, "training_step_time": 0.1104276180267334 }, { "epoch": 4.52911376953125e-05, "model_forward_time": 0.02559494972229004, "step": 29682 }, { "epoch": 4.52911376953125e-05, "step": 29682, "training_step_time": 0.10550880432128906 }, { "epoch": 4.529266357421875e-05, "model_forward_time": 0.025532007217407227, "step": 29683 }, { "epoch": 4.529266357421875e-05, "step": 29683, "training_step_time": 0.10835838317871094 }, { "epoch": 4.5294189453125e-05, "model_forward_time": 0.02557539939880371, "step": 29684 }, { "epoch": 4.5294189453125e-05, "step": 29684, "training_step_time": 0.10563278198242188 }, { "epoch": 4.529571533203125e-05, "model_forward_time": 0.026108980178833008, "step": 29685 }, { "epoch": 4.529571533203125e-05, "step": 29685, "training_step_time": 0.15536189079284668 }, { "epoch": 4.52972412109375e-05, "model_forward_time": 0.024858713150024414, "step": 29686 }, { "epoch": 4.52972412109375e-05, "step": 29686, "training_step_time": 0.1387336254119873 }, { "epoch": 4.529876708984375e-05, "model_forward_time": 0.024775028228759766, "step": 29687 }, { "epoch": 4.529876708984375e-05, "step": 29687, "training_step_time": 0.10977816581726074 }, { "epoch": 4.530029296875e-05, "model_forward_time": 0.0258333683013916, "step": 29688 }, { "epoch": 4.530029296875e-05, "step": 29688, "training_step_time": 0.18958187103271484 }, { "epoch": 4.530181884765625e-05, "model_forward_time": 0.024982690811157227, "step": 29689 }, { "epoch": 4.530181884765625e-05, "step": 29689, "training_step_time": 0.2292780876159668 }, { "epoch": 4.53033447265625e-05, "grad_norm": 0.09275636821985245, "learning_rate": 2.9189802774631792e-08, "loss": 0.0054, "step": 29690 }, { "epoch": 4.53033447265625e-05, "model_forward_time": 0.025409936904907227, "step": 29690 }, { "epoch": 4.53033447265625e-05, "step": 29690, "training_step_time": 0.23215198516845703 }, { "epoch": 4.530487060546875e-05, "model_forward_time": 0.024776220321655273, "step": 29691 }, { "epoch": 4.530487060546875e-05, "step": 29691, "training_step_time": 0.20402002334594727 }, { "epoch": 4.5306396484375e-05, "model_forward_time": 0.0251009464263916, "step": 29692 }, { "epoch": 4.5306396484375e-05, "step": 29692, "training_step_time": 0.20563030242919922 }, { "epoch": 4.530792236328125e-05, "model_forward_time": 0.025626420974731445, "step": 29693 }, { "epoch": 4.530792236328125e-05, "step": 29693, "training_step_time": 0.1843571662902832 }, { "epoch": 4.53094482421875e-05, "model_forward_time": 0.025150060653686523, "step": 29694 }, { "epoch": 4.53094482421875e-05, "step": 29694, "training_step_time": 0.16259241104125977 }, { "epoch": 4.531097412109375e-05, "model_forward_time": 0.024699687957763672, "step": 29695 }, { "epoch": 4.531097412109375e-05, "step": 29695, "training_step_time": 0.1451706886291504 }, { "epoch": 4.53125e-05, "model_forward_time": 0.02496170997619629, "step": 29696 }, { "epoch": 4.53125e-05, "step": 29696, "training_step_time": 0.10522031784057617 }, { "epoch": 4.531402587890625e-05, "model_forward_time": 0.02544999122619629, "step": 29697 }, { "epoch": 4.531402587890625e-05, "step": 29697, "training_step_time": 0.10150575637817383 }, { "epoch": 4.53155517578125e-05, "model_forward_time": 0.025568008422851562, "step": 29698 }, { "epoch": 4.53155517578125e-05, "step": 29698, "training_step_time": 0.1040036678314209 }, { "epoch": 4.531707763671875e-05, "model_forward_time": 0.025693178176879883, "step": 29699 }, { "epoch": 4.531707763671875e-05, "step": 29699, "training_step_time": 0.10436415672302246 }, { "epoch": 4.5318603515625e-05, "grad_norm": 0.047349270433187485, "learning_rate": 2.7337132953697554e-08, "loss": 0.0024, "step": 29700 }, { "epoch": 4.5318603515625e-05, "model_forward_time": 0.026383399963378906, "step": 29700 }, { "epoch": 4.5318603515625e-05, "step": 29700, "training_step_time": 0.1092383861541748 }, { "epoch": 4.532012939453125e-05, "model_forward_time": 0.025580883026123047, "step": 29701 }, { "epoch": 4.532012939453125e-05, "step": 29701, "training_step_time": 0.10425519943237305 }, { "epoch": 4.53216552734375e-05, "model_forward_time": 0.025387287139892578, "step": 29702 }, { "epoch": 4.53216552734375e-05, "step": 29702, "training_step_time": 0.10656929016113281 }, { "epoch": 4.532318115234375e-05, "model_forward_time": 0.025997161865234375, "step": 29703 }, { "epoch": 4.532318115234375e-05, "step": 29703, "training_step_time": 0.10522937774658203 }, { "epoch": 4.532470703125e-05, "model_forward_time": 0.025434255599975586, "step": 29704 }, { "epoch": 4.532470703125e-05, "step": 29704, "training_step_time": 0.16423559188842773 }, { "epoch": 4.532623291015625e-05, "model_forward_time": 0.02486443519592285, "step": 29705 }, { "epoch": 4.532623291015625e-05, "step": 29705, "training_step_time": 0.1191549301147461 }, { "epoch": 4.53277587890625e-05, "model_forward_time": 0.024611711502075195, "step": 29706 }, { "epoch": 4.53277587890625e-05, "step": 29706, "training_step_time": 0.1169428825378418 }, { "epoch": 4.532928466796875e-05, "model_forward_time": 0.025295734405517578, "step": 29707 }, { "epoch": 4.532928466796875e-05, "step": 29707, "training_step_time": 0.15642118453979492 }, { "epoch": 4.5330810546875e-05, "model_forward_time": 0.025137662887573242, "step": 29708 }, { "epoch": 4.5330810546875e-05, "step": 29708, "training_step_time": 0.2115638256072998 }, { "epoch": 4.533233642578125e-05, "model_forward_time": 0.024944543838500977, "step": 29709 }, { "epoch": 4.533233642578125e-05, "step": 29709, "training_step_time": 0.11227607727050781 }, { "epoch": 4.53338623046875e-05, "grad_norm": 0.2016042172908783, "learning_rate": 2.5545184630265672e-08, "loss": 0.0059, "step": 29710 }, { "epoch": 4.53338623046875e-05, "model_forward_time": 0.024680137634277344, "step": 29710 }, { "epoch": 4.53338623046875e-05, "step": 29710, "training_step_time": 0.1050412654876709 }, { "epoch": 4.533538818359375e-05, "model_forward_time": 0.025360822677612305, "step": 29711 }, { "epoch": 4.533538818359375e-05, "step": 29711, "training_step_time": 0.10575556755065918 }, { "epoch": 4.53369140625e-05, "model_forward_time": 0.02600860595703125, "step": 29712 }, { "epoch": 4.53369140625e-05, "step": 29712, "training_step_time": 0.10495972633361816 }, { "epoch": 4.533843994140625e-05, "model_forward_time": 0.02580738067626953, "step": 29713 }, { "epoch": 4.533843994140625e-05, "step": 29713, "training_step_time": 0.10371923446655273 }, { "epoch": 4.53399658203125e-05, "model_forward_time": 0.025251388549804688, "step": 29714 }, { "epoch": 4.53399658203125e-05, "step": 29714, "training_step_time": 0.10804605484008789 }, { "epoch": 4.534149169921875e-05, "model_forward_time": 0.025064706802368164, "step": 29715 }, { "epoch": 4.534149169921875e-05, "step": 29715, "training_step_time": 0.11443710327148438 }, { "epoch": 4.5343017578125e-05, "model_forward_time": 0.0256807804107666, "step": 29716 }, { "epoch": 4.5343017578125e-05, "step": 29716, "training_step_time": 0.11960506439208984 }, { "epoch": 4.534454345703125e-05, "model_forward_time": 0.02819085121154785, "step": 29717 }, { "epoch": 4.534454345703125e-05, "step": 29717, "training_step_time": 0.10873699188232422 }, { "epoch": 4.53460693359375e-05, "model_forward_time": 0.026059389114379883, "step": 29718 }, { "epoch": 4.53460693359375e-05, "step": 29718, "training_step_time": 0.21387195587158203 }, { "epoch": 4.534759521484375e-05, "model_forward_time": 0.025065183639526367, "step": 29719 }, { "epoch": 4.534759521484375e-05, "step": 29719, "training_step_time": 0.1087651252746582 }, { "epoch": 4.534912109375e-05, "grad_norm": 0.04984891787171364, "learning_rate": 2.3813959981711097e-08, "loss": 0.0059, "step": 29720 }, { "epoch": 4.534912109375e-05, "model_forward_time": 0.02567744255065918, "step": 29720 }, { "epoch": 4.534912109375e-05, "step": 29720, "training_step_time": 0.10394477844238281 }, { "epoch": 4.535064697265625e-05, "model_forward_time": 0.025539636611938477, "step": 29721 }, { "epoch": 4.535064697265625e-05, "step": 29721, "training_step_time": 0.10599923133850098 }, { "epoch": 4.53521728515625e-05, "model_forward_time": 0.02572345733642578, "step": 29722 }, { "epoch": 4.53521728515625e-05, "step": 29722, "training_step_time": 0.10699701309204102 }, { "epoch": 4.535369873046875e-05, "model_forward_time": 0.02574777603149414, "step": 29723 }, { "epoch": 4.535369873046875e-05, "step": 29723, "training_step_time": 0.10710597038269043 }, { "epoch": 4.5355224609375e-05, "model_forward_time": 0.025864124298095703, "step": 29724 }, { "epoch": 4.5355224609375e-05, "step": 29724, "training_step_time": 0.1065073013305664 }, { "epoch": 4.535675048828125e-05, "model_forward_time": 0.0259554386138916, "step": 29725 }, { "epoch": 4.535675048828125e-05, "step": 29725, "training_step_time": 0.10703587532043457 }, { "epoch": 4.53582763671875e-05, "model_forward_time": 0.025142431259155273, "step": 29726 }, { "epoch": 4.53582763671875e-05, "step": 29726, "training_step_time": 0.1086885929107666 }, { "epoch": 4.535980224609375e-05, "model_forward_time": 0.02535104751586914, "step": 29727 }, { "epoch": 4.535980224609375e-05, "step": 29727, "training_step_time": 0.10484886169433594 }, { "epoch": 4.5361328125e-05, "model_forward_time": 0.025358915328979492, "step": 29728 }, { "epoch": 4.5361328125e-05, "step": 29728, "training_step_time": 0.14243268966674805 }, { "epoch": 4.536285400390625e-05, "model_forward_time": 0.025272369384765625, "step": 29729 }, { "epoch": 4.536285400390625e-05, "step": 29729, "training_step_time": 0.11534452438354492 }, { "epoch": 4.53643798828125e-05, "grad_norm": 0.039226289838552475, "learning_rate": 2.214346111164556e-08, "loss": 0.0122, "step": 29730 }, { "epoch": 4.53643798828125e-05, "model_forward_time": 0.02506089210510254, "step": 29730 }, { "epoch": 4.53643798828125e-05, "step": 29730, "training_step_time": 0.10279297828674316 }, { "epoch": 4.536590576171875e-05, "model_forward_time": 0.02518773078918457, "step": 29731 }, { "epoch": 4.536590576171875e-05, "step": 29731, "training_step_time": 0.10540199279785156 }, { "epoch": 4.5367431640625e-05, "model_forward_time": 0.025439977645874023, "step": 29732 }, { "epoch": 4.5367431640625e-05, "step": 29732, "training_step_time": 0.20416831970214844 }, { "epoch": 4.536895751953125e-05, "model_forward_time": 0.024324893951416016, "step": 29733 }, { "epoch": 4.536895751953125e-05, "step": 29733, "training_step_time": 0.14699244499206543 }, { "epoch": 4.53704833984375e-05, "model_forward_time": 0.024393558502197266, "step": 29734 }, { "epoch": 4.53704833984375e-05, "step": 29734, "training_step_time": 0.21265959739685059 }, { "epoch": 4.537200927734375e-05, "model_forward_time": 0.02414679527282715, "step": 29735 }, { "epoch": 4.537200927734375e-05, "step": 29735, "training_step_time": 0.149916410446167 }, { "epoch": 4.537353515625e-05, "model_forward_time": 0.023150205612182617, "step": 29736 }, { "epoch": 4.537353515625e-05, "step": 29736, "training_step_time": 0.12035989761352539 }, { "epoch": 4.537506103515625e-05, "model_forward_time": 0.024893522262573242, "step": 29737 }, { "epoch": 4.537506103515625e-05, "step": 29737, "training_step_time": 0.1407489776611328 }, { "epoch": 4.53765869140625e-05, "model_forward_time": 0.024173736572265625, "step": 29738 }, { "epoch": 4.53765869140625e-05, "step": 29738, "training_step_time": 0.1465003490447998 }, { "epoch": 4.537811279296875e-05, "model_forward_time": 0.024143457412719727, "step": 29739 }, { "epoch": 4.537811279296875e-05, "step": 29739, "training_step_time": 0.13616204261779785 }, { "epoch": 4.5379638671875e-05, "grad_norm": 0.04317305609583855, "learning_rate": 2.0533690049878707e-08, "loss": 0.0029, "step": 29740 }, { "epoch": 4.5379638671875e-05, "model_forward_time": 0.02652430534362793, "step": 29740 }, { "epoch": 4.5379638671875e-05, "step": 29740, "training_step_time": 0.1352841854095459 }, { "epoch": 4.538116455078125e-05, "model_forward_time": 0.024184465408325195, "step": 29741 }, { "epoch": 4.538116455078125e-05, "step": 29741, "training_step_time": 0.12969541549682617 }, { "epoch": 4.53826904296875e-05, "model_forward_time": 0.024678945541381836, "step": 29742 }, { "epoch": 4.53826904296875e-05, "step": 29742, "training_step_time": 0.1267850399017334 }, { "epoch": 4.538421630859375e-05, "model_forward_time": 0.02490544319152832, "step": 29743 }, { "epoch": 4.538421630859375e-05, "step": 29743, "training_step_time": 0.12626957893371582 }, { "epoch": 4.53857421875e-05, "model_forward_time": 0.024782896041870117, "step": 29744 }, { "epoch": 4.53857421875e-05, "step": 29744, "training_step_time": 0.12142038345336914 }, { "epoch": 4.538726806640625e-05, "model_forward_time": 0.025026321411132812, "step": 29745 }, { "epoch": 4.538726806640625e-05, "step": 29745, "training_step_time": 0.11894798278808594 }, { "epoch": 4.53887939453125e-05, "model_forward_time": 0.024596452713012695, "step": 29746 }, { "epoch": 4.53887939453125e-05, "step": 29746, "training_step_time": 0.11337661743164062 }, { "epoch": 4.539031982421875e-05, "model_forward_time": 0.02541518211364746, "step": 29747 }, { "epoch": 4.539031982421875e-05, "step": 29747, "training_step_time": 0.10958600044250488 }, { "epoch": 4.5391845703125e-05, "model_forward_time": 0.025316715240478516, "step": 29748 }, { "epoch": 4.5391845703125e-05, "step": 29748, "training_step_time": 0.13230347633361816 }, { "epoch": 4.539337158203125e-05, "model_forward_time": 0.025249481201171875, "step": 29749 }, { "epoch": 4.539337158203125e-05, "step": 29749, "training_step_time": 0.11367607116699219 }, { "epoch": 4.53948974609375e-05, "grad_norm": 0.062019359320402145, "learning_rate": 1.8984648752429225e-08, "loss": 0.0065, "step": 29750 }, { "epoch": 4.53948974609375e-05, "model_forward_time": 0.024881362915039062, "step": 29750 }, { "epoch": 4.53948974609375e-05, "step": 29750, "training_step_time": 0.1212923526763916 }, { "epoch": 4.539642333984375e-05, "model_forward_time": 0.025597572326660156, "step": 29751 }, { "epoch": 4.539642333984375e-05, "step": 29751, "training_step_time": 0.13914871215820312 }, { "epoch": 4.539794921875e-05, "model_forward_time": 0.02506875991821289, "step": 29752 }, { "epoch": 4.539794921875e-05, "step": 29752, "training_step_time": 0.10605287551879883 }, { "epoch": 4.539947509765625e-05, "model_forward_time": 0.02693486213684082, "step": 29753 }, { "epoch": 4.539947509765625e-05, "step": 29753, "training_step_time": 0.13351106643676758 }, { "epoch": 4.54010009765625e-05, "model_forward_time": 0.025110960006713867, "step": 29754 }, { "epoch": 4.54010009765625e-05, "step": 29754, "training_step_time": 0.11182737350463867 }, { "epoch": 4.540252685546875e-05, "model_forward_time": 0.024997711181640625, "step": 29755 }, { "epoch": 4.540252685546875e-05, "step": 29755, "training_step_time": 0.10388636589050293 }, { "epoch": 4.5404052734375e-05, "model_forward_time": 0.024947166442871094, "step": 29756 }, { "epoch": 4.5404052734375e-05, "step": 29756, "training_step_time": 0.10567069053649902 }, { "epoch": 4.540557861328125e-05, "model_forward_time": 0.025150537490844727, "step": 29757 }, { "epoch": 4.540557861328125e-05, "step": 29757, "training_step_time": 0.10843157768249512 }, { "epoch": 4.54071044921875e-05, "model_forward_time": 0.025532245635986328, "step": 29758 }, { "epoch": 4.54071044921875e-05, "step": 29758, "training_step_time": 0.10559701919555664 }, { "epoch": 4.540863037109375e-05, "model_forward_time": 0.0243380069732666, "step": 29759 }, { "epoch": 4.540863037109375e-05, "step": 29759, "training_step_time": 0.10448431968688965 }, { "epoch": 4.541015625e-05, "grad_norm": 0.03795764967799187, "learning_rate": 1.749633910153592e-08, "loss": 0.0036, "step": 29760 }, { "epoch": 4.541015625e-05, "model_forward_time": 0.025029897689819336, "step": 29760 }, { "epoch": 4.541015625e-05, "step": 29760, "training_step_time": 0.11104607582092285 }, { "epoch": 4.541168212890625e-05, "model_forward_time": 0.025667667388916016, "step": 29761 }, { "epoch": 4.541168212890625e-05, "step": 29761, "training_step_time": 0.10853362083435059 }, { "epoch": 4.54132080078125e-05, "model_forward_time": 0.025260210037231445, "step": 29762 }, { "epoch": 4.54132080078125e-05, "step": 29762, "training_step_time": 0.10883784294128418 }, { "epoch": 4.541473388671875e-05, "model_forward_time": 0.02529621124267578, "step": 29763 }, { "epoch": 4.541473388671875e-05, "step": 29763, "training_step_time": 0.21544289588928223 }, { "epoch": 4.5416259765625e-05, "model_forward_time": 0.025112152099609375, "step": 29764 }, { "epoch": 4.5416259765625e-05, "step": 29764, "training_step_time": 0.11326026916503906 }, { "epoch": 4.541778564453125e-05, "model_forward_time": 0.025382518768310547, "step": 29765 }, { "epoch": 4.541778564453125e-05, "step": 29765, "training_step_time": 0.10379528999328613 }, { "epoch": 4.54193115234375e-05, "model_forward_time": 0.025300264358520508, "step": 29766 }, { "epoch": 4.54193115234375e-05, "step": 29766, "training_step_time": 0.10572576522827148 }, { "epoch": 4.542083740234375e-05, "model_forward_time": 0.024794578552246094, "step": 29767 }, { "epoch": 4.542083740234375e-05, "step": 29767, "training_step_time": 0.1032106876373291 }, { "epoch": 4.542236328125e-05, "model_forward_time": 0.025298118591308594, "step": 29768 }, { "epoch": 4.542236328125e-05, "step": 29768, "training_step_time": 0.10455775260925293 }, { "epoch": 4.542388916015625e-05, "model_forward_time": 0.025965452194213867, "step": 29769 }, { "epoch": 4.542388916015625e-05, "step": 29769, "training_step_time": 0.10513448715209961 }, { "epoch": 4.54254150390625e-05, "grad_norm": 0.027176687493920326, "learning_rate": 1.6068762905635527e-08, "loss": 0.0056, "step": 29770 }, { "epoch": 4.54254150390625e-05, "model_forward_time": 0.02522420883178711, "step": 29770 }, { "epoch": 4.54254150390625e-05, "step": 29770, "training_step_time": 0.10556864738464355 }, { "epoch": 4.542694091796875e-05, "model_forward_time": 0.0240938663482666, "step": 29771 }, { "epoch": 4.542694091796875e-05, "step": 29771, "training_step_time": 0.10435605049133301 }, { "epoch": 4.5428466796875e-05, "model_forward_time": 0.025252819061279297, "step": 29772 }, { "epoch": 4.5428466796875e-05, "step": 29772, "training_step_time": 0.10599827766418457 }, { "epoch": 4.542999267578125e-05, "model_forward_time": 0.02544426918029785, "step": 29773 }, { "epoch": 4.542999267578125e-05, "step": 29773, "training_step_time": 0.1511244773864746 }, { "epoch": 4.54315185546875e-05, "model_forward_time": 0.02490544319152832, "step": 29774 }, { "epoch": 4.54315185546875e-05, "step": 29774, "training_step_time": 0.14748072624206543 }, { "epoch": 4.543304443359375e-05, "model_forward_time": 0.024646520614624023, "step": 29775 }, { "epoch": 4.543304443359375e-05, "step": 29775, "training_step_time": 0.10059785842895508 }, { "epoch": 4.54345703125e-05, "model_forward_time": 0.025406837463378906, "step": 29776 }, { "epoch": 4.54345703125e-05, "step": 29776, "training_step_time": 0.12291479110717773 }, { "epoch": 4.543609619140625e-05, "model_forward_time": 0.02568197250366211, "step": 29777 }, { "epoch": 4.543609619140625e-05, "step": 29777, "training_step_time": 0.12318825721740723 }, { "epoch": 4.54376220703125e-05, "model_forward_time": 0.025368213653564453, "step": 29778 }, { "epoch": 4.54376220703125e-05, "step": 29778, "training_step_time": 0.21992874145507812 }, { "epoch": 4.543914794921875e-05, "model_forward_time": 0.024649620056152344, "step": 29779 }, { "epoch": 4.543914794921875e-05, "step": 29779, "training_step_time": 0.22472476959228516 }, { "epoch": 4.5440673828125e-05, "grad_norm": 0.04830660670995712, "learning_rate": 1.4701921899362703e-08, "loss": 0.0038, "step": 29780 }, { "epoch": 4.5440673828125e-05, "model_forward_time": 0.024690628051757812, "step": 29780 }, { "epoch": 4.5440673828125e-05, "step": 29780, "training_step_time": 0.10743284225463867 }, { "epoch": 4.544219970703125e-05, "model_forward_time": 0.02454090118408203, "step": 29781 }, { "epoch": 4.544219970703125e-05, "step": 29781, "training_step_time": 0.1349046230316162 }, { "epoch": 4.54437255859375e-05, "model_forward_time": 0.025215864181518555, "step": 29782 }, { "epoch": 4.54437255859375e-05, "step": 29782, "training_step_time": 0.15891623497009277 }, { "epoch": 4.544525146484375e-05, "model_forward_time": 0.025135517120361328, "step": 29783 }, { "epoch": 4.544525146484375e-05, "step": 29783, "training_step_time": 0.22401165962219238 }, { "epoch": 4.544677734375e-05, "model_forward_time": 0.024492263793945312, "step": 29784 }, { "epoch": 4.544677734375e-05, "step": 29784, "training_step_time": 0.13244152069091797 }, { "epoch": 4.544830322265625e-05, "model_forward_time": 0.02503037452697754, "step": 29785 }, { "epoch": 4.544830322265625e-05, "step": 29785, "training_step_time": 0.1214301586151123 }, { "epoch": 4.54498291015625e-05, "model_forward_time": 0.02538895606994629, "step": 29786 }, { "epoch": 4.54498291015625e-05, "step": 29786, "training_step_time": 0.12203431129455566 }, { "epoch": 4.545135498046875e-05, "model_forward_time": 0.025394678115844727, "step": 29787 }, { "epoch": 4.545135498046875e-05, "step": 29787, "training_step_time": 0.11734390258789062 }, { "epoch": 4.5452880859375e-05, "model_forward_time": 0.025045394897460938, "step": 29788 }, { "epoch": 4.5452880859375e-05, "step": 29788, "training_step_time": 0.11571145057678223 }, { "epoch": 4.545440673828125e-05, "model_forward_time": 0.025708675384521484, "step": 29789 }, { "epoch": 4.545440673828125e-05, "step": 29789, "training_step_time": 0.11198616027832031 }, { "epoch": 4.54559326171875e-05, "grad_norm": 0.03371327370405197, "learning_rate": 1.3395817743561134e-08, "loss": 0.0023, "step": 29790 }, { "epoch": 4.54559326171875e-05, "model_forward_time": 0.0252230167388916, "step": 29790 }, { "epoch": 4.54559326171875e-05, "step": 29790, "training_step_time": 0.10803437232971191 }, { "epoch": 4.545745849609375e-05, "model_forward_time": 0.025005102157592773, "step": 29791 }, { "epoch": 4.545745849609375e-05, "step": 29791, "training_step_time": 0.10507869720458984 }, { "epoch": 4.5458984375e-05, "model_forward_time": 0.025203943252563477, "step": 29792 }, { "epoch": 4.5458984375e-05, "step": 29792, "training_step_time": 0.20252323150634766 }, { "epoch": 4.546051025390625e-05, "model_forward_time": 0.024335145950317383, "step": 29793 }, { "epoch": 4.546051025390625e-05, "step": 29793, "training_step_time": 0.1155710220336914 }, { "epoch": 4.54620361328125e-05, "model_forward_time": 0.02441883087158203, "step": 29794 }, { "epoch": 4.54620361328125e-05, "step": 29794, "training_step_time": 0.1305527687072754 }, { "epoch": 4.546356201171875e-05, "model_forward_time": 0.02538323402404785, "step": 29795 }, { "epoch": 4.546356201171875e-05, "step": 29795, "training_step_time": 0.15538716316223145 }, { "epoch": 4.5465087890625e-05, "model_forward_time": 0.0249025821685791, "step": 29796 }, { "epoch": 4.5465087890625e-05, "step": 29796, "training_step_time": 0.2151353359222412 }, { "epoch": 4.546661376953125e-05, "model_forward_time": 0.024242162704467773, "step": 29797 }, { "epoch": 4.546661376953125e-05, "step": 29797, "training_step_time": 0.10626959800720215 }, { "epoch": 4.54681396484375e-05, "model_forward_time": 0.024651050567626953, "step": 29798 }, { "epoch": 4.54681396484375e-05, "step": 29798, "training_step_time": 0.1042943000793457 }, { "epoch": 4.546966552734375e-05, "model_forward_time": 0.025501728057861328, "step": 29799 }, { "epoch": 4.546966552734375e-05, "step": 29799, "training_step_time": 0.10441112518310547 }, { "epoch": 4.547119140625e-05, "grad_norm": 0.07544019818305969, "learning_rate": 1.215045202527243e-08, "loss": 0.0039, "step": 29800 }, { "epoch": 4.547119140625e-05, "model_forward_time": 0.02561187744140625, "step": 29800 }, { "epoch": 4.547119140625e-05, "step": 29800, "training_step_time": 0.10857152938842773 }, { "epoch": 4.547271728515625e-05, "model_forward_time": 0.025725603103637695, "step": 29801 }, { "epoch": 4.547271728515625e-05, "step": 29801, "training_step_time": 0.10497689247131348 }, { "epoch": 4.54742431640625e-05, "model_forward_time": 0.0261690616607666, "step": 29802 }, { "epoch": 4.54742431640625e-05, "step": 29802, "training_step_time": 0.1060631275177002 }, { "epoch": 4.547576904296875e-05, "model_forward_time": 0.025210142135620117, "step": 29803 }, { "epoch": 4.547576904296875e-05, "step": 29803, "training_step_time": 0.14506292343139648 }, { "epoch": 4.5477294921875e-05, "model_forward_time": 0.02478933334350586, "step": 29804 }, { "epoch": 4.5477294921875e-05, "step": 29804, "training_step_time": 0.1537313461303711 }, { "epoch": 4.547882080078125e-05, "model_forward_time": 0.024748802185058594, "step": 29805 }, { "epoch": 4.547882080078125e-05, "step": 29805, "training_step_time": 0.14660048484802246 }, { "epoch": 4.54803466796875e-05, "model_forward_time": 0.0249941349029541, "step": 29806 }, { "epoch": 4.54803466796875e-05, "step": 29806, "training_step_time": 0.13646578788757324 }, { "epoch": 4.548187255859375e-05, "model_forward_time": 0.0247342586517334, "step": 29807 }, { "epoch": 4.548187255859375e-05, "step": 29807, "training_step_time": 0.10711455345153809 }, { "epoch": 4.54833984375e-05, "model_forward_time": 0.025429725646972656, "step": 29808 }, { "epoch": 4.54833984375e-05, "step": 29808, "training_step_time": 0.11243414878845215 }, { "epoch": 4.548492431640625e-05, "model_forward_time": 0.02510547637939453, "step": 29809 }, { "epoch": 4.548492431640625e-05, "step": 29809, "training_step_time": 0.11042451858520508 }, { "epoch": 4.54864501953125e-05, "grad_norm": 0.029395155608654022, "learning_rate": 1.096582625772502e-08, "loss": 0.0026, "step": 29810 }, { "epoch": 4.54864501953125e-05, "model_forward_time": 0.025696754455566406, "step": 29810 }, { "epoch": 4.54864501953125e-05, "step": 29810, "training_step_time": 0.10933256149291992 }, { "epoch": 4.548797607421875e-05, "model_forward_time": 0.02564263343811035, "step": 29811 }, { "epoch": 4.548797607421875e-05, "step": 29811, "training_step_time": 0.10943603515625 }, { "epoch": 4.5489501953125e-05, "model_forward_time": 0.026520967483520508, "step": 29812 }, { "epoch": 4.5489501953125e-05, "step": 29812, "training_step_time": 0.11083364486694336 }, { "epoch": 4.549102783203125e-05, "model_forward_time": 0.024502277374267578, "step": 29813 }, { "epoch": 4.549102783203125e-05, "step": 29813, "training_step_time": 0.1049489974975586 }, { "epoch": 4.54925537109375e-05, "model_forward_time": 0.024890422821044922, "step": 29814 }, { "epoch": 4.54925537109375e-05, "step": 29814, "training_step_time": 0.10541272163391113 }, { "epoch": 4.549407958984375e-05, "model_forward_time": 0.02503657341003418, "step": 29815 }, { "epoch": 4.549407958984375e-05, "step": 29815, "training_step_time": 0.1040651798248291 }, { "epoch": 4.549560546875e-05, "model_forward_time": 0.024611949920654297, "step": 29816 }, { "epoch": 4.549560546875e-05, "step": 29816, "training_step_time": 0.1051797866821289 }, { "epoch": 4.549713134765625e-05, "model_forward_time": 0.02532649040222168, "step": 29817 }, { "epoch": 4.549713134765625e-05, "step": 29817, "training_step_time": 0.10568785667419434 }, { "epoch": 4.54986572265625e-05, "model_forward_time": 0.025198936462402344, "step": 29818 }, { "epoch": 4.54986572265625e-05, "step": 29818, "training_step_time": 0.10795974731445312 }, { "epoch": 4.550018310546875e-05, "model_forward_time": 0.02524590492248535, "step": 29819 }, { "epoch": 4.550018310546875e-05, "step": 29819, "training_step_time": 0.10977602005004883 }, { "epoch": 4.5501708984375e-05, "grad_norm": 0.029612349346280098, "learning_rate": 9.841941880361916e-09, "loss": 0.0024, "step": 29820 }, { "epoch": 4.5501708984375e-05, "model_forward_time": 0.025239229202270508, "step": 29820 }, { "epoch": 4.5501708984375e-05, "step": 29820, "training_step_time": 0.10514068603515625 }, { "epoch": 4.550323486328125e-05, "model_forward_time": 0.02533411979675293, "step": 29821 }, { "epoch": 4.550323486328125e-05, "step": 29821, "training_step_time": 0.11825037002563477 }, { "epoch": 4.55047607421875e-05, "model_forward_time": 0.025560855865478516, "step": 29822 }, { "epoch": 4.55047607421875e-05, "step": 29822, "training_step_time": 0.1090996265411377 }, { "epoch": 4.550628662109375e-05, "model_forward_time": 0.02523946762084961, "step": 29823 }, { "epoch": 4.550628662109375e-05, "step": 29823, "training_step_time": 0.1298837661743164 }, { "epoch": 4.55078125e-05, "model_forward_time": 0.025089502334594727, "step": 29824 }, { "epoch": 4.55078125e-05, "step": 29824, "training_step_time": 0.11962127685546875 }, { "epoch": 4.550933837890625e-05, "model_forward_time": 0.025135040283203125, "step": 29825 }, { "epoch": 4.550933837890625e-05, "step": 29825, "training_step_time": 0.20621418952941895 }, { "epoch": 4.55108642578125e-05, "model_forward_time": 0.024346351623535156, "step": 29826 }, { "epoch": 4.55108642578125e-05, "step": 29826, "training_step_time": 0.1054377555847168 }, { "epoch": 4.551239013671875e-05, "model_forward_time": 0.025043487548828125, "step": 29827 }, { "epoch": 4.551239013671875e-05, "step": 29827, "training_step_time": 0.1144716739654541 }, { "epoch": 4.5513916015625e-05, "model_forward_time": 0.025482892990112305, "step": 29828 }, { "epoch": 4.5513916015625e-05, "step": 29828, "training_step_time": 0.11162424087524414 }, { "epoch": 4.551544189453125e-05, "model_forward_time": 0.025158166885375977, "step": 29829 }, { "epoch": 4.551544189453125e-05, "step": 29829, "training_step_time": 0.10683345794677734 }, { "epoch": 4.55169677734375e-05, "grad_norm": 0.4647483825683594, "learning_rate": 8.778800258801844e-09, "loss": 0.0119, "step": 29830 }, { "epoch": 4.55169677734375e-05, "model_forward_time": 0.025578975677490234, "step": 29830 }, { "epoch": 4.55169677734375e-05, "step": 29830, "training_step_time": 0.13682055473327637 }, { "epoch": 4.551849365234375e-05, "model_forward_time": 0.024731874465942383, "step": 29831 }, { "epoch": 4.551849365234375e-05, "step": 29831, "training_step_time": 0.10227799415588379 }, { "epoch": 4.552001953125e-05, "model_forward_time": 0.025095224380493164, "step": 29832 }, { "epoch": 4.552001953125e-05, "step": 29832, "training_step_time": 0.1206061840057373 }, { "epoch": 4.552154541015625e-05, "model_forward_time": 0.025343656539916992, "step": 29833 }, { "epoch": 4.552154541015625e-05, "step": 29833, "training_step_time": 0.13239383697509766 }, { "epoch": 4.55230712890625e-05, "model_forward_time": 0.025284290313720703, "step": 29834 }, { "epoch": 4.55230712890625e-05, "step": 29834, "training_step_time": 0.13172507286071777 }, { "epoch": 4.552459716796875e-05, "model_forward_time": 0.025725841522216797, "step": 29835 }, { "epoch": 4.552459716796875e-05, "step": 29835, "training_step_time": 0.12440061569213867 }, { "epoch": 4.5526123046875e-05, "model_forward_time": 0.025058984756469727, "step": 29836 }, { "epoch": 4.5526123046875e-05, "step": 29836, "training_step_time": 0.11696410179138184 }, { "epoch": 4.552764892578125e-05, "model_forward_time": 0.025155067443847656, "step": 29837 }, { "epoch": 4.552764892578125e-05, "step": 29837, "training_step_time": 0.11116313934326172 }, { "epoch": 4.55291748046875e-05, "model_forward_time": 0.024876117706298828, "step": 29838 }, { "epoch": 4.55291748046875e-05, "step": 29838, "training_step_time": 0.17532849311828613 }, { "epoch": 4.553070068359375e-05, "model_forward_time": 0.02466440200805664, "step": 29839 }, { "epoch": 4.553070068359375e-05, "step": 29839, "training_step_time": 0.12402939796447754 }, { "epoch": 4.55322265625e-05, "grad_norm": 0.032721392810344696, "learning_rate": 7.77640268486146e-09, "loss": 0.0091, "step": 29840 }, { "epoch": 4.55322265625e-05, "model_forward_time": 0.024419546127319336, "step": 29840 }, { "epoch": 4.55322265625e-05, "step": 29840, "training_step_time": 0.10987544059753418 }, { "epoch": 4.553375244140625e-05, "model_forward_time": 0.025065898895263672, "step": 29841 }, { "epoch": 4.553375244140625e-05, "step": 29841, "training_step_time": 0.10781073570251465 }, { "epoch": 4.55352783203125e-05, "model_forward_time": 0.02496933937072754, "step": 29842 }, { "epoch": 4.55352783203125e-05, "step": 29842, "training_step_time": 0.1091301441192627 }, { "epoch": 4.553680419921875e-05, "model_forward_time": 0.025188684463500977, "step": 29843 }, { "epoch": 4.553680419921875e-05, "step": 29843, "training_step_time": 0.10757565498352051 }, { "epoch": 4.5538330078125e-05, "model_forward_time": 0.025395631790161133, "step": 29844 }, { "epoch": 4.5538330078125e-05, "step": 29844, "training_step_time": 0.11426615715026855 }, { "epoch": 4.553985595703125e-05, "model_forward_time": 0.025185346603393555, "step": 29845 }, { "epoch": 4.553985595703125e-05, "step": 29845, "training_step_time": 0.11373162269592285 }, { "epoch": 4.55413818359375e-05, "model_forward_time": 0.025087356567382812, "step": 29846 }, { "epoch": 4.55413818359375e-05, "step": 29846, "training_step_time": 0.10472798347473145 }, { "epoch": 4.554290771484375e-05, "model_forward_time": 0.024913787841796875, "step": 29847 }, { "epoch": 4.554290771484375e-05, "step": 29847, "training_step_time": 0.10458898544311523 }, { "epoch": 4.554443359375e-05, "model_forward_time": 0.024812936782836914, "step": 29848 }, { "epoch": 4.554443359375e-05, "step": 29848, "training_step_time": 0.1044011116027832 }, { "epoch": 4.554595947265625e-05, "model_forward_time": 0.024538516998291016, "step": 29849 }, { "epoch": 4.554595947265625e-05, "step": 29849, "training_step_time": 0.10287642478942871 }, { "epoch": 4.55474853515625e-05, "grad_norm": 0.15437576174736023, "learning_rate": 6.834750376549792e-09, "loss": 0.0063, "step": 29850 }, { "epoch": 4.55474853515625e-05, "model_forward_time": 0.024811506271362305, "step": 29850 }, { "epoch": 4.55474853515625e-05, "step": 29850, "training_step_time": 0.1455671787261963 }, { "epoch": 4.554901123046875e-05, "model_forward_time": 0.025674104690551758, "step": 29851 }, { "epoch": 4.554901123046875e-05, "step": 29851, "training_step_time": 0.15994048118591309 }, { "epoch": 4.5550537109375e-05, "model_forward_time": 0.024443626403808594, "step": 29852 }, { "epoch": 4.5550537109375e-05, "step": 29852, "training_step_time": 0.11379861831665039 }, { "epoch": 4.555206298828125e-05, "model_forward_time": 0.024624347686767578, "step": 29853 }, { "epoch": 4.555206298828125e-05, "step": 29853, "training_step_time": 0.12799954414367676 }, { "epoch": 4.55535888671875e-05, "model_forward_time": 0.02533864974975586, "step": 29854 }, { "epoch": 4.55535888671875e-05, "step": 29854, "training_step_time": 0.2027287483215332 }, { "epoch": 4.555511474609375e-05, "model_forward_time": 0.024456024169921875, "step": 29855 }, { "epoch": 4.555511474609375e-05, "step": 29855, "training_step_time": 0.10149312019348145 }, { "epoch": 4.5556640625e-05, "model_forward_time": 0.02468419075012207, "step": 29856 }, { "epoch": 4.5556640625e-05, "step": 29856, "training_step_time": 0.10879015922546387 }, { "epoch": 4.555816650390625e-05, "model_forward_time": 0.025716066360473633, "step": 29857 }, { "epoch": 4.555816650390625e-05, "step": 29857, "training_step_time": 0.10439038276672363 }, { "epoch": 4.55596923828125e-05, "model_forward_time": 0.025579214096069336, "step": 29858 }, { "epoch": 4.55596923828125e-05, "step": 29858, "training_step_time": 0.10485124588012695 }, { "epoch": 4.556121826171875e-05, "model_forward_time": 0.02590799331665039, "step": 29859 }, { "epoch": 4.556121826171875e-05, "step": 29859, "training_step_time": 0.11098670959472656 }, { "epoch": 4.5562744140625e-05, "grad_norm": 0.11889815330505371, "learning_rate": 5.953844478068238e-09, "loss": 0.0048, "step": 29860 }, { "epoch": 4.5562744140625e-05, "model_forward_time": 0.025787830352783203, "step": 29860 }, { "epoch": 4.5562744140625e-05, "step": 29860, "training_step_time": 0.11009430885314941 }, { "epoch": 4.556427001953125e-05, "model_forward_time": 0.02519512176513672, "step": 29861 }, { "epoch": 4.556427001953125e-05, "step": 29861, "training_step_time": 0.10363125801086426 }, { "epoch": 4.55657958984375e-05, "model_forward_time": 0.025161027908325195, "step": 29862 }, { "epoch": 4.55657958984375e-05, "step": 29862, "training_step_time": 0.10190534591674805 }, { "epoch": 4.556732177734375e-05, "model_forward_time": 0.02541971206665039, "step": 29863 }, { "epoch": 4.556732177734375e-05, "step": 29863, "training_step_time": 0.17733097076416016 }, { "epoch": 4.556884765625e-05, "model_forward_time": 0.024814367294311523, "step": 29864 }, { "epoch": 4.556884765625e-05, "step": 29864, "training_step_time": 0.15914678573608398 }, { "epoch": 4.557037353515625e-05, "model_forward_time": 0.024339675903320312, "step": 29865 }, { "epoch": 4.557037353515625e-05, "step": 29865, "training_step_time": 0.17061281204223633 }, { "epoch": 4.55718994140625e-05, "model_forward_time": 0.024552583694458008, "step": 29866 }, { "epoch": 4.55718994140625e-05, "step": 29866, "training_step_time": 0.19593214988708496 }, { "epoch": 4.557342529296875e-05, "model_forward_time": 0.024770259857177734, "step": 29867 }, { "epoch": 4.557342529296875e-05, "step": 29867, "training_step_time": 0.16433930397033691 }, { "epoch": 4.5574951171875e-05, "model_forward_time": 0.02445673942565918, "step": 29868 }, { "epoch": 4.5574951171875e-05, "step": 29868, "training_step_time": 0.14076566696166992 }, { "epoch": 4.557647705078125e-05, "model_forward_time": 0.024722576141357422, "step": 29869 }, { "epoch": 4.557647705078125e-05, "step": 29869, "training_step_time": 0.2085716724395752 }, { "epoch": 4.55780029296875e-05, "grad_norm": 0.061708930879831314, "learning_rate": 5.133686059793918e-09, "loss": 0.0026, "step": 29870 }, { "epoch": 4.55780029296875e-05, "model_forward_time": 0.025552988052368164, "step": 29870 }, { "epoch": 4.55780029296875e-05, "step": 29870, "training_step_time": 0.21052312850952148 }, { "epoch": 4.557952880859375e-05, "model_forward_time": 0.025050640106201172, "step": 29871 }, { "epoch": 4.557952880859375e-05, "step": 29871, "training_step_time": 0.11556434631347656 }, { "epoch": 4.55810546875e-05, "model_forward_time": 0.024522781372070312, "step": 29872 }, { "epoch": 4.55810546875e-05, "step": 29872, "training_step_time": 0.11649179458618164 }, { "epoch": 4.558258056640625e-05, "model_forward_time": 0.0255277156829834, "step": 29873 }, { "epoch": 4.558258056640625e-05, "step": 29873, "training_step_time": 0.13058733940124512 }, { "epoch": 4.55841064453125e-05, "model_forward_time": 0.025236129760742188, "step": 29874 }, { "epoch": 4.55841064453125e-05, "step": 29874, "training_step_time": 0.1099398136138916 }, { "epoch": 4.558563232421875e-05, "model_forward_time": 0.025589704513549805, "step": 29875 }, { "epoch": 4.558563232421875e-05, "step": 29875, "training_step_time": 0.10824131965637207 }, { "epoch": 4.5587158203125e-05, "model_forward_time": 0.024853229522705078, "step": 29876 }, { "epoch": 4.5587158203125e-05, "step": 29876, "training_step_time": 0.1059272289276123 }, { "epoch": 4.558868408203125e-05, "model_forward_time": 0.02524709701538086, "step": 29877 }, { "epoch": 4.558868408203125e-05, "step": 29877, "training_step_time": 0.10860228538513184 }, { "epoch": 4.55902099609375e-05, "model_forward_time": 0.026285648345947266, "step": 29878 }, { "epoch": 4.55902099609375e-05, "step": 29878, "training_step_time": 0.10982656478881836 }, { "epoch": 4.559173583984375e-05, "model_forward_time": 0.02453017234802246, "step": 29879 }, { "epoch": 4.559173583984375e-05, "step": 29879, "training_step_time": 0.10590934753417969 }, { "epoch": 4.559326171875e-05, "grad_norm": 0.15741203725337982, "learning_rate": 4.3742761183018784e-09, "loss": 0.0053, "step": 29880 }, { "epoch": 4.559326171875e-05, "model_forward_time": 0.02405261993408203, "step": 29880 }, { "epoch": 4.559326171875e-05, "step": 29880, "training_step_time": 0.10668325424194336 }, { "epoch": 4.559478759765625e-05, "model_forward_time": 0.024428606033325195, "step": 29881 }, { "epoch": 4.559478759765625e-05, "step": 29881, "training_step_time": 0.10592341423034668 }, { "epoch": 4.55963134765625e-05, "model_forward_time": 0.024007320404052734, "step": 29882 }, { "epoch": 4.55963134765625e-05, "step": 29882, "training_step_time": 0.17463970184326172 }, { "epoch": 4.559783935546875e-05, "model_forward_time": 0.024793624877929688, "step": 29883 }, { "epoch": 4.559783935546875e-05, "step": 29883, "training_step_time": 0.11826205253601074 }, { "epoch": 4.5599365234375e-05, "model_forward_time": 0.0247189998626709, "step": 29884 }, { "epoch": 4.5599365234375e-05, "step": 29884, "training_step_time": 0.13265275955200195 }, { "epoch": 4.560089111328125e-05, "model_forward_time": 0.025522232055664062, "step": 29885 }, { "epoch": 4.560089111328125e-05, "step": 29885, "training_step_time": 0.10564398765563965 }, { "epoch": 4.56024169921875e-05, "model_forward_time": 0.025332927703857422, "step": 29886 }, { "epoch": 4.56024169921875e-05, "step": 29886, "training_step_time": 0.1683807373046875 }, { "epoch": 4.560394287109375e-05, "model_forward_time": 0.024429798126220703, "step": 29887 }, { "epoch": 4.560394287109375e-05, "step": 29887, "training_step_time": 0.13535571098327637 }, { "epoch": 4.560546875e-05, "model_forward_time": 0.024456262588500977, "step": 29888 }, { "epoch": 4.560546875e-05, "step": 29888, "training_step_time": 0.10717988014221191 }, { "epoch": 4.560699462890625e-05, "model_forward_time": 0.025233983993530273, "step": 29889 }, { "epoch": 4.560699462890625e-05, "step": 29889, "training_step_time": 0.10336875915527344 }, { "epoch": 4.56085205078125e-05, "grad_norm": 0.03077687695622444, "learning_rate": 3.6756155763373323e-09, "loss": 0.0025, "step": 29890 }, { "epoch": 4.56085205078125e-05, "model_forward_time": 0.0254213809967041, "step": 29890 }, { "epoch": 4.56085205078125e-05, "step": 29890, "training_step_time": 0.1055135726928711 }, { "epoch": 4.561004638671875e-05, "model_forward_time": 0.02512216567993164, "step": 29891 }, { "epoch": 4.561004638671875e-05, "step": 29891, "training_step_time": 0.10843396186828613 }, { "epoch": 4.5611572265625e-05, "model_forward_time": 0.025209665298461914, "step": 29892 }, { "epoch": 4.5611572265625e-05, "step": 29892, "training_step_time": 0.10658574104309082 }, { "epoch": 4.561309814453125e-05, "model_forward_time": 0.025867223739624023, "step": 29893 }, { "epoch": 4.561309814453125e-05, "step": 29893, "training_step_time": 0.10712766647338867 }, { "epoch": 4.56146240234375e-05, "model_forward_time": 0.02500176429748535, "step": 29894 }, { "epoch": 4.56146240234375e-05, "step": 29894, "training_step_time": 0.14104533195495605 }, { "epoch": 4.561614990234375e-05, "model_forward_time": 0.02465963363647461, "step": 29895 }, { "epoch": 4.561614990234375e-05, "step": 29895, "training_step_time": 0.15581536293029785 }, { "epoch": 4.561767578125e-05, "model_forward_time": 0.025221824645996094, "step": 29896 }, { "epoch": 4.561767578125e-05, "step": 29896, "training_step_time": 0.11303591728210449 }, { "epoch": 4.561920166015625e-05, "model_forward_time": 0.024544239044189453, "step": 29897 }, { "epoch": 4.561920166015625e-05, "step": 29897, "training_step_time": 0.1325218677520752 }, { "epoch": 4.56207275390625e-05, "model_forward_time": 0.025615692138671875, "step": 29898 }, { "epoch": 4.56207275390625e-05, "step": 29898, "training_step_time": 0.1951141357421875 }, { "epoch": 4.562225341796875e-05, "model_forward_time": 0.0241239070892334, "step": 29899 }, { "epoch": 4.562225341796875e-05, "step": 29899, "training_step_time": 0.10769081115722656 }, { "epoch": 4.5623779296875e-05, "grad_norm": 0.10912805050611496, "learning_rate": 3.0377052828489683e-09, "loss": 0.0129, "step": 29900 }, { "epoch": 4.5623779296875e-05, "model_forward_time": 0.02468132972717285, "step": 29900 }, { "epoch": 4.5623779296875e-05, "step": 29900, "training_step_time": 0.10791420936584473 }, { "epoch": 4.562530517578125e-05, "model_forward_time": 0.025246858596801758, "step": 29901 }, { "epoch": 4.562530517578125e-05, "step": 29901, "training_step_time": 0.10767698287963867 }, { "epoch": 4.56268310546875e-05, "model_forward_time": 0.025555133819580078, "step": 29902 }, { "epoch": 4.56268310546875e-05, "step": 29902, "training_step_time": 0.10740232467651367 }, { "epoch": 4.562835693359375e-05, "model_forward_time": 0.0254056453704834, "step": 29903 }, { "epoch": 4.562835693359375e-05, "step": 29903, "training_step_time": 0.10549807548522949 }, { "epoch": 4.56298828125e-05, "model_forward_time": 0.025142431259155273, "step": 29904 }, { "epoch": 4.56298828125e-05, "step": 29904, "training_step_time": 0.105438232421875 }, { "epoch": 4.563140869140625e-05, "model_forward_time": 0.0251920223236084, "step": 29905 }, { "epoch": 4.563140869140625e-05, "step": 29905, "training_step_time": 0.10515737533569336 }, { "epoch": 4.56329345703125e-05, "model_forward_time": 0.024955034255981445, "step": 29906 }, { "epoch": 4.56329345703125e-05, "step": 29906, "training_step_time": 0.18176889419555664 }, { "epoch": 4.563446044921875e-05, "model_forward_time": 0.024888277053833008, "step": 29907 }, { "epoch": 4.563446044921875e-05, "step": 29907, "training_step_time": 0.11118674278259277 }, { "epoch": 4.5635986328125e-05, "model_forward_time": 0.024846792221069336, "step": 29908 }, { "epoch": 4.5635986328125e-05, "step": 29908, "training_step_time": 0.18247151374816895 }, { "epoch": 4.563751220703125e-05, "model_forward_time": 0.024693727493286133, "step": 29909 }, { "epoch": 4.563751220703125e-05, "step": 29909, "training_step_time": 0.12749028205871582 }, { "epoch": 4.56390380859375e-05, "grad_norm": 0.06053199619054794, "learning_rate": 2.4605460129556445e-09, "loss": 0.0034, "step": 29910 }, { "epoch": 4.56390380859375e-05, "model_forward_time": 0.024350881576538086, "step": 29910 }, { "epoch": 4.56390380859375e-05, "step": 29910, "training_step_time": 0.10874342918395996 }, { "epoch": 4.564056396484375e-05, "model_forward_time": 0.025455236434936523, "step": 29911 }, { "epoch": 4.564056396484375e-05, "step": 29911, "training_step_time": 0.10852837562561035 }, { "epoch": 4.564208984375e-05, "model_forward_time": 0.025014400482177734, "step": 29912 }, { "epoch": 4.564208984375e-05, "step": 29912, "training_step_time": 0.12110638618469238 }, { "epoch": 4.564361572265625e-05, "model_forward_time": 0.025246858596801758, "step": 29913 }, { "epoch": 4.564361572265625e-05, "step": 29913, "training_step_time": 0.14872527122497559 }, { "epoch": 4.56451416015625e-05, "model_forward_time": 0.025379657745361328, "step": 29914 }, { "epoch": 4.56451416015625e-05, "step": 29914, "training_step_time": 0.21952438354492188 }, { "epoch": 4.564666748046875e-05, "model_forward_time": 0.024655818939208984, "step": 29915 }, { "epoch": 4.564666748046875e-05, "step": 29915, "training_step_time": 0.17305517196655273 }, { "epoch": 4.5648193359375e-05, "model_forward_time": 0.02428746223449707, "step": 29916 }, { "epoch": 4.5648193359375e-05, "step": 29916, "training_step_time": 0.14905071258544922 }, { "epoch": 4.564971923828125e-05, "model_forward_time": 0.0247652530670166, "step": 29917 }, { "epoch": 4.564971923828125e-05, "step": 29917, "training_step_time": 0.11669349670410156 }, { "epoch": 4.56512451171875e-05, "model_forward_time": 0.024723291397094727, "step": 29918 }, { "epoch": 4.56512451171875e-05, "step": 29918, "training_step_time": 0.14152979850769043 }, { "epoch": 4.565277099609375e-05, "model_forward_time": 0.02499675750732422, "step": 29919 }, { "epoch": 4.565277099609375e-05, "step": 29919, "training_step_time": 0.10398554801940918 }, { "epoch": 4.5654296875e-05, "grad_norm": 0.07553080469369888, "learning_rate": 1.9441384679574903e-09, "loss": 0.0047, "step": 29920 }, { "epoch": 4.5654296875e-05, "model_forward_time": 0.025573253631591797, "step": 29920 }, { "epoch": 4.5654296875e-05, "step": 29920, "training_step_time": 0.10768938064575195 }, { "epoch": 4.565582275390625e-05, "model_forward_time": 0.025451183319091797, "step": 29921 }, { "epoch": 4.565582275390625e-05, "step": 29921, "training_step_time": 0.10346627235412598 }, { "epoch": 4.56573486328125e-05, "model_forward_time": 0.025549650192260742, "step": 29922 }, { "epoch": 4.56573486328125e-05, "step": 29922, "training_step_time": 0.10369133949279785 }, { "epoch": 4.565887451171875e-05, "model_forward_time": 0.025081157684326172, "step": 29923 }, { "epoch": 4.565887451171875e-05, "step": 29923, "training_step_time": 0.1023261547088623 }, { "epoch": 4.5660400390625e-05, "model_forward_time": 0.025149822235107422, "step": 29924 }, { "epoch": 4.5660400390625e-05, "step": 29924, "training_step_time": 0.10977435111999512 }, { "epoch": 4.566192626953125e-05, "model_forward_time": 0.02494192123413086, "step": 29925 }, { "epoch": 4.566192626953125e-05, "step": 29925, "training_step_time": 0.10996079444885254 }, { "epoch": 4.56634521484375e-05, "model_forward_time": 0.02515721321105957, "step": 29926 }, { "epoch": 4.56634521484375e-05, "step": 29926, "training_step_time": 0.18644475936889648 }, { "epoch": 4.566497802734375e-05, "model_forward_time": 0.023569822311401367, "step": 29927 }, { "epoch": 4.566497802734375e-05, "step": 29927, "training_step_time": 0.23588109016418457 }, { "epoch": 4.566650390625e-05, "model_forward_time": 0.0247647762298584, "step": 29928 }, { "epoch": 4.566650390625e-05, "step": 29928, "training_step_time": 0.2453474998474121 }, { "epoch": 4.566802978515625e-05, "model_forward_time": 0.024123430252075195, "step": 29929 }, { "epoch": 4.566802978515625e-05, "step": 29929, "training_step_time": 0.19994688034057617 }, { "epoch": 4.56695556640625e-05, "grad_norm": 0.1345215141773224, "learning_rate": 1.4884832753414569e-09, "loss": 0.0107, "step": 29930 }, { "epoch": 4.56695556640625e-05, "model_forward_time": 0.02463221549987793, "step": 29930 }, { "epoch": 4.56695556640625e-05, "step": 29930, "training_step_time": 0.18257498741149902 }, { "epoch": 4.567108154296875e-05, "model_forward_time": 0.024220705032348633, "step": 29931 }, { "epoch": 4.567108154296875e-05, "step": 29931, "training_step_time": 0.1744384765625 }, { "epoch": 4.5672607421875e-05, "model_forward_time": 0.024404525756835938, "step": 29932 }, { "epoch": 4.5672607421875e-05, "step": 29932, "training_step_time": 0.16705536842346191 }, { "epoch": 4.567413330078125e-05, "model_forward_time": 0.024343490600585938, "step": 29933 }, { "epoch": 4.567413330078125e-05, "step": 29933, "training_step_time": 0.10091090202331543 }, { "epoch": 4.56756591796875e-05, "model_forward_time": 0.02545785903930664, "step": 29934 }, { "epoch": 4.56756591796875e-05, "step": 29934, "training_step_time": 0.10606265068054199 }, { "epoch": 4.567718505859375e-05, "model_forward_time": 0.025053977966308594, "step": 29935 }, { "epoch": 4.567718505859375e-05, "step": 29935, "training_step_time": 0.14166498184204102 }, { "epoch": 4.56787109375e-05, "model_forward_time": 0.024908065795898438, "step": 29936 }, { "epoch": 4.56787109375e-05, "step": 29936, "training_step_time": 0.16064214706420898 }, { "epoch": 4.568023681640625e-05, "model_forward_time": 0.024546146392822266, "step": 29937 }, { "epoch": 4.568023681640625e-05, "step": 29937, "training_step_time": 0.11368393898010254 }, { "epoch": 4.56817626953125e-05, "model_forward_time": 0.024523019790649414, "step": 29938 }, { "epoch": 4.56817626953125e-05, "step": 29938, "training_step_time": 0.13491010665893555 }, { "epoch": 4.568328857421875e-05, "model_forward_time": 0.02511906623840332, "step": 29939 }, { "epoch": 4.568328857421875e-05, "step": 29939, "training_step_time": 0.20428204536437988 }, { "epoch": 4.5684814453125e-05, "grad_norm": 0.05975399166345596, "learning_rate": 1.0935809887702154e-09, "loss": 0.004, "step": 29940 }, { "epoch": 4.5684814453125e-05, "model_forward_time": 0.02454686164855957, "step": 29940 }, { "epoch": 4.5684814453125e-05, "step": 29940, "training_step_time": 0.11071300506591797 }, { "epoch": 4.568634033203125e-05, "model_forward_time": 0.02504730224609375, "step": 29941 }, { "epoch": 4.568634033203125e-05, "step": 29941, "training_step_time": 0.10204768180847168 }, { "epoch": 4.56878662109375e-05, "model_forward_time": 0.025545120239257812, "step": 29942 }, { "epoch": 4.56878662109375e-05, "step": 29942, "training_step_time": 0.10422396659851074 }, { "epoch": 4.568939208984375e-05, "model_forward_time": 0.025277137756347656, "step": 29943 }, { "epoch": 4.568939208984375e-05, "step": 29943, "training_step_time": 0.10412430763244629 }, { "epoch": 4.569091796875e-05, "model_forward_time": 0.025634050369262695, "step": 29944 }, { "epoch": 4.569091796875e-05, "step": 29944, "training_step_time": 0.10509872436523438 }, { "epoch": 4.569244384765625e-05, "model_forward_time": 0.02518010139465332, "step": 29945 }, { "epoch": 4.569244384765625e-05, "step": 29945, "training_step_time": 0.10468316078186035 }, { "epoch": 4.56939697265625e-05, "model_forward_time": 0.02538132667541504, "step": 29946 }, { "epoch": 4.56939697265625e-05, "step": 29946, "training_step_time": 0.10490942001342773 }, { "epoch": 4.569549560546875e-05, "model_forward_time": 0.02390432357788086, "step": 29947 }, { "epoch": 4.569549560546875e-05, "step": 29947, "training_step_time": 0.19042181968688965 }, { "epoch": 4.5697021484375e-05, "model_forward_time": 0.02464604377746582, "step": 29948 }, { "epoch": 4.5697021484375e-05, "step": 29948, "training_step_time": 0.13690543174743652 }, { "epoch": 4.569854736328125e-05, "model_forward_time": 0.024507999420166016, "step": 29949 }, { "epoch": 4.569854736328125e-05, "step": 29949, "training_step_time": 0.11126923561096191 }, { "epoch": 4.57000732421875e-05, "grad_norm": 0.0483989343047142, "learning_rate": 7.594320880821571e-10, "loss": 0.0038, "step": 29950 }, { "epoch": 4.57000732421875e-05, "model_forward_time": 0.024791479110717773, "step": 29950 }, { "epoch": 4.57000732421875e-05, "step": 29950, "training_step_time": 0.11907720565795898 }, { "epoch": 4.570159912109375e-05, "model_forward_time": 0.025133609771728516, "step": 29951 }, { "epoch": 4.570159912109375e-05, "step": 29951, "training_step_time": 0.1067807674407959 }, { "epoch": 4.5703125e-05, "model_forward_time": 0.02519369125366211, "step": 29952 }, { "epoch": 4.5703125e-05, "step": 29952, "training_step_time": 0.10656595230102539 }, { "epoch": 4.570465087890625e-05, "model_forward_time": 0.025478363037109375, "step": 29953 }, { "epoch": 4.570465087890625e-05, "step": 29953, "training_step_time": 0.12312483787536621 }, { "epoch": 4.57061767578125e-05, "model_forward_time": 0.02511763572692871, "step": 29954 }, { "epoch": 4.57061767578125e-05, "step": 29954, "training_step_time": 0.11763262748718262 }, { "epoch": 4.570770263671875e-05, "model_forward_time": 0.026611804962158203, "step": 29955 }, { "epoch": 4.570770263671875e-05, "step": 29955, "training_step_time": 0.11596274375915527 }, { "epoch": 4.5709228515625e-05, "model_forward_time": 0.026400089263916016, "step": 29956 }, { "epoch": 4.5709228515625e-05, "step": 29956, "training_step_time": 0.21816802024841309 }, { "epoch": 4.571075439453125e-05, "model_forward_time": 0.02498316764831543, "step": 29957 }, { "epoch": 4.571075439453125e-05, "step": 29957, "training_step_time": 0.20394253730773926 }, { "epoch": 4.57122802734375e-05, "model_forward_time": 0.024318456649780273, "step": 29958 }, { "epoch": 4.57122802734375e-05, "step": 29958, "training_step_time": 0.1201925277709961 }, { "epoch": 4.571380615234375e-05, "model_forward_time": 0.02431201934814453, "step": 29959 }, { "epoch": 4.571380615234375e-05, "step": 29959, "training_step_time": 0.11854720115661621 }, { "epoch": 4.571533203125e-05, "grad_norm": 0.06495802104473114, "learning_rate": 4.860369793080466e-10, "loss": 0.0062, "step": 29960 }, { "epoch": 4.571533203125e-05, "model_forward_time": 0.024856090545654297, "step": 29960 }, { "epoch": 4.571533203125e-05, "step": 29960, "training_step_time": 0.12964510917663574 }, { "epoch": 4.571685791015625e-05, "model_forward_time": 0.0247037410736084, "step": 29961 }, { "epoch": 4.571685791015625e-05, "step": 29961, "training_step_time": 0.10771536827087402 }, { "epoch": 4.57183837890625e-05, "model_forward_time": 0.025290727615356445, "step": 29962 }, { "epoch": 4.57183837890625e-05, "step": 29962, "training_step_time": 0.10528135299682617 }, { "epoch": 4.571990966796875e-05, "model_forward_time": 0.025136470794677734, "step": 29963 }, { "epoch": 4.571990966796875e-05, "step": 29963, "training_step_time": 0.1065676212310791 }, { "epoch": 4.5721435546875e-05, "model_forward_time": 0.025384187698364258, "step": 29964 }, { "epoch": 4.5721435546875e-05, "step": 29964, "training_step_time": 0.10642337799072266 }, { "epoch": 4.572296142578125e-05, "model_forward_time": 0.024346113204956055, "step": 29965 }, { "epoch": 4.572296142578125e-05, "step": 29965, "training_step_time": 0.1043846607208252 }, { "epoch": 4.57244873046875e-05, "model_forward_time": 0.024061918258666992, "step": 29966 }, { "epoch": 4.57244873046875e-05, "step": 29966, "training_step_time": 0.10479950904846191 }, { "epoch": 4.572601318359375e-05, "model_forward_time": 0.025409221649169922, "step": 29967 }, { "epoch": 4.572601318359375e-05, "step": 29967, "training_step_time": 0.10571026802062988 }, { "epoch": 4.57275390625e-05, "model_forward_time": 0.026334762573242188, "step": 29968 }, { "epoch": 4.57275390625e-05, "step": 29968, "training_step_time": 0.10742473602294922 }, { "epoch": 4.572906494140625e-05, "model_forward_time": 0.024933576583862305, "step": 29969 }, { "epoch": 4.572906494140625e-05, "step": 29969, "training_step_time": 0.14802932739257812 }, { "epoch": 4.57305908203125e-05, "grad_norm": 0.1793670952320099, "learning_rate": 2.7339599464326627e-10, "loss": 0.0034, "step": 29970 }, { "epoch": 4.57305908203125e-05, "model_forward_time": 0.025174617767333984, "step": 29970 }, { "epoch": 4.57305908203125e-05, "step": 29970, "training_step_time": 0.10462617874145508 }, { "epoch": 4.573211669921875e-05, "model_forward_time": 0.0256345272064209, "step": 29971 }, { "epoch": 4.573211669921875e-05, "step": 29971, "training_step_time": 0.12344837188720703 }, { "epoch": 4.5733642578125e-05, "model_forward_time": 0.02504563331604004, "step": 29972 }, { "epoch": 4.5733642578125e-05, "step": 29972, "training_step_time": 0.15032577514648438 }, { "epoch": 4.573516845703125e-05, "model_forward_time": 0.024487972259521484, "step": 29973 }, { "epoch": 4.573516845703125e-05, "step": 29973, "training_step_time": 0.10456323623657227 }, { "epoch": 4.57366943359375e-05, "model_forward_time": 0.025249719619750977, "step": 29974 }, { "epoch": 4.57366943359375e-05, "step": 29974, "training_step_time": 0.19520306587219238 }, { "epoch": 4.573822021484375e-05, "model_forward_time": 0.024792909622192383, "step": 29975 }, { "epoch": 4.573822021484375e-05, "step": 29975, "training_step_time": 0.20862936973571777 }, { "epoch": 4.573974609375e-05, "model_forward_time": 0.028766632080078125, "step": 29976 }, { "epoch": 4.573974609375e-05, "step": 29976, "training_step_time": 0.20551609992980957 }, { "epoch": 4.574127197265625e-05, "model_forward_time": 0.024842500686645508, "step": 29977 }, { "epoch": 4.574127197265625e-05, "step": 29977, "training_step_time": 0.12756824493408203 }, { "epoch": 4.57427978515625e-05, "model_forward_time": 0.024707555770874023, "step": 29978 }, { "epoch": 4.57427978515625e-05, "step": 29978, "training_step_time": 0.17250943183898926 }, { "epoch": 4.574432373046875e-05, "model_forward_time": 0.02458667755126953, "step": 29979 }, { "epoch": 4.574432373046875e-05, "step": 29979, "training_step_time": 0.11186718940734863 }, { "epoch": 4.5745849609375e-05, "grad_norm": 0.2396877557039261, "learning_rate": 1.2150939247002058e-10, "loss": 0.0053, "step": 29980 }, { "epoch": 4.5745849609375e-05, "model_forward_time": 0.025346040725708008, "step": 29980 }, { "epoch": 4.5745849609375e-05, "step": 29980, "training_step_time": 0.10394740104675293 }, { "epoch": 4.574737548828125e-05, "model_forward_time": 0.025341510772705078, "step": 29981 }, { "epoch": 4.574737548828125e-05, "step": 29981, "training_step_time": 0.1427762508392334 }, { "epoch": 4.57489013671875e-05, "model_forward_time": 0.025478124618530273, "step": 29982 }, { "epoch": 4.57489013671875e-05, "step": 29982, "training_step_time": 0.1967456340789795 }, { "epoch": 4.575042724609375e-05, "model_forward_time": 0.024437427520751953, "step": 29983 }, { "epoch": 4.575042724609375e-05, "step": 29983, "training_step_time": 0.10135197639465332 }, { "epoch": 4.5751953125e-05, "model_forward_time": 0.02461528778076172, "step": 29984 }, { "epoch": 4.5751953125e-05, "step": 29984, "training_step_time": 0.10251975059509277 }, { "epoch": 4.575347900390625e-05, "model_forward_time": 0.0253293514251709, "step": 29985 }, { "epoch": 4.575347900390625e-05, "step": 29985, "training_step_time": 0.1026310920715332 }, { "epoch": 4.57550048828125e-05, "model_forward_time": 0.025458097457885742, "step": 29986 }, { "epoch": 4.57550048828125e-05, "step": 29986, "training_step_time": 0.11490917205810547 }, { "epoch": 4.575653076171875e-05, "model_forward_time": 0.02565932273864746, "step": 29987 }, { "epoch": 4.575653076171875e-05, "step": 29987, "training_step_time": 0.10691595077514648 }, { "epoch": 4.5758056640625e-05, "model_forward_time": 0.02588939666748047, "step": 29988 }, { "epoch": 4.5758056640625e-05, "step": 29988, "training_step_time": 0.188765287399292 }, { "epoch": 4.575958251953125e-05, "model_forward_time": 0.02950453758239746, "step": 29989 }, { "epoch": 4.575958251953125e-05, "step": 29989, "training_step_time": 0.20821714401245117 }, { "epoch": 4.57611083984375e-05, "grad_norm": 0.030882326886057854, "learning_rate": 3.037735734623404e-11, "loss": 0.0043, "step": 29990 }, { "epoch": 4.57611083984375e-05, "model_forward_time": 0.026105403900146484, "step": 29990 }, { "epoch": 4.57611083984375e-05, "step": 29990, "training_step_time": 0.2144639492034912 }, { "epoch": 4.576263427734375e-05, "model_forward_time": 0.026969194412231445, "step": 29991 }, { "epoch": 4.576263427734375e-05, "step": 29991, "training_step_time": 0.23193597793579102 }, { "epoch": 4.576416015625e-05, "model_forward_time": 0.029546737670898438, "step": 29992 }, { "epoch": 4.576416015625e-05, "step": 29992, "training_step_time": 0.3120415210723877 }, { "epoch": 4.576568603515625e-05, "model_forward_time": 0.029010772705078125, "step": 29993 }, { "epoch": 4.576568603515625e-05, "step": 29993, "training_step_time": 0.27482128143310547 }, { "epoch": 4.57672119140625e-05, "model_forward_time": 0.030508756637573242, "step": 29994 }, { "epoch": 4.57672119140625e-05, "step": 29994, "training_step_time": 0.3284180164337158 }, { "epoch": 4.576873779296875e-05, "model_forward_time": 0.028658628463745117, "step": 29995 }, { "epoch": 4.576873779296875e-05, "step": 29995, "training_step_time": 0.36992478370666504 }, { "epoch": 4.5770263671875e-05, "model_forward_time": 0.028682947158813477, "step": 29996 }, { "epoch": 4.5770263671875e-05, "step": 29996, "training_step_time": 0.3473165035247803 }, { "epoch": 4.577178955078125e-05, "model_forward_time": 0.030662059783935547, "step": 29997 }, { "epoch": 4.577178955078125e-05, "step": 29997, "training_step_time": 0.31621313095092773 }, { "epoch": 4.57733154296875e-05, "model_forward_time": 0.04142260551452637, "step": 29998 }, { "epoch": 4.57733154296875e-05, "step": 29998, "training_step_time": 0.28385281562805176 }, { "epoch": 4.577484130859375e-05, "model_forward_time": 0.03106999397277832, "step": 29999 }, { "epoch": 4.577484130859375e-05, "step": 29999, "training_step_time": 0.3634684085845947 }, { "epoch": 4.57763671875e-05, "grad_norm": 0.07076636701822281, "learning_rate": 0.0, "loss": 0.0029, "step": 30000 } ], "logging_steps": 10, "max_steps": 30000, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 1000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }