|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 19.795497185741088, |
|
"eval_steps": 66, |
|
"global_step": 1320, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0150093808630394, |
|
"grad_norm": 0.45099504621179437, |
|
"learning_rate": 3.0303030303030305e-08, |
|
"loss": 0.8363, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0150093808630394, |
|
"eval_loss": 0.8522398471832275, |
|
"eval_runtime": 13.8139, |
|
"eval_samples_per_second": 32.359, |
|
"eval_steps_per_second": 2.027, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0300187617260788, |
|
"grad_norm": 0.441134394511529, |
|
"learning_rate": 6.060606060606061e-08, |
|
"loss": 0.8152, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.0450281425891182, |
|
"grad_norm": 0.44058980813366744, |
|
"learning_rate": 9.09090909090909e-08, |
|
"loss": 0.8263, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.0600375234521576, |
|
"grad_norm": 0.4412989069973729, |
|
"learning_rate": 1.2121212121212122e-07, |
|
"loss": 0.8285, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.075046904315197, |
|
"grad_norm": 0.4411021457996664, |
|
"learning_rate": 1.5151515151515152e-07, |
|
"loss": 0.8294, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.0900562851782364, |
|
"grad_norm": 0.4512982125032984, |
|
"learning_rate": 1.818181818181818e-07, |
|
"loss": 0.827, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.1050656660412758, |
|
"grad_norm": 0.4487759970382494, |
|
"learning_rate": 2.121212121212121e-07, |
|
"loss": 0.831, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.1200750469043152, |
|
"grad_norm": 0.45274790304085666, |
|
"learning_rate": 2.4242424242424244e-07, |
|
"loss": 0.8266, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.1350844277673546, |
|
"grad_norm": 0.4452059179334573, |
|
"learning_rate": 2.727272727272727e-07, |
|
"loss": 0.8278, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.150093808630394, |
|
"grad_norm": 0.4447347665120929, |
|
"learning_rate": 3.0303030303030305e-07, |
|
"loss": 0.828, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.1651031894934334, |
|
"grad_norm": 0.44996814286667713, |
|
"learning_rate": 3.333333333333333e-07, |
|
"loss": 0.8321, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.1801125703564728, |
|
"grad_norm": 0.4400767238276578, |
|
"learning_rate": 3.636363636363636e-07, |
|
"loss": 0.8188, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.1951219512195122, |
|
"grad_norm": 0.4714681881384513, |
|
"learning_rate": 3.939393939393939e-07, |
|
"loss": 0.8295, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.2101313320825516, |
|
"grad_norm": 0.444385872298255, |
|
"learning_rate": 4.242424242424242e-07, |
|
"loss": 0.8163, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.225140712945591, |
|
"grad_norm": 0.4403917468130588, |
|
"learning_rate": 4.545454545454545e-07, |
|
"loss": 0.829, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.2401500938086304, |
|
"grad_norm": 0.4463075871861068, |
|
"learning_rate": 4.848484848484849e-07, |
|
"loss": 0.8301, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.2551594746716698, |
|
"grad_norm": 0.4517876122481777, |
|
"learning_rate": 5.151515151515151e-07, |
|
"loss": 0.8237, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.2701688555347092, |
|
"grad_norm": 0.4194271488424739, |
|
"learning_rate": 5.454545454545454e-07, |
|
"loss": 0.828, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.2851782363977486, |
|
"grad_norm": 0.4385859199926406, |
|
"learning_rate": 5.757575757575758e-07, |
|
"loss": 0.8313, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.300187617260788, |
|
"grad_norm": 0.43935758099705285, |
|
"learning_rate": 6.060606060606061e-07, |
|
"loss": 0.8135, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.3151969981238274, |
|
"grad_norm": 0.42349119651358025, |
|
"learning_rate": 6.363636363636363e-07, |
|
"loss": 0.814, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.3302063789868668, |
|
"grad_norm": 0.42862096475156763, |
|
"learning_rate": 6.666666666666666e-07, |
|
"loss": 0.8107, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.3452157598499062, |
|
"grad_norm": 0.41027437311847303, |
|
"learning_rate": 6.96969696969697e-07, |
|
"loss": 0.8093, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.3602251407129456, |
|
"grad_norm": 0.41506365946047097, |
|
"learning_rate": 7.272727272727272e-07, |
|
"loss": 0.8007, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.37523452157598497, |
|
"grad_norm": 0.35818533786374307, |
|
"learning_rate": 7.575757575757575e-07, |
|
"loss": 0.7935, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.3902439024390244, |
|
"grad_norm": 0.36820244566867855, |
|
"learning_rate": 7.878787878787878e-07, |
|
"loss": 0.7956, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.4052532833020638, |
|
"grad_norm": 0.3554347415386222, |
|
"learning_rate": 8.181818181818182e-07, |
|
"loss": 0.7965, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.4202626641651032, |
|
"grad_norm": 0.34816729354565595, |
|
"learning_rate": 8.484848484848484e-07, |
|
"loss": 0.7893, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.4352720450281426, |
|
"grad_norm": 0.3492723930636243, |
|
"learning_rate": 8.787878787878787e-07, |
|
"loss": 0.7927, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.450281425891182, |
|
"grad_norm": 0.3524378456441126, |
|
"learning_rate": 9.09090909090909e-07, |
|
"loss": 0.7805, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.4652908067542214, |
|
"grad_norm": 0.3364134835289249, |
|
"learning_rate": 9.393939393939395e-07, |
|
"loss": 0.7901, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.4803001876172608, |
|
"grad_norm": 0.34952134401579665, |
|
"learning_rate": 9.696969696969698e-07, |
|
"loss": 0.7848, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.49530956848030017, |
|
"grad_norm": 0.34379662697051444, |
|
"learning_rate": 1e-06, |
|
"loss": 0.7766, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.5103189493433395, |
|
"grad_norm": 0.25380437737254385, |
|
"learning_rate": 1.0303030303030302e-06, |
|
"loss": 0.7506, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.525328330206379, |
|
"grad_norm": 0.2160315736548007, |
|
"learning_rate": 1.0606060606060606e-06, |
|
"loss": 0.7296, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.5403377110694184, |
|
"grad_norm": 0.21519653463861005, |
|
"learning_rate": 1.0909090909090908e-06, |
|
"loss": 0.7429, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.5553470919324578, |
|
"grad_norm": 0.2118091773645455, |
|
"learning_rate": 1.121212121212121e-06, |
|
"loss": 0.7341, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.5703564727954972, |
|
"grad_norm": 0.2133974139017253, |
|
"learning_rate": 1.1515151515151516e-06, |
|
"loss": 0.7336, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.5853658536585366, |
|
"grad_norm": 0.21183205584010478, |
|
"learning_rate": 1.1818181818181818e-06, |
|
"loss": 0.7406, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.600375234521576, |
|
"grad_norm": 0.20612576338064367, |
|
"learning_rate": 1.2121212121212122e-06, |
|
"loss": 0.7172, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.6153846153846154, |
|
"grad_norm": 0.20009218157286937, |
|
"learning_rate": 1.2424242424242424e-06, |
|
"loss": 0.7331, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.6303939962476548, |
|
"grad_norm": 0.20086489901884286, |
|
"learning_rate": 1.2727272727272726e-06, |
|
"loss": 0.7206, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.6454033771106942, |
|
"grad_norm": 0.19327701765033134, |
|
"learning_rate": 1.303030303030303e-06, |
|
"loss": 0.7264, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.6604127579737336, |
|
"grad_norm": 0.18735374384890305, |
|
"learning_rate": 1.3333333333333332e-06, |
|
"loss": 0.7073, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.6754221388367729, |
|
"grad_norm": 0.17683867740993736, |
|
"learning_rate": 1.3636363636363634e-06, |
|
"loss": 0.6931, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.6904315196998124, |
|
"grad_norm": 0.17198229254906564, |
|
"learning_rate": 1.393939393939394e-06, |
|
"loss": 0.698, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.7054409005628518, |
|
"grad_norm": 0.16380634624432175, |
|
"learning_rate": 1.4242424242424242e-06, |
|
"loss": 0.6903, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.7204502814258912, |
|
"grad_norm": 0.14953817712425876, |
|
"learning_rate": 1.4545454545454544e-06, |
|
"loss": 0.6771, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.7354596622889306, |
|
"grad_norm": 0.14120367016713395, |
|
"learning_rate": 1.4848484848484848e-06, |
|
"loss": 0.6689, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.7504690431519699, |
|
"grad_norm": 0.13232673022559538, |
|
"learning_rate": 1.515151515151515e-06, |
|
"loss": 0.6748, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.7654784240150094, |
|
"grad_norm": 0.12723197101176636, |
|
"learning_rate": 1.5454545454545454e-06, |
|
"loss": 0.6612, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.7804878048780488, |
|
"grad_norm": 0.12474022700537914, |
|
"learning_rate": 1.5757575757575756e-06, |
|
"loss": 0.6458, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.7954971857410882, |
|
"grad_norm": 0.12420274477384924, |
|
"learning_rate": 1.6060606060606058e-06, |
|
"loss": 0.6529, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.8105065666041276, |
|
"grad_norm": 0.12270466802134104, |
|
"learning_rate": 1.6363636363636365e-06, |
|
"loss": 0.6475, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.8255159474671669, |
|
"grad_norm": 0.12049286207469485, |
|
"learning_rate": 1.6666666666666667e-06, |
|
"loss": 0.6359, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.8405253283302064, |
|
"grad_norm": 0.11526479585742994, |
|
"learning_rate": 1.6969696969696969e-06, |
|
"loss": 0.6261, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.8555347091932458, |
|
"grad_norm": 0.11592626217416292, |
|
"learning_rate": 1.7272727272727273e-06, |
|
"loss": 0.627, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.8705440900562852, |
|
"grad_norm": 0.11594477634938592, |
|
"learning_rate": 1.7575757575757575e-06, |
|
"loss": 0.6244, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.8855534709193246, |
|
"grad_norm": 0.11313778567858399, |
|
"learning_rate": 1.7878787878787877e-06, |
|
"loss": 0.6317, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.900562851782364, |
|
"grad_norm": 0.11023173423057069, |
|
"learning_rate": 1.818181818181818e-06, |
|
"loss": 0.6248, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.9155722326454033, |
|
"grad_norm": 0.10740667281307065, |
|
"learning_rate": 1.8484848484848483e-06, |
|
"loss": 0.621, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.9305816135084428, |
|
"grad_norm": 0.10061348969269865, |
|
"learning_rate": 1.878787878787879e-06, |
|
"loss": 0.6182, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.9455909943714822, |
|
"grad_norm": 0.09404279395367166, |
|
"learning_rate": 1.909090909090909e-06, |
|
"loss": 0.6068, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.9606003752345216, |
|
"grad_norm": 0.09335512170262361, |
|
"learning_rate": 1.9393939393939395e-06, |
|
"loss": 0.6114, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.975609756097561, |
|
"grad_norm": 0.08836932387148118, |
|
"learning_rate": 1.9696969696969695e-06, |
|
"loss": 0.5959, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.9906191369606003, |
|
"grad_norm": 0.08549247801026265, |
|
"learning_rate": 2e-06, |
|
"loss": 0.6113, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.9906191369606003, |
|
"eval_loss": 0.5765168070793152, |
|
"eval_runtime": 13.7554, |
|
"eval_samples_per_second": 32.496, |
|
"eval_steps_per_second": 2.036, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 0.08549247801026265, |
|
"learning_rate": 1.999996861844573e-06, |
|
"loss": 0.4985, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 1.0150093808630394, |
|
"grad_norm": 0.09984581949422254, |
|
"learning_rate": 1.999987447397988e-06, |
|
"loss": 0.6663, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 1.0300187617260788, |
|
"grad_norm": 0.07132682896345384, |
|
"learning_rate": 1.9999717567193325e-06, |
|
"loss": 0.5697, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 1.0450281425891181, |
|
"grad_norm": 0.06981020986612589, |
|
"learning_rate": 1.999949789907087e-06, |
|
"loss": 0.5768, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 1.0600375234521575, |
|
"grad_norm": 0.06754090898889059, |
|
"learning_rate": 1.9999215470991215e-06, |
|
"loss": 0.5828, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 1.075046904315197, |
|
"grad_norm": 0.06243964561211776, |
|
"learning_rate": 1.9998870284726965e-06, |
|
"loss": 0.5694, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 1.0900562851782365, |
|
"grad_norm": 0.061009810010117634, |
|
"learning_rate": 1.999846234244462e-06, |
|
"loss": 0.5727, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 1.1050656660412759, |
|
"grad_norm": 0.056185784091092324, |
|
"learning_rate": 1.999799164670455e-06, |
|
"loss": 0.5607, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 1.1200750469043153, |
|
"grad_norm": 0.055842108965366594, |
|
"learning_rate": 1.9997458200460992e-06, |
|
"loss": 0.5521, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 1.1350844277673546, |
|
"grad_norm": 0.05187749252022605, |
|
"learning_rate": 1.999686200706201e-06, |
|
"loss": 0.5724, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 1.150093808630394, |
|
"grad_norm": 0.051625347997863995, |
|
"learning_rate": 1.9996203070249514e-06, |
|
"loss": 0.5566, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 1.1651031894934334, |
|
"grad_norm": 0.04854987918067284, |
|
"learning_rate": 1.9995481394159185e-06, |
|
"loss": 0.5444, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 1.1801125703564728, |
|
"grad_norm": 0.046725939692484605, |
|
"learning_rate": 1.999469698332049e-06, |
|
"loss": 0.5452, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 1.1951219512195121, |
|
"grad_norm": 0.04651989075072082, |
|
"learning_rate": 1.9993849842656634e-06, |
|
"loss": 0.5533, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 1.2101313320825515, |
|
"grad_norm": 0.04530133654545522, |
|
"learning_rate": 1.9992939977484538e-06, |
|
"loss": 0.5446, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 1.225140712945591, |
|
"grad_norm": 0.04348117767209829, |
|
"learning_rate": 1.99919673935148e-06, |
|
"loss": 0.5518, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 1.2401500938086305, |
|
"grad_norm": 0.041929135982438165, |
|
"learning_rate": 1.999093209685165e-06, |
|
"loss": 0.5669, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 1.2551594746716699, |
|
"grad_norm": 0.04245022581761594, |
|
"learning_rate": 1.9989834093992944e-06, |
|
"loss": 0.5217, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 1.2701688555347093, |
|
"grad_norm": 0.03980799680014109, |
|
"learning_rate": 1.998867339183008e-06, |
|
"loss": 0.5429, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 1.2851782363977486, |
|
"grad_norm": 0.04050731565010284, |
|
"learning_rate": 1.9987449997647986e-06, |
|
"loss": 0.5277, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 1.300187617260788, |
|
"grad_norm": 0.03868146463623856, |
|
"learning_rate": 1.9986163919125074e-06, |
|
"loss": 0.5471, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 1.3151969981238274, |
|
"grad_norm": 0.038497979471297274, |
|
"learning_rate": 1.998481516433316e-06, |
|
"loss": 0.5444, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 1.3302063789868668, |
|
"grad_norm": 0.03793331778602445, |
|
"learning_rate": 1.998340374173746e-06, |
|
"loss": 0.5443, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 1.3452157598499062, |
|
"grad_norm": 0.037330687993704544, |
|
"learning_rate": 1.998192966019649e-06, |
|
"loss": 0.5397, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 1.3602251407129455, |
|
"grad_norm": 0.036291421015624784, |
|
"learning_rate": 1.998039292896205e-06, |
|
"loss": 0.5275, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 1.375234521575985, |
|
"grad_norm": 0.035032791533978855, |
|
"learning_rate": 1.9978793557679143e-06, |
|
"loss": 0.5219, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 1.3902439024390243, |
|
"grad_norm": 0.034627794718902295, |
|
"learning_rate": 1.9977131556385916e-06, |
|
"loss": 0.5383, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 1.4052532833020637, |
|
"grad_norm": 0.03451383360226329, |
|
"learning_rate": 1.9975406935513613e-06, |
|
"loss": 0.5301, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 1.4202626641651033, |
|
"grad_norm": 0.03375702739265765, |
|
"learning_rate": 1.9973619705886486e-06, |
|
"loss": 0.5358, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 1.4352720450281427, |
|
"grad_norm": 0.03420334157092594, |
|
"learning_rate": 1.9971769878721743e-06, |
|
"loss": 0.5308, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 1.450281425891182, |
|
"grad_norm": 0.032291163299197616, |
|
"learning_rate": 1.9969857465629473e-06, |
|
"loss": 0.5318, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 1.4652908067542214, |
|
"grad_norm": 0.03307719877411306, |
|
"learning_rate": 1.996788247861258e-06, |
|
"loss": 0.5304, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 1.4803001876172608, |
|
"grad_norm": 0.032040033737229634, |
|
"learning_rate": 1.9965844930066696e-06, |
|
"loss": 0.5132, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 1.4953095684803002, |
|
"grad_norm": 0.030182728005005464, |
|
"learning_rate": 1.9963744832780105e-06, |
|
"loss": 0.5148, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.5103189493433395, |
|
"grad_norm": 0.029981506115315602, |
|
"learning_rate": 1.996158219993368e-06, |
|
"loss": 0.5229, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 1.5253283302063791, |
|
"grad_norm": 0.030487016777282053, |
|
"learning_rate": 1.995935704510076e-06, |
|
"loss": 0.5105, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 1.5403377110694185, |
|
"grad_norm": 0.029784594581146837, |
|
"learning_rate": 1.995706938224712e-06, |
|
"loss": 0.5204, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 1.555347091932458, |
|
"grad_norm": 0.029409093062428185, |
|
"learning_rate": 1.9954719225730845e-06, |
|
"loss": 0.5192, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 1.5703564727954973, |
|
"grad_norm": 0.028177419957318213, |
|
"learning_rate": 1.995230659030224e-06, |
|
"loss": 0.5163, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 1.5853658536585367, |
|
"grad_norm": 0.02727561948703477, |
|
"learning_rate": 1.994983149110376e-06, |
|
"loss": 0.5088, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 1.600375234521576, |
|
"grad_norm": 0.027757151756443717, |
|
"learning_rate": 1.99472939436699e-06, |
|
"loss": 0.5239, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 1.6153846153846154, |
|
"grad_norm": 0.02681869602530872, |
|
"learning_rate": 1.994469396392709e-06, |
|
"loss": 0.5193, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 1.6303939962476548, |
|
"grad_norm": 0.02805316201412264, |
|
"learning_rate": 1.9942031568193616e-06, |
|
"loss": 0.508, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 1.6454033771106942, |
|
"grad_norm": 0.026206675896448955, |
|
"learning_rate": 1.9939306773179494e-06, |
|
"loss": 0.5161, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 1.6604127579737336, |
|
"grad_norm": 0.02633552856537573, |
|
"learning_rate": 1.9936519595986392e-06, |
|
"loss": 0.513, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 1.675422138836773, |
|
"grad_norm": 0.02671752302721344, |
|
"learning_rate": 1.9933670054107495e-06, |
|
"loss": 0.5059, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 1.6904315196998123, |
|
"grad_norm": 0.02608289502544131, |
|
"learning_rate": 1.993075816542742e-06, |
|
"loss": 0.5155, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 1.7054409005628517, |
|
"grad_norm": 0.025066926457861842, |
|
"learning_rate": 1.992778394822208e-06, |
|
"loss": 0.5159, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 1.720450281425891, |
|
"grad_norm": 0.025549312802280857, |
|
"learning_rate": 1.992474742115859e-06, |
|
"loss": 0.5069, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 1.7354596622889304, |
|
"grad_norm": 0.024567185837752966, |
|
"learning_rate": 1.9921648603295138e-06, |
|
"loss": 0.5088, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 1.7504690431519698, |
|
"grad_norm": 0.024409380976663946, |
|
"learning_rate": 1.9918487514080866e-06, |
|
"loss": 0.5065, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 1.7654784240150094, |
|
"grad_norm": 0.024237188144454417, |
|
"learning_rate": 1.991526417335575e-06, |
|
"loss": 0.5185, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 1.7804878048780488, |
|
"grad_norm": 0.024430013424038537, |
|
"learning_rate": 1.9911978601350483e-06, |
|
"loss": 0.4929, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 1.7954971857410882, |
|
"grad_norm": 0.024004085443342052, |
|
"learning_rate": 1.9908630818686336e-06, |
|
"loss": 0.4931, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 1.8105065666041276, |
|
"grad_norm": 0.023061733754567375, |
|
"learning_rate": 1.990522084637503e-06, |
|
"loss": 0.497, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 1.825515947467167, |
|
"grad_norm": 0.02280013753274718, |
|
"learning_rate": 1.990174870581862e-06, |
|
"loss": 0.5013, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 1.8405253283302065, |
|
"grad_norm": 0.022535127183177055, |
|
"learning_rate": 1.9898214418809326e-06, |
|
"loss": 0.4992, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 1.855534709193246, |
|
"grad_norm": 0.02279479434548679, |
|
"learning_rate": 1.989461800752944e-06, |
|
"loss": 0.5034, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 1.8705440900562853, |
|
"grad_norm": 0.022737894727696523, |
|
"learning_rate": 1.989095949455116e-06, |
|
"loss": 0.5041, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 1.8855534709193247, |
|
"grad_norm": 0.02213733247427209, |
|
"learning_rate": 1.988723890283645e-06, |
|
"loss": 0.5064, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 1.900562851782364, |
|
"grad_norm": 0.022001719966808008, |
|
"learning_rate": 1.988345625573689e-06, |
|
"loss": 0.4938, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 1.9155722326454034, |
|
"grad_norm": 0.021997816422623415, |
|
"learning_rate": 1.9879611576993556e-06, |
|
"loss": 0.4975, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 1.9305816135084428, |
|
"grad_norm": 0.021825994539949378, |
|
"learning_rate": 1.987570489073685e-06, |
|
"loss": 0.4953, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 1.9455909943714822, |
|
"grad_norm": 0.021569132172669345, |
|
"learning_rate": 1.9871736221486344e-06, |
|
"loss": 0.4866, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 1.9606003752345216, |
|
"grad_norm": 0.021163833610375136, |
|
"learning_rate": 1.9867705594150646e-06, |
|
"loss": 0.489, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 1.975609756097561, |
|
"grad_norm": 0.020950382903110992, |
|
"learning_rate": 1.9863613034027223e-06, |
|
"loss": 0.4911, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 1.975609756097561, |
|
"eval_loss": 0.47668132185935974, |
|
"eval_runtime": 13.9051, |
|
"eval_samples_per_second": 32.146, |
|
"eval_steps_per_second": 2.014, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 1.9906191369606003, |
|
"grad_norm": 0.020557758708440663, |
|
"learning_rate": 1.9859458566802253e-06, |
|
"loss": 0.4948, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 0.023846854436759164, |
|
"learning_rate": 1.9855242218550463e-06, |
|
"loss": 0.479, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 2.0150093808630394, |
|
"grad_norm": 0.02517578701828216, |
|
"learning_rate": 1.9850964015734966e-06, |
|
"loss": 0.5028, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 2.0300187617260788, |
|
"grad_norm": 0.02174554797483234, |
|
"learning_rate": 1.9846623985207097e-06, |
|
"loss": 0.5053, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 2.045028142589118, |
|
"grad_norm": 0.02084416259303054, |
|
"learning_rate": 1.9842222154206232e-06, |
|
"loss": 0.4962, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 2.0600375234521575, |
|
"grad_norm": 0.019786665839116563, |
|
"learning_rate": 1.9837758550359635e-06, |
|
"loss": 0.4891, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 2.075046904315197, |
|
"grad_norm": 0.02046650546146805, |
|
"learning_rate": 1.9833233201682263e-06, |
|
"loss": 0.4989, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 2.0900562851782363, |
|
"grad_norm": 0.019907238202250363, |
|
"learning_rate": 1.982864613657662e-06, |
|
"loss": 0.4775, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 2.1050656660412757, |
|
"grad_norm": 0.019815490776120104, |
|
"learning_rate": 1.982399738383255e-06, |
|
"loss": 0.4897, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 2.120075046904315, |
|
"grad_norm": 0.01918425758934218, |
|
"learning_rate": 1.9819286972627067e-06, |
|
"loss": 0.4972, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 2.1350844277673544, |
|
"grad_norm": 0.019994680552468825, |
|
"learning_rate": 1.9814514932524176e-06, |
|
"loss": 0.4951, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 2.150093808630394, |
|
"grad_norm": 0.019278973829129135, |
|
"learning_rate": 1.980968129347469e-06, |
|
"loss": 0.4809, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 2.1651031894934336, |
|
"grad_norm": 0.0192577035975134, |
|
"learning_rate": 1.9804786085816027e-06, |
|
"loss": 0.4909, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 2.180112570356473, |
|
"grad_norm": 0.018971731946795373, |
|
"learning_rate": 1.979982934027203e-06, |
|
"loss": 0.4804, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 2.1951219512195124, |
|
"grad_norm": 0.019218520844467814, |
|
"learning_rate": 1.979481108795278e-06, |
|
"loss": 0.4886, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 2.2101313320825517, |
|
"grad_norm": 0.018888083378158605, |
|
"learning_rate": 1.9789731360354377e-06, |
|
"loss": 0.4884, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 2.225140712945591, |
|
"grad_norm": 0.019795240765748786, |
|
"learning_rate": 1.9784590189358786e-06, |
|
"loss": 0.4918, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 2.2401500938086305, |
|
"grad_norm": 0.018701780342971427, |
|
"learning_rate": 1.9779387607233582e-06, |
|
"loss": 0.4837, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 2.25515947467167, |
|
"grad_norm": 0.01883414126738354, |
|
"learning_rate": 1.9774123646631797e-06, |
|
"loss": 0.4856, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 2.2701688555347093, |
|
"grad_norm": 0.018749906068093083, |
|
"learning_rate": 1.9768798340591678e-06, |
|
"loss": 0.4765, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 2.2851782363977486, |
|
"grad_norm": 0.018730040963488046, |
|
"learning_rate": 1.9763411722536503e-06, |
|
"loss": 0.4845, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 2.300187617260788, |
|
"grad_norm": 0.018684796471871466, |
|
"learning_rate": 1.9757963826274354e-06, |
|
"loss": 0.4822, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 2.3151969981238274, |
|
"grad_norm": 0.018465641278047874, |
|
"learning_rate": 1.9752454685997933e-06, |
|
"loss": 0.4828, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 2.3302063789868668, |
|
"grad_norm": 0.018220085507859175, |
|
"learning_rate": 1.9746884336284313e-06, |
|
"loss": 0.4838, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 2.345215759849906, |
|
"grad_norm": 0.01849417235430084, |
|
"learning_rate": 1.974125281209474e-06, |
|
"loss": 0.4953, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 2.3602251407129455, |
|
"grad_norm": 0.018786012040010656, |
|
"learning_rate": 1.973556014877441e-06, |
|
"loss": 0.4928, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 2.375234521575985, |
|
"grad_norm": 0.017733412017590936, |
|
"learning_rate": 1.972980638205225e-06, |
|
"loss": 0.4798, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 2.3902439024390243, |
|
"grad_norm": 0.017824181403243117, |
|
"learning_rate": 1.972399154804068e-06, |
|
"loss": 0.4844, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 2.4052532833020637, |
|
"grad_norm": 0.018065420440970443, |
|
"learning_rate": 1.9718115683235415e-06, |
|
"loss": 0.4666, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 2.420262664165103, |
|
"grad_norm": 0.017368916708408812, |
|
"learning_rate": 1.971217882451521e-06, |
|
"loss": 0.4875, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 2.4352720450281424, |
|
"grad_norm": 0.018070325703733577, |
|
"learning_rate": 1.9706181009141627e-06, |
|
"loss": 0.474, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 2.450281425891182, |
|
"grad_norm": 0.017332167220279988, |
|
"learning_rate": 1.9700122274758824e-06, |
|
"loss": 0.48, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 2.465290806754221, |
|
"grad_norm": 0.017274769746170343, |
|
"learning_rate": 1.9694002659393305e-06, |
|
"loss": 0.4771, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 2.480300187617261, |
|
"grad_norm": 0.01814722046626225, |
|
"learning_rate": 1.9687822201453674e-06, |
|
"loss": 0.4848, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 2.4953095684803, |
|
"grad_norm": 0.017408741457802464, |
|
"learning_rate": 1.9681580939730405e-06, |
|
"loss": 0.4827, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 2.5103189493433398, |
|
"grad_norm": 0.01768240964560969, |
|
"learning_rate": 1.96752789133956e-06, |
|
"loss": 0.4794, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 2.525328330206379, |
|
"grad_norm": 0.017154935951961867, |
|
"learning_rate": 1.9668916162002736e-06, |
|
"loss": 0.4693, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 2.5403377110694185, |
|
"grad_norm": 0.01802153159416841, |
|
"learning_rate": 1.966249272548642e-06, |
|
"loss": 0.4777, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 2.555347091932458, |
|
"grad_norm": 0.01695693908189586, |
|
"learning_rate": 1.965600864416213e-06, |
|
"loss": 0.4759, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 2.5703564727954973, |
|
"grad_norm": 0.017778123189151847, |
|
"learning_rate": 1.964946395872598e-06, |
|
"loss": 0.4741, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 2.5853658536585367, |
|
"grad_norm": 0.01689429975438397, |
|
"learning_rate": 1.964285871025445e-06, |
|
"loss": 0.478, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 2.600375234521576, |
|
"grad_norm": 0.01649235549328338, |
|
"learning_rate": 1.963619294020413e-06, |
|
"loss": 0.4813, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 2.6153846153846154, |
|
"grad_norm": 0.017089151903788, |
|
"learning_rate": 1.9629466690411472e-06, |
|
"loss": 0.4655, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 2.630393996247655, |
|
"grad_norm": 0.01640933754335135, |
|
"learning_rate": 1.9622680003092503e-06, |
|
"loss": 0.4707, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 2.645403377110694, |
|
"grad_norm": 0.016910827126253334, |
|
"learning_rate": 1.9615832920842585e-06, |
|
"loss": 0.4746, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 2.6604127579737336, |
|
"grad_norm": 0.016866233985154282, |
|
"learning_rate": 1.9608925486636137e-06, |
|
"loss": 0.4779, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 2.675422138836773, |
|
"grad_norm": 0.016909921106817608, |
|
"learning_rate": 1.9601957743826357e-06, |
|
"loss": 0.4746, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 2.6904315196998123, |
|
"grad_norm": 0.0168852778181624, |
|
"learning_rate": 1.9594929736144973e-06, |
|
"loss": 0.4689, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 2.7054409005628517, |
|
"grad_norm": 0.016401176940009726, |
|
"learning_rate": 1.958784150770194e-06, |
|
"loss": 0.4797, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 2.720450281425891, |
|
"grad_norm": 0.016798403098782076, |
|
"learning_rate": 1.9580693102985183e-06, |
|
"loss": 0.4857, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 2.7354596622889304, |
|
"grad_norm": 0.016774376394434458, |
|
"learning_rate": 1.9573484566860318e-06, |
|
"loss": 0.4778, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 2.75046904315197, |
|
"grad_norm": 0.01629889660789357, |
|
"learning_rate": 1.956621594457035e-06, |
|
"loss": 0.4732, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 2.7654784240150097, |
|
"grad_norm": 0.016664693825275887, |
|
"learning_rate": 1.955888728173542e-06, |
|
"loss": 0.4748, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 2.7804878048780486, |
|
"grad_norm": 0.016438663495088306, |
|
"learning_rate": 1.9551498624352495e-06, |
|
"loss": 0.4692, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 2.7954971857410884, |
|
"grad_norm": 0.01622929505747816, |
|
"learning_rate": 1.9544050018795075e-06, |
|
"loss": 0.4596, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 2.8105065666041273, |
|
"grad_norm": 0.01617655541567404, |
|
"learning_rate": 1.953654151181293e-06, |
|
"loss": 0.4692, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 2.825515947467167, |
|
"grad_norm": 0.016422497381573444, |
|
"learning_rate": 1.9528973150531785e-06, |
|
"loss": 0.4816, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 2.8405253283302065, |
|
"grad_norm": 0.016242133515846205, |
|
"learning_rate": 1.9521344982453028e-06, |
|
"loss": 0.461, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 2.855534709193246, |
|
"grad_norm": 0.01614541338103813, |
|
"learning_rate": 1.951365705545341e-06, |
|
"loss": 0.4648, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 2.8705440900562853, |
|
"grad_norm": 0.016610969075841926, |
|
"learning_rate": 1.9505909417784754e-06, |
|
"loss": 0.4821, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 2.8855534709193247, |
|
"grad_norm": 0.01603749843836089, |
|
"learning_rate": 1.949810211807364e-06, |
|
"loss": 0.4683, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 2.900562851782364, |
|
"grad_norm": 0.016182649712645568, |
|
"learning_rate": 1.9490235205321113e-06, |
|
"loss": 0.4711, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 2.9155722326454034, |
|
"grad_norm": 0.015691775043731006, |
|
"learning_rate": 1.9482308728902354e-06, |
|
"loss": 0.4679, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 2.930581613508443, |
|
"grad_norm": 0.01562069940201586, |
|
"learning_rate": 1.94743227385664e-06, |
|
"loss": 0.4635, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 2.945590994371482, |
|
"grad_norm": 0.016502000964567342, |
|
"learning_rate": 1.946627728443581e-06, |
|
"loss": 0.4846, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 2.9606003752345216, |
|
"grad_norm": 0.016314776580001914, |
|
"learning_rate": 1.9458172417006346e-06, |
|
"loss": 0.4687, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 2.9606003752345216, |
|
"eval_loss": 0.45064201951026917, |
|
"eval_runtime": 13.829, |
|
"eval_samples_per_second": 32.323, |
|
"eval_steps_per_second": 2.025, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 2.975609756097561, |
|
"grad_norm": 0.016032669050809613, |
|
"learning_rate": 1.945000818714668e-06, |
|
"loss": 0.4671, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 2.9906191369606003, |
|
"grad_norm": 0.015653715448159893, |
|
"learning_rate": 1.9441784646098063e-06, |
|
"loss": 0.4711, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 0.02223419252139366, |
|
"learning_rate": 1.9433501845473993e-06, |
|
"loss": 0.4737, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 3.0150093808630394, |
|
"grad_norm": 0.01514004662536423, |
|
"learning_rate": 1.942515983725989e-06, |
|
"loss": 0.4623, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 3.0300187617260788, |
|
"grad_norm": 0.015881834319376502, |
|
"learning_rate": 1.9416758673812807e-06, |
|
"loss": 0.4644, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 3.045028142589118, |
|
"grad_norm": 0.015532270172868397, |
|
"learning_rate": 1.940829840786104e-06, |
|
"loss": 0.4661, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 3.0600375234521575, |
|
"grad_norm": 0.015406068622430446, |
|
"learning_rate": 1.9399779092503866e-06, |
|
"loss": 0.4739, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 3.075046904315197, |
|
"grad_norm": 0.015806639463125247, |
|
"learning_rate": 1.9391200781211143e-06, |
|
"loss": 0.4663, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 3.0900562851782363, |
|
"grad_norm": 0.015442763548195685, |
|
"learning_rate": 1.9382563527823025e-06, |
|
"loss": 0.4618, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 3.1050656660412757, |
|
"grad_norm": 0.016087586162934313, |
|
"learning_rate": 1.93738673865496e-06, |
|
"loss": 0.4768, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 3.120075046904315, |
|
"grad_norm": 0.015086642867178588, |
|
"learning_rate": 1.9365112411970546e-06, |
|
"loss": 0.4527, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 3.1350844277673544, |
|
"grad_norm": 0.015544622423445547, |
|
"learning_rate": 1.9356298659034817e-06, |
|
"loss": 0.4633, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 3.150093808630394, |
|
"grad_norm": 0.015639897610521446, |
|
"learning_rate": 1.934742618306026e-06, |
|
"loss": 0.4647, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 3.1651031894934336, |
|
"grad_norm": 0.015476405998347507, |
|
"learning_rate": 1.9338495039733286e-06, |
|
"loss": 0.4758, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 3.180112570356473, |
|
"grad_norm": 0.015338735369002602, |
|
"learning_rate": 1.932950528510854e-06, |
|
"loss": 0.4713, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 3.1951219512195124, |
|
"grad_norm": 0.015887835721317498, |
|
"learning_rate": 1.932045697560851e-06, |
|
"loss": 0.488, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 3.2101313320825517, |
|
"grad_norm": 0.015551083745535117, |
|
"learning_rate": 1.9311350168023193e-06, |
|
"loss": 0.4712, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 3.225140712945591, |
|
"grad_norm": 0.015325879160020314, |
|
"learning_rate": 1.9302184919509753e-06, |
|
"loss": 0.4608, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 3.2401500938086305, |
|
"grad_norm": 0.014851862570601724, |
|
"learning_rate": 1.9292961287592137e-06, |
|
"loss": 0.4584, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 3.25515947467167, |
|
"grad_norm": 0.01524320818707719, |
|
"learning_rate": 1.9283679330160725e-06, |
|
"loss": 0.4563, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 3.2701688555347093, |
|
"grad_norm": 0.01574419013401929, |
|
"learning_rate": 1.9274339105471968e-06, |
|
"loss": 0.4637, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 3.2851782363977486, |
|
"grad_norm": 0.014929249445401652, |
|
"learning_rate": 1.9264940672148015e-06, |
|
"loss": 0.4536, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 3.300187617260788, |
|
"grad_norm": 0.015275176183210167, |
|
"learning_rate": 1.9255484089176364e-06, |
|
"loss": 0.477, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 3.3151969981238274, |
|
"grad_norm": 0.014832301389772685, |
|
"learning_rate": 1.924596941590946e-06, |
|
"loss": 0.4545, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 3.3302063789868668, |
|
"grad_norm": 0.014806138439426077, |
|
"learning_rate": 1.9236396712064356e-06, |
|
"loss": 0.4564, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 3.345215759849906, |
|
"grad_norm": 0.015216973896443366, |
|
"learning_rate": 1.9226766037722316e-06, |
|
"loss": 0.4775, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 3.3602251407129455, |
|
"grad_norm": 0.015830800852046037, |
|
"learning_rate": 1.9217077453328448e-06, |
|
"loss": 0.4655, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 3.375234521575985, |
|
"grad_norm": 0.014954797566724949, |
|
"learning_rate": 1.9207331019691313e-06, |
|
"loss": 0.4683, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 3.3902439024390243, |
|
"grad_norm": 0.014824714915179381, |
|
"learning_rate": 1.9197526797982563e-06, |
|
"loss": 0.468, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 3.4052532833020637, |
|
"grad_norm": 0.015070859630471943, |
|
"learning_rate": 1.918766484973654e-06, |
|
"loss": 0.4508, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 3.420262664165103, |
|
"grad_norm": 0.01499772386383127, |
|
"learning_rate": 1.9177745236849897e-06, |
|
"loss": 0.4607, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 3.4352720450281424, |
|
"grad_norm": 0.01470349681548681, |
|
"learning_rate": 1.9167768021581207e-06, |
|
"loss": 0.4545, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 3.450281425891182, |
|
"grad_norm": 0.014758480658159613, |
|
"learning_rate": 1.915773326655057e-06, |
|
"loss": 0.453, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 3.465290806754221, |
|
"grad_norm": 0.01473967663449321, |
|
"learning_rate": 1.9147641034739244e-06, |
|
"loss": 0.4561, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 3.480300187617261, |
|
"grad_norm": 0.01500366500390119, |
|
"learning_rate": 1.9137491389489197e-06, |
|
"loss": 0.468, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 3.4953095684803, |
|
"grad_norm": 0.014848085370275077, |
|
"learning_rate": 1.912728439450276e-06, |
|
"loss": 0.4578, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 3.5103189493433398, |
|
"grad_norm": 0.014381703396358053, |
|
"learning_rate": 1.9117020113842214e-06, |
|
"loss": 0.454, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 3.525328330206379, |
|
"grad_norm": 0.015112883990438414, |
|
"learning_rate": 1.910669861192937e-06, |
|
"loss": 0.4568, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 3.5403377110694185, |
|
"grad_norm": 0.014957316535602687, |
|
"learning_rate": 1.9096319953545185e-06, |
|
"loss": 0.4587, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 3.555347091932458, |
|
"grad_norm": 0.014931146009771568, |
|
"learning_rate": 1.908588420382934e-06, |
|
"loss": 0.4611, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 3.5703564727954973, |
|
"grad_norm": 0.014775742620555714, |
|
"learning_rate": 1.9075391428279847e-06, |
|
"loss": 0.4639, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 3.5853658536585367, |
|
"grad_norm": 0.014136489795191264, |
|
"learning_rate": 1.906484169275263e-06, |
|
"loss": 0.4479, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 3.600375234521576, |
|
"grad_norm": 0.014789494140792015, |
|
"learning_rate": 1.9054235063461103e-06, |
|
"loss": 0.4695, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 3.6153846153846154, |
|
"grad_norm": 0.014464423814392216, |
|
"learning_rate": 1.9043571606975775e-06, |
|
"loss": 0.4527, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 3.630393996247655, |
|
"grad_norm": 0.014608804721408216, |
|
"learning_rate": 1.903285139022381e-06, |
|
"loss": 0.464, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 3.645403377110694, |
|
"grad_norm": 0.014617662641472032, |
|
"learning_rate": 1.9022074480488616e-06, |
|
"loss": 0.4605, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 3.6604127579737336, |
|
"grad_norm": 0.01474231509935561, |
|
"learning_rate": 1.901124094540944e-06, |
|
"loss": 0.4494, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 3.675422138836773, |
|
"grad_norm": 0.014438782822529975, |
|
"learning_rate": 1.9000350852980907e-06, |
|
"loss": 0.4501, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 3.6904315196998123, |
|
"grad_norm": 0.015011699255171157, |
|
"learning_rate": 1.8989404271552628e-06, |
|
"loss": 0.474, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 3.7054409005628517, |
|
"grad_norm": 0.014342680013044425, |
|
"learning_rate": 1.8978401269828743e-06, |
|
"loss": 0.4448, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 3.720450281425891, |
|
"grad_norm": 0.014862003016937061, |
|
"learning_rate": 1.8967341916867517e-06, |
|
"loss": 0.4627, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 3.7354596622889304, |
|
"grad_norm": 0.014657009740811595, |
|
"learning_rate": 1.8956226282080887e-06, |
|
"loss": 0.4695, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 3.75046904315197, |
|
"grad_norm": 0.01399652480743858, |
|
"learning_rate": 1.8945054435234032e-06, |
|
"loss": 0.4485, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 3.7654784240150097, |
|
"grad_norm": 0.01482138592733111, |
|
"learning_rate": 1.893382644644493e-06, |
|
"loss": 0.4541, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 3.7804878048780486, |
|
"grad_norm": 0.014805529769546168, |
|
"learning_rate": 1.8922542386183939e-06, |
|
"loss": 0.4574, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 3.7954971857410884, |
|
"grad_norm": 0.014571190288480682, |
|
"learning_rate": 1.8911202325273323e-06, |
|
"loss": 0.4494, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 3.8105065666041273, |
|
"grad_norm": 0.0147815449251929, |
|
"learning_rate": 1.8899806334886828e-06, |
|
"loss": 0.4587, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 3.825515947467167, |
|
"grad_norm": 0.014250828964641306, |
|
"learning_rate": 1.8888354486549234e-06, |
|
"loss": 0.461, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 3.8405253283302065, |
|
"grad_norm": 0.014756398846796545, |
|
"learning_rate": 1.8876846852135901e-06, |
|
"loss": 0.4454, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 3.855534709193246, |
|
"grad_norm": 0.014425887354809732, |
|
"learning_rate": 1.8865283503872323e-06, |
|
"loss": 0.4514, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 3.8705440900562853, |
|
"grad_norm": 0.014515933982418967, |
|
"learning_rate": 1.8853664514333661e-06, |
|
"loss": 0.4674, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 3.8855534709193247, |
|
"grad_norm": 0.015102016577158591, |
|
"learning_rate": 1.8841989956444309e-06, |
|
"loss": 0.4681, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 3.900562851782364, |
|
"grad_norm": 0.01429047197261286, |
|
"learning_rate": 1.8830259903477424e-06, |
|
"loss": 0.4478, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 3.9155722326454034, |
|
"grad_norm": 0.014175055891186608, |
|
"learning_rate": 1.881847442905446e-06, |
|
"loss": 0.4466, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 3.930581613508443, |
|
"grad_norm": 0.014199706390703304, |
|
"learning_rate": 1.8806633607144724e-06, |
|
"loss": 0.4633, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 3.945590994371482, |
|
"grad_norm": 0.014134069587260203, |
|
"learning_rate": 1.8794737512064888e-06, |
|
"loss": 0.4622, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 3.945590994371482, |
|
"eval_loss": 0.4357408583164215, |
|
"eval_runtime": 13.9645, |
|
"eval_samples_per_second": 32.01, |
|
"eval_steps_per_second": 2.005, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 3.9606003752345216, |
|
"grad_norm": 0.014023531184218459, |
|
"learning_rate": 1.878278621847855e-06, |
|
"loss": 0.4515, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 3.975609756097561, |
|
"grad_norm": 0.014386733226076575, |
|
"learning_rate": 1.8770779801395738e-06, |
|
"loss": 0.4509, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 3.9906191369606003, |
|
"grad_norm": 0.014531318566438902, |
|
"learning_rate": 1.875871833617246e-06, |
|
"loss": 0.4532, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 4.01500938086304, |
|
"grad_norm": 0.022435033190231806, |
|
"learning_rate": 1.874660189851022e-06, |
|
"loss": 0.901, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 4.030018761726079, |
|
"grad_norm": 0.014231420288400026, |
|
"learning_rate": 1.8734430564455548e-06, |
|
"loss": 0.4498, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 4.045028142589119, |
|
"grad_norm": 0.01459887414261473, |
|
"learning_rate": 1.872220441039952e-06, |
|
"loss": 0.4623, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 4.0600375234521575, |
|
"grad_norm": 0.014035710638968229, |
|
"learning_rate": 1.870992351307728e-06, |
|
"loss": 0.4531, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 4.075046904315197, |
|
"grad_norm": 0.01398312713702392, |
|
"learning_rate": 1.8697587949567556e-06, |
|
"loss": 0.4583, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 4.090056285178236, |
|
"grad_norm": 0.014204886625475947, |
|
"learning_rate": 1.868519779729218e-06, |
|
"loss": 0.4564, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 4.105065666041276, |
|
"grad_norm": 0.013937605618481593, |
|
"learning_rate": 1.8672753134015595e-06, |
|
"loss": 0.45, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 4.120075046904315, |
|
"grad_norm": 0.014035968722853485, |
|
"learning_rate": 1.8660254037844386e-06, |
|
"loss": 0.4539, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 4.135084427767355, |
|
"grad_norm": 0.013953280133352524, |
|
"learning_rate": 1.8647700587226757e-06, |
|
"loss": 0.4355, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 4.150093808630394, |
|
"grad_norm": 0.014026635079346822, |
|
"learning_rate": 1.863509286095207e-06, |
|
"loss": 0.4597, |
|
"step": 277 |
|
}, |
|
{ |
|
"epoch": 4.165103189493434, |
|
"grad_norm": 0.013907777893968047, |
|
"learning_rate": 1.8622430938150336e-06, |
|
"loss": 0.4572, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 4.1801125703564725, |
|
"grad_norm": 0.014404008458403212, |
|
"learning_rate": 1.8609714898291714e-06, |
|
"loss": 0.4463, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 4.195121951219512, |
|
"grad_norm": 0.014119288813333237, |
|
"learning_rate": 1.8596944821186025e-06, |
|
"loss": 0.4559, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 4.210131332082551, |
|
"grad_norm": 0.014044205401061443, |
|
"learning_rate": 1.8584120786982243e-06, |
|
"loss": 0.4456, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 4.225140712945591, |
|
"grad_norm": 0.014403222487556044, |
|
"learning_rate": 1.8571242876167993e-06, |
|
"loss": 0.4574, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 4.24015009380863, |
|
"grad_norm": 0.014536903944634817, |
|
"learning_rate": 1.8558311169569046e-06, |
|
"loss": 0.4509, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 4.25515947467167, |
|
"grad_norm": 0.013763721532947899, |
|
"learning_rate": 1.8545325748348816e-06, |
|
"loss": 0.4461, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 4.270168855534709, |
|
"grad_norm": 0.013895967925956643, |
|
"learning_rate": 1.8532286694007836e-06, |
|
"loss": 0.4554, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 4.285178236397749, |
|
"grad_norm": 0.01393914459390315, |
|
"learning_rate": 1.851919408838327e-06, |
|
"loss": 0.4397, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 4.300187617260788, |
|
"grad_norm": 0.013537134276235397, |
|
"learning_rate": 1.850604801364838e-06, |
|
"loss": 0.4562, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 4.315196998123827, |
|
"grad_norm": 0.014143471820575505, |
|
"learning_rate": 1.8492848552312013e-06, |
|
"loss": 0.4535, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 4.330206378986867, |
|
"grad_norm": 0.013836631457896802, |
|
"learning_rate": 1.8479595787218098e-06, |
|
"loss": 0.4429, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 4.345215759849906, |
|
"grad_norm": 0.013594205543129362, |
|
"learning_rate": 1.8466289801545104e-06, |
|
"loss": 0.4403, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 4.360225140712946, |
|
"grad_norm": 0.013860702615794385, |
|
"learning_rate": 1.8452930678805533e-06, |
|
"loss": 0.4474, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 4.375234521575985, |
|
"grad_norm": 0.014225127650714606, |
|
"learning_rate": 1.8439518502845396e-06, |
|
"loss": 0.4477, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 4.390243902439025, |
|
"grad_norm": 0.014938943538672861, |
|
"learning_rate": 1.8426053357843677e-06, |
|
"loss": 0.449, |
|
"step": 293 |
|
}, |
|
{ |
|
"epoch": 4.405253283302064, |
|
"grad_norm": 0.014757581146971215, |
|
"learning_rate": 1.8412535328311812e-06, |
|
"loss": 0.4296, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 4.4202626641651035, |
|
"grad_norm": 0.013546168269445344, |
|
"learning_rate": 1.8398964499093152e-06, |
|
"loss": 0.4582, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 4.435272045028142, |
|
"grad_norm": 0.01407135906976384, |
|
"learning_rate": 1.8385340955362445e-06, |
|
"loss": 0.4526, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 4.450281425891182, |
|
"grad_norm": 0.014172740828230739, |
|
"learning_rate": 1.8371664782625285e-06, |
|
"loss": 0.4488, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 4.465290806754221, |
|
"grad_norm": 0.01412396469645932, |
|
"learning_rate": 1.8357936066717583e-06, |
|
"loss": 0.444, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 4.480300187617261, |
|
"grad_norm": 0.014088947324034075, |
|
"learning_rate": 1.8344154893805026e-06, |
|
"loss": 0.4381, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 4.4953095684803, |
|
"grad_norm": 0.014071918393412349, |
|
"learning_rate": 1.8330321350382542e-06, |
|
"loss": 0.4564, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 4.51031894934334, |
|
"grad_norm": 0.01403461284817204, |
|
"learning_rate": 1.831643552327375e-06, |
|
"loss": 0.4526, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 4.525328330206379, |
|
"grad_norm": 0.014031997266113018, |
|
"learning_rate": 1.8302497499630413e-06, |
|
"loss": 0.436, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 4.5403377110694185, |
|
"grad_norm": 0.013947785382431976, |
|
"learning_rate": 1.8288507366931904e-06, |
|
"loss": 0.4543, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 4.5553470919324575, |
|
"grad_norm": 0.013628442283807026, |
|
"learning_rate": 1.8274465212984645e-06, |
|
"loss": 0.4493, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 4.570356472795497, |
|
"grad_norm": 0.013780622075694634, |
|
"learning_rate": 1.8260371125921558e-06, |
|
"loss": 0.4541, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 4.585365853658536, |
|
"grad_norm": 0.01367940406253203, |
|
"learning_rate": 1.8246225194201513e-06, |
|
"loss": 0.4497, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 4.600375234521576, |
|
"grad_norm": 0.014142512306958784, |
|
"learning_rate": 1.8232027506608778e-06, |
|
"loss": 0.4499, |
|
"step": 307 |
|
}, |
|
{ |
|
"epoch": 4.615384615384615, |
|
"grad_norm": 0.013824356322542732, |
|
"learning_rate": 1.821777815225245e-06, |
|
"loss": 0.4451, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 4.630393996247655, |
|
"grad_norm": 0.013296623575056241, |
|
"learning_rate": 1.820347722056591e-06, |
|
"loss": 0.4512, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 4.645403377110695, |
|
"grad_norm": 0.01368390848987212, |
|
"learning_rate": 1.818912480130625e-06, |
|
"loss": 0.4451, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 4.6604127579737336, |
|
"grad_norm": 0.013358544096664208, |
|
"learning_rate": 1.8174720984553712e-06, |
|
"loss": 0.4454, |
|
"step": 311 |
|
}, |
|
{ |
|
"epoch": 4.6754221388367725, |
|
"grad_norm": 0.014400294321699255, |
|
"learning_rate": 1.8160265860711132e-06, |
|
"loss": 0.4425, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 4.690431519699812, |
|
"grad_norm": 0.013756109715941985, |
|
"learning_rate": 1.8145759520503357e-06, |
|
"loss": 0.4531, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 4.705440900562852, |
|
"grad_norm": 0.013628586470860909, |
|
"learning_rate": 1.8131202054976687e-06, |
|
"loss": 0.4425, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 4.720450281425891, |
|
"grad_norm": 0.013358205799936783, |
|
"learning_rate": 1.8116593555498305e-06, |
|
"loss": 0.4508, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 4.735459662288931, |
|
"grad_norm": 0.014217584611552704, |
|
"learning_rate": 1.810193411375569e-06, |
|
"loss": 0.4537, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 4.75046904315197, |
|
"grad_norm": 0.013645086599534274, |
|
"learning_rate": 1.808722382175606e-06, |
|
"loss": 0.4478, |
|
"step": 317 |
|
}, |
|
{ |
|
"epoch": 4.76547842401501, |
|
"grad_norm": 0.014072448637602094, |
|
"learning_rate": 1.8072462771825778e-06, |
|
"loss": 0.4518, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 4.780487804878049, |
|
"grad_norm": 0.013395441388107984, |
|
"learning_rate": 1.8057651056609782e-06, |
|
"loss": 0.4428, |
|
"step": 319 |
|
}, |
|
{ |
|
"epoch": 4.795497185741088, |
|
"grad_norm": 0.013975384875639913, |
|
"learning_rate": 1.8042788769070997e-06, |
|
"loss": 0.4451, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 4.810506566604127, |
|
"grad_norm": 0.013421942808358008, |
|
"learning_rate": 1.802787600248977e-06, |
|
"loss": 0.4375, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 4.825515947467167, |
|
"grad_norm": 0.014009319367795352, |
|
"learning_rate": 1.8012912850463247e-06, |
|
"loss": 0.454, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 4.840525328330206, |
|
"grad_norm": 0.013790459864287155, |
|
"learning_rate": 1.7997899406904833e-06, |
|
"loss": 0.4454, |
|
"step": 323 |
|
}, |
|
{ |
|
"epoch": 4.855534709193246, |
|
"grad_norm": 0.013608907293000358, |
|
"learning_rate": 1.7982835766043558e-06, |
|
"loss": 0.4428, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 4.870544090056285, |
|
"grad_norm": 0.013990463928312293, |
|
"learning_rate": 1.7967722022423519e-06, |
|
"loss": 0.4442, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 4.885553470919325, |
|
"grad_norm": 0.013540266851849116, |
|
"learning_rate": 1.795255827090327e-06, |
|
"loss": 0.4498, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 4.900562851782364, |
|
"grad_norm": 0.013504467127123212, |
|
"learning_rate": 1.7937344606655226e-06, |
|
"loss": 0.4484, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 4.915572232645403, |
|
"grad_norm": 0.013720590668768537, |
|
"learning_rate": 1.7922081125165075e-06, |
|
"loss": 0.4375, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 4.930581613508442, |
|
"grad_norm": 0.013786451177835153, |
|
"learning_rate": 1.7906767922231171e-06, |
|
"loss": 0.4366, |
|
"step": 329 |
|
}, |
|
{ |
|
"epoch": 4.945590994371482, |
|
"grad_norm": 0.013176942655191242, |
|
"learning_rate": 1.7891405093963937e-06, |
|
"loss": 0.4505, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 4.945590994371482, |
|
"eval_loss": 0.4254697263240814, |
|
"eval_runtime": 13.8629, |
|
"eval_samples_per_second": 32.244, |
|
"eval_steps_per_second": 2.02, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 4.960600375234522, |
|
"grad_norm": 0.013540995677048154, |
|
"learning_rate": 1.7875992736785255e-06, |
|
"loss": 0.4364, |
|
"step": 331 |
|
}, |
|
{ |
|
"epoch": 4.975609756097561, |
|
"grad_norm": 0.013148307270088234, |
|
"learning_rate": 1.7860530947427874e-06, |
|
"loss": 0.4234, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 4.9906191369606, |
|
"grad_norm": 0.013580201172669656, |
|
"learning_rate": 1.7845019822934787e-06, |
|
"loss": 0.4341, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 0.018296171270344976, |
|
"learning_rate": 1.7829459460658637e-06, |
|
"loss": 0.4486, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 5.01500938086304, |
|
"grad_norm": 0.014697785626980316, |
|
"learning_rate": 1.7813849958261094e-06, |
|
"loss": 0.4341, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 5.030018761726079, |
|
"grad_norm": 0.014043055899901864, |
|
"learning_rate": 1.7798191413712242e-06, |
|
"loss": 0.4479, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 5.045028142589119, |
|
"grad_norm": 0.013424222741471816, |
|
"learning_rate": 1.778248392528998e-06, |
|
"loss": 0.4489, |
|
"step": 337 |
|
}, |
|
{ |
|
"epoch": 5.0600375234521575, |
|
"grad_norm": 0.012943130776201624, |
|
"learning_rate": 1.7766727591579387e-06, |
|
"loss": 0.4288, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 5.075046904315197, |
|
"grad_norm": 0.013826788871450798, |
|
"learning_rate": 1.7750922511472108e-06, |
|
"loss": 0.4431, |
|
"step": 339 |
|
}, |
|
{ |
|
"epoch": 5.090056285178236, |
|
"grad_norm": 0.01434896451317387, |
|
"learning_rate": 1.7735068784165744e-06, |
|
"loss": 0.4298, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 5.105065666041276, |
|
"grad_norm": 0.013543345476736609, |
|
"learning_rate": 1.7719166509163208e-06, |
|
"loss": 0.4443, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 5.120075046904315, |
|
"grad_norm": 0.013654867873190223, |
|
"learning_rate": 1.7703215786272128e-06, |
|
"loss": 0.4471, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 5.135084427767355, |
|
"grad_norm": 0.013298753867087855, |
|
"learning_rate": 1.76872167156042e-06, |
|
"loss": 0.4344, |
|
"step": 343 |
|
}, |
|
{ |
|
"epoch": 5.150093808630394, |
|
"grad_norm": 0.013653938903598364, |
|
"learning_rate": 1.767116939757456e-06, |
|
"loss": 0.4365, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 5.165103189493434, |
|
"grad_norm": 0.013329024847758256, |
|
"learning_rate": 1.7655073932901165e-06, |
|
"loss": 0.4312, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 5.1801125703564725, |
|
"grad_norm": 0.013615866345533169, |
|
"learning_rate": 1.763893042260416e-06, |
|
"loss": 0.4402, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 5.195121951219512, |
|
"grad_norm": 0.013772143243209281, |
|
"learning_rate": 1.7622738968005226e-06, |
|
"loss": 0.4416, |
|
"step": 347 |
|
}, |
|
{ |
|
"epoch": 5.210131332082551, |
|
"grad_norm": 0.013480347503805534, |
|
"learning_rate": 1.7606499670726968e-06, |
|
"loss": 0.4472, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 5.225140712945591, |
|
"grad_norm": 0.013157923235962578, |
|
"learning_rate": 1.759021263269227e-06, |
|
"loss": 0.4317, |
|
"step": 349 |
|
}, |
|
{ |
|
"epoch": 5.24015009380863, |
|
"grad_norm": 0.013273852412143418, |
|
"learning_rate": 1.7573877956123637e-06, |
|
"loss": 0.4334, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 5.25515947467167, |
|
"grad_norm": 0.013606909673657986, |
|
"learning_rate": 1.7557495743542582e-06, |
|
"loss": 0.4371, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 5.270168855534709, |
|
"grad_norm": 0.013941507400326235, |
|
"learning_rate": 1.754106609776896e-06, |
|
"loss": 0.4431, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 5.285178236397749, |
|
"grad_norm": 0.013436310560981397, |
|
"learning_rate": 1.7524589121920342e-06, |
|
"loss": 0.442, |
|
"step": 353 |
|
}, |
|
{ |
|
"epoch": 5.300187617260788, |
|
"grad_norm": 0.01329009884820222, |
|
"learning_rate": 1.7508064919411343e-06, |
|
"loss": 0.4497, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 5.315196998123827, |
|
"grad_norm": 0.01367800689726424, |
|
"learning_rate": 1.7491493593952996e-06, |
|
"loss": 0.4393, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 5.330206378986867, |
|
"grad_norm": 0.01354483521655564, |
|
"learning_rate": 1.747487524955209e-06, |
|
"loss": 0.4364, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 5.345215759849906, |
|
"grad_norm": 0.01352972460550207, |
|
"learning_rate": 1.7458209990510527e-06, |
|
"loss": 0.4333, |
|
"step": 357 |
|
}, |
|
{ |
|
"epoch": 5.360225140712946, |
|
"grad_norm": 0.013745956104063744, |
|
"learning_rate": 1.7441497921424645e-06, |
|
"loss": 0.4328, |
|
"step": 358 |
|
}, |
|
{ |
|
"epoch": 5.375234521575985, |
|
"grad_norm": 0.01319303150078731, |
|
"learning_rate": 1.7424739147184591e-06, |
|
"loss": 0.4333, |
|
"step": 359 |
|
}, |
|
{ |
|
"epoch": 5.390243902439025, |
|
"grad_norm": 0.013764119164539975, |
|
"learning_rate": 1.7407933772973635e-06, |
|
"loss": 0.4518, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 5.405253283302064, |
|
"grad_norm": 0.013090437609724701, |
|
"learning_rate": 1.7391081904267537e-06, |
|
"loss": 0.4392, |
|
"step": 361 |
|
}, |
|
{ |
|
"epoch": 5.4202626641651035, |
|
"grad_norm": 0.013571191725851636, |
|
"learning_rate": 1.7374183646833858e-06, |
|
"loss": 0.442, |
|
"step": 362 |
|
}, |
|
{ |
|
"epoch": 5.435272045028142, |
|
"grad_norm": 0.013136763063661159, |
|
"learning_rate": 1.7357239106731317e-06, |
|
"loss": 0.4321, |
|
"step": 363 |
|
}, |
|
{ |
|
"epoch": 5.450281425891182, |
|
"grad_norm": 0.013174996076392734, |
|
"learning_rate": 1.734024839030911e-06, |
|
"loss": 0.4351, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 5.465290806754221, |
|
"grad_norm": 0.013584392079284755, |
|
"learning_rate": 1.7323211604206264e-06, |
|
"loss": 0.4336, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 5.480300187617261, |
|
"grad_norm": 0.013238150722927616, |
|
"learning_rate": 1.7306128855350938e-06, |
|
"loss": 0.4499, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 5.4953095684803, |
|
"grad_norm": 0.012966538449872765, |
|
"learning_rate": 1.728900025095978e-06, |
|
"loss": 0.439, |
|
"step": 367 |
|
}, |
|
{ |
|
"epoch": 5.51031894934334, |
|
"grad_norm": 0.013097706772820898, |
|
"learning_rate": 1.7271825898537226e-06, |
|
"loss": 0.4405, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 5.525328330206379, |
|
"grad_norm": 0.013139456397187858, |
|
"learning_rate": 1.725460590587486e-06, |
|
"loss": 0.4322, |
|
"step": 369 |
|
}, |
|
{ |
|
"epoch": 5.5403377110694185, |
|
"grad_norm": 0.01320832233373905, |
|
"learning_rate": 1.72373403810507e-06, |
|
"loss": 0.4453, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 5.5553470919324575, |
|
"grad_norm": 0.013313022883952532, |
|
"learning_rate": 1.7220029432428555e-06, |
|
"loss": 0.4369, |
|
"step": 371 |
|
}, |
|
{ |
|
"epoch": 5.570356472795497, |
|
"grad_norm": 0.013035779228992055, |
|
"learning_rate": 1.7202673168657315e-06, |
|
"loss": 0.43, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 5.585365853658536, |
|
"grad_norm": 0.013457833726421647, |
|
"learning_rate": 1.7185271698670292e-06, |
|
"loss": 0.4329, |
|
"step": 373 |
|
}, |
|
{ |
|
"epoch": 5.600375234521576, |
|
"grad_norm": 0.013116857431759213, |
|
"learning_rate": 1.7167825131684511e-06, |
|
"loss": 0.4313, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 5.615384615384615, |
|
"grad_norm": 0.013758827471993697, |
|
"learning_rate": 1.715033357720006e-06, |
|
"loss": 0.4476, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 5.630393996247655, |
|
"grad_norm": 0.013455290328633535, |
|
"learning_rate": 1.7132797144999367e-06, |
|
"loss": 0.4477, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 5.645403377110695, |
|
"grad_norm": 0.013424149696678331, |
|
"learning_rate": 1.7115215945146532e-06, |
|
"loss": 0.4382, |
|
"step": 377 |
|
}, |
|
{ |
|
"epoch": 5.6604127579737336, |
|
"grad_norm": 0.013961218093918704, |
|
"learning_rate": 1.709759008798663e-06, |
|
"loss": 0.4429, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 5.6754221388367725, |
|
"grad_norm": 0.013358780836199631, |
|
"learning_rate": 1.7079919684145026e-06, |
|
"loss": 0.4405, |
|
"step": 379 |
|
}, |
|
{ |
|
"epoch": 5.690431519699812, |
|
"grad_norm": 0.013336158263698005, |
|
"learning_rate": 1.7062204844526657e-06, |
|
"loss": 0.4289, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 5.705440900562852, |
|
"grad_norm": 0.013355419310096175, |
|
"learning_rate": 1.7044445680315372e-06, |
|
"loss": 0.44, |
|
"step": 381 |
|
}, |
|
{ |
|
"epoch": 5.720450281425891, |
|
"grad_norm": 0.013326753688393343, |
|
"learning_rate": 1.7026642302973203e-06, |
|
"loss": 0.4383, |
|
"step": 382 |
|
}, |
|
{ |
|
"epoch": 5.735459662288931, |
|
"grad_norm": 0.013248321811620164, |
|
"learning_rate": 1.7008794824239673e-06, |
|
"loss": 0.4385, |
|
"step": 383 |
|
}, |
|
{ |
|
"epoch": 5.75046904315197, |
|
"grad_norm": 0.013440251864610129, |
|
"learning_rate": 1.6990903356131123e-06, |
|
"loss": 0.4447, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 5.76547842401501, |
|
"grad_norm": 0.013356165325694516, |
|
"learning_rate": 1.6972968010939952e-06, |
|
"loss": 0.4395, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 5.780487804878049, |
|
"grad_norm": 0.013697957995084815, |
|
"learning_rate": 1.6954988901233974e-06, |
|
"loss": 0.4445, |
|
"step": 386 |
|
}, |
|
{ |
|
"epoch": 5.795497185741088, |
|
"grad_norm": 0.01352896933498215, |
|
"learning_rate": 1.6936966139855661e-06, |
|
"loss": 0.4497, |
|
"step": 387 |
|
}, |
|
{ |
|
"epoch": 5.810506566604127, |
|
"grad_norm": 0.013017644966079026, |
|
"learning_rate": 1.6918899839921473e-06, |
|
"loss": 0.4427, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 5.825515947467167, |
|
"grad_norm": 0.0130227730372479, |
|
"learning_rate": 1.690079011482112e-06, |
|
"loss": 0.4353, |
|
"step": 389 |
|
}, |
|
{ |
|
"epoch": 5.840525328330206, |
|
"grad_norm": 0.013588479837256142, |
|
"learning_rate": 1.6882637078216865e-06, |
|
"loss": 0.4309, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 5.855534709193246, |
|
"grad_norm": 0.013099801786314374, |
|
"learning_rate": 1.6864440844042815e-06, |
|
"loss": 0.4259, |
|
"step": 391 |
|
}, |
|
{ |
|
"epoch": 5.870544090056285, |
|
"grad_norm": 0.013097214237123593, |
|
"learning_rate": 1.6846201526504186e-06, |
|
"loss": 0.4302, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 5.885553470919325, |
|
"grad_norm": 0.013065212723171151, |
|
"learning_rate": 1.682791924007661e-06, |
|
"loss": 0.4398, |
|
"step": 393 |
|
}, |
|
{ |
|
"epoch": 5.900562851782364, |
|
"grad_norm": 0.014045965247674342, |
|
"learning_rate": 1.6809594099505392e-06, |
|
"loss": 0.434, |
|
"step": 394 |
|
}, |
|
{ |
|
"epoch": 5.915572232645403, |
|
"grad_norm": 0.013248206942276757, |
|
"learning_rate": 1.6791226219804819e-06, |
|
"loss": 0.4319, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 5.930581613508442, |
|
"grad_norm": 0.013171561529992387, |
|
"learning_rate": 1.6772815716257411e-06, |
|
"loss": 0.4477, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 5.930581613508442, |
|
"eval_loss": 0.41817715764045715, |
|
"eval_runtime": 14.0502, |
|
"eval_samples_per_second": 31.815, |
|
"eval_steps_per_second": 1.993, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 5.945590994371482, |
|
"grad_norm": 0.013426647961607079, |
|
"learning_rate": 1.6754362704413208e-06, |
|
"loss": 0.4338, |
|
"step": 397 |
|
}, |
|
{ |
|
"epoch": 5.960600375234522, |
|
"grad_norm": 0.01345417807854897, |
|
"learning_rate": 1.673586730008905e-06, |
|
"loss": 0.4439, |
|
"step": 398 |
|
}, |
|
{ |
|
"epoch": 5.975609756097561, |
|
"grad_norm": 0.013431711439659157, |
|
"learning_rate": 1.6717329619367848e-06, |
|
"loss": 0.4319, |
|
"step": 399 |
|
}, |
|
{ |
|
"epoch": 5.9906191369606, |
|
"grad_norm": 0.013005069733149226, |
|
"learning_rate": 1.6698749778597842e-06, |
|
"loss": 0.4455, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 6.01500938086304, |
|
"grad_norm": 0.020617486128169053, |
|
"learning_rate": 1.6680127894391894e-06, |
|
"loss": 0.869, |
|
"step": 401 |
|
}, |
|
{ |
|
"epoch": 6.030018761726079, |
|
"grad_norm": 0.013176861969478369, |
|
"learning_rate": 1.6661464083626733e-06, |
|
"loss": 0.4394, |
|
"step": 402 |
|
}, |
|
{ |
|
"epoch": 6.045028142589119, |
|
"grad_norm": 0.012886044431831887, |
|
"learning_rate": 1.6642758463442244e-06, |
|
"loss": 0.4352, |
|
"step": 403 |
|
}, |
|
{ |
|
"epoch": 6.0600375234521575, |
|
"grad_norm": 0.012593283091799695, |
|
"learning_rate": 1.6624011151240707e-06, |
|
"loss": 0.4352, |
|
"step": 404 |
|
}, |
|
{ |
|
"epoch": 6.075046904315197, |
|
"grad_norm": 0.013119523155248839, |
|
"learning_rate": 1.6605222264686082e-06, |
|
"loss": 0.4456, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 6.090056285178236, |
|
"grad_norm": 0.013135474034204285, |
|
"learning_rate": 1.6586391921703266e-06, |
|
"loss": 0.4372, |
|
"step": 406 |
|
}, |
|
{ |
|
"epoch": 6.105065666041276, |
|
"grad_norm": 0.013337941205936685, |
|
"learning_rate": 1.6567520240477343e-06, |
|
"loss": 0.4327, |
|
"step": 407 |
|
}, |
|
{ |
|
"epoch": 6.120075046904315, |
|
"grad_norm": 0.013170367591114004, |
|
"learning_rate": 1.6548607339452852e-06, |
|
"loss": 0.4368, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 6.135084427767355, |
|
"grad_norm": 0.013634531162252857, |
|
"learning_rate": 1.6529653337333031e-06, |
|
"loss": 0.4328, |
|
"step": 409 |
|
}, |
|
{ |
|
"epoch": 6.150093808630394, |
|
"grad_norm": 0.013706123614988762, |
|
"learning_rate": 1.65106583530791e-06, |
|
"loss": 0.4375, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 6.165103189493434, |
|
"grad_norm": 0.0135254442542636, |
|
"learning_rate": 1.649162250590948e-06, |
|
"loss": 0.4354, |
|
"step": 411 |
|
}, |
|
{ |
|
"epoch": 6.1801125703564725, |
|
"grad_norm": 0.013527120604353708, |
|
"learning_rate": 1.6472545915299066e-06, |
|
"loss": 0.4364, |
|
"step": 412 |
|
}, |
|
{ |
|
"epoch": 6.195121951219512, |
|
"grad_norm": 0.012809333866597397, |
|
"learning_rate": 1.645342870097847e-06, |
|
"loss": 0.424, |
|
"step": 413 |
|
}, |
|
{ |
|
"epoch": 6.210131332082551, |
|
"grad_norm": 0.013350184383574616, |
|
"learning_rate": 1.6434270982933271e-06, |
|
"loss": 0.4456, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 6.225140712945591, |
|
"grad_norm": 0.013238123537924208, |
|
"learning_rate": 1.6415072881403263e-06, |
|
"loss": 0.4277, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 6.24015009380863, |
|
"grad_norm": 0.01289362750174883, |
|
"learning_rate": 1.6395834516881702e-06, |
|
"loss": 0.4303, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 6.25515947467167, |
|
"grad_norm": 0.013557747567476165, |
|
"learning_rate": 1.637655601011454e-06, |
|
"loss": 0.4372, |
|
"step": 417 |
|
}, |
|
{ |
|
"epoch": 6.270168855534709, |
|
"grad_norm": 0.012808118264155776, |
|
"learning_rate": 1.6357237482099683e-06, |
|
"loss": 0.4288, |
|
"step": 418 |
|
}, |
|
{ |
|
"epoch": 6.285178236397749, |
|
"grad_norm": 0.013405549925460917, |
|
"learning_rate": 1.6337879054086208e-06, |
|
"loss": 0.4389, |
|
"step": 419 |
|
}, |
|
{ |
|
"epoch": 6.300187617260788, |
|
"grad_norm": 0.013505737436808054, |
|
"learning_rate": 1.6318480847573638e-06, |
|
"loss": 0.4328, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 6.315196998123827, |
|
"grad_norm": 0.013081956141683409, |
|
"learning_rate": 1.6299042984311143e-06, |
|
"loss": 0.4344, |
|
"step": 421 |
|
}, |
|
{ |
|
"epoch": 6.330206378986867, |
|
"grad_norm": 0.013113716625865569, |
|
"learning_rate": 1.6279565586296797e-06, |
|
"loss": 0.4367, |
|
"step": 422 |
|
}, |
|
{ |
|
"epoch": 6.345215759849906, |
|
"grad_norm": 0.013235475707455984, |
|
"learning_rate": 1.6260048775776803e-06, |
|
"loss": 0.4286, |
|
"step": 423 |
|
}, |
|
{ |
|
"epoch": 6.360225140712946, |
|
"grad_norm": 0.013512582434807428, |
|
"learning_rate": 1.6240492675244726e-06, |
|
"loss": 0.4428, |
|
"step": 424 |
|
}, |
|
{ |
|
"epoch": 6.375234521575985, |
|
"grad_norm": 0.013410143400651279, |
|
"learning_rate": 1.6220897407440741e-06, |
|
"loss": 0.4358, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 6.390243902439025, |
|
"grad_norm": 0.013327451309648946, |
|
"learning_rate": 1.6201263095350832e-06, |
|
"loss": 0.4301, |
|
"step": 426 |
|
}, |
|
{ |
|
"epoch": 6.405253283302064, |
|
"grad_norm": 0.013025559977297553, |
|
"learning_rate": 1.6181589862206052e-06, |
|
"loss": 0.4359, |
|
"step": 427 |
|
}, |
|
{ |
|
"epoch": 6.4202626641651035, |
|
"grad_norm": 0.013348709279455499, |
|
"learning_rate": 1.6161877831481722e-06, |
|
"loss": 0.434, |
|
"step": 428 |
|
}, |
|
{ |
|
"epoch": 6.435272045028142, |
|
"grad_norm": 0.01262545802365253, |
|
"learning_rate": 1.6142127126896679e-06, |
|
"loss": 0.4263, |
|
"step": 429 |
|
}, |
|
{ |
|
"epoch": 6.450281425891182, |
|
"grad_norm": 0.013172427598095178, |
|
"learning_rate": 1.612233787241248e-06, |
|
"loss": 0.4221, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 6.465290806754221, |
|
"grad_norm": 0.013481870181090533, |
|
"learning_rate": 1.610251019223264e-06, |
|
"loss": 0.435, |
|
"step": 431 |
|
}, |
|
{ |
|
"epoch": 6.480300187617261, |
|
"grad_norm": 0.013037104332200075, |
|
"learning_rate": 1.6082644210801843e-06, |
|
"loss": 0.4311, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 6.4953095684803, |
|
"grad_norm": 0.013514919736678848, |
|
"learning_rate": 1.6062740052805168e-06, |
|
"loss": 0.4406, |
|
"step": 433 |
|
}, |
|
{ |
|
"epoch": 6.51031894934334, |
|
"grad_norm": 0.013036697288445677, |
|
"learning_rate": 1.6042797843167289e-06, |
|
"loss": 0.4215, |
|
"step": 434 |
|
}, |
|
{ |
|
"epoch": 6.525328330206379, |
|
"grad_norm": 0.012753548020225313, |
|
"learning_rate": 1.6022817707051721e-06, |
|
"loss": 0.4393, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 6.5403377110694185, |
|
"grad_norm": 0.01288860891440036, |
|
"learning_rate": 1.6002799769860005e-06, |
|
"loss": 0.4248, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 6.5553470919324575, |
|
"grad_norm": 0.013672060679601777, |
|
"learning_rate": 1.5982744157230937e-06, |
|
"loss": 0.4385, |
|
"step": 437 |
|
}, |
|
{ |
|
"epoch": 6.570356472795497, |
|
"grad_norm": 0.013035441611515807, |
|
"learning_rate": 1.5962650995039782e-06, |
|
"loss": 0.4422, |
|
"step": 438 |
|
}, |
|
{ |
|
"epoch": 6.585365853658536, |
|
"grad_norm": 0.013122421954394383, |
|
"learning_rate": 1.5942520409397462e-06, |
|
"loss": 0.4365, |
|
"step": 439 |
|
}, |
|
{ |
|
"epoch": 6.600375234521576, |
|
"grad_norm": 0.01319951174301247, |
|
"learning_rate": 1.5922352526649801e-06, |
|
"loss": 0.4307, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 6.615384615384615, |
|
"grad_norm": 0.013085414297184095, |
|
"learning_rate": 1.5902147473376693e-06, |
|
"loss": 0.4312, |
|
"step": 441 |
|
}, |
|
{ |
|
"epoch": 6.630393996247655, |
|
"grad_norm": 0.012791438384207784, |
|
"learning_rate": 1.5881905376391336e-06, |
|
"loss": 0.4211, |
|
"step": 442 |
|
}, |
|
{ |
|
"epoch": 6.645403377110695, |
|
"grad_norm": 0.012892370389429274, |
|
"learning_rate": 1.5861626362739423e-06, |
|
"loss": 0.4238, |
|
"step": 443 |
|
}, |
|
{ |
|
"epoch": 6.6604127579737336, |
|
"grad_norm": 0.012426989025538739, |
|
"learning_rate": 1.5841310559698342e-06, |
|
"loss": 0.4274, |
|
"step": 444 |
|
}, |
|
{ |
|
"epoch": 6.6754221388367725, |
|
"grad_norm": 0.012707681199612911, |
|
"learning_rate": 1.5820958094776398e-06, |
|
"loss": 0.429, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 6.690431519699812, |
|
"grad_norm": 0.013157713008872291, |
|
"learning_rate": 1.5800569095711981e-06, |
|
"loss": 0.4215, |
|
"step": 446 |
|
}, |
|
{ |
|
"epoch": 6.705440900562852, |
|
"grad_norm": 0.013168651131606283, |
|
"learning_rate": 1.578014369047279e-06, |
|
"loss": 0.4385, |
|
"step": 447 |
|
}, |
|
{ |
|
"epoch": 6.720450281425891, |
|
"grad_norm": 0.013114532064645967, |
|
"learning_rate": 1.5759682007255016e-06, |
|
"loss": 0.4448, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 6.735459662288931, |
|
"grad_norm": 0.013082990504026984, |
|
"learning_rate": 1.573918417448254e-06, |
|
"loss": 0.4275, |
|
"step": 449 |
|
}, |
|
{ |
|
"epoch": 6.75046904315197, |
|
"grad_norm": 0.013151892866998883, |
|
"learning_rate": 1.5718650320806142e-06, |
|
"loss": 0.4337, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 6.76547842401501, |
|
"grad_norm": 0.013101096876612917, |
|
"learning_rate": 1.569808057510266e-06, |
|
"loss": 0.4293, |
|
"step": 451 |
|
}, |
|
{ |
|
"epoch": 6.780487804878049, |
|
"grad_norm": 0.012754754751734624, |
|
"learning_rate": 1.567747506647422e-06, |
|
"loss": 0.4257, |
|
"step": 452 |
|
}, |
|
{ |
|
"epoch": 6.795497185741088, |
|
"grad_norm": 0.012418281228569954, |
|
"learning_rate": 1.5656833924247396e-06, |
|
"loss": 0.4194, |
|
"step": 453 |
|
}, |
|
{ |
|
"epoch": 6.810506566604127, |
|
"grad_norm": 0.012677644542908185, |
|
"learning_rate": 1.5636157277972413e-06, |
|
"loss": 0.4203, |
|
"step": 454 |
|
}, |
|
{ |
|
"epoch": 6.825515947467167, |
|
"grad_norm": 0.012978451911838326, |
|
"learning_rate": 1.5615445257422332e-06, |
|
"loss": 0.4236, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 6.840525328330206, |
|
"grad_norm": 0.012837439507680473, |
|
"learning_rate": 1.5594697992592229e-06, |
|
"loss": 0.4331, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 6.855534709193246, |
|
"grad_norm": 0.01252259688640816, |
|
"learning_rate": 1.5573915613698393e-06, |
|
"loss": 0.4378, |
|
"step": 457 |
|
}, |
|
{ |
|
"epoch": 6.870544090056285, |
|
"grad_norm": 0.013197054542329851, |
|
"learning_rate": 1.5553098251177485e-06, |
|
"loss": 0.4206, |
|
"step": 458 |
|
}, |
|
{ |
|
"epoch": 6.885553470919325, |
|
"grad_norm": 0.012853395885087531, |
|
"learning_rate": 1.5532246035685755e-06, |
|
"loss": 0.4268, |
|
"step": 459 |
|
}, |
|
{ |
|
"epoch": 6.900562851782364, |
|
"grad_norm": 0.012990308489818332, |
|
"learning_rate": 1.5511359098098183e-06, |
|
"loss": 0.4291, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 6.915572232645403, |
|
"grad_norm": 0.012921940170193533, |
|
"learning_rate": 1.549043756950768e-06, |
|
"loss": 0.4339, |
|
"step": 461 |
|
}, |
|
{ |
|
"epoch": 6.930581613508442, |
|
"grad_norm": 0.013109887397593497, |
|
"learning_rate": 1.5469481581224271e-06, |
|
"loss": 0.4358, |
|
"step": 462 |
|
}, |
|
{ |
|
"epoch": 6.930581613508442, |
|
"eval_loss": 0.4126039445400238, |
|
"eval_runtime": 13.7392, |
|
"eval_samples_per_second": 32.535, |
|
"eval_steps_per_second": 2.038, |
|
"step": 462 |
|
}, |
|
{ |
|
"epoch": 6.945590994371482, |
|
"grad_norm": 0.012598723095405159, |
|
"learning_rate": 1.5448491264774241e-06, |
|
"loss": 0.4263, |
|
"step": 463 |
|
}, |
|
{ |
|
"epoch": 6.960600375234522, |
|
"grad_norm": 0.012861189060759321, |
|
"learning_rate": 1.5427466751899352e-06, |
|
"loss": 0.427, |
|
"step": 464 |
|
}, |
|
{ |
|
"epoch": 6.975609756097561, |
|
"grad_norm": 0.013200569881254022, |
|
"learning_rate": 1.5406408174555977e-06, |
|
"loss": 0.4259, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 6.9906191369606, |
|
"grad_norm": 0.012616437259491003, |
|
"learning_rate": 1.5385315664914292e-06, |
|
"loss": 0.436, |
|
"step": 466 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"grad_norm": 0.016057681180086946, |
|
"learning_rate": 1.536418935535745e-06, |
|
"loss": 0.4215, |
|
"step": 467 |
|
}, |
|
{ |
|
"epoch": 7.01500938086304, |
|
"grad_norm": 0.015147696984116419, |
|
"learning_rate": 1.534302937848073e-06, |
|
"loss": 0.4299, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 7.030018761726079, |
|
"grad_norm": 0.012645368174521793, |
|
"learning_rate": 1.5321835867090732e-06, |
|
"loss": 0.4322, |
|
"step": 469 |
|
}, |
|
{ |
|
"epoch": 7.045028142589119, |
|
"grad_norm": 0.01320195840723717, |
|
"learning_rate": 1.5300608954204514e-06, |
|
"loss": 0.4202, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 7.0600375234521575, |
|
"grad_norm": 0.012876528475684408, |
|
"learning_rate": 1.5279348773048785e-06, |
|
"loss": 0.4234, |
|
"step": 471 |
|
}, |
|
{ |
|
"epoch": 7.075046904315197, |
|
"grad_norm": 0.012414131318572394, |
|
"learning_rate": 1.5258055457059052e-06, |
|
"loss": 0.4286, |
|
"step": 472 |
|
}, |
|
{ |
|
"epoch": 7.090056285178236, |
|
"grad_norm": 0.013424481910424807, |
|
"learning_rate": 1.5236729139878778e-06, |
|
"loss": 0.4363, |
|
"step": 473 |
|
}, |
|
{ |
|
"epoch": 7.105065666041276, |
|
"grad_norm": 0.013082970732005126, |
|
"learning_rate": 1.5215369955358566e-06, |
|
"loss": 0.4307, |
|
"step": 474 |
|
}, |
|
{ |
|
"epoch": 7.120075046904315, |
|
"grad_norm": 0.013012675401740906, |
|
"learning_rate": 1.5193978037555292e-06, |
|
"loss": 0.4281, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 7.135084427767355, |
|
"grad_norm": 0.01386296810810948, |
|
"learning_rate": 1.517255352073129e-06, |
|
"loss": 0.4359, |
|
"step": 476 |
|
}, |
|
{ |
|
"epoch": 7.150093808630394, |
|
"grad_norm": 0.012959933299681203, |
|
"learning_rate": 1.5151096539353479e-06, |
|
"loss": 0.4267, |
|
"step": 477 |
|
}, |
|
{ |
|
"epoch": 7.165103189493434, |
|
"grad_norm": 0.013273365230097464, |
|
"learning_rate": 1.5129607228092548e-06, |
|
"loss": 0.4225, |
|
"step": 478 |
|
}, |
|
{ |
|
"epoch": 7.1801125703564725, |
|
"grad_norm": 0.013069822188325222, |
|
"learning_rate": 1.5108085721822097e-06, |
|
"loss": 0.434, |
|
"step": 479 |
|
}, |
|
{ |
|
"epoch": 7.195121951219512, |
|
"grad_norm": 0.013059257995761383, |
|
"learning_rate": 1.5086532155617784e-06, |
|
"loss": 0.4337, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 7.210131332082551, |
|
"grad_norm": 0.012530361479891216, |
|
"learning_rate": 1.506494666475649e-06, |
|
"loss": 0.4288, |
|
"step": 481 |
|
}, |
|
{ |
|
"epoch": 7.225140712945591, |
|
"grad_norm": 0.01275554693816723, |
|
"learning_rate": 1.5043329384715473e-06, |
|
"loss": 0.4267, |
|
"step": 482 |
|
}, |
|
{ |
|
"epoch": 7.24015009380863, |
|
"grad_norm": 0.012727944808238934, |
|
"learning_rate": 1.5021680451171498e-06, |
|
"loss": 0.4227, |
|
"step": 483 |
|
}, |
|
{ |
|
"epoch": 7.25515947467167, |
|
"grad_norm": 0.012512871850066665, |
|
"learning_rate": 1.5e-06, |
|
"loss": 0.4347, |
|
"step": 484 |
|
}, |
|
{ |
|
"epoch": 7.270168855534709, |
|
"grad_norm": 0.012879847459317067, |
|
"learning_rate": 1.4978288167274232e-06, |
|
"loss": 0.4238, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 7.285178236397749, |
|
"grad_norm": 0.013267371965589561, |
|
"learning_rate": 1.4956545089264405e-06, |
|
"loss": 0.4258, |
|
"step": 486 |
|
}, |
|
{ |
|
"epoch": 7.300187617260788, |
|
"grad_norm": 0.012473072491095367, |
|
"learning_rate": 1.4934770902436834e-06, |
|
"loss": 0.4299, |
|
"step": 487 |
|
}, |
|
{ |
|
"epoch": 7.315196998123827, |
|
"grad_norm": 0.012782011764426037, |
|
"learning_rate": 1.4912965743453087e-06, |
|
"loss": 0.4182, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 7.330206378986867, |
|
"grad_norm": 0.01311348055027308, |
|
"learning_rate": 1.4891129749169118e-06, |
|
"loss": 0.4296, |
|
"step": 489 |
|
}, |
|
{ |
|
"epoch": 7.345215759849906, |
|
"grad_norm": 0.012765085462581965, |
|
"learning_rate": 1.4869263056634417e-06, |
|
"loss": 0.4289, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 7.360225140712946, |
|
"grad_norm": 0.012412381150431463, |
|
"learning_rate": 1.4847365803091144e-06, |
|
"loss": 0.4334, |
|
"step": 491 |
|
}, |
|
{ |
|
"epoch": 7.375234521575985, |
|
"grad_norm": 0.01271837862802656, |
|
"learning_rate": 1.4825438125973263e-06, |
|
"loss": 0.425, |
|
"step": 492 |
|
}, |
|
{ |
|
"epoch": 7.390243902439025, |
|
"grad_norm": 0.012352812019462122, |
|
"learning_rate": 1.4803480162905695e-06, |
|
"loss": 0.4207, |
|
"step": 493 |
|
}, |
|
{ |
|
"epoch": 7.405253283302064, |
|
"grad_norm": 0.012746296584207542, |
|
"learning_rate": 1.4781492051703448e-06, |
|
"loss": 0.4215, |
|
"step": 494 |
|
}, |
|
{ |
|
"epoch": 7.4202626641651035, |
|
"grad_norm": 0.012721837448907083, |
|
"learning_rate": 1.4759473930370736e-06, |
|
"loss": 0.4225, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 7.435272045028142, |
|
"grad_norm": 0.012889669681319507, |
|
"learning_rate": 1.4737425937100135e-06, |
|
"loss": 0.4261, |
|
"step": 496 |
|
}, |
|
{ |
|
"epoch": 7.450281425891182, |
|
"grad_norm": 0.012544026246862405, |
|
"learning_rate": 1.4715348210271703e-06, |
|
"loss": 0.4189, |
|
"step": 497 |
|
}, |
|
{ |
|
"epoch": 7.465290806754221, |
|
"grad_norm": 0.012601175719615424, |
|
"learning_rate": 1.4693240888452118e-06, |
|
"loss": 0.4188, |
|
"step": 498 |
|
}, |
|
{ |
|
"epoch": 7.480300187617261, |
|
"grad_norm": 0.012911814515041583, |
|
"learning_rate": 1.4671104110393808e-06, |
|
"loss": 0.445, |
|
"step": 499 |
|
}, |
|
{ |
|
"epoch": 7.4953095684803, |
|
"grad_norm": 0.012900962528470759, |
|
"learning_rate": 1.4648938015034067e-06, |
|
"loss": 0.4271, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 7.51031894934334, |
|
"grad_norm": 0.012640868695431564, |
|
"learning_rate": 1.4626742741494205e-06, |
|
"loss": 0.4345, |
|
"step": 501 |
|
}, |
|
{ |
|
"epoch": 7.525328330206379, |
|
"grad_norm": 0.012451005837885486, |
|
"learning_rate": 1.4604518429078652e-06, |
|
"loss": 0.429, |
|
"step": 502 |
|
}, |
|
{ |
|
"epoch": 7.5403377110694185, |
|
"grad_norm": 0.013230360834140219, |
|
"learning_rate": 1.4582265217274103e-06, |
|
"loss": 0.4161, |
|
"step": 503 |
|
}, |
|
{ |
|
"epoch": 7.5553470919324575, |
|
"grad_norm": 0.01283686066958734, |
|
"learning_rate": 1.4559983245748637e-06, |
|
"loss": 0.4251, |
|
"step": 504 |
|
}, |
|
{ |
|
"epoch": 7.570356472795497, |
|
"grad_norm": 0.012527190536013916, |
|
"learning_rate": 1.4537672654350832e-06, |
|
"loss": 0.4137, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 7.585365853658536, |
|
"grad_norm": 0.012828676017273635, |
|
"learning_rate": 1.4515333583108893e-06, |
|
"loss": 0.4373, |
|
"step": 506 |
|
}, |
|
{ |
|
"epoch": 7.600375234521576, |
|
"grad_norm": 0.013137450736113143, |
|
"learning_rate": 1.4492966172229778e-06, |
|
"loss": 0.4314, |
|
"step": 507 |
|
}, |
|
{ |
|
"epoch": 7.615384615384615, |
|
"grad_norm": 0.012681479750471514, |
|
"learning_rate": 1.4470570562098306e-06, |
|
"loss": 0.4191, |
|
"step": 508 |
|
}, |
|
{ |
|
"epoch": 7.630393996247655, |
|
"grad_norm": 0.01283115430539013, |
|
"learning_rate": 1.4448146893276295e-06, |
|
"loss": 0.4293, |
|
"step": 509 |
|
}, |
|
{ |
|
"epoch": 7.645403377110695, |
|
"grad_norm": 0.012923054058866432, |
|
"learning_rate": 1.4425695306501655e-06, |
|
"loss": 0.4202, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 7.6604127579737336, |
|
"grad_norm": 0.013121242158230989, |
|
"learning_rate": 1.4403215942687525e-06, |
|
"loss": 0.4373, |
|
"step": 511 |
|
}, |
|
{ |
|
"epoch": 7.6754221388367725, |
|
"grad_norm": 0.012479459230005806, |
|
"learning_rate": 1.4380708942921382e-06, |
|
"loss": 0.4242, |
|
"step": 512 |
|
}, |
|
{ |
|
"epoch": 7.690431519699812, |
|
"grad_norm": 0.012387034220238928, |
|
"learning_rate": 1.4358174448464153e-06, |
|
"loss": 0.414, |
|
"step": 513 |
|
}, |
|
{ |
|
"epoch": 7.705440900562852, |
|
"grad_norm": 0.012574345486950885, |
|
"learning_rate": 1.433561260074933e-06, |
|
"loss": 0.4272, |
|
"step": 514 |
|
}, |
|
{ |
|
"epoch": 7.720450281425891, |
|
"grad_norm": 0.01290052005130462, |
|
"learning_rate": 1.4313023541382079e-06, |
|
"loss": 0.4298, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 7.735459662288931, |
|
"grad_norm": 0.012668792696732748, |
|
"learning_rate": 1.4290407412138363e-06, |
|
"loss": 0.425, |
|
"step": 516 |
|
}, |
|
{ |
|
"epoch": 7.75046904315197, |
|
"grad_norm": 0.01225012968171437, |
|
"learning_rate": 1.4267764354964037e-06, |
|
"loss": 0.4233, |
|
"step": 517 |
|
}, |
|
{ |
|
"epoch": 7.76547842401501, |
|
"grad_norm": 0.012255924007790624, |
|
"learning_rate": 1.4245094511973967e-06, |
|
"loss": 0.4165, |
|
"step": 518 |
|
}, |
|
{ |
|
"epoch": 7.780487804878049, |
|
"grad_norm": 0.012942200297671827, |
|
"learning_rate": 1.4222398025451134e-06, |
|
"loss": 0.4179, |
|
"step": 519 |
|
}, |
|
{ |
|
"epoch": 7.795497185741088, |
|
"grad_norm": 0.012669086030337824, |
|
"learning_rate": 1.4199675037845743e-06, |
|
"loss": 0.4273, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 7.810506566604127, |
|
"grad_norm": 0.012438089901438005, |
|
"learning_rate": 1.4176925691774333e-06, |
|
"loss": 0.4229, |
|
"step": 521 |
|
}, |
|
{ |
|
"epoch": 7.825515947467167, |
|
"grad_norm": 0.013016106458461985, |
|
"learning_rate": 1.4154150130018865e-06, |
|
"loss": 0.4342, |
|
"step": 522 |
|
}, |
|
{ |
|
"epoch": 7.840525328330206, |
|
"grad_norm": 0.012676013909183275, |
|
"learning_rate": 1.4131348495525846e-06, |
|
"loss": 0.43, |
|
"step": 523 |
|
}, |
|
{ |
|
"epoch": 7.855534709193246, |
|
"grad_norm": 0.012244101315314318, |
|
"learning_rate": 1.4108520931405421e-06, |
|
"loss": 0.4124, |
|
"step": 524 |
|
}, |
|
{ |
|
"epoch": 7.870544090056285, |
|
"grad_norm": 0.012356566669634342, |
|
"learning_rate": 1.4085667580930481e-06, |
|
"loss": 0.4253, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 7.885553470919325, |
|
"grad_norm": 0.013545664104386693, |
|
"learning_rate": 1.4062788587535757e-06, |
|
"loss": 0.4336, |
|
"step": 526 |
|
}, |
|
{ |
|
"epoch": 7.900562851782364, |
|
"grad_norm": 0.012958461771861544, |
|
"learning_rate": 1.403988409481692e-06, |
|
"loss": 0.4256, |
|
"step": 527 |
|
}, |
|
{ |
|
"epoch": 7.915572232645403, |
|
"grad_norm": 0.012515973222970926, |
|
"learning_rate": 1.4016954246529694e-06, |
|
"loss": 0.4258, |
|
"step": 528 |
|
}, |
|
{ |
|
"epoch": 7.915572232645403, |
|
"eval_loss": 0.40825632214546204, |
|
"eval_runtime": 13.931, |
|
"eval_samples_per_second": 32.087, |
|
"eval_steps_per_second": 2.01, |
|
"step": 528 |
|
}, |
|
{ |
|
"epoch": 7.930581613508442, |
|
"grad_norm": 0.013163236106306738, |
|
"learning_rate": 1.399399918658893e-06, |
|
"loss": 0.4261, |
|
"step": 529 |
|
}, |
|
{ |
|
"epoch": 7.945590994371482, |
|
"grad_norm": 0.013156945923845514, |
|
"learning_rate": 1.3971019059067716e-06, |
|
"loss": 0.4282, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 7.960600375234522, |
|
"grad_norm": 0.012975296664314717, |
|
"learning_rate": 1.3948014008196485e-06, |
|
"loss": 0.4178, |
|
"step": 531 |
|
}, |
|
{ |
|
"epoch": 7.975609756097561, |
|
"grad_norm": 0.012620579592042935, |
|
"learning_rate": 1.3924984178362077e-06, |
|
"loss": 0.4315, |
|
"step": 532 |
|
}, |
|
{ |
|
"epoch": 7.9906191369606, |
|
"grad_norm": 0.012990444728543174, |
|
"learning_rate": 1.390192971410687e-06, |
|
"loss": 0.425, |
|
"step": 533 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"grad_norm": 0.012990444728543174, |
|
"learning_rate": 1.3878850760127846e-06, |
|
"loss": 0.3523, |
|
"step": 534 |
|
}, |
|
{ |
|
"epoch": 8.01500938086304, |
|
"grad_norm": 0.012443192719573455, |
|
"learning_rate": 1.3855747461275697e-06, |
|
"loss": 0.4906, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 8.03001876172608, |
|
"grad_norm": 0.012368209559504574, |
|
"learning_rate": 1.3832619962553905e-06, |
|
"loss": 0.4227, |
|
"step": 536 |
|
}, |
|
{ |
|
"epoch": 8.045028142589118, |
|
"grad_norm": 0.012874491584688213, |
|
"learning_rate": 1.3809468409117844e-06, |
|
"loss": 0.423, |
|
"step": 537 |
|
}, |
|
{ |
|
"epoch": 8.060037523452158, |
|
"grad_norm": 0.012955402213050558, |
|
"learning_rate": 1.3786292946273859e-06, |
|
"loss": 0.4301, |
|
"step": 538 |
|
}, |
|
{ |
|
"epoch": 8.075046904315197, |
|
"grad_norm": 0.012397892892618072, |
|
"learning_rate": 1.3763093719478357e-06, |
|
"loss": 0.4213, |
|
"step": 539 |
|
}, |
|
{ |
|
"epoch": 8.090056285178237, |
|
"grad_norm": 0.012234259749931429, |
|
"learning_rate": 1.3739870874336897e-06, |
|
"loss": 0.4193, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 8.105065666041275, |
|
"grad_norm": 0.012209969001234834, |
|
"learning_rate": 1.3716624556603274e-06, |
|
"loss": 0.4234, |
|
"step": 541 |
|
}, |
|
{ |
|
"epoch": 8.120075046904315, |
|
"grad_norm": 0.012750032029844028, |
|
"learning_rate": 1.3693354912178607e-06, |
|
"loss": 0.4286, |
|
"step": 542 |
|
}, |
|
{ |
|
"epoch": 8.135084427767355, |
|
"grad_norm": 0.012498344856428782, |
|
"learning_rate": 1.367006208711042e-06, |
|
"loss": 0.4162, |
|
"step": 543 |
|
}, |
|
{ |
|
"epoch": 8.150093808630395, |
|
"grad_norm": 0.012551160489829018, |
|
"learning_rate": 1.3646746227591718e-06, |
|
"loss": 0.423, |
|
"step": 544 |
|
}, |
|
{ |
|
"epoch": 8.165103189493433, |
|
"grad_norm": 0.012836057883823809, |
|
"learning_rate": 1.3623407479960086e-06, |
|
"loss": 0.4183, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 8.180112570356473, |
|
"grad_norm": 0.012832136571966581, |
|
"learning_rate": 1.360004599069676e-06, |
|
"loss": 0.4255, |
|
"step": 546 |
|
}, |
|
{ |
|
"epoch": 8.195121951219512, |
|
"grad_norm": 0.012674899783215083, |
|
"learning_rate": 1.3576661906425705e-06, |
|
"loss": 0.4154, |
|
"step": 547 |
|
}, |
|
{ |
|
"epoch": 8.210131332082552, |
|
"grad_norm": 0.012904907272715635, |
|
"learning_rate": 1.3553255373912707e-06, |
|
"loss": 0.4221, |
|
"step": 548 |
|
}, |
|
{ |
|
"epoch": 8.22514071294559, |
|
"grad_norm": 0.012553161887151092, |
|
"learning_rate": 1.3529826540064438e-06, |
|
"loss": 0.4197, |
|
"step": 549 |
|
}, |
|
{ |
|
"epoch": 8.24015009380863, |
|
"grad_norm": 0.01258506653039211, |
|
"learning_rate": 1.3506375551927544e-06, |
|
"loss": 0.4323, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 8.25515947467167, |
|
"grad_norm": 0.013006243593294749, |
|
"learning_rate": 1.3482902556687715e-06, |
|
"loss": 0.4301, |
|
"step": 551 |
|
}, |
|
{ |
|
"epoch": 8.27016885553471, |
|
"grad_norm": 0.012621778538523186, |
|
"learning_rate": 1.345940770166876e-06, |
|
"loss": 0.4273, |
|
"step": 552 |
|
}, |
|
{ |
|
"epoch": 8.285178236397748, |
|
"grad_norm": 0.012547789104974505, |
|
"learning_rate": 1.3435891134331705e-06, |
|
"loss": 0.4255, |
|
"step": 553 |
|
}, |
|
{ |
|
"epoch": 8.300187617260788, |
|
"grad_norm": 0.012362863077543909, |
|
"learning_rate": 1.3412353002273827e-06, |
|
"loss": 0.4274, |
|
"step": 554 |
|
}, |
|
{ |
|
"epoch": 8.315196998123827, |
|
"grad_norm": 0.013130546014588162, |
|
"learning_rate": 1.3388793453227765e-06, |
|
"loss": 0.4245, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 8.330206378986867, |
|
"grad_norm": 0.012375388484720671, |
|
"learning_rate": 1.3365212635060569e-06, |
|
"loss": 0.4182, |
|
"step": 556 |
|
}, |
|
{ |
|
"epoch": 8.345215759849907, |
|
"grad_norm": 0.012314392169435896, |
|
"learning_rate": 1.3341610695772784e-06, |
|
"loss": 0.4128, |
|
"step": 557 |
|
}, |
|
{ |
|
"epoch": 8.360225140712945, |
|
"grad_norm": 0.012928110986283681, |
|
"learning_rate": 1.3317987783497519e-06, |
|
"loss": 0.4251, |
|
"step": 558 |
|
}, |
|
{ |
|
"epoch": 8.375234521575985, |
|
"grad_norm": 0.013081254426541622, |
|
"learning_rate": 1.3294344046499515e-06, |
|
"loss": 0.4288, |
|
"step": 559 |
|
}, |
|
{ |
|
"epoch": 8.390243902439025, |
|
"grad_norm": 0.012679568106310851, |
|
"learning_rate": 1.3270679633174217e-06, |
|
"loss": 0.4181, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 8.405253283302065, |
|
"grad_norm": 0.012837821286797968, |
|
"learning_rate": 1.3246994692046835e-06, |
|
"loss": 0.4221, |
|
"step": 561 |
|
}, |
|
{ |
|
"epoch": 8.420262664165103, |
|
"grad_norm": 0.012683953345995792, |
|
"learning_rate": 1.3223289371771424e-06, |
|
"loss": 0.4342, |
|
"step": 562 |
|
}, |
|
{ |
|
"epoch": 8.435272045028142, |
|
"grad_norm": 0.012324190260690752, |
|
"learning_rate": 1.3199563821129944e-06, |
|
"loss": 0.4143, |
|
"step": 563 |
|
}, |
|
{ |
|
"epoch": 8.450281425891182, |
|
"grad_norm": 0.012704009069056542, |
|
"learning_rate": 1.3175818189031326e-06, |
|
"loss": 0.4139, |
|
"step": 564 |
|
}, |
|
{ |
|
"epoch": 8.465290806754222, |
|
"grad_norm": 0.012664000146649987, |
|
"learning_rate": 1.3152052624510535e-06, |
|
"loss": 0.421, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 8.48030018761726, |
|
"grad_norm": 0.013174322443423665, |
|
"learning_rate": 1.3128267276727644e-06, |
|
"loss": 0.4172, |
|
"step": 566 |
|
}, |
|
{ |
|
"epoch": 8.4953095684803, |
|
"grad_norm": 0.012481267748429541, |
|
"learning_rate": 1.3104462294966894e-06, |
|
"loss": 0.4256, |
|
"step": 567 |
|
}, |
|
{ |
|
"epoch": 8.51031894934334, |
|
"grad_norm": 0.012926931305574265, |
|
"learning_rate": 1.3080637828635744e-06, |
|
"loss": 0.4236, |
|
"step": 568 |
|
}, |
|
{ |
|
"epoch": 8.52532833020638, |
|
"grad_norm": 0.012594343237208048, |
|
"learning_rate": 1.3056794027263948e-06, |
|
"loss": 0.424, |
|
"step": 569 |
|
}, |
|
{ |
|
"epoch": 8.540337711069418, |
|
"grad_norm": 0.013005429097167703, |
|
"learning_rate": 1.3032931040502626e-06, |
|
"loss": 0.4262, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 8.555347091932457, |
|
"grad_norm": 0.012700849998308944, |
|
"learning_rate": 1.300904901812329e-06, |
|
"loss": 0.4112, |
|
"step": 571 |
|
}, |
|
{ |
|
"epoch": 8.570356472795497, |
|
"grad_norm": 0.01234859544446316, |
|
"learning_rate": 1.2985148110016947e-06, |
|
"loss": 0.4234, |
|
"step": 572 |
|
}, |
|
{ |
|
"epoch": 8.585365853658537, |
|
"grad_norm": 0.012647777310344478, |
|
"learning_rate": 1.2961228466193116e-06, |
|
"loss": 0.4298, |
|
"step": 573 |
|
}, |
|
{ |
|
"epoch": 8.600375234521575, |
|
"grad_norm": 0.012976806863275401, |
|
"learning_rate": 1.293729023677892e-06, |
|
"loss": 0.4104, |
|
"step": 574 |
|
}, |
|
{ |
|
"epoch": 8.615384615384615, |
|
"grad_norm": 0.013316957653669519, |
|
"learning_rate": 1.2913333572018132e-06, |
|
"loss": 0.4277, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 8.630393996247655, |
|
"grad_norm": 0.012701811346435813, |
|
"learning_rate": 1.2889358622270223e-06, |
|
"loss": 0.4194, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 8.645403377110695, |
|
"grad_norm": 0.012852571501030714, |
|
"learning_rate": 1.2865365538009432e-06, |
|
"loss": 0.4225, |
|
"step": 577 |
|
}, |
|
{ |
|
"epoch": 8.660412757973734, |
|
"grad_norm": 0.012874594988687248, |
|
"learning_rate": 1.2841354469823814e-06, |
|
"loss": 0.4124, |
|
"step": 578 |
|
}, |
|
{ |
|
"epoch": 8.675422138836772, |
|
"grad_norm": 0.013235216009797502, |
|
"learning_rate": 1.2817325568414297e-06, |
|
"loss": 0.4319, |
|
"step": 579 |
|
}, |
|
{ |
|
"epoch": 8.690431519699812, |
|
"grad_norm": 0.012795063248840513, |
|
"learning_rate": 1.2793278984593734e-06, |
|
"loss": 0.4231, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 8.705440900562852, |
|
"grad_norm": 0.012789613479480306, |
|
"learning_rate": 1.2769214869285963e-06, |
|
"loss": 0.4174, |
|
"step": 581 |
|
}, |
|
{ |
|
"epoch": 8.720450281425892, |
|
"grad_norm": 0.012313758378033874, |
|
"learning_rate": 1.2745133373524852e-06, |
|
"loss": 0.4294, |
|
"step": 582 |
|
}, |
|
{ |
|
"epoch": 8.73545966228893, |
|
"grad_norm": 0.01301988888876861, |
|
"learning_rate": 1.272103464845335e-06, |
|
"loss": 0.4265, |
|
"step": 583 |
|
}, |
|
{ |
|
"epoch": 8.75046904315197, |
|
"grad_norm": 0.012794500405563685, |
|
"learning_rate": 1.269691884532255e-06, |
|
"loss": 0.4169, |
|
"step": 584 |
|
}, |
|
{ |
|
"epoch": 8.76547842401501, |
|
"grad_norm": 0.012906369892351301, |
|
"learning_rate": 1.2672786115490727e-06, |
|
"loss": 0.4235, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 8.78048780487805, |
|
"grad_norm": 0.012698038298790544, |
|
"learning_rate": 1.26486366104224e-06, |
|
"loss": 0.4198, |
|
"step": 586 |
|
}, |
|
{ |
|
"epoch": 8.795497185741088, |
|
"grad_norm": 0.012324390315305763, |
|
"learning_rate": 1.2624470481687368e-06, |
|
"loss": 0.4222, |
|
"step": 587 |
|
}, |
|
{ |
|
"epoch": 8.810506566604127, |
|
"grad_norm": 0.012471895547561243, |
|
"learning_rate": 1.260028788095976e-06, |
|
"loss": 0.4121, |
|
"step": 588 |
|
}, |
|
{ |
|
"epoch": 8.825515947467167, |
|
"grad_norm": 0.013053246144026396, |
|
"learning_rate": 1.2576088960017107e-06, |
|
"loss": 0.423, |
|
"step": 589 |
|
}, |
|
{ |
|
"epoch": 8.840525328330207, |
|
"grad_norm": 0.013057771093177609, |
|
"learning_rate": 1.255187387073935e-06, |
|
"loss": 0.4195, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 8.855534709193245, |
|
"grad_norm": 0.012992648432035044, |
|
"learning_rate": 1.2527642765107917e-06, |
|
"loss": 0.4148, |
|
"step": 591 |
|
}, |
|
{ |
|
"epoch": 8.870544090056285, |
|
"grad_norm": 0.0130532629815524, |
|
"learning_rate": 1.2503395795204766e-06, |
|
"loss": 0.4309, |
|
"step": 592 |
|
}, |
|
{ |
|
"epoch": 8.885553470919325, |
|
"grad_norm": 0.012368890664965363, |
|
"learning_rate": 1.2479133113211412e-06, |
|
"loss": 0.4158, |
|
"step": 593 |
|
}, |
|
{ |
|
"epoch": 8.900562851782365, |
|
"grad_norm": 0.012966982165331422, |
|
"learning_rate": 1.245485487140799e-06, |
|
"loss": 0.4207, |
|
"step": 594 |
|
}, |
|
{ |
|
"epoch": 8.900562851782365, |
|
"eval_loss": 0.4048081934452057, |
|
"eval_runtime": 13.9142, |
|
"eval_samples_per_second": 32.126, |
|
"eval_steps_per_second": 2.012, |
|
"step": 594 |
|
}, |
|
{ |
|
"epoch": 8.915572232645403, |
|
"grad_norm": 0.01282686305166402, |
|
"learning_rate": 1.2430561222172295e-06, |
|
"loss": 0.4342, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 8.930581613508442, |
|
"grad_norm": 0.012073531377116312, |
|
"learning_rate": 1.2406252317978821e-06, |
|
"loss": 0.4225, |
|
"step": 596 |
|
}, |
|
{ |
|
"epoch": 8.945590994371482, |
|
"grad_norm": 0.012136707320216203, |
|
"learning_rate": 1.2381928311397806e-06, |
|
"loss": 0.42, |
|
"step": 597 |
|
}, |
|
{ |
|
"epoch": 8.960600375234522, |
|
"grad_norm": 0.012992601219686593, |
|
"learning_rate": 1.2357589355094273e-06, |
|
"loss": 0.4294, |
|
"step": 598 |
|
}, |
|
{ |
|
"epoch": 8.975609756097562, |
|
"grad_norm": 0.012433755901529724, |
|
"learning_rate": 1.2333235601827084e-06, |
|
"loss": 0.4135, |
|
"step": 599 |
|
}, |
|
{ |
|
"epoch": 8.9906191369606, |
|
"grad_norm": 0.012871323298772467, |
|
"learning_rate": 1.2308867204447957e-06, |
|
"loss": 0.4227, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"grad_norm": 0.012871323298772467, |
|
"learning_rate": 1.228448431590054e-06, |
|
"loss": 0.4203, |
|
"step": 601 |
|
}, |
|
{ |
|
"epoch": 9.01500938086304, |
|
"grad_norm": 0.017461692067906185, |
|
"learning_rate": 1.2260087089219414e-06, |
|
"loss": 0.4263, |
|
"step": 602 |
|
}, |
|
{ |
|
"epoch": 9.03001876172608, |
|
"grad_norm": 0.012676685293751521, |
|
"learning_rate": 1.2235675677529155e-06, |
|
"loss": 0.4206, |
|
"step": 603 |
|
}, |
|
{ |
|
"epoch": 9.045028142589118, |
|
"grad_norm": 0.012796622016032069, |
|
"learning_rate": 1.2211250234043382e-06, |
|
"loss": 0.4263, |
|
"step": 604 |
|
}, |
|
{ |
|
"epoch": 9.060037523452158, |
|
"grad_norm": 0.013183001174916187, |
|
"learning_rate": 1.2186810912063758e-06, |
|
"loss": 0.42, |
|
"step": 605 |
|
}, |
|
{ |
|
"epoch": 9.075046904315197, |
|
"grad_norm": 0.012633953173363561, |
|
"learning_rate": 1.216235786497907e-06, |
|
"loss": 0.4163, |
|
"step": 606 |
|
}, |
|
{ |
|
"epoch": 9.090056285178237, |
|
"grad_norm": 0.012022016291928495, |
|
"learning_rate": 1.213789124626425e-06, |
|
"loss": 0.4185, |
|
"step": 607 |
|
}, |
|
{ |
|
"epoch": 9.105065666041275, |
|
"grad_norm": 0.012893256202566969, |
|
"learning_rate": 1.211341120947939e-06, |
|
"loss": 0.4098, |
|
"step": 608 |
|
}, |
|
{ |
|
"epoch": 9.120075046904315, |
|
"grad_norm": 0.012317279779981451, |
|
"learning_rate": 1.208891790826882e-06, |
|
"loss": 0.4269, |
|
"step": 609 |
|
}, |
|
{ |
|
"epoch": 9.135084427767355, |
|
"grad_norm": 0.012580486012471572, |
|
"learning_rate": 1.2064411496360107e-06, |
|
"loss": 0.4144, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 9.150093808630395, |
|
"grad_norm": 0.012564068366617366, |
|
"learning_rate": 1.2039892127563116e-06, |
|
"loss": 0.4088, |
|
"step": 611 |
|
}, |
|
{ |
|
"epoch": 9.165103189493433, |
|
"grad_norm": 0.011856953897715697, |
|
"learning_rate": 1.201535995576902e-06, |
|
"loss": 0.4283, |
|
"step": 612 |
|
}, |
|
{ |
|
"epoch": 9.180112570356473, |
|
"grad_norm": 0.01293799333411114, |
|
"learning_rate": 1.199081513494936e-06, |
|
"loss": 0.4165, |
|
"step": 613 |
|
}, |
|
{ |
|
"epoch": 9.195121951219512, |
|
"grad_norm": 0.013093089279946351, |
|
"learning_rate": 1.1966257819155062e-06, |
|
"loss": 0.4164, |
|
"step": 614 |
|
}, |
|
{ |
|
"epoch": 9.210131332082552, |
|
"grad_norm": 0.012895898134142643, |
|
"learning_rate": 1.1941688162515467e-06, |
|
"loss": 0.4248, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 9.22514071294559, |
|
"grad_norm": 0.01250661926372622, |
|
"learning_rate": 1.1917106319237384e-06, |
|
"loss": 0.4303, |
|
"step": 616 |
|
}, |
|
{ |
|
"epoch": 9.24015009380863, |
|
"grad_norm": 0.013540155741539446, |
|
"learning_rate": 1.1892512443604101e-06, |
|
"loss": 0.4167, |
|
"step": 617 |
|
}, |
|
{ |
|
"epoch": 9.25515947467167, |
|
"grad_norm": 0.012245135490446384, |
|
"learning_rate": 1.1867906689974427e-06, |
|
"loss": 0.4234, |
|
"step": 618 |
|
}, |
|
{ |
|
"epoch": 9.27016885553471, |
|
"grad_norm": 0.012654858934040628, |
|
"learning_rate": 1.1843289212781722e-06, |
|
"loss": 0.4078, |
|
"step": 619 |
|
}, |
|
{ |
|
"epoch": 9.285178236397748, |
|
"grad_norm": 0.012581564643630807, |
|
"learning_rate": 1.1818660166532924e-06, |
|
"loss": 0.404, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 9.300187617260788, |
|
"grad_norm": 0.012446126751038225, |
|
"learning_rate": 1.1794019705807582e-06, |
|
"loss": 0.4256, |
|
"step": 621 |
|
}, |
|
{ |
|
"epoch": 9.315196998123827, |
|
"grad_norm": 0.012225067090946798, |
|
"learning_rate": 1.1769367985256885e-06, |
|
"loss": 0.4195, |
|
"step": 622 |
|
}, |
|
{ |
|
"epoch": 9.330206378986867, |
|
"grad_norm": 0.012493128506126407, |
|
"learning_rate": 1.1744705159602698e-06, |
|
"loss": 0.4219, |
|
"step": 623 |
|
}, |
|
{ |
|
"epoch": 9.345215759849907, |
|
"grad_norm": 0.012675214454940823, |
|
"learning_rate": 1.1720031383636585e-06, |
|
"loss": 0.4212, |
|
"step": 624 |
|
}, |
|
{ |
|
"epoch": 9.360225140712945, |
|
"grad_norm": 0.012438903850514627, |
|
"learning_rate": 1.1695346812218825e-06, |
|
"loss": 0.4168, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 9.375234521575985, |
|
"grad_norm": 0.01254096913931574, |
|
"learning_rate": 1.167065160027747e-06, |
|
"loss": 0.4149, |
|
"step": 626 |
|
}, |
|
{ |
|
"epoch": 9.390243902439025, |
|
"grad_norm": 0.012506400410392516, |
|
"learning_rate": 1.164594590280734e-06, |
|
"loss": 0.4169, |
|
"step": 627 |
|
}, |
|
{ |
|
"epoch": 9.405253283302065, |
|
"grad_norm": 0.013067474713690975, |
|
"learning_rate": 1.1621229874869075e-06, |
|
"loss": 0.4127, |
|
"step": 628 |
|
}, |
|
{ |
|
"epoch": 9.420262664165103, |
|
"grad_norm": 0.012626791768751198, |
|
"learning_rate": 1.159650367158815e-06, |
|
"loss": 0.4291, |
|
"step": 629 |
|
}, |
|
{ |
|
"epoch": 9.435272045028142, |
|
"grad_norm": 0.012395862427797645, |
|
"learning_rate": 1.15717674481539e-06, |
|
"loss": 0.4099, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 9.450281425891182, |
|
"grad_norm": 0.01277375372458372, |
|
"learning_rate": 1.1547021359818558e-06, |
|
"loss": 0.4123, |
|
"step": 631 |
|
}, |
|
{ |
|
"epoch": 9.465290806754222, |
|
"grad_norm": 0.0123342923006372, |
|
"learning_rate": 1.1522265561896263e-06, |
|
"loss": 0.4154, |
|
"step": 632 |
|
}, |
|
{ |
|
"epoch": 9.48030018761726, |
|
"grad_norm": 0.012429900682600912, |
|
"learning_rate": 1.14975002097621e-06, |
|
"loss": 0.4152, |
|
"step": 633 |
|
}, |
|
{ |
|
"epoch": 9.4953095684803, |
|
"grad_norm": 0.01280375207676722, |
|
"learning_rate": 1.1472725458851116e-06, |
|
"loss": 0.415, |
|
"step": 634 |
|
}, |
|
{ |
|
"epoch": 9.51031894934334, |
|
"grad_norm": 0.012687397063652189, |
|
"learning_rate": 1.144794146465735e-06, |
|
"loss": 0.4304, |
|
"step": 635 |
|
}, |
|
{ |
|
"epoch": 9.52532833020638, |
|
"grad_norm": 0.012179956092863778, |
|
"learning_rate": 1.1423148382732853e-06, |
|
"loss": 0.4093, |
|
"step": 636 |
|
}, |
|
{ |
|
"epoch": 9.540337711069418, |
|
"grad_norm": 0.012093011406295692, |
|
"learning_rate": 1.1398346368686714e-06, |
|
"loss": 0.418, |
|
"step": 637 |
|
}, |
|
{ |
|
"epoch": 9.555347091932457, |
|
"grad_norm": 0.013454735147744316, |
|
"learning_rate": 1.1373535578184082e-06, |
|
"loss": 0.4264, |
|
"step": 638 |
|
}, |
|
{ |
|
"epoch": 9.570356472795497, |
|
"grad_norm": 0.012555914733497363, |
|
"learning_rate": 1.1348716166945195e-06, |
|
"loss": 0.4212, |
|
"step": 639 |
|
}, |
|
{ |
|
"epoch": 9.585365853658537, |
|
"grad_norm": 0.01309842650785753, |
|
"learning_rate": 1.1323888290744385e-06, |
|
"loss": 0.4229, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 9.600375234521575, |
|
"grad_norm": 0.013049394375582246, |
|
"learning_rate": 1.1299052105409134e-06, |
|
"loss": 0.4235, |
|
"step": 641 |
|
}, |
|
{ |
|
"epoch": 9.615384615384615, |
|
"grad_norm": 0.012029514118628914, |
|
"learning_rate": 1.127420776681905e-06, |
|
"loss": 0.4132, |
|
"step": 642 |
|
}, |
|
{ |
|
"epoch": 9.630393996247655, |
|
"grad_norm": 0.012689737627309502, |
|
"learning_rate": 1.1249355430904929e-06, |
|
"loss": 0.4234, |
|
"step": 643 |
|
}, |
|
{ |
|
"epoch": 9.645403377110695, |
|
"grad_norm": 0.012003229452039505, |
|
"learning_rate": 1.1224495253647754e-06, |
|
"loss": 0.4166, |
|
"step": 644 |
|
}, |
|
{ |
|
"epoch": 9.660412757973734, |
|
"grad_norm": 0.012066969097037491, |
|
"learning_rate": 1.119962739107773e-06, |
|
"loss": 0.4092, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 9.675422138836772, |
|
"grad_norm": 0.012338956794216918, |
|
"learning_rate": 1.117475199927329e-06, |
|
"loss": 0.4282, |
|
"step": 646 |
|
}, |
|
{ |
|
"epoch": 9.690431519699812, |
|
"grad_norm": 0.012204286663618873, |
|
"learning_rate": 1.1149869234360126e-06, |
|
"loss": 0.4314, |
|
"step": 647 |
|
}, |
|
{ |
|
"epoch": 9.705440900562852, |
|
"grad_norm": 0.01259052031260883, |
|
"learning_rate": 1.1124979252510207e-06, |
|
"loss": 0.4305, |
|
"step": 648 |
|
}, |
|
{ |
|
"epoch": 9.720450281425892, |
|
"grad_norm": 0.013022893825570933, |
|
"learning_rate": 1.1100082209940793e-06, |
|
"loss": 0.4198, |
|
"step": 649 |
|
}, |
|
{ |
|
"epoch": 9.73545966228893, |
|
"grad_norm": 0.012690810978915846, |
|
"learning_rate": 1.1075178262913466e-06, |
|
"loss": 0.4109, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 9.75046904315197, |
|
"grad_norm": 0.012631087513351836, |
|
"learning_rate": 1.1050267567733138e-06, |
|
"loss": 0.4161, |
|
"step": 651 |
|
}, |
|
{ |
|
"epoch": 9.76547842401501, |
|
"grad_norm": 0.012804454368901522, |
|
"learning_rate": 1.1025350280747073e-06, |
|
"loss": 0.4196, |
|
"step": 652 |
|
}, |
|
{ |
|
"epoch": 9.78048780487805, |
|
"grad_norm": 0.0126217418321894, |
|
"learning_rate": 1.1000426558343909e-06, |
|
"loss": 0.421, |
|
"step": 653 |
|
}, |
|
{ |
|
"epoch": 9.795497185741088, |
|
"grad_norm": 0.012742956495133224, |
|
"learning_rate": 1.097549655695268e-06, |
|
"loss": 0.4175, |
|
"step": 654 |
|
}, |
|
{ |
|
"epoch": 9.810506566604127, |
|
"grad_norm": 0.012689525456554308, |
|
"learning_rate": 1.0950560433041825e-06, |
|
"loss": 0.4078, |
|
"step": 655 |
|
}, |
|
{ |
|
"epoch": 9.825515947467167, |
|
"grad_norm": 0.012319356429640735, |
|
"learning_rate": 1.0925618343118207e-06, |
|
"loss": 0.4249, |
|
"step": 656 |
|
}, |
|
{ |
|
"epoch": 9.840525328330207, |
|
"grad_norm": 0.012463815882183473, |
|
"learning_rate": 1.0900670443726134e-06, |
|
"loss": 0.416, |
|
"step": 657 |
|
}, |
|
{ |
|
"epoch": 9.855534709193245, |
|
"grad_norm": 0.012494008236093816, |
|
"learning_rate": 1.087571689144638e-06, |
|
"loss": 0.4094, |
|
"step": 658 |
|
}, |
|
{ |
|
"epoch": 9.870544090056285, |
|
"grad_norm": 0.012788296107781863, |
|
"learning_rate": 1.0850757842895193e-06, |
|
"loss": 0.4134, |
|
"step": 659 |
|
}, |
|
{ |
|
"epoch": 9.885553470919325, |
|
"grad_norm": 0.011949790784482477, |
|
"learning_rate": 1.0825793454723324e-06, |
|
"loss": 0.4123, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 9.885553470919325, |
|
"eval_loss": 0.40209999680519104, |
|
"eval_runtime": 13.8285, |
|
"eval_samples_per_second": 32.324, |
|
"eval_steps_per_second": 2.025, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 9.900562851782365, |
|
"grad_norm": 0.012120763384507233, |
|
"learning_rate": 1.0800823883615032e-06, |
|
"loss": 0.418, |
|
"step": 661 |
|
}, |
|
{ |
|
"epoch": 9.915572232645403, |
|
"grad_norm": 0.01272550451559427, |
|
"learning_rate": 1.0775849286287104e-06, |
|
"loss": 0.4255, |
|
"step": 662 |
|
}, |
|
{ |
|
"epoch": 9.930581613508442, |
|
"grad_norm": 0.012569887569723945, |
|
"learning_rate": 1.0750869819487883e-06, |
|
"loss": 0.4181, |
|
"step": 663 |
|
}, |
|
{ |
|
"epoch": 9.945590994371482, |
|
"grad_norm": 0.012900893185061444, |
|
"learning_rate": 1.0725885639996262e-06, |
|
"loss": 0.4256, |
|
"step": 664 |
|
}, |
|
{ |
|
"epoch": 9.960600375234522, |
|
"grad_norm": 0.012186922057616576, |
|
"learning_rate": 1.0700896904620722e-06, |
|
"loss": 0.4239, |
|
"step": 665 |
|
}, |
|
{ |
|
"epoch": 9.975609756097562, |
|
"grad_norm": 0.012546185280174902, |
|
"learning_rate": 1.0675903770198332e-06, |
|
"loss": 0.4096, |
|
"step": 666 |
|
}, |
|
{ |
|
"epoch": 9.9906191369606, |
|
"grad_norm": 0.012308385784068267, |
|
"learning_rate": 1.0650906393593768e-06, |
|
"loss": 0.417, |
|
"step": 667 |
|
}, |
|
{ |
|
"epoch": 10.01500938086304, |
|
"grad_norm": 0.014542424040829479, |
|
"learning_rate": 1.0625904931698345e-06, |
|
"loss": 0.8235, |
|
"step": 668 |
|
}, |
|
{ |
|
"epoch": 10.03001876172608, |
|
"grad_norm": 0.012357617955558025, |
|
"learning_rate": 1.0600899541429002e-06, |
|
"loss": 0.4132, |
|
"step": 669 |
|
}, |
|
{ |
|
"epoch": 10.045028142589118, |
|
"grad_norm": 0.01238382198584047, |
|
"learning_rate": 1.057589037972735e-06, |
|
"loss": 0.409, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 10.060037523452158, |
|
"grad_norm": 0.012545757227630114, |
|
"learning_rate": 1.0550877603558654e-06, |
|
"loss": 0.4202, |
|
"step": 671 |
|
}, |
|
{ |
|
"epoch": 10.075046904315197, |
|
"grad_norm": 0.012296729178528745, |
|
"learning_rate": 1.0525861369910876e-06, |
|
"loss": 0.4118, |
|
"step": 672 |
|
}, |
|
{ |
|
"epoch": 10.090056285178237, |
|
"grad_norm": 0.01233936400904742, |
|
"learning_rate": 1.0500841835793676e-06, |
|
"loss": 0.4186, |
|
"step": 673 |
|
}, |
|
{ |
|
"epoch": 10.105065666041275, |
|
"grad_norm": 0.01239644213592733, |
|
"learning_rate": 1.0475819158237424e-06, |
|
"loss": 0.4211, |
|
"step": 674 |
|
}, |
|
{ |
|
"epoch": 10.120075046904315, |
|
"grad_norm": 0.01238165563747052, |
|
"learning_rate": 1.0450793494292222e-06, |
|
"loss": 0.4192, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 10.135084427767355, |
|
"grad_norm": 0.01228638867675189, |
|
"learning_rate": 1.0425765001026922e-06, |
|
"loss": 0.4122, |
|
"step": 676 |
|
}, |
|
{ |
|
"epoch": 10.150093808630395, |
|
"grad_norm": 0.012503182702259674, |
|
"learning_rate": 1.0400733835528124e-06, |
|
"loss": 0.4257, |
|
"step": 677 |
|
}, |
|
{ |
|
"epoch": 10.165103189493433, |
|
"grad_norm": 0.012016184893121317, |
|
"learning_rate": 1.0375700154899207e-06, |
|
"loss": 0.3982, |
|
"step": 678 |
|
}, |
|
{ |
|
"epoch": 10.180112570356473, |
|
"grad_norm": 0.012647564056457507, |
|
"learning_rate": 1.0350664116259326e-06, |
|
"loss": 0.4247, |
|
"step": 679 |
|
}, |
|
{ |
|
"epoch": 10.195121951219512, |
|
"grad_norm": 0.012624563972519114, |
|
"learning_rate": 1.032562587674245e-06, |
|
"loss": 0.4226, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 10.210131332082552, |
|
"grad_norm": 0.01266422823244068, |
|
"learning_rate": 1.0300585593496347e-06, |
|
"loss": 0.4236, |
|
"step": 681 |
|
}, |
|
{ |
|
"epoch": 10.22514071294559, |
|
"grad_norm": 0.013120899008077981, |
|
"learning_rate": 1.0275543423681621e-06, |
|
"loss": 0.4267, |
|
"step": 682 |
|
}, |
|
{ |
|
"epoch": 10.24015009380863, |
|
"grad_norm": 0.012469850454087042, |
|
"learning_rate": 1.0250499524470713e-06, |
|
"loss": 0.4185, |
|
"step": 683 |
|
}, |
|
{ |
|
"epoch": 10.25515947467167, |
|
"grad_norm": 0.013032775148789266, |
|
"learning_rate": 1.022545405304692e-06, |
|
"loss": 0.4173, |
|
"step": 684 |
|
}, |
|
{ |
|
"epoch": 10.27016885553471, |
|
"grad_norm": 0.012130912167583757, |
|
"learning_rate": 1.020040716660341e-06, |
|
"loss": 0.4174, |
|
"step": 685 |
|
}, |
|
{ |
|
"epoch": 10.285178236397748, |
|
"grad_norm": 0.012694584356580994, |
|
"learning_rate": 1.0175359022342224e-06, |
|
"loss": 0.4201, |
|
"step": 686 |
|
}, |
|
{ |
|
"epoch": 10.300187617260788, |
|
"grad_norm": 0.012738648311210665, |
|
"learning_rate": 1.0150309777473304e-06, |
|
"loss": 0.4246, |
|
"step": 687 |
|
}, |
|
{ |
|
"epoch": 10.315196998123827, |
|
"grad_norm": 0.012541064779257812, |
|
"learning_rate": 1.0125259589213495e-06, |
|
"loss": 0.4237, |
|
"step": 688 |
|
}, |
|
{ |
|
"epoch": 10.330206378986867, |
|
"grad_norm": 0.012159573126019833, |
|
"learning_rate": 1.0100208614785565e-06, |
|
"loss": 0.4236, |
|
"step": 689 |
|
}, |
|
{ |
|
"epoch": 10.345215759849907, |
|
"grad_norm": 0.012655017443475823, |
|
"learning_rate": 1.007515701141722e-06, |
|
"loss": 0.414, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 10.360225140712945, |
|
"grad_norm": 0.01201000687630199, |
|
"learning_rate": 1.0050104936340107e-06, |
|
"loss": 0.4185, |
|
"step": 691 |
|
}, |
|
{ |
|
"epoch": 10.375234521575985, |
|
"grad_norm": 0.012362021002323115, |
|
"learning_rate": 1.002505254678884e-06, |
|
"loss": 0.4226, |
|
"step": 692 |
|
}, |
|
{ |
|
"epoch": 10.390243902439025, |
|
"grad_norm": 0.012831699499431226, |
|
"learning_rate": 1e-06, |
|
"loss": 0.4267, |
|
"step": 693 |
|
}, |
|
{ |
|
"epoch": 10.405253283302065, |
|
"grad_norm": 0.01274780819591858, |
|
"learning_rate": 9.97494745321116e-07, |
|
"loss": 0.4109, |
|
"step": 694 |
|
}, |
|
{ |
|
"epoch": 10.420262664165103, |
|
"grad_norm": 0.013774541103795192, |
|
"learning_rate": 9.949895063659892e-07, |
|
"loss": 0.4125, |
|
"step": 695 |
|
}, |
|
{ |
|
"epoch": 10.435272045028142, |
|
"grad_norm": 0.012506222777518427, |
|
"learning_rate": 9.924842988582782e-07, |
|
"loss": 0.4214, |
|
"step": 696 |
|
}, |
|
{ |
|
"epoch": 10.450281425891182, |
|
"grad_norm": 0.01244429874279497, |
|
"learning_rate": 9.899791385214436e-07, |
|
"loss": 0.4051, |
|
"step": 697 |
|
}, |
|
{ |
|
"epoch": 10.465290806754222, |
|
"grad_norm": 0.012191950069124378, |
|
"learning_rate": 9.874740410786506e-07, |
|
"loss": 0.4118, |
|
"step": 698 |
|
}, |
|
{ |
|
"epoch": 10.48030018761726, |
|
"grad_norm": 0.013197523206823903, |
|
"learning_rate": 9.849690222526697e-07, |
|
"loss": 0.416, |
|
"step": 699 |
|
}, |
|
{ |
|
"epoch": 10.4953095684803, |
|
"grad_norm": 0.012181682595547577, |
|
"learning_rate": 9.824640977657773e-07, |
|
"loss": 0.4105, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 10.51031894934334, |
|
"grad_norm": 0.01234029745826439, |
|
"learning_rate": 9.79959283339659e-07, |
|
"loss": 0.4198, |
|
"step": 701 |
|
}, |
|
{ |
|
"epoch": 10.52532833020638, |
|
"grad_norm": 0.012785453706742769, |
|
"learning_rate": 9.77454594695308e-07, |
|
"loss": 0.4143, |
|
"step": 702 |
|
}, |
|
{ |
|
"epoch": 10.540337711069418, |
|
"grad_norm": 0.012575428471071192, |
|
"learning_rate": 9.749500475529289e-07, |
|
"loss": 0.411, |
|
"step": 703 |
|
}, |
|
{ |
|
"epoch": 10.555347091932457, |
|
"grad_norm": 0.012318721406990746, |
|
"learning_rate": 9.72445657631838e-07, |
|
"loss": 0.3997, |
|
"step": 704 |
|
}, |
|
{ |
|
"epoch": 10.570356472795497, |
|
"grad_norm": 0.012789956349847692, |
|
"learning_rate": 9.699414406503652e-07, |
|
"loss": 0.4176, |
|
"step": 705 |
|
}, |
|
{ |
|
"epoch": 10.585365853658537, |
|
"grad_norm": 0.01328367201839251, |
|
"learning_rate": 9.674374123257553e-07, |
|
"loss": 0.4202, |
|
"step": 706 |
|
}, |
|
{ |
|
"epoch": 10.600375234521575, |
|
"grad_norm": 0.013024092725146954, |
|
"learning_rate": 9.649335883740673e-07, |
|
"loss": 0.4158, |
|
"step": 707 |
|
}, |
|
{ |
|
"epoch": 10.615384615384615, |
|
"grad_norm": 0.013090434740472322, |
|
"learning_rate": 9.624299845100794e-07, |
|
"loss": 0.4101, |
|
"step": 708 |
|
}, |
|
{ |
|
"epoch": 10.630393996247655, |
|
"grad_norm": 0.012176392973424509, |
|
"learning_rate": 9.599266164471873e-07, |
|
"loss": 0.4073, |
|
"step": 709 |
|
}, |
|
{ |
|
"epoch": 10.645403377110695, |
|
"grad_norm": 0.012247800521725064, |
|
"learning_rate": 9.574234998973075e-07, |
|
"loss": 0.4126, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 10.660412757973734, |
|
"grad_norm": 0.012220616675445717, |
|
"learning_rate": 9.549206505707777e-07, |
|
"loss": 0.4145, |
|
"step": 711 |
|
}, |
|
{ |
|
"epoch": 10.675422138836772, |
|
"grad_norm": 0.01252656468465896, |
|
"learning_rate": 9.524180841762576e-07, |
|
"loss": 0.4226, |
|
"step": 712 |
|
}, |
|
{ |
|
"epoch": 10.690431519699812, |
|
"grad_norm": 0.01239436211827446, |
|
"learning_rate": 9.499158164206324e-07, |
|
"loss": 0.4181, |
|
"step": 713 |
|
}, |
|
{ |
|
"epoch": 10.705440900562852, |
|
"grad_norm": 0.012506887073625421, |
|
"learning_rate": 9.474138630089123e-07, |
|
"loss": 0.4114, |
|
"step": 714 |
|
}, |
|
{ |
|
"epoch": 10.720450281425892, |
|
"grad_norm": 0.012310549268342016, |
|
"learning_rate": 9.449122396441343e-07, |
|
"loss": 0.4048, |
|
"step": 715 |
|
}, |
|
{ |
|
"epoch": 10.73545966228893, |
|
"grad_norm": 0.012239695816293078, |
|
"learning_rate": 9.424109620272652e-07, |
|
"loss": 0.4208, |
|
"step": 716 |
|
}, |
|
{ |
|
"epoch": 10.75046904315197, |
|
"grad_norm": 0.012441047937013722, |
|
"learning_rate": 9.399100458570996e-07, |
|
"loss": 0.4039, |
|
"step": 717 |
|
}, |
|
{ |
|
"epoch": 10.76547842401501, |
|
"grad_norm": 0.01206547884076615, |
|
"learning_rate": 9.374095068301656e-07, |
|
"loss": 0.4103, |
|
"step": 718 |
|
}, |
|
{ |
|
"epoch": 10.78048780487805, |
|
"grad_norm": 0.012386755042071106, |
|
"learning_rate": 9.349093606406231e-07, |
|
"loss": 0.4163, |
|
"step": 719 |
|
}, |
|
{ |
|
"epoch": 10.795497185741088, |
|
"grad_norm": 0.012498396538373382, |
|
"learning_rate": 9.324096229801673e-07, |
|
"loss": 0.4196, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 10.810506566604127, |
|
"grad_norm": 0.012178639675100663, |
|
"learning_rate": 9.299103095379281e-07, |
|
"loss": 0.4135, |
|
"step": 721 |
|
}, |
|
{ |
|
"epoch": 10.825515947467167, |
|
"grad_norm": 0.012463279304890761, |
|
"learning_rate": 9.274114360003737e-07, |
|
"loss": 0.421, |
|
"step": 722 |
|
}, |
|
{ |
|
"epoch": 10.840525328330207, |
|
"grad_norm": 0.012419698179239286, |
|
"learning_rate": 9.249130180512116e-07, |
|
"loss": 0.4138, |
|
"step": 723 |
|
}, |
|
{ |
|
"epoch": 10.855534709193245, |
|
"grad_norm": 0.012609451632788022, |
|
"learning_rate": 9.224150713712894e-07, |
|
"loss": 0.4111, |
|
"step": 724 |
|
}, |
|
{ |
|
"epoch": 10.870544090056285, |
|
"grad_norm": 0.01264688365836291, |
|
"learning_rate": 9.199176116384973e-07, |
|
"loss": 0.4101, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 10.885553470919325, |
|
"grad_norm": 0.012922477609598963, |
|
"learning_rate": 9.174206545276677e-07, |
|
"loss": 0.4103, |
|
"step": 726 |
|
}, |
|
{ |
|
"epoch": 10.885553470919325, |
|
"eval_loss": 0.3998468220233917, |
|
"eval_runtime": 13.6727, |
|
"eval_samples_per_second": 32.693, |
|
"eval_steps_per_second": 2.048, |
|
"step": 726 |
|
}, |
|
{ |
|
"epoch": 10.900562851782365, |
|
"grad_norm": 0.012222567401425159, |
|
"learning_rate": 9.149242157104806e-07, |
|
"loss": 0.4028, |
|
"step": 727 |
|
}, |
|
{ |
|
"epoch": 10.915572232645403, |
|
"grad_norm": 0.012431994303602674, |
|
"learning_rate": 9.12428310855362e-07, |
|
"loss": 0.4252, |
|
"step": 728 |
|
}, |
|
{ |
|
"epoch": 10.930581613508442, |
|
"grad_norm": 0.01224992043513614, |
|
"learning_rate": 9.099329556273865e-07, |
|
"loss": 0.4194, |
|
"step": 729 |
|
}, |
|
{ |
|
"epoch": 10.945590994371482, |
|
"grad_norm": 0.012591228897490484, |
|
"learning_rate": 9.074381656881796e-07, |
|
"loss": 0.4162, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 10.960600375234522, |
|
"grad_norm": 0.01281665111811267, |
|
"learning_rate": 9.049439566958175e-07, |
|
"loss": 0.4168, |
|
"step": 731 |
|
}, |
|
{ |
|
"epoch": 10.975609756097562, |
|
"grad_norm": 0.012052834238919263, |
|
"learning_rate": 9.024503443047318e-07, |
|
"loss": 0.4195, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 10.9906191369606, |
|
"grad_norm": 0.012847179302409485, |
|
"learning_rate": 8.999573441656089e-07, |
|
"loss": 0.4158, |
|
"step": 733 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"grad_norm": 0.012847179302409485, |
|
"learning_rate": 8.974649719252928e-07, |
|
"loss": 0.4147, |
|
"step": 734 |
|
}, |
|
{ |
|
"epoch": 11.01500938086304, |
|
"grad_norm": 0.016922785752928025, |
|
"learning_rate": 8.949732432266866e-07, |
|
"loss": 0.4058, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 11.03001876172608, |
|
"grad_norm": 0.012040148173995293, |
|
"learning_rate": 8.924821737086535e-07, |
|
"loss": 0.4197, |
|
"step": 736 |
|
}, |
|
{ |
|
"epoch": 11.045028142589118, |
|
"grad_norm": 0.01243791973891847, |
|
"learning_rate": 8.899917790059207e-07, |
|
"loss": 0.4225, |
|
"step": 737 |
|
}, |
|
{ |
|
"epoch": 11.060037523452158, |
|
"grad_norm": 0.01195989075560471, |
|
"learning_rate": 8.875020747489793e-07, |
|
"loss": 0.4163, |
|
"step": 738 |
|
}, |
|
{ |
|
"epoch": 11.075046904315197, |
|
"grad_norm": 0.012609607466383803, |
|
"learning_rate": 8.850130765639872e-07, |
|
"loss": 0.4229, |
|
"step": 739 |
|
}, |
|
{ |
|
"epoch": 11.090056285178237, |
|
"grad_norm": 0.012478853178643596, |
|
"learning_rate": 8.825248000726713e-07, |
|
"loss": 0.4203, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 11.105065666041275, |
|
"grad_norm": 0.012533296902894696, |
|
"learning_rate": 8.80037260892227e-07, |
|
"loss": 0.4097, |
|
"step": 741 |
|
}, |
|
{ |
|
"epoch": 11.120075046904315, |
|
"grad_norm": 0.012463619425894766, |
|
"learning_rate": 8.775504746352246e-07, |
|
"loss": 0.4099, |
|
"step": 742 |
|
}, |
|
{ |
|
"epoch": 11.135084427767355, |
|
"grad_norm": 0.012572037460674756, |
|
"learning_rate": 8.750644569095072e-07, |
|
"loss": 0.4207, |
|
"step": 743 |
|
}, |
|
{ |
|
"epoch": 11.150093808630395, |
|
"grad_norm": 0.012817195911965876, |
|
"learning_rate": 8.72579223318095e-07, |
|
"loss": 0.4139, |
|
"step": 744 |
|
}, |
|
{ |
|
"epoch": 11.165103189493433, |
|
"grad_norm": 0.013023585144428015, |
|
"learning_rate": 8.70094789459087e-07, |
|
"loss": 0.4181, |
|
"step": 745 |
|
}, |
|
{ |
|
"epoch": 11.180112570356473, |
|
"grad_norm": 0.012339182343402779, |
|
"learning_rate": 8.676111709255614e-07, |
|
"loss": 0.4186, |
|
"step": 746 |
|
}, |
|
{ |
|
"epoch": 11.195121951219512, |
|
"grad_norm": 0.012231541298556112, |
|
"learning_rate": 8.651283833054808e-07, |
|
"loss": 0.4087, |
|
"step": 747 |
|
}, |
|
{ |
|
"epoch": 11.210131332082552, |
|
"grad_norm": 0.012477160954476265, |
|
"learning_rate": 8.626464421815918e-07, |
|
"loss": 0.4223, |
|
"step": 748 |
|
}, |
|
{ |
|
"epoch": 11.22514071294559, |
|
"grad_norm": 0.01247166784817762, |
|
"learning_rate": 8.601653631313287e-07, |
|
"loss": 0.4218, |
|
"step": 749 |
|
}, |
|
{ |
|
"epoch": 11.24015009380863, |
|
"grad_norm": 0.012436312283189853, |
|
"learning_rate": 8.576851617267149e-07, |
|
"loss": 0.4148, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 11.25515947467167, |
|
"grad_norm": 0.012388511629756823, |
|
"learning_rate": 8.552058535342652e-07, |
|
"loss": 0.4127, |
|
"step": 751 |
|
}, |
|
{ |
|
"epoch": 11.27016885553471, |
|
"grad_norm": 0.012312475936173602, |
|
"learning_rate": 8.527274541148884e-07, |
|
"loss": 0.4085, |
|
"step": 752 |
|
}, |
|
{ |
|
"epoch": 11.285178236397748, |
|
"grad_norm": 0.011970201855727553, |
|
"learning_rate": 8.502499790237899e-07, |
|
"loss": 0.4007, |
|
"step": 753 |
|
}, |
|
{ |
|
"epoch": 11.300187617260788, |
|
"grad_norm": 0.012623949917184176, |
|
"learning_rate": 8.477734438103735e-07, |
|
"loss": 0.4119, |
|
"step": 754 |
|
}, |
|
{ |
|
"epoch": 11.315196998123827, |
|
"grad_norm": 0.012212246192249409, |
|
"learning_rate": 8.452978640181444e-07, |
|
"loss": 0.4018, |
|
"step": 755 |
|
}, |
|
{ |
|
"epoch": 11.330206378986867, |
|
"grad_norm": 0.012416638149246855, |
|
"learning_rate": 8.428232551846101e-07, |
|
"loss": 0.4088, |
|
"step": 756 |
|
}, |
|
{ |
|
"epoch": 11.345215759849907, |
|
"grad_norm": 0.012132111502099707, |
|
"learning_rate": 8.40349632841185e-07, |
|
"loss": 0.4114, |
|
"step": 757 |
|
}, |
|
{ |
|
"epoch": 11.360225140712945, |
|
"grad_norm": 0.012267161024372699, |
|
"learning_rate": 8.378770125130924e-07, |
|
"loss": 0.4111, |
|
"step": 758 |
|
}, |
|
{ |
|
"epoch": 11.375234521575985, |
|
"grad_norm": 0.01283364696949894, |
|
"learning_rate": 8.354054097192659e-07, |
|
"loss": 0.4191, |
|
"step": 759 |
|
}, |
|
{ |
|
"epoch": 11.390243902439025, |
|
"grad_norm": 0.012709567548725487, |
|
"learning_rate": 8.329348399722533e-07, |
|
"loss": 0.4128, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 11.405253283302065, |
|
"grad_norm": 0.012340710769494431, |
|
"learning_rate": 8.304653187781175e-07, |
|
"loss": 0.4011, |
|
"step": 761 |
|
}, |
|
{ |
|
"epoch": 11.420262664165103, |
|
"grad_norm": 0.012429569405680763, |
|
"learning_rate": 8.279968616363417e-07, |
|
"loss": 0.4074, |
|
"step": 762 |
|
}, |
|
{ |
|
"epoch": 11.435272045028142, |
|
"grad_norm": 0.011810395153086212, |
|
"learning_rate": 8.2552948403973e-07, |
|
"loss": 0.4134, |
|
"step": 763 |
|
}, |
|
{ |
|
"epoch": 11.450281425891182, |
|
"grad_norm": 0.012235707581225837, |
|
"learning_rate": 8.230632014743114e-07, |
|
"loss": 0.4209, |
|
"step": 764 |
|
}, |
|
{ |
|
"epoch": 11.465290806754222, |
|
"grad_norm": 0.012394322030979758, |
|
"learning_rate": 8.205980294192421e-07, |
|
"loss": 0.4156, |
|
"step": 765 |
|
}, |
|
{ |
|
"epoch": 11.48030018761726, |
|
"grad_norm": 0.01233224667407161, |
|
"learning_rate": 8.181339833467078e-07, |
|
"loss": 0.4129, |
|
"step": 766 |
|
}, |
|
{ |
|
"epoch": 11.4953095684803, |
|
"grad_norm": 0.012894820199788238, |
|
"learning_rate": 8.156710787218277e-07, |
|
"loss": 0.4022, |
|
"step": 767 |
|
}, |
|
{ |
|
"epoch": 11.51031894934334, |
|
"grad_norm": 0.012115396789355592, |
|
"learning_rate": 8.132093310025571e-07, |
|
"loss": 0.4227, |
|
"step": 768 |
|
}, |
|
{ |
|
"epoch": 11.52532833020638, |
|
"grad_norm": 0.01250946865021327, |
|
"learning_rate": 8.107487556395901e-07, |
|
"loss": 0.4167, |
|
"step": 769 |
|
}, |
|
{ |
|
"epoch": 11.540337711069418, |
|
"grad_norm": 0.01235958458093249, |
|
"learning_rate": 8.082893680762618e-07, |
|
"loss": 0.4159, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 11.555347091932457, |
|
"grad_norm": 0.01251555314637417, |
|
"learning_rate": 8.058311837484535e-07, |
|
"loss": 0.4179, |
|
"step": 771 |
|
}, |
|
{ |
|
"epoch": 11.570356472795497, |
|
"grad_norm": 0.012567617246515256, |
|
"learning_rate": 8.03374218084494e-07, |
|
"loss": 0.4139, |
|
"step": 772 |
|
}, |
|
{ |
|
"epoch": 11.585365853658537, |
|
"grad_norm": 0.012363452875523725, |
|
"learning_rate": 8.009184865050639e-07, |
|
"loss": 0.4125, |
|
"step": 773 |
|
}, |
|
{ |
|
"epoch": 11.600375234521575, |
|
"grad_norm": 0.012315468666145416, |
|
"learning_rate": 7.984640044230983e-07, |
|
"loss": 0.4125, |
|
"step": 774 |
|
}, |
|
{ |
|
"epoch": 11.615384615384615, |
|
"grad_norm": 0.012111170617275159, |
|
"learning_rate": 7.960107872436887e-07, |
|
"loss": 0.4082, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 11.630393996247655, |
|
"grad_norm": 0.012314813108895683, |
|
"learning_rate": 7.935588503639891e-07, |
|
"loss": 0.4205, |
|
"step": 776 |
|
}, |
|
{ |
|
"epoch": 11.645403377110695, |
|
"grad_norm": 0.012195711107959282, |
|
"learning_rate": 7.91108209173118e-07, |
|
"loss": 0.4175, |
|
"step": 777 |
|
}, |
|
{ |
|
"epoch": 11.660412757973734, |
|
"grad_norm": 0.012204558616190958, |
|
"learning_rate": 7.886588790520608e-07, |
|
"loss": 0.4176, |
|
"step": 778 |
|
}, |
|
{ |
|
"epoch": 11.675422138836772, |
|
"grad_norm": 0.012574502223338559, |
|
"learning_rate": 7.862108753735752e-07, |
|
"loss": 0.4141, |
|
"step": 779 |
|
}, |
|
{ |
|
"epoch": 11.690431519699812, |
|
"grad_norm": 0.012138755761975865, |
|
"learning_rate": 7.837642135020928e-07, |
|
"loss": 0.4144, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 11.705440900562852, |
|
"grad_norm": 0.012892259806495502, |
|
"learning_rate": 7.813189087936242e-07, |
|
"loss": 0.4165, |
|
"step": 781 |
|
}, |
|
{ |
|
"epoch": 11.720450281425892, |
|
"grad_norm": 0.012462979592728919, |
|
"learning_rate": 7.788749765956619e-07, |
|
"loss": 0.4018, |
|
"step": 782 |
|
}, |
|
{ |
|
"epoch": 11.73545966228893, |
|
"grad_norm": 0.012473208835669184, |
|
"learning_rate": 7.764324322470841e-07, |
|
"loss": 0.4136, |
|
"step": 783 |
|
}, |
|
{ |
|
"epoch": 11.75046904315197, |
|
"grad_norm": 0.01289776168996197, |
|
"learning_rate": 7.739912910780589e-07, |
|
"loss": 0.4199, |
|
"step": 784 |
|
}, |
|
{ |
|
"epoch": 11.76547842401501, |
|
"grad_norm": 0.01217754538151564, |
|
"learning_rate": 7.715515684099462e-07, |
|
"loss": 0.4151, |
|
"step": 785 |
|
}, |
|
{ |
|
"epoch": 11.78048780487805, |
|
"grad_norm": 0.012374436487471281, |
|
"learning_rate": 7.691132795552042e-07, |
|
"loss": 0.4076, |
|
"step": 786 |
|
}, |
|
{ |
|
"epoch": 11.795497185741088, |
|
"grad_norm": 0.012391305303445135, |
|
"learning_rate": 7.666764398172917e-07, |
|
"loss": 0.4241, |
|
"step": 787 |
|
}, |
|
{ |
|
"epoch": 11.810506566604127, |
|
"grad_norm": 0.012727898092571818, |
|
"learning_rate": 7.642410644905726e-07, |
|
"loss": 0.4066, |
|
"step": 788 |
|
}, |
|
{ |
|
"epoch": 11.825515947467167, |
|
"grad_norm": 0.012072572789924537, |
|
"learning_rate": 7.618071688602198e-07, |
|
"loss": 0.411, |
|
"step": 789 |
|
}, |
|
{ |
|
"epoch": 11.840525328330207, |
|
"grad_norm": 0.012477736514295582, |
|
"learning_rate": 7.593747682021181e-07, |
|
"loss": 0.4162, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 11.855534709193245, |
|
"grad_norm": 0.01223133703780588, |
|
"learning_rate": 7.569438777827705e-07, |
|
"loss": 0.4139, |
|
"step": 791 |
|
}, |
|
{ |
|
"epoch": 11.870544090056285, |
|
"grad_norm": 0.012134907953234304, |
|
"learning_rate": 7.545145128592008e-07, |
|
"loss": 0.4143, |
|
"step": 792 |
|
}, |
|
{ |
|
"epoch": 11.870544090056285, |
|
"eval_loss": 0.3981357216835022, |
|
"eval_runtime": 13.8587, |
|
"eval_samples_per_second": 32.254, |
|
"eval_steps_per_second": 2.02, |
|
"step": 792 |
|
}, |
|
{ |
|
"epoch": 11.885553470919325, |
|
"grad_norm": 0.012556981627557074, |
|
"learning_rate": 7.520866886788587e-07, |
|
"loss": 0.4137, |
|
"step": 793 |
|
}, |
|
{ |
|
"epoch": 11.900562851782365, |
|
"grad_norm": 0.012816523723577786, |
|
"learning_rate": 7.496604204795234e-07, |
|
"loss": 0.4035, |
|
"step": 794 |
|
}, |
|
{ |
|
"epoch": 11.915572232645403, |
|
"grad_norm": 0.012143845555033037, |
|
"learning_rate": 7.472357234892081e-07, |
|
"loss": 0.4006, |
|
"step": 795 |
|
}, |
|
{ |
|
"epoch": 11.930581613508442, |
|
"grad_norm": 0.012405667588160222, |
|
"learning_rate": 7.448126129260651e-07, |
|
"loss": 0.4086, |
|
"step": 796 |
|
}, |
|
{ |
|
"epoch": 11.945590994371482, |
|
"grad_norm": 0.012494666356775595, |
|
"learning_rate": 7.423911039982893e-07, |
|
"loss": 0.4188, |
|
"step": 797 |
|
}, |
|
{ |
|
"epoch": 11.960600375234522, |
|
"grad_norm": 0.012828825056010124, |
|
"learning_rate": 7.399712119040236e-07, |
|
"loss": 0.4253, |
|
"step": 798 |
|
}, |
|
{ |
|
"epoch": 11.975609756097562, |
|
"grad_norm": 0.01248045961928763, |
|
"learning_rate": 7.375529518312636e-07, |
|
"loss": 0.4094, |
|
"step": 799 |
|
}, |
|
{ |
|
"epoch": 11.9906191369606, |
|
"grad_norm": 0.011879777000295733, |
|
"learning_rate": 7.3513633895776e-07, |
|
"loss": 0.4089, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"grad_norm": 0.01480589004238864, |
|
"learning_rate": 7.327213884509272e-07, |
|
"loss": 0.4039, |
|
"step": 801 |
|
}, |
|
{ |
|
"epoch": 12.01500938086304, |
|
"grad_norm": 0.015597993205706172, |
|
"learning_rate": 7.303081154677451e-07, |
|
"loss": 0.4125, |
|
"step": 802 |
|
}, |
|
{ |
|
"epoch": 12.03001876172608, |
|
"grad_norm": 0.01238599658294667, |
|
"learning_rate": 7.278965351546648e-07, |
|
"loss": 0.4199, |
|
"step": 803 |
|
}, |
|
{ |
|
"epoch": 12.045028142589118, |
|
"grad_norm": 0.012326415759353096, |
|
"learning_rate": 7.254866626475152e-07, |
|
"loss": 0.4065, |
|
"step": 804 |
|
}, |
|
{ |
|
"epoch": 12.060037523452158, |
|
"grad_norm": 0.01249682081721596, |
|
"learning_rate": 7.230785130714037e-07, |
|
"loss": 0.4188, |
|
"step": 805 |
|
}, |
|
{ |
|
"epoch": 12.075046904315197, |
|
"grad_norm": 0.012342465641942092, |
|
"learning_rate": 7.206721015406266e-07, |
|
"loss": 0.4051, |
|
"step": 806 |
|
}, |
|
{ |
|
"epoch": 12.090056285178237, |
|
"grad_norm": 0.012375842848816656, |
|
"learning_rate": 7.182674431585702e-07, |
|
"loss": 0.4144, |
|
"step": 807 |
|
}, |
|
{ |
|
"epoch": 12.105065666041275, |
|
"grad_norm": 0.01235742358239482, |
|
"learning_rate": 7.158645530176184e-07, |
|
"loss": 0.4153, |
|
"step": 808 |
|
}, |
|
{ |
|
"epoch": 12.120075046904315, |
|
"grad_norm": 0.01242364276675938, |
|
"learning_rate": 7.134634461990569e-07, |
|
"loss": 0.4198, |
|
"step": 809 |
|
}, |
|
{ |
|
"epoch": 12.135084427767355, |
|
"grad_norm": 0.011962847590866504, |
|
"learning_rate": 7.110641377729777e-07, |
|
"loss": 0.4115, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 12.150093808630395, |
|
"grad_norm": 0.012325487589974966, |
|
"learning_rate": 7.086666427981868e-07, |
|
"loss": 0.4125, |
|
"step": 811 |
|
}, |
|
{ |
|
"epoch": 12.165103189493433, |
|
"grad_norm": 0.012286797408210898, |
|
"learning_rate": 7.062709763221078e-07, |
|
"loss": 0.4087, |
|
"step": 812 |
|
}, |
|
{ |
|
"epoch": 12.180112570356473, |
|
"grad_norm": 0.012087334628339325, |
|
"learning_rate": 7.038771533806883e-07, |
|
"loss": 0.4183, |
|
"step": 813 |
|
}, |
|
{ |
|
"epoch": 12.195121951219512, |
|
"grad_norm": 0.01206278765338631, |
|
"learning_rate": 7.014851889983057e-07, |
|
"loss": 0.4171, |
|
"step": 814 |
|
}, |
|
{ |
|
"epoch": 12.210131332082552, |
|
"grad_norm": 0.01214504139796841, |
|
"learning_rate": 6.990950981876709e-07, |
|
"loss": 0.4016, |
|
"step": 815 |
|
}, |
|
{ |
|
"epoch": 12.22514071294559, |
|
"grad_norm": 0.012463474933901226, |
|
"learning_rate": 6.967068959497376e-07, |
|
"loss": 0.4138, |
|
"step": 816 |
|
}, |
|
{ |
|
"epoch": 12.24015009380863, |
|
"grad_norm": 0.012038537835947886, |
|
"learning_rate": 6.94320597273605e-07, |
|
"loss": 0.4072, |
|
"step": 817 |
|
}, |
|
{ |
|
"epoch": 12.25515947467167, |
|
"grad_norm": 0.012375869455876078, |
|
"learning_rate": 6.919362171364261e-07, |
|
"loss": 0.4187, |
|
"step": 818 |
|
}, |
|
{ |
|
"epoch": 12.27016885553471, |
|
"grad_norm": 0.012447453448819253, |
|
"learning_rate": 6.895537705033107e-07, |
|
"loss": 0.4072, |
|
"step": 819 |
|
}, |
|
{ |
|
"epoch": 12.285178236397748, |
|
"grad_norm": 0.012624813232774283, |
|
"learning_rate": 6.871732723272354e-07, |
|
"loss": 0.4084, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 12.300187617260788, |
|
"grad_norm": 0.012070464285206625, |
|
"learning_rate": 6.847947375489464e-07, |
|
"loss": 0.4091, |
|
"step": 821 |
|
}, |
|
{ |
|
"epoch": 12.315196998123827, |
|
"grad_norm": 0.012323123920528901, |
|
"learning_rate": 6.824181810968674e-07, |
|
"loss": 0.4077, |
|
"step": 822 |
|
}, |
|
{ |
|
"epoch": 12.330206378986867, |
|
"grad_norm": 0.012423328528856116, |
|
"learning_rate": 6.800436178870057e-07, |
|
"loss": 0.4176, |
|
"step": 823 |
|
}, |
|
{ |
|
"epoch": 12.345215759849907, |
|
"grad_norm": 0.011822785847084157, |
|
"learning_rate": 6.776710628228576e-07, |
|
"loss": 0.4072, |
|
"step": 824 |
|
}, |
|
{ |
|
"epoch": 12.360225140712945, |
|
"grad_norm": 0.01263193125503588, |
|
"learning_rate": 6.753005307953165e-07, |
|
"loss": 0.4125, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 12.375234521575985, |
|
"grad_norm": 0.011667379545734729, |
|
"learning_rate": 6.729320366825783e-07, |
|
"loss": 0.4113, |
|
"step": 826 |
|
}, |
|
{ |
|
"epoch": 12.390243902439025, |
|
"grad_norm": 0.012237311305074636, |
|
"learning_rate": 6.705655953500483e-07, |
|
"loss": 0.413, |
|
"step": 827 |
|
}, |
|
{ |
|
"epoch": 12.405253283302065, |
|
"grad_norm": 0.012427319712453859, |
|
"learning_rate": 6.682012216502483e-07, |
|
"loss": 0.4189, |
|
"step": 828 |
|
}, |
|
{ |
|
"epoch": 12.420262664165103, |
|
"grad_norm": 0.012117640495853378, |
|
"learning_rate": 6.658389304227219e-07, |
|
"loss": 0.4157, |
|
"step": 829 |
|
}, |
|
{ |
|
"epoch": 12.435272045028142, |
|
"grad_norm": 0.012064176015325582, |
|
"learning_rate": 6.634787364939434e-07, |
|
"loss": 0.4048, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 12.450281425891182, |
|
"grad_norm": 0.012531719531208345, |
|
"learning_rate": 6.611206546772237e-07, |
|
"loss": 0.426, |
|
"step": 831 |
|
}, |
|
{ |
|
"epoch": 12.465290806754222, |
|
"grad_norm": 0.012359334486955045, |
|
"learning_rate": 6.587646997726173e-07, |
|
"loss": 0.4065, |
|
"step": 832 |
|
}, |
|
{ |
|
"epoch": 12.48030018761726, |
|
"grad_norm": 0.011957899962627493, |
|
"learning_rate": 6.564108865668297e-07, |
|
"loss": 0.4048, |
|
"step": 833 |
|
}, |
|
{ |
|
"epoch": 12.4953095684803, |
|
"grad_norm": 0.01170019242610937, |
|
"learning_rate": 6.540592298331238e-07, |
|
"loss": 0.4126, |
|
"step": 834 |
|
}, |
|
{ |
|
"epoch": 12.51031894934334, |
|
"grad_norm": 0.012331576031846944, |
|
"learning_rate": 6.517097443312288e-07, |
|
"loss": 0.4049, |
|
"step": 835 |
|
}, |
|
{ |
|
"epoch": 12.52532833020638, |
|
"grad_norm": 0.012366827738055758, |
|
"learning_rate": 6.493624448072457e-07, |
|
"loss": 0.4127, |
|
"step": 836 |
|
}, |
|
{ |
|
"epoch": 12.540337711069418, |
|
"grad_norm": 0.011860523908193414, |
|
"learning_rate": 6.470173459935559e-07, |
|
"loss": 0.4172, |
|
"step": 837 |
|
}, |
|
{ |
|
"epoch": 12.555347091932457, |
|
"grad_norm": 0.012415007915885625, |
|
"learning_rate": 6.446744626087293e-07, |
|
"loss": 0.4137, |
|
"step": 838 |
|
}, |
|
{ |
|
"epoch": 12.570356472795497, |
|
"grad_norm": 0.012431394669758795, |
|
"learning_rate": 6.423338093574293e-07, |
|
"loss": 0.4136, |
|
"step": 839 |
|
}, |
|
{ |
|
"epoch": 12.585365853658537, |
|
"grad_norm": 0.012176504783944421, |
|
"learning_rate": 6.399954009303239e-07, |
|
"loss": 0.411, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 12.600375234521575, |
|
"grad_norm": 0.01197249057193435, |
|
"learning_rate": 6.376592520039912e-07, |
|
"loss": 0.4141, |
|
"step": 841 |
|
}, |
|
{ |
|
"epoch": 12.615384615384615, |
|
"grad_norm": 0.012336864192816516, |
|
"learning_rate": 6.35325377240828e-07, |
|
"loss": 0.4138, |
|
"step": 842 |
|
}, |
|
{ |
|
"epoch": 12.630393996247655, |
|
"grad_norm": 0.011861502798104129, |
|
"learning_rate": 6.329937912889581e-07, |
|
"loss": 0.3931, |
|
"step": 843 |
|
}, |
|
{ |
|
"epoch": 12.645403377110695, |
|
"grad_norm": 0.012391892541325432, |
|
"learning_rate": 6.306645087821392e-07, |
|
"loss": 0.4241, |
|
"step": 844 |
|
}, |
|
{ |
|
"epoch": 12.660412757973734, |
|
"grad_norm": 0.012478980233980006, |
|
"learning_rate": 6.283375443396726e-07, |
|
"loss": 0.4161, |
|
"step": 845 |
|
}, |
|
{ |
|
"epoch": 12.675422138836772, |
|
"grad_norm": 0.012527299390245597, |
|
"learning_rate": 6.260129125663105e-07, |
|
"loss": 0.41, |
|
"step": 846 |
|
}, |
|
{ |
|
"epoch": 12.690431519699812, |
|
"grad_norm": 0.011927959930610983, |
|
"learning_rate": 6.236906280521646e-07, |
|
"loss": 0.4055, |
|
"step": 847 |
|
}, |
|
{ |
|
"epoch": 12.705440900562852, |
|
"grad_norm": 0.012720010931915432, |
|
"learning_rate": 6.213707053726145e-07, |
|
"loss": 0.4173, |
|
"step": 848 |
|
}, |
|
{ |
|
"epoch": 12.720450281425892, |
|
"grad_norm": 0.012556965039986982, |
|
"learning_rate": 6.190531590882158e-07, |
|
"loss": 0.4176, |
|
"step": 849 |
|
}, |
|
{ |
|
"epoch": 12.73545966228893, |
|
"grad_norm": 0.012646171657061266, |
|
"learning_rate": 6.167380037446094e-07, |
|
"loss": 0.4037, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 12.75046904315197, |
|
"grad_norm": 0.012363455660222528, |
|
"learning_rate": 6.144252538724302e-07, |
|
"loss": 0.4069, |
|
"step": 851 |
|
}, |
|
{ |
|
"epoch": 12.76547842401501, |
|
"grad_norm": 0.012481896036275676, |
|
"learning_rate": 6.12114923987215e-07, |
|
"loss": 0.4098, |
|
"step": 852 |
|
}, |
|
{ |
|
"epoch": 12.78048780487805, |
|
"grad_norm": 0.01219025499093808, |
|
"learning_rate": 6.098070285893128e-07, |
|
"loss": 0.4128, |
|
"step": 853 |
|
}, |
|
{ |
|
"epoch": 12.795497185741088, |
|
"grad_norm": 0.012221753177996873, |
|
"learning_rate": 6.075015821637922e-07, |
|
"loss": 0.4124, |
|
"step": 854 |
|
}, |
|
{ |
|
"epoch": 12.810506566604127, |
|
"grad_norm": 0.01242451231763906, |
|
"learning_rate": 6.051985991803517e-07, |
|
"loss": 0.4055, |
|
"step": 855 |
|
}, |
|
{ |
|
"epoch": 12.825515947467167, |
|
"grad_norm": 0.012105488715900472, |
|
"learning_rate": 6.028980940932282e-07, |
|
"loss": 0.398, |
|
"step": 856 |
|
}, |
|
{ |
|
"epoch": 12.840525328330207, |
|
"grad_norm": 0.012102745982651956, |
|
"learning_rate": 6.006000813411069e-07, |
|
"loss": 0.4152, |
|
"step": 857 |
|
}, |
|
{ |
|
"epoch": 12.855534709193245, |
|
"grad_norm": 0.012200334931675806, |
|
"learning_rate": 5.983045753470307e-07, |
|
"loss": 0.4146, |
|
"step": 858 |
|
}, |
|
{ |
|
"epoch": 12.855534709193245, |
|
"eval_loss": 0.39677873253822327, |
|
"eval_runtime": 13.689, |
|
"eval_samples_per_second": 32.654, |
|
"eval_steps_per_second": 2.045, |
|
"step": 858 |
|
}, |
|
{ |
|
"epoch": 12.870544090056285, |
|
"grad_norm": 0.012649382929243192, |
|
"learning_rate": 5.960115905183078e-07, |
|
"loss": 0.4081, |
|
"step": 859 |
|
}, |
|
{ |
|
"epoch": 12.885553470919325, |
|
"grad_norm": 0.011929152112251899, |
|
"learning_rate": 5.937211412464245e-07, |
|
"loss": 0.4031, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 12.900562851782365, |
|
"grad_norm": 0.011962658774633357, |
|
"learning_rate": 5.914332419069519e-07, |
|
"loss": 0.4111, |
|
"step": 861 |
|
}, |
|
{ |
|
"epoch": 12.915572232645403, |
|
"grad_norm": 0.012120445434134606, |
|
"learning_rate": 5.89147906859458e-07, |
|
"loss": 0.4057, |
|
"step": 862 |
|
}, |
|
{ |
|
"epoch": 12.930581613508442, |
|
"grad_norm": 0.012223863205227052, |
|
"learning_rate": 5.868651504474156e-07, |
|
"loss": 0.422, |
|
"step": 863 |
|
}, |
|
{ |
|
"epoch": 12.945590994371482, |
|
"grad_norm": 0.012093907000714669, |
|
"learning_rate": 5.845849869981136e-07, |
|
"loss": 0.4063, |
|
"step": 864 |
|
}, |
|
{ |
|
"epoch": 12.960600375234522, |
|
"grad_norm": 0.011744441936770404, |
|
"learning_rate": 5.823074308225668e-07, |
|
"loss": 0.4214, |
|
"step": 865 |
|
}, |
|
{ |
|
"epoch": 12.975609756097562, |
|
"grad_norm": 0.012588359080999381, |
|
"learning_rate": 5.800324962154251e-07, |
|
"loss": 0.4104, |
|
"step": 866 |
|
}, |
|
{ |
|
"epoch": 12.9906191369606, |
|
"grad_norm": 0.012041813356469647, |
|
"learning_rate": 5.777601974548866e-07, |
|
"loss": 0.409, |
|
"step": 867 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"grad_norm": 0.017454775927059043, |
|
"learning_rate": 5.754905488026034e-07, |
|
"loss": 0.4009, |
|
"step": 868 |
|
}, |
|
{ |
|
"epoch": 13.01500938086304, |
|
"grad_norm": 0.012590573823027078, |
|
"learning_rate": 5.732235645035963e-07, |
|
"loss": 0.4077, |
|
"step": 869 |
|
}, |
|
{ |
|
"epoch": 13.03001876172608, |
|
"grad_norm": 0.012476473937542754, |
|
"learning_rate": 5.709592587861637e-07, |
|
"loss": 0.409, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 13.045028142589118, |
|
"grad_norm": 0.01272863688069376, |
|
"learning_rate": 5.686976458617921e-07, |
|
"loss": 0.4203, |
|
"step": 871 |
|
}, |
|
{ |
|
"epoch": 13.060037523452158, |
|
"grad_norm": 0.012212260852439769, |
|
"learning_rate": 5.664387399250672e-07, |
|
"loss": 0.4052, |
|
"step": 872 |
|
}, |
|
{ |
|
"epoch": 13.075046904315197, |
|
"grad_norm": 0.012019289466009251, |
|
"learning_rate": 5.641825551535848e-07, |
|
"loss": 0.4038, |
|
"step": 873 |
|
}, |
|
{ |
|
"epoch": 13.090056285178237, |
|
"grad_norm": 0.011953777292384806, |
|
"learning_rate": 5.619291057078618e-07, |
|
"loss": 0.3931, |
|
"step": 874 |
|
}, |
|
{ |
|
"epoch": 13.105065666041275, |
|
"grad_norm": 0.01214865490238678, |
|
"learning_rate": 5.596784057312474e-07, |
|
"loss": 0.4007, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 13.120075046904315, |
|
"grad_norm": 0.011912792450191237, |
|
"learning_rate": 5.574304693498345e-07, |
|
"loss": 0.4192, |
|
"step": 876 |
|
}, |
|
{ |
|
"epoch": 13.135084427767355, |
|
"grad_norm": 0.012090316577047174, |
|
"learning_rate": 5.551853106723709e-07, |
|
"loss": 0.4073, |
|
"step": 877 |
|
}, |
|
{ |
|
"epoch": 13.150093808630395, |
|
"grad_norm": 0.013010062600398912, |
|
"learning_rate": 5.529429437901696e-07, |
|
"loss": 0.4227, |
|
"step": 878 |
|
}, |
|
{ |
|
"epoch": 13.165103189493433, |
|
"grad_norm": 0.01280067493337377, |
|
"learning_rate": 5.507033827770225e-07, |
|
"loss": 0.4126, |
|
"step": 879 |
|
}, |
|
{ |
|
"epoch": 13.180112570356473, |
|
"grad_norm": 0.012398125964523131, |
|
"learning_rate": 5.484666416891108e-07, |
|
"loss": 0.4023, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 13.195121951219512, |
|
"grad_norm": 0.012420941719772401, |
|
"learning_rate": 5.462327345649165e-07, |
|
"loss": 0.4044, |
|
"step": 881 |
|
}, |
|
{ |
|
"epoch": 13.210131332082552, |
|
"grad_norm": 0.012049668556132256, |
|
"learning_rate": 5.440016754251364e-07, |
|
"loss": 0.4175, |
|
"step": 882 |
|
}, |
|
{ |
|
"epoch": 13.22514071294559, |
|
"grad_norm": 0.012258701638030365, |
|
"learning_rate": 5.417734782725896e-07, |
|
"loss": 0.416, |
|
"step": 883 |
|
}, |
|
{ |
|
"epoch": 13.24015009380863, |
|
"grad_norm": 0.01192198170753603, |
|
"learning_rate": 5.395481570921349e-07, |
|
"loss": 0.4039, |
|
"step": 884 |
|
}, |
|
{ |
|
"epoch": 13.25515947467167, |
|
"grad_norm": 0.012554184751834683, |
|
"learning_rate": 5.373257258505796e-07, |
|
"loss": 0.4156, |
|
"step": 885 |
|
}, |
|
{ |
|
"epoch": 13.27016885553471, |
|
"grad_norm": 0.012851533427761994, |
|
"learning_rate": 5.351061984965931e-07, |
|
"loss": 0.4197, |
|
"step": 886 |
|
}, |
|
{ |
|
"epoch": 13.285178236397748, |
|
"grad_norm": 0.012316726606129447, |
|
"learning_rate": 5.328895889606193e-07, |
|
"loss": 0.4236, |
|
"step": 887 |
|
}, |
|
{ |
|
"epoch": 13.300187617260788, |
|
"grad_norm": 0.013029382216239293, |
|
"learning_rate": 5.306759111547881e-07, |
|
"loss": 0.427, |
|
"step": 888 |
|
}, |
|
{ |
|
"epoch": 13.315196998123827, |
|
"grad_norm": 0.012480929561426931, |
|
"learning_rate": 5.284651789728296e-07, |
|
"loss": 0.4107, |
|
"step": 889 |
|
}, |
|
{ |
|
"epoch": 13.330206378986867, |
|
"grad_norm": 0.01199215605719895, |
|
"learning_rate": 5.262574062899866e-07, |
|
"loss": 0.3977, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 13.345215759849907, |
|
"grad_norm": 0.011780966276621337, |
|
"learning_rate": 5.240526069629264e-07, |
|
"loss": 0.4078, |
|
"step": 891 |
|
}, |
|
{ |
|
"epoch": 13.360225140712945, |
|
"grad_norm": 0.012128143743191527, |
|
"learning_rate": 5.218507948296556e-07, |
|
"loss": 0.4143, |
|
"step": 892 |
|
}, |
|
{ |
|
"epoch": 13.375234521575985, |
|
"grad_norm": 0.011698499209161491, |
|
"learning_rate": 5.196519837094306e-07, |
|
"loss": 0.3999, |
|
"step": 893 |
|
}, |
|
{ |
|
"epoch": 13.390243902439025, |
|
"grad_norm": 0.011777330802458462, |
|
"learning_rate": 5.174561874026741e-07, |
|
"loss": 0.4202, |
|
"step": 894 |
|
}, |
|
{ |
|
"epoch": 13.405253283302065, |
|
"grad_norm": 0.012154703240758565, |
|
"learning_rate": 5.152634196908861e-07, |
|
"loss": 0.411, |
|
"step": 895 |
|
}, |
|
{ |
|
"epoch": 13.420262664165103, |
|
"grad_norm": 0.012477581253436483, |
|
"learning_rate": 5.13073694336558e-07, |
|
"loss": 0.3935, |
|
"step": 896 |
|
}, |
|
{ |
|
"epoch": 13.435272045028142, |
|
"grad_norm": 0.012300889865186484, |
|
"learning_rate": 5.108870250830881e-07, |
|
"loss": 0.4173, |
|
"step": 897 |
|
}, |
|
{ |
|
"epoch": 13.450281425891182, |
|
"grad_norm": 0.01236137621122289, |
|
"learning_rate": 5.087034256546912e-07, |
|
"loss": 0.4063, |
|
"step": 898 |
|
}, |
|
{ |
|
"epoch": 13.465290806754222, |
|
"grad_norm": 0.01178006897188101, |
|
"learning_rate": 5.065229097563164e-07, |
|
"loss": 0.4027, |
|
"step": 899 |
|
}, |
|
{ |
|
"epoch": 13.48030018761726, |
|
"grad_norm": 0.012622903840672843, |
|
"learning_rate": 5.043454910735593e-07, |
|
"loss": 0.4007, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 13.4953095684803, |
|
"grad_norm": 0.01232943004197537, |
|
"learning_rate": 5.021711832725767e-07, |
|
"loss": 0.4101, |
|
"step": 901 |
|
}, |
|
{ |
|
"epoch": 13.51031894934334, |
|
"grad_norm": 0.012650338163967687, |
|
"learning_rate": 5.000000000000002e-07, |
|
"loss": 0.4109, |
|
"step": 902 |
|
}, |
|
{ |
|
"epoch": 13.52532833020638, |
|
"grad_norm": 0.012308627141887482, |
|
"learning_rate": 4.978319548828504e-07, |
|
"loss": 0.4167, |
|
"step": 903 |
|
}, |
|
{ |
|
"epoch": 13.540337711069418, |
|
"grad_norm": 0.01247431058858993, |
|
"learning_rate": 4.956670615284528e-07, |
|
"loss": 0.4083, |
|
"step": 904 |
|
}, |
|
{ |
|
"epoch": 13.555347091932457, |
|
"grad_norm": 0.012521896733928029, |
|
"learning_rate": 4.935053335243508e-07, |
|
"loss": 0.41, |
|
"step": 905 |
|
}, |
|
{ |
|
"epoch": 13.570356472795497, |
|
"grad_norm": 0.01224071942990429, |
|
"learning_rate": 4.913467844382217e-07, |
|
"loss": 0.411, |
|
"step": 906 |
|
}, |
|
{ |
|
"epoch": 13.585365853658537, |
|
"grad_norm": 0.011911018144128583, |
|
"learning_rate": 4.891914278177907e-07, |
|
"loss": 0.4131, |
|
"step": 907 |
|
}, |
|
{ |
|
"epoch": 13.600375234521575, |
|
"grad_norm": 0.012173795572423684, |
|
"learning_rate": 4.870392771907454e-07, |
|
"loss": 0.4172, |
|
"step": 908 |
|
}, |
|
{ |
|
"epoch": 13.615384615384615, |
|
"grad_norm": 0.012787639508096029, |
|
"learning_rate": 4.848903460646522e-07, |
|
"loss": 0.4082, |
|
"step": 909 |
|
}, |
|
{ |
|
"epoch": 13.630393996247655, |
|
"grad_norm": 0.012810162920205651, |
|
"learning_rate": 4.827446479268712e-07, |
|
"loss": 0.4156, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 13.645403377110695, |
|
"grad_norm": 0.012158067835970099, |
|
"learning_rate": 4.806021962444707e-07, |
|
"loss": 0.4066, |
|
"step": 911 |
|
}, |
|
{ |
|
"epoch": 13.660412757973734, |
|
"grad_norm": 0.012243835560067891, |
|
"learning_rate": 4.784630044641435e-07, |
|
"loss": 0.4141, |
|
"step": 912 |
|
}, |
|
{ |
|
"epoch": 13.675422138836772, |
|
"grad_norm": 0.012182441628748585, |
|
"learning_rate": 4.7632708601212215e-07, |
|
"loss": 0.4132, |
|
"step": 913 |
|
}, |
|
{ |
|
"epoch": 13.690431519699812, |
|
"grad_norm": 0.012064707690036225, |
|
"learning_rate": 4.7419445429409487e-07, |
|
"loss": 0.4004, |
|
"step": 914 |
|
}, |
|
{ |
|
"epoch": 13.705440900562852, |
|
"grad_norm": 0.012058309173201142, |
|
"learning_rate": 4.7206512269512125e-07, |
|
"loss": 0.4065, |
|
"step": 915 |
|
}, |
|
{ |
|
"epoch": 13.720450281425892, |
|
"grad_norm": 0.011914352483260126, |
|
"learning_rate": 4.6993910457954864e-07, |
|
"loss": 0.4074, |
|
"step": 916 |
|
}, |
|
{ |
|
"epoch": 13.73545966228893, |
|
"grad_norm": 0.01218029087555133, |
|
"learning_rate": 4.6781641329092705e-07, |
|
"loss": 0.4167, |
|
"step": 917 |
|
}, |
|
{ |
|
"epoch": 13.75046904315197, |
|
"grad_norm": 0.013057166343761247, |
|
"learning_rate": 4.6569706215192693e-07, |
|
"loss": 0.4068, |
|
"step": 918 |
|
}, |
|
{ |
|
"epoch": 13.76547842401501, |
|
"grad_norm": 0.012604582774669597, |
|
"learning_rate": 4.635810644642552e-07, |
|
"loss": 0.4144, |
|
"step": 919 |
|
}, |
|
{ |
|
"epoch": 13.78048780487805, |
|
"grad_norm": 0.011935794848686154, |
|
"learning_rate": 4.614684335085708e-07, |
|
"loss": 0.4009, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 13.795497185741088, |
|
"grad_norm": 0.012666727346839646, |
|
"learning_rate": 4.5935918254440274e-07, |
|
"loss": 0.4102, |
|
"step": 921 |
|
}, |
|
{ |
|
"epoch": 13.810506566604127, |
|
"grad_norm": 0.01205052533335544, |
|
"learning_rate": 4.572533248100652e-07, |
|
"loss": 0.4107, |
|
"step": 922 |
|
}, |
|
{ |
|
"epoch": 13.825515947467167, |
|
"grad_norm": 0.012515947617199258, |
|
"learning_rate": 4.5515087352257606e-07, |
|
"loss": 0.4058, |
|
"step": 923 |
|
}, |
|
{ |
|
"epoch": 13.840525328330207, |
|
"grad_norm": 0.01178275512252074, |
|
"learning_rate": 4.530518418775733e-07, |
|
"loss": 0.3981, |
|
"step": 924 |
|
}, |
|
{ |
|
"epoch": 13.840525328330207, |
|
"eval_loss": 0.39569905400276184, |
|
"eval_runtime": 13.9015, |
|
"eval_samples_per_second": 32.155, |
|
"eval_steps_per_second": 2.014, |
|
"step": 924 |
|
}, |
|
{ |
|
"epoch": 13.855534709193245, |
|
"grad_norm": 0.012172425812137781, |
|
"learning_rate": 4.50956243049232e-07, |
|
"loss": 0.41, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 13.870544090056285, |
|
"grad_norm": 0.01225111633980482, |
|
"learning_rate": 4.488640901901818e-07, |
|
"loss": 0.4132, |
|
"step": 926 |
|
}, |
|
{ |
|
"epoch": 13.885553470919325, |
|
"grad_norm": 0.012388855766636281, |
|
"learning_rate": 4.467753964314245e-07, |
|
"loss": 0.4108, |
|
"step": 927 |
|
}, |
|
{ |
|
"epoch": 13.900562851782365, |
|
"grad_norm": 0.012447467722629292, |
|
"learning_rate": 4.4469017488225124e-07, |
|
"loss": 0.4181, |
|
"step": 928 |
|
}, |
|
{ |
|
"epoch": 13.915572232645403, |
|
"grad_norm": 0.0118810129924204, |
|
"learning_rate": 4.426084386301607e-07, |
|
"loss": 0.4139, |
|
"step": 929 |
|
}, |
|
{ |
|
"epoch": 13.930581613508442, |
|
"grad_norm": 0.0118987273245147, |
|
"learning_rate": 4.40530200740777e-07, |
|
"loss": 0.4192, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 13.945590994371482, |
|
"grad_norm": 0.012530895292621011, |
|
"learning_rate": 4.3845547425776707e-07, |
|
"loss": 0.4098, |
|
"step": 931 |
|
}, |
|
{ |
|
"epoch": 13.960600375234522, |
|
"grad_norm": 0.011523586359779753, |
|
"learning_rate": 4.3638427220275876e-07, |
|
"loss": 0.4048, |
|
"step": 932 |
|
}, |
|
{ |
|
"epoch": 13.975609756097562, |
|
"grad_norm": 0.012130640942285414, |
|
"learning_rate": 4.3431660757526043e-07, |
|
"loss": 0.4003, |
|
"step": 933 |
|
}, |
|
{ |
|
"epoch": 13.9906191369606, |
|
"grad_norm": 0.01306375560358271, |
|
"learning_rate": 4.3225249335257795e-07, |
|
"loss": 0.419, |
|
"step": 934 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"grad_norm": 0.01306375560358271, |
|
"learning_rate": 4.3019194248973377e-07, |
|
"loss": 0.4085, |
|
"step": 935 |
|
}, |
|
{ |
|
"epoch": 14.01500938086304, |
|
"grad_norm": 0.01702987966177615, |
|
"learning_rate": 4.281349679193861e-07, |
|
"loss": 0.4086, |
|
"step": 936 |
|
}, |
|
{ |
|
"epoch": 14.03001876172608, |
|
"grad_norm": 0.01210923339196911, |
|
"learning_rate": 4.2608158255174597e-07, |
|
"loss": 0.4112, |
|
"step": 937 |
|
}, |
|
{ |
|
"epoch": 14.045028142589118, |
|
"grad_norm": 0.011866596152432489, |
|
"learning_rate": 4.2403179927449864e-07, |
|
"loss": 0.4109, |
|
"step": 938 |
|
}, |
|
{ |
|
"epoch": 14.060037523452158, |
|
"grad_norm": 0.012607746156689865, |
|
"learning_rate": 4.219856309527211e-07, |
|
"loss": 0.4221, |
|
"step": 939 |
|
}, |
|
{ |
|
"epoch": 14.075046904315197, |
|
"grad_norm": 0.012486558528938796, |
|
"learning_rate": 4.1994309042880193e-07, |
|
"loss": 0.4103, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 14.090056285178237, |
|
"grad_norm": 0.012303061756963003, |
|
"learning_rate": 4.1790419052236025e-07, |
|
"loss": 0.4104, |
|
"step": 941 |
|
}, |
|
{ |
|
"epoch": 14.105065666041275, |
|
"grad_norm": 0.012176253504823318, |
|
"learning_rate": 4.158689440301657e-07, |
|
"loss": 0.4156, |
|
"step": 942 |
|
}, |
|
{ |
|
"epoch": 14.120075046904315, |
|
"grad_norm": 0.012798739423948427, |
|
"learning_rate": 4.138373637260579e-07, |
|
"loss": 0.4094, |
|
"step": 943 |
|
}, |
|
{ |
|
"epoch": 14.135084427767355, |
|
"grad_norm": 0.012149574296456156, |
|
"learning_rate": 4.1180946236086646e-07, |
|
"loss": 0.4153, |
|
"step": 944 |
|
}, |
|
{ |
|
"epoch": 14.150093808630395, |
|
"grad_norm": 0.012028612599444181, |
|
"learning_rate": 4.0978525266233064e-07, |
|
"loss": 0.4054, |
|
"step": 945 |
|
}, |
|
{ |
|
"epoch": 14.165103189493433, |
|
"grad_norm": 0.012231549955693293, |
|
"learning_rate": 4.0776474733502007e-07, |
|
"loss": 0.416, |
|
"step": 946 |
|
}, |
|
{ |
|
"epoch": 14.180112570356473, |
|
"grad_norm": 0.012421110011924136, |
|
"learning_rate": 4.0574795906025374e-07, |
|
"loss": 0.4016, |
|
"step": 947 |
|
}, |
|
{ |
|
"epoch": 14.195121951219512, |
|
"grad_norm": 0.011730159482603666, |
|
"learning_rate": 4.03734900496022e-07, |
|
"loss": 0.4013, |
|
"step": 948 |
|
}, |
|
{ |
|
"epoch": 14.210131332082552, |
|
"grad_norm": 0.012688603636352387, |
|
"learning_rate": 4.017255842769062e-07, |
|
"loss": 0.415, |
|
"step": 949 |
|
}, |
|
{ |
|
"epoch": 14.22514071294559, |
|
"grad_norm": 0.012404842948789518, |
|
"learning_rate": 3.9972002301399956e-07, |
|
"loss": 0.4169, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 14.24015009380863, |
|
"grad_norm": 0.012149123886554853, |
|
"learning_rate": 3.977182292948282e-07, |
|
"loss": 0.3949, |
|
"step": 951 |
|
}, |
|
{ |
|
"epoch": 14.25515947467167, |
|
"grad_norm": 0.012045312561245865, |
|
"learning_rate": 3.957202156832713e-07, |
|
"loss": 0.4134, |
|
"step": 952 |
|
}, |
|
{ |
|
"epoch": 14.27016885553471, |
|
"grad_norm": 0.01209269757907142, |
|
"learning_rate": 3.9372599471948354e-07, |
|
"loss": 0.414, |
|
"step": 953 |
|
}, |
|
{ |
|
"epoch": 14.285178236397748, |
|
"grad_norm": 0.011918842634659655, |
|
"learning_rate": 3.9173557891981567e-07, |
|
"loss": 0.4014, |
|
"step": 954 |
|
}, |
|
{ |
|
"epoch": 14.300187617260788, |
|
"grad_norm": 0.01220439396332339, |
|
"learning_rate": 3.89748980776736e-07, |
|
"loss": 0.4018, |
|
"step": 955 |
|
}, |
|
{ |
|
"epoch": 14.315196998123827, |
|
"grad_norm": 0.011998622448288693, |
|
"learning_rate": 3.877662127587521e-07, |
|
"loss": 0.4174, |
|
"step": 956 |
|
}, |
|
{ |
|
"epoch": 14.330206378986867, |
|
"grad_norm": 0.012295617446559496, |
|
"learning_rate": 3.8578728731033214e-07, |
|
"loss": 0.4102, |
|
"step": 957 |
|
}, |
|
{ |
|
"epoch": 14.345215759849907, |
|
"grad_norm": 0.011980609920463275, |
|
"learning_rate": 3.838122168518276e-07, |
|
"loss": 0.4006, |
|
"step": 958 |
|
}, |
|
{ |
|
"epoch": 14.360225140712945, |
|
"grad_norm": 0.01234999901307482, |
|
"learning_rate": 3.818410137793947e-07, |
|
"loss": 0.4083, |
|
"step": 959 |
|
}, |
|
{ |
|
"epoch": 14.375234521575985, |
|
"grad_norm": 0.01204318922158648, |
|
"learning_rate": 3.798736904649168e-07, |
|
"loss": 0.416, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 14.390243902439025, |
|
"grad_norm": 0.012342974076721963, |
|
"learning_rate": 3.77910259255926e-07, |
|
"loss": 0.4042, |
|
"step": 961 |
|
}, |
|
{ |
|
"epoch": 14.405253283302065, |
|
"grad_norm": 0.012080518503284057, |
|
"learning_rate": 3.7595073247552735e-07, |
|
"loss": 0.4148, |
|
"step": 962 |
|
}, |
|
{ |
|
"epoch": 14.420262664165103, |
|
"grad_norm": 0.012401425685947038, |
|
"learning_rate": 3.739951224223199e-07, |
|
"loss": 0.4166, |
|
"step": 963 |
|
}, |
|
{ |
|
"epoch": 14.435272045028142, |
|
"grad_norm": 0.012113429871714052, |
|
"learning_rate": 3.720434413703202e-07, |
|
"loss": 0.4031, |
|
"step": 964 |
|
}, |
|
{ |
|
"epoch": 14.450281425891182, |
|
"grad_norm": 0.012196068177764108, |
|
"learning_rate": 3.700957015688858e-07, |
|
"loss": 0.4115, |
|
"step": 965 |
|
}, |
|
{ |
|
"epoch": 14.465290806754222, |
|
"grad_norm": 0.011860016831751172, |
|
"learning_rate": 3.681519152426362e-07, |
|
"loss": 0.4212, |
|
"step": 966 |
|
}, |
|
{ |
|
"epoch": 14.48030018761726, |
|
"grad_norm": 0.012149681720096986, |
|
"learning_rate": 3.6621209459137926e-07, |
|
"loss": 0.4126, |
|
"step": 967 |
|
}, |
|
{ |
|
"epoch": 14.4953095684803, |
|
"grad_norm": 0.011922991193016708, |
|
"learning_rate": 3.6427625179003217e-07, |
|
"loss": 0.404, |
|
"step": 968 |
|
}, |
|
{ |
|
"epoch": 14.51031894934334, |
|
"grad_norm": 0.012235166739834057, |
|
"learning_rate": 3.623443989885462e-07, |
|
"loss": 0.4008, |
|
"step": 969 |
|
}, |
|
{ |
|
"epoch": 14.52532833020638, |
|
"grad_norm": 0.01261366907306786, |
|
"learning_rate": 3.604165483118299e-07, |
|
"loss": 0.4157, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 14.540337711069418, |
|
"grad_norm": 0.011959411543845642, |
|
"learning_rate": 3.5849271185967366e-07, |
|
"loss": 0.4087, |
|
"step": 971 |
|
}, |
|
{ |
|
"epoch": 14.555347091932457, |
|
"grad_norm": 0.011886511893275631, |
|
"learning_rate": 3.565729017066729e-07, |
|
"loss": 0.4073, |
|
"step": 972 |
|
}, |
|
{ |
|
"epoch": 14.570356472795497, |
|
"grad_norm": 0.012266040637982558, |
|
"learning_rate": 3.546571299021529e-07, |
|
"loss": 0.4002, |
|
"step": 973 |
|
}, |
|
{ |
|
"epoch": 14.585365853658537, |
|
"grad_norm": 0.011946836948162617, |
|
"learning_rate": 3.527454084700933e-07, |
|
"loss": 0.4113, |
|
"step": 974 |
|
}, |
|
{ |
|
"epoch": 14.600375234521575, |
|
"grad_norm": 0.012884422251256487, |
|
"learning_rate": 3.508377494090521e-07, |
|
"loss": 0.411, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 14.615384615384615, |
|
"grad_norm": 0.012401104679477528, |
|
"learning_rate": 3.4893416469208993e-07, |
|
"loss": 0.405, |
|
"step": 976 |
|
}, |
|
{ |
|
"epoch": 14.630393996247655, |
|
"grad_norm": 0.012397424222628246, |
|
"learning_rate": 3.4703466626669673e-07, |
|
"loss": 0.4009, |
|
"step": 977 |
|
}, |
|
{ |
|
"epoch": 14.645403377110695, |
|
"grad_norm": 0.01211105873440178, |
|
"learning_rate": 3.45139266054715e-07, |
|
"loss": 0.4211, |
|
"step": 978 |
|
}, |
|
{ |
|
"epoch": 14.660412757973734, |
|
"grad_norm": 0.012745867091250343, |
|
"learning_rate": 3.4324797595226564e-07, |
|
"loss": 0.4133, |
|
"step": 979 |
|
}, |
|
{ |
|
"epoch": 14.675422138836772, |
|
"grad_norm": 0.012089095003607657, |
|
"learning_rate": 3.413608078296735e-07, |
|
"loss": 0.4052, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 14.690431519699812, |
|
"grad_norm": 0.012182824041581471, |
|
"learning_rate": 3.394777735313918e-07, |
|
"loss": 0.4043, |
|
"step": 981 |
|
}, |
|
{ |
|
"epoch": 14.705440900562852, |
|
"grad_norm": 0.012109620293361477, |
|
"learning_rate": 3.3759888487592946e-07, |
|
"loss": 0.4059, |
|
"step": 982 |
|
}, |
|
{ |
|
"epoch": 14.720450281425892, |
|
"grad_norm": 0.012382313496190551, |
|
"learning_rate": 3.357241536557758e-07, |
|
"loss": 0.4086, |
|
"step": 983 |
|
}, |
|
{ |
|
"epoch": 14.73545966228893, |
|
"grad_norm": 0.012198508778120983, |
|
"learning_rate": 3.3385359163732664e-07, |
|
"loss": 0.4136, |
|
"step": 984 |
|
}, |
|
{ |
|
"epoch": 14.75046904315197, |
|
"grad_norm": 0.01280406441595411, |
|
"learning_rate": 3.319872105608107e-07, |
|
"loss": 0.4068, |
|
"step": 985 |
|
}, |
|
{ |
|
"epoch": 14.76547842401501, |
|
"grad_norm": 0.012130187940707679, |
|
"learning_rate": 3.3012502214021577e-07, |
|
"loss": 0.4145, |
|
"step": 986 |
|
}, |
|
{ |
|
"epoch": 14.78048780487805, |
|
"grad_norm": 0.011810045886264997, |
|
"learning_rate": 3.282670380632152e-07, |
|
"loss": 0.4003, |
|
"step": 987 |
|
}, |
|
{ |
|
"epoch": 14.795497185741088, |
|
"grad_norm": 0.01226997027635051, |
|
"learning_rate": 3.2641326999109474e-07, |
|
"loss": 0.4181, |
|
"step": 988 |
|
}, |
|
{ |
|
"epoch": 14.810506566604127, |
|
"grad_norm": 0.012662471100966417, |
|
"learning_rate": 3.2456372955867907e-07, |
|
"loss": 0.4058, |
|
"step": 989 |
|
}, |
|
{ |
|
"epoch": 14.825515947467167, |
|
"grad_norm": 0.012554555241937682, |
|
"learning_rate": 3.227184283742591e-07, |
|
"loss": 0.4011, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 14.825515947467167, |
|
"eval_loss": 0.3950127065181732, |
|
"eval_runtime": 13.8589, |
|
"eval_samples_per_second": 32.254, |
|
"eval_steps_per_second": 2.02, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 14.840525328330207, |
|
"grad_norm": 0.01206293666136027, |
|
"learning_rate": 3.20877378019518e-07, |
|
"loss": 0.4098, |
|
"step": 991 |
|
}, |
|
{ |
|
"epoch": 14.855534709193245, |
|
"grad_norm": 0.012160543935788812, |
|
"learning_rate": 3.190405900494606e-07, |
|
"loss": 0.4022, |
|
"step": 992 |
|
}, |
|
{ |
|
"epoch": 14.870544090056285, |
|
"grad_norm": 0.011862384188573029, |
|
"learning_rate": 3.17208075992339e-07, |
|
"loss": 0.4132, |
|
"step": 993 |
|
}, |
|
{ |
|
"epoch": 14.885553470919325, |
|
"grad_norm": 0.012348297087273847, |
|
"learning_rate": 3.153798473495811e-07, |
|
"loss": 0.4063, |
|
"step": 994 |
|
}, |
|
{ |
|
"epoch": 14.900562851782365, |
|
"grad_norm": 0.011971438983032082, |
|
"learning_rate": 3.135559155957186e-07, |
|
"loss": 0.4189, |
|
"step": 995 |
|
}, |
|
{ |
|
"epoch": 14.915572232645403, |
|
"grad_norm": 0.012225913697186104, |
|
"learning_rate": 3.117362921783134e-07, |
|
"loss": 0.4078, |
|
"step": 996 |
|
}, |
|
{ |
|
"epoch": 14.930581613508442, |
|
"grad_norm": 0.012483752970869568, |
|
"learning_rate": 3.0992098851788817e-07, |
|
"loss": 0.4027, |
|
"step": 997 |
|
}, |
|
{ |
|
"epoch": 14.945590994371482, |
|
"grad_norm": 0.012312435583901655, |
|
"learning_rate": 3.081100160078528e-07, |
|
"loss": 0.3964, |
|
"step": 998 |
|
}, |
|
{ |
|
"epoch": 14.960600375234522, |
|
"grad_norm": 0.011733287392883436, |
|
"learning_rate": 3.0630338601443385e-07, |
|
"loss": 0.4077, |
|
"step": 999 |
|
}, |
|
{ |
|
"epoch": 14.975609756097562, |
|
"grad_norm": 0.012352553640401984, |
|
"learning_rate": 3.045011098766026e-07, |
|
"loss": 0.4097, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 14.9906191369606, |
|
"grad_norm": 0.012691085307760875, |
|
"learning_rate": 3.027031989060046e-07, |
|
"loss": 0.4014, |
|
"step": 1001 |
|
}, |
|
{ |
|
"epoch": 15.01500938086304, |
|
"grad_norm": 0.01450895061621053, |
|
"learning_rate": 3.009096643868877e-07, |
|
"loss": 0.8212, |
|
"step": 1002 |
|
}, |
|
{ |
|
"epoch": 15.03001876172608, |
|
"grad_norm": 0.012442122708447712, |
|
"learning_rate": 2.991205175760322e-07, |
|
"loss": 0.4064, |
|
"step": 1003 |
|
}, |
|
{ |
|
"epoch": 15.045028142589118, |
|
"grad_norm": 0.01177546286369461, |
|
"learning_rate": 2.9733576970267973e-07, |
|
"loss": 0.395, |
|
"step": 1004 |
|
}, |
|
{ |
|
"epoch": 15.060037523452158, |
|
"grad_norm": 0.012583125025593713, |
|
"learning_rate": 2.955554319684629e-07, |
|
"loss": 0.404, |
|
"step": 1005 |
|
}, |
|
{ |
|
"epoch": 15.075046904315197, |
|
"grad_norm": 0.012181333733193132, |
|
"learning_rate": 2.937795155473343e-07, |
|
"loss": 0.4163, |
|
"step": 1006 |
|
}, |
|
{ |
|
"epoch": 15.090056285178237, |
|
"grad_norm": 0.011929578865847229, |
|
"learning_rate": 2.920080315854975e-07, |
|
"loss": 0.4091, |
|
"step": 1007 |
|
}, |
|
{ |
|
"epoch": 15.105065666041275, |
|
"grad_norm": 0.012219751268944107, |
|
"learning_rate": 2.902409912013367e-07, |
|
"loss": 0.4087, |
|
"step": 1008 |
|
}, |
|
{ |
|
"epoch": 15.120075046904315, |
|
"grad_norm": 0.012185881807267762, |
|
"learning_rate": 2.8847840548534695e-07, |
|
"loss": 0.3959, |
|
"step": 1009 |
|
}, |
|
{ |
|
"epoch": 15.135084427767355, |
|
"grad_norm": 0.012095974327974867, |
|
"learning_rate": 2.8672028550006357e-07, |
|
"loss": 0.4142, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 15.150093808630395, |
|
"grad_norm": 0.012004403180987345, |
|
"learning_rate": 2.8496664227999414e-07, |
|
"loss": 0.4095, |
|
"step": 1011 |
|
}, |
|
{ |
|
"epoch": 15.165103189493433, |
|
"grad_norm": 0.012068360546402585, |
|
"learning_rate": 2.8321748683154887e-07, |
|
"loss": 0.412, |
|
"step": 1012 |
|
}, |
|
{ |
|
"epoch": 15.180112570356473, |
|
"grad_norm": 0.011604065137702905, |
|
"learning_rate": 2.814728301329711e-07, |
|
"loss": 0.4037, |
|
"step": 1013 |
|
}, |
|
{ |
|
"epoch": 15.195121951219512, |
|
"grad_norm": 0.012246442251214433, |
|
"learning_rate": 2.7973268313426835e-07, |
|
"loss": 0.4176, |
|
"step": 1014 |
|
}, |
|
{ |
|
"epoch": 15.210131332082552, |
|
"grad_norm": 0.012835581733332583, |
|
"learning_rate": 2.7799705675714437e-07, |
|
"loss": 0.4142, |
|
"step": 1015 |
|
}, |
|
{ |
|
"epoch": 15.22514071294559, |
|
"grad_norm": 0.012179697043205245, |
|
"learning_rate": 2.762659618949298e-07, |
|
"loss": 0.4074, |
|
"step": 1016 |
|
}, |
|
{ |
|
"epoch": 15.24015009380863, |
|
"grad_norm": 0.012369178745362223, |
|
"learning_rate": 2.745394094125141e-07, |
|
"loss": 0.3992, |
|
"step": 1017 |
|
}, |
|
{ |
|
"epoch": 15.25515947467167, |
|
"grad_norm": 0.012579361492169629, |
|
"learning_rate": 2.7281741014627714e-07, |
|
"loss": 0.4104, |
|
"step": 1018 |
|
}, |
|
{ |
|
"epoch": 15.27016885553471, |
|
"grad_norm": 0.01162764738709065, |
|
"learning_rate": 2.710999749040223e-07, |
|
"loss": 0.4068, |
|
"step": 1019 |
|
}, |
|
{ |
|
"epoch": 15.285178236397748, |
|
"grad_norm": 0.01265468112430842, |
|
"learning_rate": 2.69387114464906e-07, |
|
"loss": 0.4088, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 15.300187617260788, |
|
"grad_norm": 0.011940035373381468, |
|
"learning_rate": 2.6767883957937344e-07, |
|
"loss": 0.4063, |
|
"step": 1021 |
|
}, |
|
{ |
|
"epoch": 15.315196998123827, |
|
"grad_norm": 0.012255329102484067, |
|
"learning_rate": 2.6597516096908867e-07, |
|
"loss": 0.4069, |
|
"step": 1022 |
|
}, |
|
{ |
|
"epoch": 15.330206378986867, |
|
"grad_norm": 0.012355083411473719, |
|
"learning_rate": 2.642760893268684e-07, |
|
"loss": 0.405, |
|
"step": 1023 |
|
}, |
|
{ |
|
"epoch": 15.345215759849907, |
|
"grad_norm": 0.012128588311350583, |
|
"learning_rate": 2.6258163531661447e-07, |
|
"loss": 0.4085, |
|
"step": 1024 |
|
}, |
|
{ |
|
"epoch": 15.360225140712945, |
|
"grad_norm": 0.012015480810663814, |
|
"learning_rate": 2.6089180957324654e-07, |
|
"loss": 0.4099, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 15.375234521575985, |
|
"grad_norm": 0.01236965765692212, |
|
"learning_rate": 2.5920662270263647e-07, |
|
"loss": 0.3968, |
|
"step": 1026 |
|
}, |
|
{ |
|
"epoch": 15.390243902439025, |
|
"grad_norm": 0.012236517064876534, |
|
"learning_rate": 2.575260852815411e-07, |
|
"loss": 0.4087, |
|
"step": 1027 |
|
}, |
|
{ |
|
"epoch": 15.405253283302065, |
|
"grad_norm": 0.011933985911797585, |
|
"learning_rate": 2.5585020785753553e-07, |
|
"loss": 0.4057, |
|
"step": 1028 |
|
}, |
|
{ |
|
"epoch": 15.420262664165103, |
|
"grad_norm": 0.012075593886736229, |
|
"learning_rate": 2.541790009489474e-07, |
|
"loss": 0.4015, |
|
"step": 1029 |
|
}, |
|
{ |
|
"epoch": 15.435272045028142, |
|
"grad_norm": 0.012164890137346288, |
|
"learning_rate": 2.525124750447908e-07, |
|
"loss": 0.4168, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 15.450281425891182, |
|
"grad_norm": 0.012126106786702912, |
|
"learning_rate": 2.508506406047004e-07, |
|
"loss": 0.4058, |
|
"step": 1031 |
|
}, |
|
{ |
|
"epoch": 15.465290806754222, |
|
"grad_norm": 0.012368399052280451, |
|
"learning_rate": 2.4919350805886576e-07, |
|
"loss": 0.406, |
|
"step": 1032 |
|
}, |
|
{ |
|
"epoch": 15.48030018761726, |
|
"grad_norm": 0.012432763679806415, |
|
"learning_rate": 2.475410878079657e-07, |
|
"loss": 0.4056, |
|
"step": 1033 |
|
}, |
|
{ |
|
"epoch": 15.4953095684803, |
|
"grad_norm": 0.012881685474676679, |
|
"learning_rate": 2.458933902231038e-07, |
|
"loss": 0.4085, |
|
"step": 1034 |
|
}, |
|
{ |
|
"epoch": 15.51031894934334, |
|
"grad_norm": 0.01228631484433566, |
|
"learning_rate": 2.4425042564574185e-07, |
|
"loss": 0.4017, |
|
"step": 1035 |
|
}, |
|
{ |
|
"epoch": 15.52532833020638, |
|
"grad_norm": 0.012042020829137407, |
|
"learning_rate": 2.426122043876362e-07, |
|
"loss": 0.4046, |
|
"step": 1036 |
|
}, |
|
{ |
|
"epoch": 15.540337711069418, |
|
"grad_norm": 0.012587414690850632, |
|
"learning_rate": 2.4097873673077296e-07, |
|
"loss": 0.408, |
|
"step": 1037 |
|
}, |
|
{ |
|
"epoch": 15.555347091932457, |
|
"grad_norm": 0.012190544175375687, |
|
"learning_rate": 2.393500329273029e-07, |
|
"loss": 0.4097, |
|
"step": 1038 |
|
}, |
|
{ |
|
"epoch": 15.570356472795497, |
|
"grad_norm": 0.01206814550432668, |
|
"learning_rate": 2.377261031994776e-07, |
|
"loss": 0.4064, |
|
"step": 1039 |
|
}, |
|
{ |
|
"epoch": 15.585365853658537, |
|
"grad_norm": 0.012307556354982051, |
|
"learning_rate": 2.3610695773958434e-07, |
|
"loss": 0.4168, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 15.600375234521575, |
|
"grad_norm": 0.01222641572619058, |
|
"learning_rate": 2.3449260670988358e-07, |
|
"loss": 0.4022, |
|
"step": 1041 |
|
}, |
|
{ |
|
"epoch": 15.615384615384615, |
|
"grad_norm": 0.012220527683488478, |
|
"learning_rate": 2.3288306024254411e-07, |
|
"loss": 0.3987, |
|
"step": 1042 |
|
}, |
|
{ |
|
"epoch": 15.630393996247655, |
|
"grad_norm": 0.012424430033867473, |
|
"learning_rate": 2.3127832843958007e-07, |
|
"loss": 0.4166, |
|
"step": 1043 |
|
}, |
|
{ |
|
"epoch": 15.645403377110695, |
|
"grad_norm": 0.012366507275888419, |
|
"learning_rate": 2.2967842137278703e-07, |
|
"loss": 0.4115, |
|
"step": 1044 |
|
}, |
|
{ |
|
"epoch": 15.660412757973734, |
|
"grad_norm": 0.012360880870716443, |
|
"learning_rate": 2.2808334908367909e-07, |
|
"loss": 0.4161, |
|
"step": 1045 |
|
}, |
|
{ |
|
"epoch": 15.675422138836772, |
|
"grad_norm": 0.012736823965131592, |
|
"learning_rate": 2.264931215834257e-07, |
|
"loss": 0.4066, |
|
"step": 1046 |
|
}, |
|
{ |
|
"epoch": 15.690431519699812, |
|
"grad_norm": 0.012155204376594498, |
|
"learning_rate": 2.2490774885278907e-07, |
|
"loss": 0.4049, |
|
"step": 1047 |
|
}, |
|
{ |
|
"epoch": 15.705440900562852, |
|
"grad_norm": 0.012646948905131343, |
|
"learning_rate": 2.2332724084206112e-07, |
|
"loss": 0.4102, |
|
"step": 1048 |
|
}, |
|
{ |
|
"epoch": 15.720450281425892, |
|
"grad_norm": 0.012200685840136094, |
|
"learning_rate": 2.2175160747100198e-07, |
|
"loss": 0.4049, |
|
"step": 1049 |
|
}, |
|
{ |
|
"epoch": 15.73545966228893, |
|
"grad_norm": 0.012245324836010893, |
|
"learning_rate": 2.2018085862877566e-07, |
|
"loss": 0.411, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 15.75046904315197, |
|
"grad_norm": 0.012513647467032053, |
|
"learning_rate": 2.1861500417389056e-07, |
|
"loss": 0.4048, |
|
"step": 1051 |
|
}, |
|
{ |
|
"epoch": 15.76547842401501, |
|
"grad_norm": 0.011903061421256312, |
|
"learning_rate": 2.170540539341361e-07, |
|
"loss": 0.4188, |
|
"step": 1052 |
|
}, |
|
{ |
|
"epoch": 15.78048780487805, |
|
"grad_norm": 0.012394997852371569, |
|
"learning_rate": 2.1549801770652098e-07, |
|
"loss": 0.3948, |
|
"step": 1053 |
|
}, |
|
{ |
|
"epoch": 15.795497185741088, |
|
"grad_norm": 0.012059547174933096, |
|
"learning_rate": 2.139469052572127e-07, |
|
"loss": 0.4074, |
|
"step": 1054 |
|
}, |
|
{ |
|
"epoch": 15.810506566604127, |
|
"grad_norm": 0.012205668055297809, |
|
"learning_rate": 2.1240072632147456e-07, |
|
"loss": 0.421, |
|
"step": 1055 |
|
}, |
|
{ |
|
"epoch": 15.825515947467167, |
|
"grad_norm": 0.0120693595501382, |
|
"learning_rate": 2.1085949060360653e-07, |
|
"loss": 0.4148, |
|
"step": 1056 |
|
}, |
|
{ |
|
"epoch": 15.825515947467167, |
|
"eval_loss": 0.3944862484931946, |
|
"eval_runtime": 14.0894, |
|
"eval_samples_per_second": 31.726, |
|
"eval_steps_per_second": 1.987, |
|
"step": 1056 |
|
}, |
|
{ |
|
"epoch": 15.840525328330207, |
|
"grad_norm": 0.012103676359132986, |
|
"learning_rate": 2.0932320777688296e-07, |
|
"loss": 0.405, |
|
"step": 1057 |
|
}, |
|
{ |
|
"epoch": 15.855534709193245, |
|
"grad_norm": 0.012592718645086199, |
|
"learning_rate": 2.0779188748349252e-07, |
|
"loss": 0.4153, |
|
"step": 1058 |
|
}, |
|
{ |
|
"epoch": 15.870544090056285, |
|
"grad_norm": 0.012256564326088341, |
|
"learning_rate": 2.0626553933447732e-07, |
|
"loss": 0.4097, |
|
"step": 1059 |
|
}, |
|
{ |
|
"epoch": 15.885553470919325, |
|
"grad_norm": 0.011906039730046884, |
|
"learning_rate": 2.0474417290967295e-07, |
|
"loss": 0.3977, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 15.900562851782365, |
|
"grad_norm": 0.01245313620942531, |
|
"learning_rate": 2.0322779775764787e-07, |
|
"loss": 0.421, |
|
"step": 1061 |
|
}, |
|
{ |
|
"epoch": 15.915572232645403, |
|
"grad_norm": 0.012327925900428386, |
|
"learning_rate": 2.0171642339564398e-07, |
|
"loss": 0.4174, |
|
"step": 1062 |
|
}, |
|
{ |
|
"epoch": 15.930581613508442, |
|
"grad_norm": 0.012067615556392703, |
|
"learning_rate": 2.0021005930951684e-07, |
|
"loss": 0.4047, |
|
"step": 1063 |
|
}, |
|
{ |
|
"epoch": 15.945590994371482, |
|
"grad_norm": 0.011899607848417232, |
|
"learning_rate": 1.9870871495367514e-07, |
|
"loss": 0.4109, |
|
"step": 1064 |
|
}, |
|
{ |
|
"epoch": 15.960600375234522, |
|
"grad_norm": 0.012062481535146134, |
|
"learning_rate": 1.972123997510231e-07, |
|
"loss": 0.4121, |
|
"step": 1065 |
|
}, |
|
{ |
|
"epoch": 15.975609756097562, |
|
"grad_norm": 0.012510881372292592, |
|
"learning_rate": 1.957211230929e-07, |
|
"loss": 0.409, |
|
"step": 1066 |
|
}, |
|
{ |
|
"epoch": 15.9906191369606, |
|
"grad_norm": 0.01227375801391932, |
|
"learning_rate": 1.9423489433902184e-07, |
|
"loss": 0.4076, |
|
"step": 1067 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"grad_norm": 0.01227375801391932, |
|
"learning_rate": 1.9275372281742243e-07, |
|
"loss": 0.4065, |
|
"step": 1068 |
|
}, |
|
{ |
|
"epoch": 16.015009380863038, |
|
"grad_norm": 0.016887925794017364, |
|
"learning_rate": 1.91277617824394e-07, |
|
"loss": 0.4033, |
|
"step": 1069 |
|
}, |
|
{ |
|
"epoch": 16.03001876172608, |
|
"grad_norm": 0.012065709393656405, |
|
"learning_rate": 1.8980658862443088e-07, |
|
"loss": 0.4139, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 16.045028142589118, |
|
"grad_norm": 0.012123882563609396, |
|
"learning_rate": 1.8834064445016951e-07, |
|
"loss": 0.4141, |
|
"step": 1071 |
|
}, |
|
{ |
|
"epoch": 16.06003752345216, |
|
"grad_norm": 0.011970596541082451, |
|
"learning_rate": 1.8687979450233115e-07, |
|
"loss": 0.3953, |
|
"step": 1072 |
|
}, |
|
{ |
|
"epoch": 16.075046904315197, |
|
"grad_norm": 0.0121109332002954, |
|
"learning_rate": 1.8542404794966427e-07, |
|
"loss": 0.4007, |
|
"step": 1073 |
|
}, |
|
{ |
|
"epoch": 16.090056285178235, |
|
"grad_norm": 0.012317267567386788, |
|
"learning_rate": 1.8397341392888676e-07, |
|
"loss": 0.3968, |
|
"step": 1074 |
|
}, |
|
{ |
|
"epoch": 16.105065666041277, |
|
"grad_norm": 0.011954379333899165, |
|
"learning_rate": 1.825279015446286e-07, |
|
"loss": 0.4098, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 16.120075046904315, |
|
"grad_norm": 0.01207522116296904, |
|
"learning_rate": 1.8108751986937486e-07, |
|
"loss": 0.4101, |
|
"step": 1076 |
|
}, |
|
{ |
|
"epoch": 16.135084427767353, |
|
"grad_norm": 0.012742776596916777, |
|
"learning_rate": 1.7965227794340875e-07, |
|
"loss": 0.4127, |
|
"step": 1077 |
|
}, |
|
{ |
|
"epoch": 16.150093808630395, |
|
"grad_norm": 0.01227951952136753, |
|
"learning_rate": 1.7822218477475494e-07, |
|
"loss": 0.4146, |
|
"step": 1078 |
|
}, |
|
{ |
|
"epoch": 16.165103189493433, |
|
"grad_norm": 0.012201981579291116, |
|
"learning_rate": 1.767972493391222e-07, |
|
"loss": 0.4127, |
|
"step": 1079 |
|
}, |
|
{ |
|
"epoch": 16.180112570356474, |
|
"grad_norm": 0.012271422051618314, |
|
"learning_rate": 1.7537748057984857e-07, |
|
"loss": 0.4039, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 16.195121951219512, |
|
"grad_norm": 0.012025618790831537, |
|
"learning_rate": 1.7396288740784416e-07, |
|
"loss": 0.402, |
|
"step": 1081 |
|
}, |
|
{ |
|
"epoch": 16.21013133208255, |
|
"grad_norm": 0.012207501752001196, |
|
"learning_rate": 1.7255347870153536e-07, |
|
"loss": 0.4167, |
|
"step": 1082 |
|
}, |
|
{ |
|
"epoch": 16.225140712945592, |
|
"grad_norm": 0.01276307209079064, |
|
"learning_rate": 1.7114926330680957e-07, |
|
"loss": 0.4067, |
|
"step": 1083 |
|
}, |
|
{ |
|
"epoch": 16.24015009380863, |
|
"grad_norm": 0.011866760953989293, |
|
"learning_rate": 1.6975025003695864e-07, |
|
"loss": 0.4025, |
|
"step": 1084 |
|
}, |
|
{ |
|
"epoch": 16.255159474671668, |
|
"grad_norm": 0.012251478648853141, |
|
"learning_rate": 1.6835644767262514e-07, |
|
"loss": 0.3965, |
|
"step": 1085 |
|
}, |
|
{ |
|
"epoch": 16.27016885553471, |
|
"grad_norm": 0.012206207332206386, |
|
"learning_rate": 1.6696786496174575e-07, |
|
"loss": 0.409, |
|
"step": 1086 |
|
}, |
|
{ |
|
"epoch": 16.285178236397748, |
|
"grad_norm": 0.012182631218416822, |
|
"learning_rate": 1.655845106194973e-07, |
|
"loss": 0.4113, |
|
"step": 1087 |
|
}, |
|
{ |
|
"epoch": 16.30018761726079, |
|
"grad_norm": 0.01226161680696391, |
|
"learning_rate": 1.642063933282417e-07, |
|
"loss": 0.4032, |
|
"step": 1088 |
|
}, |
|
{ |
|
"epoch": 16.315196998123827, |
|
"grad_norm": 0.011802893119126617, |
|
"learning_rate": 1.6283352173747146e-07, |
|
"loss": 0.4121, |
|
"step": 1089 |
|
}, |
|
{ |
|
"epoch": 16.330206378986865, |
|
"grad_norm": 0.01205076600250808, |
|
"learning_rate": 1.614659044637553e-07, |
|
"loss": 0.4146, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 16.345215759849907, |
|
"grad_norm": 0.012403739116953917, |
|
"learning_rate": 1.6010355009068454e-07, |
|
"loss": 0.4127, |
|
"step": 1091 |
|
}, |
|
{ |
|
"epoch": 16.360225140712945, |
|
"grad_norm": 0.011974952494326613, |
|
"learning_rate": 1.5874646716881868e-07, |
|
"loss": 0.4056, |
|
"step": 1092 |
|
}, |
|
{ |
|
"epoch": 16.375234521575987, |
|
"grad_norm": 0.012012554552280001, |
|
"learning_rate": 1.5739466421563218e-07, |
|
"loss": 0.3993, |
|
"step": 1093 |
|
}, |
|
{ |
|
"epoch": 16.390243902439025, |
|
"grad_norm": 0.012068435859544514, |
|
"learning_rate": 1.560481497154602e-07, |
|
"loss": 0.4067, |
|
"step": 1094 |
|
}, |
|
{ |
|
"epoch": 16.405253283302063, |
|
"grad_norm": 0.01241486073176841, |
|
"learning_rate": 1.5470693211944642e-07, |
|
"loss": 0.4168, |
|
"step": 1095 |
|
}, |
|
{ |
|
"epoch": 16.420262664165104, |
|
"grad_norm": 0.012421971155925233, |
|
"learning_rate": 1.5337101984548951e-07, |
|
"loss": 0.4036, |
|
"step": 1096 |
|
}, |
|
{ |
|
"epoch": 16.435272045028142, |
|
"grad_norm": 0.012148607504961384, |
|
"learning_rate": 1.5204042127819018e-07, |
|
"loss": 0.3997, |
|
"step": 1097 |
|
}, |
|
{ |
|
"epoch": 16.45028142589118, |
|
"grad_norm": 0.011893274496745576, |
|
"learning_rate": 1.5071514476879876e-07, |
|
"loss": 0.4075, |
|
"step": 1098 |
|
}, |
|
{ |
|
"epoch": 16.465290806754222, |
|
"grad_norm": 0.012445240319998217, |
|
"learning_rate": 1.4939519863516213e-07, |
|
"loss": 0.4038, |
|
"step": 1099 |
|
}, |
|
{ |
|
"epoch": 16.48030018761726, |
|
"grad_norm": 0.012537068527918893, |
|
"learning_rate": 1.4808059116167303e-07, |
|
"loss": 0.4111, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 16.4953095684803, |
|
"grad_norm": 0.012304138189345946, |
|
"learning_rate": 1.4677133059921632e-07, |
|
"loss": 0.4151, |
|
"step": 1101 |
|
}, |
|
{ |
|
"epoch": 16.51031894934334, |
|
"grad_norm": 0.012061421440472614, |
|
"learning_rate": 1.4546742516511845e-07, |
|
"loss": 0.3969, |
|
"step": 1102 |
|
}, |
|
{ |
|
"epoch": 16.525328330206378, |
|
"grad_norm": 0.011836675742582285, |
|
"learning_rate": 1.4416888304309515e-07, |
|
"loss": 0.4047, |
|
"step": 1103 |
|
}, |
|
{ |
|
"epoch": 16.54033771106942, |
|
"grad_norm": 0.011817532065112164, |
|
"learning_rate": 1.4287571238320051e-07, |
|
"loss": 0.4107, |
|
"step": 1104 |
|
}, |
|
{ |
|
"epoch": 16.555347091932457, |
|
"grad_norm": 0.011743671409316899, |
|
"learning_rate": 1.4158792130177543e-07, |
|
"loss": 0.4004, |
|
"step": 1105 |
|
}, |
|
{ |
|
"epoch": 16.570356472795496, |
|
"grad_norm": 0.012116572838494573, |
|
"learning_rate": 1.4030551788139721e-07, |
|
"loss": 0.4141, |
|
"step": 1106 |
|
}, |
|
{ |
|
"epoch": 16.585365853658537, |
|
"grad_norm": 0.012425598089451213, |
|
"learning_rate": 1.3902851017082862e-07, |
|
"loss": 0.4118, |
|
"step": 1107 |
|
}, |
|
{ |
|
"epoch": 16.600375234521575, |
|
"grad_norm": 0.011753586649073179, |
|
"learning_rate": 1.377569061849665e-07, |
|
"loss": 0.4082, |
|
"step": 1108 |
|
}, |
|
{ |
|
"epoch": 16.615384615384617, |
|
"grad_norm": 0.012375977275680615, |
|
"learning_rate": 1.3649071390479283e-07, |
|
"loss": 0.4146, |
|
"step": 1109 |
|
}, |
|
{ |
|
"epoch": 16.630393996247655, |
|
"grad_norm": 0.012295332163973069, |
|
"learning_rate": 1.3522994127732412e-07, |
|
"loss": 0.4151, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 16.645403377110693, |
|
"grad_norm": 0.011963182926471421, |
|
"learning_rate": 1.3397459621556128e-07, |
|
"loss": 0.4151, |
|
"step": 1111 |
|
}, |
|
{ |
|
"epoch": 16.660412757973734, |
|
"grad_norm": 0.011814638769608714, |
|
"learning_rate": 1.327246865984404e-07, |
|
"loss": 0.406, |
|
"step": 1112 |
|
}, |
|
{ |
|
"epoch": 16.675422138836772, |
|
"grad_norm": 0.012205572263242072, |
|
"learning_rate": 1.314802202707822e-07, |
|
"loss": 0.41, |
|
"step": 1113 |
|
}, |
|
{ |
|
"epoch": 16.690431519699814, |
|
"grad_norm": 0.011956127437528156, |
|
"learning_rate": 1.3024120504324454e-07, |
|
"loss": 0.4042, |
|
"step": 1114 |
|
}, |
|
{ |
|
"epoch": 16.705440900562852, |
|
"grad_norm": 0.012425936619786686, |
|
"learning_rate": 1.290076486922722e-07, |
|
"loss": 0.4098, |
|
"step": 1115 |
|
}, |
|
{ |
|
"epoch": 16.72045028142589, |
|
"grad_norm": 0.012195755792282742, |
|
"learning_rate": 1.2777955896004811e-07, |
|
"loss": 0.4123, |
|
"step": 1116 |
|
}, |
|
{ |
|
"epoch": 16.735459662288932, |
|
"grad_norm": 0.012001955272470884, |
|
"learning_rate": 1.2655694355444547e-07, |
|
"loss": 0.4058, |
|
"step": 1117 |
|
}, |
|
{ |
|
"epoch": 16.75046904315197, |
|
"grad_norm": 0.012612779412664959, |
|
"learning_rate": 1.25339810148978e-07, |
|
"loss": 0.4018, |
|
"step": 1118 |
|
}, |
|
{ |
|
"epoch": 16.765478424015008, |
|
"grad_norm": 0.011731053965490292, |
|
"learning_rate": 1.2412816638275402e-07, |
|
"loss": 0.4099, |
|
"step": 1119 |
|
}, |
|
{ |
|
"epoch": 16.78048780487805, |
|
"grad_norm": 0.011942110649578081, |
|
"learning_rate": 1.2292201986042616e-07, |
|
"loss": 0.4159, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 16.795497185741088, |
|
"grad_norm": 0.01228838930130753, |
|
"learning_rate": 1.2172137815214488e-07, |
|
"loss": 0.4177, |
|
"step": 1121 |
|
}, |
|
{ |
|
"epoch": 16.81050656660413, |
|
"grad_norm": 0.012201958299201607, |
|
"learning_rate": 1.2052624879351103e-07, |
|
"loss": 0.4064, |
|
"step": 1122 |
|
}, |
|
{ |
|
"epoch": 16.81050656660413, |
|
"eval_loss": 0.3941747844219208, |
|
"eval_runtime": 13.7699, |
|
"eval_samples_per_second": 32.462, |
|
"eval_steps_per_second": 2.033, |
|
"step": 1122 |
|
}, |
|
{ |
|
"epoch": 16.825515947467167, |
|
"grad_norm": 0.011987590697607683, |
|
"learning_rate": 1.1933663928552752e-07, |
|
"loss": 0.3976, |
|
"step": 1123 |
|
}, |
|
{ |
|
"epoch": 16.840525328330205, |
|
"grad_norm": 0.012138943362389928, |
|
"learning_rate": 1.1815255709455374e-07, |
|
"loss": 0.4153, |
|
"step": 1124 |
|
}, |
|
{ |
|
"epoch": 16.855534709193247, |
|
"grad_norm": 0.012373458329033428, |
|
"learning_rate": 1.1697400965225745e-07, |
|
"loss": 0.4146, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 16.870544090056285, |
|
"grad_norm": 0.012389090392038503, |
|
"learning_rate": 1.1580100435556883e-07, |
|
"loss": 0.3946, |
|
"step": 1126 |
|
}, |
|
{ |
|
"epoch": 16.885553470919323, |
|
"grad_norm": 0.011834382886208047, |
|
"learning_rate": 1.1463354856663399e-07, |
|
"loss": 0.4016, |
|
"step": 1127 |
|
}, |
|
{ |
|
"epoch": 16.900562851782365, |
|
"grad_norm": 0.012114418651751394, |
|
"learning_rate": 1.1347164961276789e-07, |
|
"loss": 0.396, |
|
"step": 1128 |
|
}, |
|
{ |
|
"epoch": 16.915572232645403, |
|
"grad_norm": 0.012269351272196211, |
|
"learning_rate": 1.1231531478640987e-07, |
|
"loss": 0.4098, |
|
"step": 1129 |
|
}, |
|
{ |
|
"epoch": 16.930581613508444, |
|
"grad_norm": 0.011614991226426072, |
|
"learning_rate": 1.1116455134507663e-07, |
|
"loss": 0.4122, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 16.945590994371482, |
|
"grad_norm": 0.012827165860445595, |
|
"learning_rate": 1.1001936651131716e-07, |
|
"loss": 0.4074, |
|
"step": 1131 |
|
}, |
|
{ |
|
"epoch": 16.96060037523452, |
|
"grad_norm": 0.01202261694187779, |
|
"learning_rate": 1.0887976747266791e-07, |
|
"loss": 0.4017, |
|
"step": 1132 |
|
}, |
|
{ |
|
"epoch": 16.975609756097562, |
|
"grad_norm": 0.012092296382532815, |
|
"learning_rate": 1.0774576138160596e-07, |
|
"loss": 0.4114, |
|
"step": 1133 |
|
}, |
|
{ |
|
"epoch": 16.9906191369606, |
|
"grad_norm": 0.012388455511771996, |
|
"learning_rate": 1.0661735535550665e-07, |
|
"loss": 0.4104, |
|
"step": 1134 |
|
}, |
|
{ |
|
"epoch": 17.015009380863038, |
|
"grad_norm": 0.01607381588237361, |
|
"learning_rate": 1.0549455647659677e-07, |
|
"loss": 0.7922, |
|
"step": 1135 |
|
}, |
|
{ |
|
"epoch": 17.03001876172608, |
|
"grad_norm": 0.01182074740531476, |
|
"learning_rate": 1.0437737179191108e-07, |
|
"loss": 0.4057, |
|
"step": 1136 |
|
}, |
|
{ |
|
"epoch": 17.045028142589118, |
|
"grad_norm": 0.012095915652515062, |
|
"learning_rate": 1.0326580831324816e-07, |
|
"loss": 0.4012, |
|
"step": 1137 |
|
}, |
|
{ |
|
"epoch": 17.06003752345216, |
|
"grad_norm": 0.011903831536743867, |
|
"learning_rate": 1.021598730171257e-07, |
|
"loss": 0.4091, |
|
"step": 1138 |
|
}, |
|
{ |
|
"epoch": 17.075046904315197, |
|
"grad_norm": 0.012259225232080817, |
|
"learning_rate": 1.0105957284473732e-07, |
|
"loss": 0.4093, |
|
"step": 1139 |
|
}, |
|
{ |
|
"epoch": 17.090056285178235, |
|
"grad_norm": 0.012083353911997635, |
|
"learning_rate": 9.996491470190915e-08, |
|
"loss": 0.4137, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 17.105065666041277, |
|
"grad_norm": 0.012224092783758394, |
|
"learning_rate": 9.887590545905589e-08, |
|
"loss": 0.4254, |
|
"step": 1141 |
|
}, |
|
{ |
|
"epoch": 17.120075046904315, |
|
"grad_norm": 0.011652986549382856, |
|
"learning_rate": 9.779255195113823e-08, |
|
"loss": 0.4037, |
|
"step": 1142 |
|
}, |
|
{ |
|
"epoch": 17.135084427767353, |
|
"grad_norm": 0.012465106664410062, |
|
"learning_rate": 9.671486097761917e-08, |
|
"loss": 0.401, |
|
"step": 1143 |
|
}, |
|
{ |
|
"epoch": 17.150093808630395, |
|
"grad_norm": 0.012292307923531054, |
|
"learning_rate": 9.564283930242257e-08, |
|
"loss": 0.4078, |
|
"step": 1144 |
|
}, |
|
{ |
|
"epoch": 17.165103189493433, |
|
"grad_norm": 0.012083496838805925, |
|
"learning_rate": 9.457649365388965e-08, |
|
"loss": 0.4061, |
|
"step": 1145 |
|
}, |
|
{ |
|
"epoch": 17.180112570356474, |
|
"grad_norm": 0.012058613681905764, |
|
"learning_rate": 9.351583072473712e-08, |
|
"loss": 0.4011, |
|
"step": 1146 |
|
}, |
|
{ |
|
"epoch": 17.195121951219512, |
|
"grad_norm": 0.011786553163980374, |
|
"learning_rate": 9.246085717201546e-08, |
|
"loss": 0.4148, |
|
"step": 1147 |
|
}, |
|
{ |
|
"epoch": 17.21013133208255, |
|
"grad_norm": 0.012314917316211435, |
|
"learning_rate": 9.141157961706602e-08, |
|
"loss": 0.4102, |
|
"step": 1148 |
|
}, |
|
{ |
|
"epoch": 17.225140712945592, |
|
"grad_norm": 0.012107777455723816, |
|
"learning_rate": 9.036800464548156e-08, |
|
"loss": 0.3958, |
|
"step": 1149 |
|
}, |
|
{ |
|
"epoch": 17.24015009380863, |
|
"grad_norm": 0.011964321058374836, |
|
"learning_rate": 8.933013880706275e-08, |
|
"loss": 0.4023, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 17.255159474671668, |
|
"grad_norm": 0.012067958170577502, |
|
"learning_rate": 8.829798861577831e-08, |
|
"loss": 0.4134, |
|
"step": 1151 |
|
}, |
|
{ |
|
"epoch": 17.27016885553471, |
|
"grad_norm": 0.01209478822335003, |
|
"learning_rate": 8.727156054972373e-08, |
|
"loss": 0.416, |
|
"step": 1152 |
|
}, |
|
{ |
|
"epoch": 17.285178236397748, |
|
"grad_norm": 0.012244663184378318, |
|
"learning_rate": 8.625086105108037e-08, |
|
"loss": 0.4005, |
|
"step": 1153 |
|
}, |
|
{ |
|
"epoch": 17.30018761726079, |
|
"grad_norm": 0.011573669129444004, |
|
"learning_rate": 8.523589652607566e-08, |
|
"loss": 0.4041, |
|
"step": 1154 |
|
}, |
|
{ |
|
"epoch": 17.315196998123827, |
|
"grad_norm": 0.01184555083569059, |
|
"learning_rate": 8.422667334494249e-08, |
|
"loss": 0.3999, |
|
"step": 1155 |
|
}, |
|
{ |
|
"epoch": 17.330206378986865, |
|
"grad_norm": 0.01271028447647729, |
|
"learning_rate": 8.322319784187959e-08, |
|
"loss": 0.4113, |
|
"step": 1156 |
|
}, |
|
{ |
|
"epoch": 17.345215759849907, |
|
"grad_norm": 0.012424858123698831, |
|
"learning_rate": 8.222547631501054e-08, |
|
"loss": 0.4073, |
|
"step": 1157 |
|
}, |
|
{ |
|
"epoch": 17.360225140712945, |
|
"grad_norm": 0.012569921108406372, |
|
"learning_rate": 8.123351502634623e-08, |
|
"loss": 0.4176, |
|
"step": 1158 |
|
}, |
|
{ |
|
"epoch": 17.375234521575987, |
|
"grad_norm": 0.012212460256096685, |
|
"learning_rate": 8.024732020174385e-08, |
|
"loss": 0.4163, |
|
"step": 1159 |
|
}, |
|
{ |
|
"epoch": 17.390243902439025, |
|
"grad_norm": 0.012876844421681181, |
|
"learning_rate": 7.926689803086872e-08, |
|
"loss": 0.4137, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 17.405253283302063, |
|
"grad_norm": 0.012127902236964899, |
|
"learning_rate": 7.82922546671555e-08, |
|
"loss": 0.4108, |
|
"step": 1161 |
|
}, |
|
{ |
|
"epoch": 17.420262664165104, |
|
"grad_norm": 0.012475847191274562, |
|
"learning_rate": 7.732339622776829e-08, |
|
"loss": 0.4119, |
|
"step": 1162 |
|
}, |
|
{ |
|
"epoch": 17.435272045028142, |
|
"grad_norm": 0.012323895374580014, |
|
"learning_rate": 7.636032879356425e-08, |
|
"loss": 0.4064, |
|
"step": 1163 |
|
}, |
|
{ |
|
"epoch": 17.45028142589118, |
|
"grad_norm": 0.01162521749026464, |
|
"learning_rate": 7.540305840905369e-08, |
|
"loss": 0.4099, |
|
"step": 1164 |
|
}, |
|
{ |
|
"epoch": 17.465290806754222, |
|
"grad_norm": 0.012085289434805383, |
|
"learning_rate": 7.445159108236343e-08, |
|
"loss": 0.4014, |
|
"step": 1165 |
|
}, |
|
{ |
|
"epoch": 17.48030018761726, |
|
"grad_norm": 0.011680985710547098, |
|
"learning_rate": 7.350593278519823e-08, |
|
"loss": 0.4118, |
|
"step": 1166 |
|
}, |
|
{ |
|
"epoch": 17.4953095684803, |
|
"grad_norm": 0.01202917040251438, |
|
"learning_rate": 7.256608945280318e-08, |
|
"loss": 0.4085, |
|
"step": 1167 |
|
}, |
|
{ |
|
"epoch": 17.51031894934334, |
|
"grad_norm": 0.011995016219742638, |
|
"learning_rate": 7.163206698392742e-08, |
|
"loss": 0.4121, |
|
"step": 1168 |
|
}, |
|
{ |
|
"epoch": 17.525328330206378, |
|
"grad_norm": 0.012587588608999197, |
|
"learning_rate": 7.070387124078614e-08, |
|
"loss": 0.4134, |
|
"step": 1169 |
|
}, |
|
{ |
|
"epoch": 17.54033771106942, |
|
"grad_norm": 0.011907862956892574, |
|
"learning_rate": 6.978150804902449e-08, |
|
"loss": 0.4149, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 17.555347091932457, |
|
"grad_norm": 0.012367732761458044, |
|
"learning_rate": 6.886498319768075e-08, |
|
"loss": 0.4123, |
|
"step": 1171 |
|
}, |
|
{ |
|
"epoch": 17.570356472795496, |
|
"grad_norm": 0.012413142793066507, |
|
"learning_rate": 6.795430243914935e-08, |
|
"loss": 0.4099, |
|
"step": 1172 |
|
}, |
|
{ |
|
"epoch": 17.585365853658537, |
|
"grad_norm": 0.01198028253713889, |
|
"learning_rate": 6.704947148914608e-08, |
|
"loss": 0.4053, |
|
"step": 1173 |
|
}, |
|
{ |
|
"epoch": 17.600375234521575, |
|
"grad_norm": 0.01218802781184607, |
|
"learning_rate": 6.615049602667122e-08, |
|
"loss": 0.4116, |
|
"step": 1174 |
|
}, |
|
{ |
|
"epoch": 17.615384615384617, |
|
"grad_norm": 0.012525942652443793, |
|
"learning_rate": 6.52573816939742e-08, |
|
"loss": 0.4123, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 17.630393996247655, |
|
"grad_norm": 0.012661673801348647, |
|
"learning_rate": 6.437013409651847e-08, |
|
"loss": 0.4108, |
|
"step": 1176 |
|
}, |
|
{ |
|
"epoch": 17.645403377110693, |
|
"grad_norm": 0.012526249345482146, |
|
"learning_rate": 6.348875880294535e-08, |
|
"loss": 0.4129, |
|
"step": 1177 |
|
}, |
|
{ |
|
"epoch": 17.660412757973734, |
|
"grad_norm": 0.012160064393132261, |
|
"learning_rate": 6.26132613450403e-08, |
|
"loss": 0.405, |
|
"step": 1178 |
|
}, |
|
{ |
|
"epoch": 17.675422138836772, |
|
"grad_norm": 0.011856736199021006, |
|
"learning_rate": 6.174364721769742e-08, |
|
"loss": 0.3979, |
|
"step": 1179 |
|
}, |
|
{ |
|
"epoch": 17.690431519699814, |
|
"grad_norm": 0.012209332212864363, |
|
"learning_rate": 6.087992187888557e-08, |
|
"loss": 0.4094, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 17.705440900562852, |
|
"grad_norm": 0.012164727576505307, |
|
"learning_rate": 6.00220907496135e-08, |
|
"loss": 0.4064, |
|
"step": 1181 |
|
}, |
|
{ |
|
"epoch": 17.72045028142589, |
|
"grad_norm": 0.012094675237917947, |
|
"learning_rate": 5.917015921389568e-08, |
|
"loss": 0.4111, |
|
"step": 1182 |
|
}, |
|
{ |
|
"epoch": 17.735459662288932, |
|
"grad_norm": 0.012301334949674643, |
|
"learning_rate": 5.832413261871938e-08, |
|
"loss": 0.4019, |
|
"step": 1183 |
|
}, |
|
{ |
|
"epoch": 17.75046904315197, |
|
"grad_norm": 0.012225443758653235, |
|
"learning_rate": 5.748401627401067e-08, |
|
"loss": 0.3957, |
|
"step": 1184 |
|
}, |
|
{ |
|
"epoch": 17.765478424015008, |
|
"grad_norm": 0.012458043664162905, |
|
"learning_rate": 5.6649815452600725e-08, |
|
"loss": 0.3975, |
|
"step": 1185 |
|
}, |
|
{ |
|
"epoch": 17.78048780487805, |
|
"grad_norm": 0.011703394783094899, |
|
"learning_rate": 5.5821535390193406e-08, |
|
"loss": 0.4084, |
|
"step": 1186 |
|
}, |
|
{ |
|
"epoch": 17.795497185741088, |
|
"grad_norm": 0.01193572234009368, |
|
"learning_rate": 5.499918128533154e-08, |
|
"loss": 0.4029, |
|
"step": 1187 |
|
}, |
|
{ |
|
"epoch": 17.81050656660413, |
|
"grad_norm": 0.012490715952476177, |
|
"learning_rate": 5.4182758299365364e-08, |
|
"loss": 0.4066, |
|
"step": 1188 |
|
}, |
|
{ |
|
"epoch": 17.81050656660413, |
|
"eval_loss": 0.3940185606479645, |
|
"eval_runtime": 14.0978, |
|
"eval_samples_per_second": 31.707, |
|
"eval_steps_per_second": 1.986, |
|
"step": 1188 |
|
}, |
|
{ |
|
"epoch": 17.825515947467167, |
|
"grad_norm": 0.012227421280696971, |
|
"learning_rate": 5.337227155641921e-08, |
|
"loss": 0.4083, |
|
"step": 1189 |
|
}, |
|
{ |
|
"epoch": 17.840525328330205, |
|
"grad_norm": 0.012367946788776257, |
|
"learning_rate": 5.256772614335991e-08, |
|
"loss": 0.4082, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 17.855534709193247, |
|
"grad_norm": 0.012324728938463323, |
|
"learning_rate": 5.1769127109764666e-08, |
|
"loss": 0.4009, |
|
"step": 1191 |
|
}, |
|
{ |
|
"epoch": 17.870544090056285, |
|
"grad_norm": 0.011883776326167475, |
|
"learning_rate": 5.0976479467888966e-08, |
|
"loss": 0.3992, |
|
"step": 1192 |
|
}, |
|
{ |
|
"epoch": 17.885553470919323, |
|
"grad_norm": 0.01191459640325878, |
|
"learning_rate": 5.018978819263597e-08, |
|
"loss": 0.4086, |
|
"step": 1193 |
|
}, |
|
{ |
|
"epoch": 17.900562851782365, |
|
"grad_norm": 0.012409893635999196, |
|
"learning_rate": 4.940905822152452e-08, |
|
"loss": 0.3926, |
|
"step": 1194 |
|
}, |
|
{ |
|
"epoch": 17.915572232645403, |
|
"grad_norm": 0.012283886910921115, |
|
"learning_rate": 4.863429445465883e-08, |
|
"loss": 0.4075, |
|
"step": 1195 |
|
}, |
|
{ |
|
"epoch": 17.930581613508444, |
|
"grad_norm": 0.012064735027299723, |
|
"learning_rate": 4.786550175469728e-08, |
|
"loss": 0.3929, |
|
"step": 1196 |
|
}, |
|
{ |
|
"epoch": 17.945590994371482, |
|
"grad_norm": 0.012077507797889245, |
|
"learning_rate": 4.7102684946821456e-08, |
|
"loss": 0.4086, |
|
"step": 1197 |
|
}, |
|
{ |
|
"epoch": 17.96060037523452, |
|
"grad_norm": 0.011773580263745117, |
|
"learning_rate": 4.6345848818706956e-08, |
|
"loss": 0.4195, |
|
"step": 1198 |
|
}, |
|
{ |
|
"epoch": 17.975609756097562, |
|
"grad_norm": 0.012158454333714083, |
|
"learning_rate": 4.55949981204925e-08, |
|
"loss": 0.4147, |
|
"step": 1199 |
|
}, |
|
{ |
|
"epoch": 17.9906191369606, |
|
"grad_norm": 0.011705832981943262, |
|
"learning_rate": 4.4850137564750756e-08, |
|
"loss": 0.4133, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 18.015009380863038, |
|
"grad_norm": 0.01892041042339661, |
|
"learning_rate": 4.4111271826457684e-08, |
|
"loss": 0.8008, |
|
"step": 1201 |
|
}, |
|
{ |
|
"epoch": 18.03001876172608, |
|
"grad_norm": 0.011875241891989333, |
|
"learning_rate": 4.337840554296468e-08, |
|
"loss": 0.3956, |
|
"step": 1202 |
|
}, |
|
{ |
|
"epoch": 18.045028142589118, |
|
"grad_norm": 0.011518111425783685, |
|
"learning_rate": 4.265154331396814e-08, |
|
"loss": 0.4018, |
|
"step": 1203 |
|
}, |
|
{ |
|
"epoch": 18.06003752345216, |
|
"grad_norm": 0.011921877135567948, |
|
"learning_rate": 4.193068970148139e-08, |
|
"loss": 0.4135, |
|
"step": 1204 |
|
}, |
|
{ |
|
"epoch": 18.075046904315197, |
|
"grad_norm": 0.01196404394665153, |
|
"learning_rate": 4.121584922980603e-08, |
|
"loss": 0.4152, |
|
"step": 1205 |
|
}, |
|
{ |
|
"epoch": 18.090056285178235, |
|
"grad_norm": 0.012254596451346144, |
|
"learning_rate": 4.050702638550274e-08, |
|
"loss": 0.4057, |
|
"step": 1206 |
|
}, |
|
{ |
|
"epoch": 18.105065666041277, |
|
"grad_norm": 0.012184092362587513, |
|
"learning_rate": 3.9804225617364185e-08, |
|
"loss": 0.4042, |
|
"step": 1207 |
|
}, |
|
{ |
|
"epoch": 18.120075046904315, |
|
"grad_norm": 0.012194493098812067, |
|
"learning_rate": 3.910745133638638e-08, |
|
"loss": 0.418, |
|
"step": 1208 |
|
}, |
|
{ |
|
"epoch": 18.135084427767353, |
|
"grad_norm": 0.012555563095673745, |
|
"learning_rate": 3.841670791574136e-08, |
|
"loss": 0.4102, |
|
"step": 1209 |
|
}, |
|
{ |
|
"epoch": 18.150093808630395, |
|
"grad_norm": 0.012169854152024248, |
|
"learning_rate": 3.7731999690749585e-08, |
|
"loss": 0.3893, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 18.165103189493433, |
|
"grad_norm": 0.01244474180194911, |
|
"learning_rate": 3.705333095885277e-08, |
|
"loss": 0.4044, |
|
"step": 1211 |
|
}, |
|
{ |
|
"epoch": 18.180112570356474, |
|
"grad_norm": 0.012701077259060964, |
|
"learning_rate": 3.6380705979586644e-08, |
|
"loss": 0.4094, |
|
"step": 1212 |
|
}, |
|
{ |
|
"epoch": 18.195121951219512, |
|
"grad_norm": 0.011824029437486382, |
|
"learning_rate": 3.571412897455495e-08, |
|
"loss": 0.4129, |
|
"step": 1213 |
|
}, |
|
{ |
|
"epoch": 18.21013133208255, |
|
"grad_norm": 0.012828647907710029, |
|
"learning_rate": 3.505360412740188e-08, |
|
"loss": 0.4, |
|
"step": 1214 |
|
}, |
|
{ |
|
"epoch": 18.225140712945592, |
|
"grad_norm": 0.01201395104905769, |
|
"learning_rate": 3.439913558378704e-08, |
|
"loss": 0.4207, |
|
"step": 1215 |
|
}, |
|
{ |
|
"epoch": 18.24015009380863, |
|
"grad_norm": 0.012461422730290947, |
|
"learning_rate": 3.3750727451358094e-08, |
|
"loss": 0.3988, |
|
"step": 1216 |
|
}, |
|
{ |
|
"epoch": 18.255159474671668, |
|
"grad_norm": 0.012047772292080035, |
|
"learning_rate": 3.310838379972614e-08, |
|
"loss": 0.4122, |
|
"step": 1217 |
|
}, |
|
{ |
|
"epoch": 18.27016885553471, |
|
"grad_norm": 0.012451921378931235, |
|
"learning_rate": 3.24721086604397e-08, |
|
"loss": 0.4179, |
|
"step": 1218 |
|
}, |
|
{ |
|
"epoch": 18.285178236397748, |
|
"grad_norm": 0.012038541117606516, |
|
"learning_rate": 3.1841906026959356e-08, |
|
"loss": 0.4033, |
|
"step": 1219 |
|
}, |
|
{ |
|
"epoch": 18.30018761726079, |
|
"grad_norm": 0.011860346393252194, |
|
"learning_rate": 3.1217779854632806e-08, |
|
"loss": 0.3957, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 18.315196998123827, |
|
"grad_norm": 0.012122525135396791, |
|
"learning_rate": 3.0599734060669626e-08, |
|
"loss": 0.408, |
|
"step": 1221 |
|
}, |
|
{ |
|
"epoch": 18.330206378986865, |
|
"grad_norm": 0.01207502168393969, |
|
"learning_rate": 2.998777252411766e-08, |
|
"loss": 0.4165, |
|
"step": 1222 |
|
}, |
|
{ |
|
"epoch": 18.345215759849907, |
|
"grad_norm": 0.012521513750259158, |
|
"learning_rate": 2.9381899085837438e-08, |
|
"loss": 0.4122, |
|
"step": 1223 |
|
}, |
|
{ |
|
"epoch": 18.360225140712945, |
|
"grad_norm": 0.012634009105925964, |
|
"learning_rate": 2.8782117548479258e-08, |
|
"loss": 0.4151, |
|
"step": 1224 |
|
}, |
|
{ |
|
"epoch": 18.375234521575987, |
|
"grad_norm": 0.011959711219096641, |
|
"learning_rate": 2.8188431676458345e-08, |
|
"loss": 0.4078, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 18.390243902439025, |
|
"grad_norm": 0.011988140892609642, |
|
"learning_rate": 2.7600845195931867e-08, |
|
"loss": 0.4058, |
|
"step": 1226 |
|
}, |
|
{ |
|
"epoch": 18.405253283302063, |
|
"grad_norm": 0.012178000803191502, |
|
"learning_rate": 2.701936179477515e-08, |
|
"loss": 0.4144, |
|
"step": 1227 |
|
}, |
|
{ |
|
"epoch": 18.420262664165104, |
|
"grad_norm": 0.0120582199067019, |
|
"learning_rate": 2.6443985122558855e-08, |
|
"loss": 0.4048, |
|
"step": 1228 |
|
}, |
|
{ |
|
"epoch": 18.435272045028142, |
|
"grad_norm": 0.012256955326780608, |
|
"learning_rate": 2.587471879052572e-08, |
|
"loss": 0.4053, |
|
"step": 1229 |
|
}, |
|
{ |
|
"epoch": 18.45028142589118, |
|
"grad_norm": 0.012220393687074404, |
|
"learning_rate": 2.5311566371568505e-08, |
|
"loss": 0.4166, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 18.465290806754222, |
|
"grad_norm": 0.012318315655887042, |
|
"learning_rate": 2.4754531400206446e-08, |
|
"loss": 0.4086, |
|
"step": 1231 |
|
}, |
|
{ |
|
"epoch": 18.48030018761726, |
|
"grad_norm": 0.01208759598395148, |
|
"learning_rate": 2.4203617372564378e-08, |
|
"loss": 0.403, |
|
"step": 1232 |
|
}, |
|
{ |
|
"epoch": 18.4953095684803, |
|
"grad_norm": 0.012045700658904396, |
|
"learning_rate": 2.3658827746349974e-08, |
|
"loss": 0.4016, |
|
"step": 1233 |
|
}, |
|
{ |
|
"epoch": 18.51031894934334, |
|
"grad_norm": 0.011950579146901357, |
|
"learning_rate": 2.3120165940832325e-08, |
|
"loss": 0.4111, |
|
"step": 1234 |
|
}, |
|
{ |
|
"epoch": 18.525328330206378, |
|
"grad_norm": 0.012254984423373173, |
|
"learning_rate": 2.2587635336820398e-08, |
|
"loss": 0.4163, |
|
"step": 1235 |
|
}, |
|
{ |
|
"epoch": 18.54033771106942, |
|
"grad_norm": 0.012241032174288546, |
|
"learning_rate": 2.2061239276641607e-08, |
|
"loss": 0.4067, |
|
"step": 1236 |
|
}, |
|
{ |
|
"epoch": 18.555347091932457, |
|
"grad_norm": 0.01207555807762867, |
|
"learning_rate": 2.1540981064121388e-08, |
|
"loss": 0.4155, |
|
"step": 1237 |
|
}, |
|
{ |
|
"epoch": 18.570356472795496, |
|
"grad_norm": 0.012558998024423071, |
|
"learning_rate": 2.102686396456199e-08, |
|
"loss": 0.4131, |
|
"step": 1238 |
|
}, |
|
{ |
|
"epoch": 18.585365853658537, |
|
"grad_norm": 0.012060644512723748, |
|
"learning_rate": 2.0518891204722167e-08, |
|
"loss": 0.4069, |
|
"step": 1239 |
|
}, |
|
{ |
|
"epoch": 18.600375234521575, |
|
"grad_norm": 0.012285463532045451, |
|
"learning_rate": 2.0017065972796843e-08, |
|
"loss": 0.3989, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 18.615384615384617, |
|
"grad_norm": 0.012025225107120854, |
|
"learning_rate": 1.9521391418397148e-08, |
|
"loss": 0.4034, |
|
"step": 1241 |
|
}, |
|
{ |
|
"epoch": 18.630393996247655, |
|
"grad_norm": 0.012338195232809763, |
|
"learning_rate": 1.9031870652530756e-08, |
|
"loss": 0.4113, |
|
"step": 1242 |
|
}, |
|
{ |
|
"epoch": 18.645403377110693, |
|
"grad_norm": 0.01213275063867394, |
|
"learning_rate": 1.8548506747582128e-08, |
|
"loss": 0.4115, |
|
"step": 1243 |
|
}, |
|
{ |
|
"epoch": 18.660412757973734, |
|
"grad_norm": 0.012004565010333276, |
|
"learning_rate": 1.807130273729329e-08, |
|
"loss": 0.3981, |
|
"step": 1244 |
|
}, |
|
{ |
|
"epoch": 18.675422138836772, |
|
"grad_norm": 0.012366830778200107, |
|
"learning_rate": 1.7600261616745103e-08, |
|
"loss": 0.4113, |
|
"step": 1245 |
|
}, |
|
{ |
|
"epoch": 18.690431519699814, |
|
"grad_norm": 0.012029863257280412, |
|
"learning_rate": 1.713538634233791e-08, |
|
"loss": 0.4086, |
|
"step": 1246 |
|
}, |
|
{ |
|
"epoch": 18.705440900562852, |
|
"grad_norm": 0.011996240644873084, |
|
"learning_rate": 1.6676679831773567e-08, |
|
"loss": 0.4019, |
|
"step": 1247 |
|
}, |
|
{ |
|
"epoch": 18.72045028142589, |
|
"grad_norm": 0.012493637507283401, |
|
"learning_rate": 1.622414496403668e-08, |
|
"loss": 0.4007, |
|
"step": 1248 |
|
}, |
|
{ |
|
"epoch": 18.735459662288932, |
|
"grad_norm": 0.011864242959638414, |
|
"learning_rate": 1.5777784579376728e-08, |
|
"loss": 0.4002, |
|
"step": 1249 |
|
}, |
|
{ |
|
"epoch": 18.75046904315197, |
|
"grad_norm": 0.012004702120412627, |
|
"learning_rate": 1.5337601479290195e-08, |
|
"loss": 0.3977, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 18.765478424015008, |
|
"grad_norm": 0.012055148241702568, |
|
"learning_rate": 1.4903598426503237e-08, |
|
"loss": 0.4056, |
|
"step": 1251 |
|
}, |
|
{ |
|
"epoch": 18.78048780487805, |
|
"grad_norm": 0.011971783702517814, |
|
"learning_rate": 1.447577814495371e-08, |
|
"loss": 0.404, |
|
"step": 1252 |
|
}, |
|
{ |
|
"epoch": 18.795497185741088, |
|
"grad_norm": 0.011875158436815665, |
|
"learning_rate": 1.4054143319774724e-08, |
|
"loss": 0.4023, |
|
"step": 1253 |
|
}, |
|
{ |
|
"epoch": 18.81050656660413, |
|
"grad_norm": 0.012294967191521976, |
|
"learning_rate": 1.3638696597277677e-08, |
|
"loss": 0.4137, |
|
"step": 1254 |
|
}, |
|
{ |
|
"epoch": 18.81050656660413, |
|
"eval_loss": 0.39393627643585205, |
|
"eval_runtime": 13.7085, |
|
"eval_samples_per_second": 32.607, |
|
"eval_steps_per_second": 2.043, |
|
"step": 1254 |
|
}, |
|
{ |
|
"epoch": 18.825515947467167, |
|
"grad_norm": 0.012150083576670793, |
|
"learning_rate": 1.3229440584935137e-08, |
|
"loss": 0.4042, |
|
"step": 1255 |
|
}, |
|
{ |
|
"epoch": 18.840525328330205, |
|
"grad_norm": 0.012191506353413198, |
|
"learning_rate": 1.28263778513652e-08, |
|
"loss": 0.4135, |
|
"step": 1256 |
|
}, |
|
{ |
|
"epoch": 18.855534709193247, |
|
"grad_norm": 0.012120685160381235, |
|
"learning_rate": 1.2429510926314835e-08, |
|
"loss": 0.4119, |
|
"step": 1257 |
|
}, |
|
{ |
|
"epoch": 18.870544090056285, |
|
"grad_norm": 0.011999394179816762, |
|
"learning_rate": 1.2038842300644225e-08, |
|
"loss": 0.4091, |
|
"step": 1258 |
|
}, |
|
{ |
|
"epoch": 18.885553470919323, |
|
"grad_norm": 0.012229137988916727, |
|
"learning_rate": 1.165437442631112e-08, |
|
"loss": 0.4026, |
|
"step": 1259 |
|
}, |
|
{ |
|
"epoch": 18.900562851782365, |
|
"grad_norm": 0.011773188892312082, |
|
"learning_rate": 1.1276109716355286e-08, |
|
"loss": 0.3999, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 18.915572232645403, |
|
"grad_norm": 0.011987595281597069, |
|
"learning_rate": 1.0904050544883858e-08, |
|
"loss": 0.4038, |
|
"step": 1261 |
|
}, |
|
{ |
|
"epoch": 18.930581613508444, |
|
"grad_norm": 0.011884116567775816, |
|
"learning_rate": 1.0538199247055678e-08, |
|
"loss": 0.4053, |
|
"step": 1262 |
|
}, |
|
{ |
|
"epoch": 18.945590994371482, |
|
"grad_norm": 0.011817654569350527, |
|
"learning_rate": 1.0178558119067315e-08, |
|
"loss": 0.4057, |
|
"step": 1263 |
|
}, |
|
{ |
|
"epoch": 18.96060037523452, |
|
"grad_norm": 0.012055678102132312, |
|
"learning_rate": 9.825129418138178e-09, |
|
"loss": 0.4078, |
|
"step": 1264 |
|
}, |
|
{ |
|
"epoch": 18.975609756097562, |
|
"grad_norm": 0.012071982250938978, |
|
"learning_rate": 9.477915362496758e-09, |
|
"loss": 0.4203, |
|
"step": 1265 |
|
}, |
|
{ |
|
"epoch": 18.9906191369606, |
|
"grad_norm": 0.012189144844470683, |
|
"learning_rate": 9.13691813136641e-09, |
|
"loss": 0.4162, |
|
"step": 1266 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"grad_norm": 0.013032683738861633, |
|
"learning_rate": 8.802139864951596e-09, |
|
"loss": 0.3283, |
|
"step": 1267 |
|
}, |
|
{ |
|
"epoch": 19.015009380863038, |
|
"grad_norm": 0.014335853539907665, |
|
"learning_rate": 8.473582664424995e-09, |
|
"loss": 0.4792, |
|
"step": 1268 |
|
}, |
|
{ |
|
"epoch": 19.03001876172608, |
|
"grad_norm": 0.012484679040138053, |
|
"learning_rate": 8.151248591913518e-09, |
|
"loss": 0.4106, |
|
"step": 1269 |
|
}, |
|
{ |
|
"epoch": 19.045028142589118, |
|
"grad_norm": 0.012105246321067362, |
|
"learning_rate": 7.835139670486212e-09, |
|
"loss": 0.4001, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 19.06003752345216, |
|
"grad_norm": 0.011751825236754727, |
|
"learning_rate": 7.525257884140823e-09, |
|
"loss": 0.4008, |
|
"step": 1271 |
|
}, |
|
{ |
|
"epoch": 19.075046904315197, |
|
"grad_norm": 0.013175703813983093, |
|
"learning_rate": 7.2216051777916894e-09, |
|
"loss": 0.4093, |
|
"step": 1272 |
|
}, |
|
{ |
|
"epoch": 19.090056285178235, |
|
"grad_norm": 0.011978352610379423, |
|
"learning_rate": 6.924183457257871e-09, |
|
"loss": 0.4121, |
|
"step": 1273 |
|
}, |
|
{ |
|
"epoch": 19.105065666041277, |
|
"grad_norm": 0.011787239977337143, |
|
"learning_rate": 6.632994589250262e-09, |
|
"loss": 0.4121, |
|
"step": 1274 |
|
}, |
|
{ |
|
"epoch": 19.120075046904315, |
|
"grad_norm": 0.012318814698430361, |
|
"learning_rate": 6.3480404013608325e-09, |
|
"loss": 0.4043, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 19.135084427767353, |
|
"grad_norm": 0.012037273362049511, |
|
"learning_rate": 6.069322682050515e-09, |
|
"loss": 0.4066, |
|
"step": 1276 |
|
}, |
|
{ |
|
"epoch": 19.150093808630395, |
|
"grad_norm": 0.011959073152947426, |
|
"learning_rate": 5.796843180638555e-09, |
|
"loss": 0.4095, |
|
"step": 1277 |
|
}, |
|
{ |
|
"epoch": 19.165103189493433, |
|
"grad_norm": 0.011966460370732191, |
|
"learning_rate": 5.530603607290851e-09, |
|
"loss": 0.4224, |
|
"step": 1278 |
|
}, |
|
{ |
|
"epoch": 19.180112570356474, |
|
"grad_norm": 0.012353059701364673, |
|
"learning_rate": 5.2706056330098505e-09, |
|
"loss": 0.4063, |
|
"step": 1279 |
|
}, |
|
{ |
|
"epoch": 19.195121951219512, |
|
"grad_norm": 0.011930396547934605, |
|
"learning_rate": 5.0168508896235585e-09, |
|
"loss": 0.4033, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 19.21013133208255, |
|
"grad_norm": 0.011731376690623204, |
|
"learning_rate": 4.769340969775659e-09, |
|
"loss": 0.405, |
|
"step": 1281 |
|
}, |
|
{ |
|
"epoch": 19.225140712945592, |
|
"grad_norm": 0.012283286872202488, |
|
"learning_rate": 4.528077426915411e-09, |
|
"loss": 0.4106, |
|
"step": 1282 |
|
}, |
|
{ |
|
"epoch": 19.24015009380863, |
|
"grad_norm": 0.012574046336957924, |
|
"learning_rate": 4.293061775287654e-09, |
|
"loss": 0.4065, |
|
"step": 1283 |
|
}, |
|
{ |
|
"epoch": 19.255159474671668, |
|
"grad_norm": 0.011951354592048286, |
|
"learning_rate": 4.064295489923819e-09, |
|
"loss": 0.4023, |
|
"step": 1284 |
|
}, |
|
{ |
|
"epoch": 19.27016885553471, |
|
"grad_norm": 0.012132271788831939, |
|
"learning_rate": 3.841780006632267e-09, |
|
"loss": 0.4075, |
|
"step": 1285 |
|
}, |
|
{ |
|
"epoch": 19.285178236397748, |
|
"grad_norm": 0.01221857164582425, |
|
"learning_rate": 3.625516721989075e-09, |
|
"loss": 0.4077, |
|
"step": 1286 |
|
}, |
|
{ |
|
"epoch": 19.30018761726079, |
|
"grad_norm": 0.011863748308289977, |
|
"learning_rate": 3.415506993330153e-09, |
|
"loss": 0.4045, |
|
"step": 1287 |
|
}, |
|
{ |
|
"epoch": 19.315196998123827, |
|
"grad_norm": 0.011741159713343184, |
|
"learning_rate": 3.211752138741697e-09, |
|
"loss": 0.4143, |
|
"step": 1288 |
|
}, |
|
{ |
|
"epoch": 19.330206378986865, |
|
"grad_norm": 0.01191775807178149, |
|
"learning_rate": 3.0142534370524164e-09, |
|
"loss": 0.4062, |
|
"step": 1289 |
|
}, |
|
{ |
|
"epoch": 19.345215759849907, |
|
"grad_norm": 0.012215595050070076, |
|
"learning_rate": 2.8230121278257635e-09, |
|
"loss": 0.4093, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 19.360225140712945, |
|
"grad_norm": 0.012564655988299225, |
|
"learning_rate": 2.6380294113514943e-09, |
|
"loss": 0.4139, |
|
"step": 1291 |
|
}, |
|
{ |
|
"epoch": 19.375234521575987, |
|
"grad_norm": 0.012477001425241382, |
|
"learning_rate": 2.459306448638676e-09, |
|
"loss": 0.405, |
|
"step": 1292 |
|
}, |
|
{ |
|
"epoch": 19.390243902439025, |
|
"grad_norm": 0.01206388735700181, |
|
"learning_rate": 2.2868443614082468e-09, |
|
"loss": 0.4231, |
|
"step": 1293 |
|
}, |
|
{ |
|
"epoch": 19.405253283302063, |
|
"grad_norm": 0.012208171675223407, |
|
"learning_rate": 2.1206442320858e-09, |
|
"loss": 0.4005, |
|
"step": 1294 |
|
}, |
|
{ |
|
"epoch": 19.420262664165104, |
|
"grad_norm": 0.012153451875566665, |
|
"learning_rate": 1.960707103795034e-09, |
|
"loss": 0.4028, |
|
"step": 1295 |
|
}, |
|
{ |
|
"epoch": 19.435272045028142, |
|
"grad_norm": 0.012279486741819839, |
|
"learning_rate": 1.8070339803509804e-09, |
|
"loss": 0.4057, |
|
"step": 1296 |
|
}, |
|
{ |
|
"epoch": 19.45028142589118, |
|
"grad_norm": 0.012000294004781479, |
|
"learning_rate": 1.6596258262541184e-09, |
|
"loss": 0.4097, |
|
"step": 1297 |
|
}, |
|
{ |
|
"epoch": 19.465290806754222, |
|
"grad_norm": 0.012492834461960383, |
|
"learning_rate": 1.5184835666838258e-09, |
|
"loss": 0.4173, |
|
"step": 1298 |
|
}, |
|
{ |
|
"epoch": 19.48030018761726, |
|
"grad_norm": 0.012299822720232542, |
|
"learning_rate": 1.3836080874926047e-09, |
|
"loss": 0.4092, |
|
"step": 1299 |
|
}, |
|
{ |
|
"epoch": 19.4953095684803, |
|
"grad_norm": 0.01218733011356003, |
|
"learning_rate": 1.2550002352010868e-09, |
|
"loss": 0.4218, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 19.51031894934334, |
|
"grad_norm": 0.012042960770651666, |
|
"learning_rate": 1.1326608169920371e-09, |
|
"loss": 0.3959, |
|
"step": 1301 |
|
}, |
|
{ |
|
"epoch": 19.525328330206378, |
|
"grad_norm": 0.011997398364275975, |
|
"learning_rate": 1.0165906007056912e-09, |
|
"loss": 0.4166, |
|
"step": 1302 |
|
}, |
|
{ |
|
"epoch": 19.54033771106942, |
|
"grad_norm": 0.012305321109405035, |
|
"learning_rate": 9.067903148348711e-10, |
|
"loss": 0.4087, |
|
"step": 1303 |
|
}, |
|
{ |
|
"epoch": 19.555347091932457, |
|
"grad_norm": 0.012014818503486962, |
|
"learning_rate": 8.032606485200988e-10, |
|
"loss": 0.4154, |
|
"step": 1304 |
|
}, |
|
{ |
|
"epoch": 19.570356472795496, |
|
"grad_norm": 0.01251044844330409, |
|
"learning_rate": 7.060022515460451e-10, |
|
"loss": 0.4102, |
|
"step": 1305 |
|
}, |
|
{ |
|
"epoch": 19.585365853658537, |
|
"grad_norm": 0.012008572784907577, |
|
"learning_rate": 6.150157343364215e-10, |
|
"loss": 0.4152, |
|
"step": 1306 |
|
}, |
|
{ |
|
"epoch": 19.600375234521575, |
|
"grad_norm": 0.012287338325467436, |
|
"learning_rate": 5.303016679509831e-10, |
|
"loss": 0.3981, |
|
"step": 1307 |
|
}, |
|
{ |
|
"epoch": 19.615384615384617, |
|
"grad_norm": 0.012267724291012534, |
|
"learning_rate": 4.518605840815315e-10, |
|
"loss": 0.4135, |
|
"step": 1308 |
|
}, |
|
{ |
|
"epoch": 19.630393996247655, |
|
"grad_norm": 0.011809176076779143, |
|
"learning_rate": 3.7969297504858443e-10, |
|
"loss": 0.3975, |
|
"step": 1309 |
|
}, |
|
{ |
|
"epoch": 19.645403377110693, |
|
"grad_norm": 0.012294547435049158, |
|
"learning_rate": 3.1379929379871104e-10, |
|
"loss": 0.4003, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 19.660412757973734, |
|
"grad_norm": 0.011704232467770586, |
|
"learning_rate": 2.541799539008682e-10, |
|
"loss": 0.4041, |
|
"step": 1311 |
|
}, |
|
{ |
|
"epoch": 19.675422138836772, |
|
"grad_norm": 0.011850331991569664, |
|
"learning_rate": 2.0083532954484618e-10, |
|
"loss": 0.404, |
|
"step": 1312 |
|
}, |
|
{ |
|
"epoch": 19.690431519699814, |
|
"grad_norm": 0.011944977782669314, |
|
"learning_rate": 1.5376575553793793e-10, |
|
"loss": 0.3907, |
|
"step": 1313 |
|
}, |
|
{ |
|
"epoch": 19.705440900562852, |
|
"grad_norm": 0.012277893048513143, |
|
"learning_rate": 1.1297152730338489e-10, |
|
"loss": 0.4192, |
|
"step": 1314 |
|
}, |
|
{ |
|
"epoch": 19.72045028142589, |
|
"grad_norm": 0.012179141361100891, |
|
"learning_rate": 7.845290087848954e-11, |
|
"loss": 0.4054, |
|
"step": 1315 |
|
}, |
|
{ |
|
"epoch": 19.735459662288932, |
|
"grad_norm": 0.012034350217278049, |
|
"learning_rate": 5.0210092912950087e-11, |
|
"loss": 0.4143, |
|
"step": 1316 |
|
}, |
|
{ |
|
"epoch": 19.75046904315197, |
|
"grad_norm": 0.012006277883402618, |
|
"learning_rate": 2.824328066730608e-11, |
|
"loss": 0.3977, |
|
"step": 1317 |
|
}, |
|
{ |
|
"epoch": 19.765478424015008, |
|
"grad_norm": 0.011954480700150845, |
|
"learning_rate": 1.255260201216135e-11, |
|
"loss": 0.3968, |
|
"step": 1318 |
|
}, |
|
{ |
|
"epoch": 19.78048780487805, |
|
"grad_norm": 0.01235198314980799, |
|
"learning_rate": 3.138155427073741e-12, |
|
"loss": 0.4071, |
|
"step": 1319 |
|
}, |
|
{ |
|
"epoch": 19.795497185741088, |
|
"grad_norm": 0.012063710602907587, |
|
"learning_rate": 0.0, |
|
"loss": 0.4101, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 19.795497185741088, |
|
"eval_loss": 0.3939184844493866, |
|
"eval_runtime": 14.1078, |
|
"eval_samples_per_second": 31.685, |
|
"eval_steps_per_second": 1.985, |
|
"step": 1320 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 1320, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 20, |
|
"save_steps": 66, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1201839626256384.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|