|
{ |
|
"best_metric": 10.745776176452637, |
|
"best_model_checkpoint": "miner_id_24/checkpoint-200", |
|
"epoch": 0.043215211754537596, |
|
"eval_steps": 50, |
|
"global_step": 200, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.000216076058772688, |
|
"grad_norm": 0.200728178024292, |
|
"learning_rate": 3.3333333333333333e-06, |
|
"loss": 10.8463, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.000216076058772688, |
|
"eval_loss": 10.84037971496582, |
|
"eval_runtime": 27.2248, |
|
"eval_samples_per_second": 286.32, |
|
"eval_steps_per_second": 143.178, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.000432152117545376, |
|
"grad_norm": 0.23658832907676697, |
|
"learning_rate": 6.666666666666667e-06, |
|
"loss": 10.8472, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.0006482281763180639, |
|
"grad_norm": 0.20040327310562134, |
|
"learning_rate": 1e-05, |
|
"loss": 10.8415, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.000864304235090752, |
|
"grad_norm": 0.2336120903491974, |
|
"learning_rate": 1.3333333333333333e-05, |
|
"loss": 10.828, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.00108038029386344, |
|
"grad_norm": 0.20422425866127014, |
|
"learning_rate": 1.6666666666666667e-05, |
|
"loss": 10.8437, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.0012964563526361278, |
|
"grad_norm": 0.2417161911725998, |
|
"learning_rate": 2e-05, |
|
"loss": 10.8422, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.001512532411408816, |
|
"grad_norm": 0.22463224828243256, |
|
"learning_rate": 2.3333333333333336e-05, |
|
"loss": 10.8167, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.001728608470181504, |
|
"grad_norm": 0.22320792078971863, |
|
"learning_rate": 2.6666666666666667e-05, |
|
"loss": 10.8394, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.0019446845289541918, |
|
"grad_norm": 0.22033357620239258, |
|
"learning_rate": 3e-05, |
|
"loss": 10.8351, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.00216076058772688, |
|
"grad_norm": 0.20691785216331482, |
|
"learning_rate": 3.3333333333333335e-05, |
|
"loss": 10.8433, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.0023768366464995676, |
|
"grad_norm": 0.21809300780296326, |
|
"learning_rate": 3.6666666666666666e-05, |
|
"loss": 10.8364, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.0025929127052722557, |
|
"grad_norm": 0.2736707031726837, |
|
"learning_rate": 4e-05, |
|
"loss": 10.8364, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.0028089887640449437, |
|
"grad_norm": 0.24762022495269775, |
|
"learning_rate": 4.3333333333333334e-05, |
|
"loss": 10.8266, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.003025064822817632, |
|
"grad_norm": 0.24276190996170044, |
|
"learning_rate": 4.666666666666667e-05, |
|
"loss": 10.8375, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.00324114088159032, |
|
"grad_norm": 0.23069171607494354, |
|
"learning_rate": 5e-05, |
|
"loss": 10.8393, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.003457216940363008, |
|
"grad_norm": 0.24690602719783783, |
|
"learning_rate": 5.333333333333333e-05, |
|
"loss": 10.8348, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.003673292999135696, |
|
"grad_norm": 0.23763756453990936, |
|
"learning_rate": 5.666666666666667e-05, |
|
"loss": 10.8294, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.0038893690579083835, |
|
"grad_norm": 0.23151656985282898, |
|
"learning_rate": 6e-05, |
|
"loss": 10.8329, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.004105445116681072, |
|
"grad_norm": 0.23155605792999268, |
|
"learning_rate": 6.333333333333333e-05, |
|
"loss": 10.8299, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.00432152117545376, |
|
"grad_norm": 0.2375253140926361, |
|
"learning_rate": 6.666666666666667e-05, |
|
"loss": 10.8228, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.004537597234226448, |
|
"grad_norm": 0.31918394565582275, |
|
"learning_rate": 7e-05, |
|
"loss": 10.8364, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.004753673292999135, |
|
"grad_norm": 0.2590275704860687, |
|
"learning_rate": 7.333333333333333e-05, |
|
"loss": 10.8279, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.004969749351771823, |
|
"grad_norm": 0.3064877986907959, |
|
"learning_rate": 7.666666666666667e-05, |
|
"loss": 10.8388, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.005185825410544511, |
|
"grad_norm": 0.3065643310546875, |
|
"learning_rate": 8e-05, |
|
"loss": 10.8247, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.0054019014693171994, |
|
"grad_norm": 0.2851259708404541, |
|
"learning_rate": 8.333333333333334e-05, |
|
"loss": 10.8244, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.0056179775280898875, |
|
"grad_norm": 0.30654624104499817, |
|
"learning_rate": 8.666666666666667e-05, |
|
"loss": 10.8216, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.0058340535868625755, |
|
"grad_norm": 0.2787514328956604, |
|
"learning_rate": 9e-05, |
|
"loss": 10.8265, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.006050129645635264, |
|
"grad_norm": 0.314116895198822, |
|
"learning_rate": 9.333333333333334e-05, |
|
"loss": 10.8232, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.006266205704407952, |
|
"grad_norm": 0.28672200441360474, |
|
"learning_rate": 9.666666666666667e-05, |
|
"loss": 10.8145, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.00648228176318064, |
|
"grad_norm": 0.27016451954841614, |
|
"learning_rate": 0.0001, |
|
"loss": 10.8084, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.006698357821953328, |
|
"grad_norm": 0.2747277617454529, |
|
"learning_rate": 9.999146252290264e-05, |
|
"loss": 10.81, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.006914433880726016, |
|
"grad_norm": 0.29148223996162415, |
|
"learning_rate": 9.996585300715116e-05, |
|
"loss": 10.813, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.007130509939498704, |
|
"grad_norm": 0.3096551299095154, |
|
"learning_rate": 9.99231801983717e-05, |
|
"loss": 10.802, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.007346585998271392, |
|
"grad_norm": 0.3342970609664917, |
|
"learning_rate": 9.986345866928941e-05, |
|
"loss": 10.8117, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.00756266205704408, |
|
"grad_norm": 0.32004502415657043, |
|
"learning_rate": 9.978670881475172e-05, |
|
"loss": 10.7982, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.007778738115816767, |
|
"grad_norm": 0.3316425383090973, |
|
"learning_rate": 9.96929568447637e-05, |
|
"loss": 10.8037, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.007994814174589455, |
|
"grad_norm": 0.32472503185272217, |
|
"learning_rate": 9.958223477553714e-05, |
|
"loss": 10.7961, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.008210890233362144, |
|
"grad_norm": 0.3180193603038788, |
|
"learning_rate": 9.94545804185573e-05, |
|
"loss": 10.7925, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.008426966292134831, |
|
"grad_norm": 0.34710487723350525, |
|
"learning_rate": 9.931003736767013e-05, |
|
"loss": 10.7994, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.00864304235090752, |
|
"grad_norm": 0.34599560499191284, |
|
"learning_rate": 9.91486549841951e-05, |
|
"loss": 10.7997, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.008859118409680207, |
|
"grad_norm": 0.4090355634689331, |
|
"learning_rate": 9.89704883800683e-05, |
|
"loss": 10.7918, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.009075194468452896, |
|
"grad_norm": 0.38032764196395874, |
|
"learning_rate": 9.877559839902184e-05, |
|
"loss": 10.7877, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.009291270527225583, |
|
"grad_norm": 0.3663756847381592, |
|
"learning_rate": 9.85640515958057e-05, |
|
"loss": 10.7845, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.00950734658599827, |
|
"grad_norm": 0.38913989067077637, |
|
"learning_rate": 9.833592021345937e-05, |
|
"loss": 10.7775, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.00972342264477096, |
|
"grad_norm": 0.4066934585571289, |
|
"learning_rate": 9.809128215864097e-05, |
|
"loss": 10.7805, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.009939498703543647, |
|
"grad_norm": 0.40702107548713684, |
|
"learning_rate": 9.783022097502204e-05, |
|
"loss": 10.766, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.010155574762316336, |
|
"grad_norm": 0.5095567107200623, |
|
"learning_rate": 9.755282581475769e-05, |
|
"loss": 10.7686, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.010371650821089023, |
|
"grad_norm": 0.5398594737052917, |
|
"learning_rate": 9.725919140804099e-05, |
|
"loss": 10.7674, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.010587726879861712, |
|
"grad_norm": 0.5338243246078491, |
|
"learning_rate": 9.694941803075283e-05, |
|
"loss": 10.7631, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.010803802938634399, |
|
"grad_norm": 0.7691417932510376, |
|
"learning_rate": 9.662361147021779e-05, |
|
"loss": 10.7645, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.010803802938634399, |
|
"eval_loss": 10.766061782836914, |
|
"eval_runtime": 27.2467, |
|
"eval_samples_per_second": 286.089, |
|
"eval_steps_per_second": 143.063, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.011019878997407088, |
|
"grad_norm": 0.31955963373184204, |
|
"learning_rate": 9.628188298907782e-05, |
|
"loss": 10.7833, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.011235955056179775, |
|
"grad_norm": 0.2600715458393097, |
|
"learning_rate": 9.592434928729616e-05, |
|
"loss": 10.7863, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.011452031114952464, |
|
"grad_norm": 0.25826898217201233, |
|
"learning_rate": 9.555113246230442e-05, |
|
"loss": 10.7734, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.011668107173725151, |
|
"grad_norm": 0.33920153975486755, |
|
"learning_rate": 9.516235996730645e-05, |
|
"loss": 10.7821, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.01188418323249784, |
|
"grad_norm": 0.2760711908340454, |
|
"learning_rate": 9.475816456775313e-05, |
|
"loss": 10.7771, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.012100259291270527, |
|
"grad_norm": 0.2651015818119049, |
|
"learning_rate": 9.43386842960031e-05, |
|
"loss": 10.7812, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.012316335350043216, |
|
"grad_norm": 0.3452089726924896, |
|
"learning_rate": 9.39040624041849e-05, |
|
"loss": 10.7577, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.012532411408815903, |
|
"grad_norm": 0.24794746935367584, |
|
"learning_rate": 9.345444731527642e-05, |
|
"loss": 10.7618, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.01274848746758859, |
|
"grad_norm": 0.26710182428359985, |
|
"learning_rate": 9.298999257241863e-05, |
|
"loss": 10.7622, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.01296456352636128, |
|
"grad_norm": 0.2514508068561554, |
|
"learning_rate": 9.251085678648072e-05, |
|
"loss": 10.7781, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.013180639585133967, |
|
"grad_norm": 0.2650161385536194, |
|
"learning_rate": 9.201720358189464e-05, |
|
"loss": 10.7779, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.013396715643906655, |
|
"grad_norm": 0.2767448127269745, |
|
"learning_rate": 9.150920154077754e-05, |
|
"loss": 10.7757, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.013612791702679343, |
|
"grad_norm": 0.29482796788215637, |
|
"learning_rate": 9.098702414536107e-05, |
|
"loss": 10.771, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.013828867761452032, |
|
"grad_norm": 0.274749755859375, |
|
"learning_rate": 9.045084971874738e-05, |
|
"loss": 10.7587, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.014044943820224719, |
|
"grad_norm": 0.21438376605510712, |
|
"learning_rate": 8.9900861364012e-05, |
|
"loss": 10.773, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.014261019878997408, |
|
"grad_norm": 0.2738189995288849, |
|
"learning_rate": 8.933724690167417e-05, |
|
"loss": 10.7707, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.014477095937770095, |
|
"grad_norm": 0.2180059552192688, |
|
"learning_rate": 8.876019880555649e-05, |
|
"loss": 10.7649, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.014693171996542784, |
|
"grad_norm": 0.2755647301673889, |
|
"learning_rate": 8.816991413705516e-05, |
|
"loss": 10.7556, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.014909248055315471, |
|
"grad_norm": 0.2628243863582611, |
|
"learning_rate": 8.756659447784368e-05, |
|
"loss": 10.7485, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.01512532411408816, |
|
"grad_norm": 0.27653780579566956, |
|
"learning_rate": 8.695044586103296e-05, |
|
"loss": 10.7742, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.015341400172860847, |
|
"grad_norm": 0.292253315448761, |
|
"learning_rate": 8.632167870081121e-05, |
|
"loss": 10.7542, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.015557476231633534, |
|
"grad_norm": 0.21869610249996185, |
|
"learning_rate": 8.568050772058762e-05, |
|
"loss": 10.7724, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.015773552290406223, |
|
"grad_norm": 0.20132863521575928, |
|
"learning_rate": 8.502715187966455e-05, |
|
"loss": 10.7621, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.01598962834917891, |
|
"grad_norm": 0.2435745745897293, |
|
"learning_rate": 8.436183429846313e-05, |
|
"loss": 10.7661, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.016205704407951597, |
|
"grad_norm": 0.23909196257591248, |
|
"learning_rate": 8.368478218232787e-05, |
|
"loss": 10.7694, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.016421780466724288, |
|
"grad_norm": 0.22482334077358246, |
|
"learning_rate": 8.299622674393614e-05, |
|
"loss": 10.7642, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.016637856525496975, |
|
"grad_norm": 0.2314310371875763, |
|
"learning_rate": 8.229640312433937e-05, |
|
"loss": 10.7584, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.016853932584269662, |
|
"grad_norm": 0.23524075746536255, |
|
"learning_rate": 8.158555031266254e-05, |
|
"loss": 10.7632, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.01707000864304235, |
|
"grad_norm": 0.20763476192951202, |
|
"learning_rate": 8.086391106448965e-05, |
|
"loss": 10.7533, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.01728608470181504, |
|
"grad_norm": 0.1953965127468109, |
|
"learning_rate": 8.013173181896283e-05, |
|
"loss": 10.7633, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.017502160760587727, |
|
"grad_norm": 0.1644563525915146, |
|
"learning_rate": 7.938926261462366e-05, |
|
"loss": 10.764, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.017718236819360415, |
|
"grad_norm": 0.2148687094449997, |
|
"learning_rate": 7.863675700402526e-05, |
|
"loss": 10.7566, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.017934312878133102, |
|
"grad_norm": 0.2654571831226349, |
|
"learning_rate": 7.787447196714427e-05, |
|
"loss": 10.7444, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.018150388936905792, |
|
"grad_norm": 0.231702521443367, |
|
"learning_rate": 7.710266782362247e-05, |
|
"loss": 10.753, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.01836646499567848, |
|
"grad_norm": 0.26304373145103455, |
|
"learning_rate": 7.63216081438678e-05, |
|
"loss": 10.7559, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.018582541054451167, |
|
"grad_norm": 0.19173169136047363, |
|
"learning_rate": 7.553155965904535e-05, |
|
"loss": 10.7675, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.018798617113223854, |
|
"grad_norm": 0.2556131184101105, |
|
"learning_rate": 7.473279216998895e-05, |
|
"loss": 10.7589, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.01901469317199654, |
|
"grad_norm": 0.22893981635570526, |
|
"learning_rate": 7.392557845506432e-05, |
|
"loss": 10.7599, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.019230769230769232, |
|
"grad_norm": 0.21988166868686676, |
|
"learning_rate": 7.311019417701566e-05, |
|
"loss": 10.7564, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.01944684528954192, |
|
"grad_norm": 0.24298734962940216, |
|
"learning_rate": 7.228691778882693e-05, |
|
"loss": 10.7532, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.019662921348314606, |
|
"grad_norm": 0.21422868967056274, |
|
"learning_rate": 7.145603043863045e-05, |
|
"loss": 10.7588, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.019878997407087293, |
|
"grad_norm": 0.18029530346393585, |
|
"learning_rate": 7.061781587369519e-05, |
|
"loss": 10.7508, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.020095073465859984, |
|
"grad_norm": 0.29323095083236694, |
|
"learning_rate": 6.977256034352712e-05, |
|
"loss": 10.7534, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.02031114952463267, |
|
"grad_norm": 0.24313682317733765, |
|
"learning_rate": 6.892055250211552e-05, |
|
"loss": 10.753, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.02052722558340536, |
|
"grad_norm": 0.30919376015663147, |
|
"learning_rate": 6.806208330935766e-05, |
|
"loss": 10.7502, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.020743301642178046, |
|
"grad_norm": 0.3091332018375397, |
|
"learning_rate": 6.719744593169641e-05, |
|
"loss": 10.7492, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.020959377700950736, |
|
"grad_norm": 0.31597936153411865, |
|
"learning_rate": 6.632693564200416e-05, |
|
"loss": 10.7353, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.021175453759723423, |
|
"grad_norm": 0.41740843653678894, |
|
"learning_rate": 6.545084971874738e-05, |
|
"loss": 10.753, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.02139152981849611, |
|
"grad_norm": 0.3725014328956604, |
|
"learning_rate": 6.456948734446624e-05, |
|
"loss": 10.72, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.021607605877268798, |
|
"grad_norm": 0.6240119934082031, |
|
"learning_rate": 6.368314950360415e-05, |
|
"loss": 10.7379, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.021607605877268798, |
|
"eval_loss": 10.751439094543457, |
|
"eval_runtime": 27.1534, |
|
"eval_samples_per_second": 287.073, |
|
"eval_steps_per_second": 143.555, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.021823681936041485, |
|
"grad_norm": 0.21080626547336578, |
|
"learning_rate": 6.279213887972179e-05, |
|
"loss": 10.7706, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.022039757994814176, |
|
"grad_norm": 0.20775455236434937, |
|
"learning_rate": 6.189675975213094e-05, |
|
"loss": 10.7617, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.022255834053586863, |
|
"grad_norm": 0.1883164793252945, |
|
"learning_rate": 6.099731789198344e-05, |
|
"loss": 10.7622, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.02247191011235955, |
|
"grad_norm": 0.22989457845687866, |
|
"learning_rate": 6.009412045785051e-05, |
|
"loss": 10.767, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.022687986171132237, |
|
"grad_norm": 0.21413500607013702, |
|
"learning_rate": 5.918747589082853e-05, |
|
"loss": 10.7754, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.022904062229904928, |
|
"grad_norm": 0.20524542033672333, |
|
"learning_rate": 5.82776938092065e-05, |
|
"loss": 10.7728, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.023120138288677615, |
|
"grad_norm": 0.276962012052536, |
|
"learning_rate": 5.736508490273188e-05, |
|
"loss": 10.7504, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.023336214347450302, |
|
"grad_norm": 0.2452148199081421, |
|
"learning_rate": 5.644996082651017e-05, |
|
"loss": 10.7616, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.02355229040622299, |
|
"grad_norm": 0.25284409523010254, |
|
"learning_rate": 5.553263409457504e-05, |
|
"loss": 10.7604, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.02376836646499568, |
|
"grad_norm": 0.247440367937088, |
|
"learning_rate": 5.4613417973165106e-05, |
|
"loss": 10.7641, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.023984442523768367, |
|
"grad_norm": 0.18067599833011627, |
|
"learning_rate": 5.3692626373743706e-05, |
|
"loss": 10.7746, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.024200518582541054, |
|
"grad_norm": 0.19137042760849, |
|
"learning_rate": 5.27705737457985e-05, |
|
"loss": 10.7714, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.02441659464131374, |
|
"grad_norm": 0.2149994820356369, |
|
"learning_rate": 5.184757496945726e-05, |
|
"loss": 10.7733, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.024632670700086432, |
|
"grad_norm": 0.20906798541545868, |
|
"learning_rate": 5.092394524795649e-05, |
|
"loss": 10.7726, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.02484874675885912, |
|
"grad_norm": 0.24666734039783478, |
|
"learning_rate": 5e-05, |
|
"loss": 10.7536, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.025064822817631807, |
|
"grad_norm": 0.23656940460205078, |
|
"learning_rate": 4.907605475204352e-05, |
|
"loss": 10.7602, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.025280898876404494, |
|
"grad_norm": 0.25157082080841064, |
|
"learning_rate": 4.8152425030542766e-05, |
|
"loss": 10.7619, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.02549697493517718, |
|
"grad_norm": 0.2460283637046814, |
|
"learning_rate": 4.72294262542015e-05, |
|
"loss": 10.7613, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.02571305099394987, |
|
"grad_norm": 0.1980646699666977, |
|
"learning_rate": 4.6307373626256306e-05, |
|
"loss": 10.7639, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.02592912705272256, |
|
"grad_norm": 0.20079004764556885, |
|
"learning_rate": 4.5386582026834906e-05, |
|
"loss": 10.7587, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.026145203111495246, |
|
"grad_norm": 0.22045651078224182, |
|
"learning_rate": 4.446736590542497e-05, |
|
"loss": 10.7678, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.026361279170267933, |
|
"grad_norm": 0.22736740112304688, |
|
"learning_rate": 4.3550039173489845e-05, |
|
"loss": 10.7542, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.026577355229040624, |
|
"grad_norm": 0.17214606702327728, |
|
"learning_rate": 4.2634915097268115e-05, |
|
"loss": 10.7608, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.02679343128781331, |
|
"grad_norm": 0.18513911962509155, |
|
"learning_rate": 4.1722306190793495e-05, |
|
"loss": 10.7495, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.027009507346585998, |
|
"grad_norm": 0.2161649465560913, |
|
"learning_rate": 4.0812524109171476e-05, |
|
"loss": 10.7578, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.027225583405358685, |
|
"grad_norm": 0.2477547526359558, |
|
"learning_rate": 3.99058795421495e-05, |
|
"loss": 10.7566, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.027441659464131376, |
|
"grad_norm": 0.19045643508434296, |
|
"learning_rate": 3.9002682108016585e-05, |
|
"loss": 10.753, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.027657735522904063, |
|
"grad_norm": 0.2026197761297226, |
|
"learning_rate": 3.8103240247869075e-05, |
|
"loss": 10.7643, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.02787381158167675, |
|
"grad_norm": 0.23443067073822021, |
|
"learning_rate": 3.720786112027822e-05, |
|
"loss": 10.7545, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.028089887640449437, |
|
"grad_norm": 0.21837399899959564, |
|
"learning_rate": 3.631685049639586e-05, |
|
"loss": 10.7466, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.028305963699222125, |
|
"grad_norm": 0.228903129696846, |
|
"learning_rate": 3.543051265553377e-05, |
|
"loss": 10.7553, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.028522039757994815, |
|
"grad_norm": 0.22999686002731323, |
|
"learning_rate": 3.4549150281252636e-05, |
|
"loss": 10.7365, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.028738115816767502, |
|
"grad_norm": 0.2587832808494568, |
|
"learning_rate": 3.367306435799584e-05, |
|
"loss": 10.7495, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.02895419187554019, |
|
"grad_norm": 0.2272672802209854, |
|
"learning_rate": 3.2802554068303596e-05, |
|
"loss": 10.7485, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.029170267934312877, |
|
"grad_norm": 0.1977323740720749, |
|
"learning_rate": 3.1937916690642356e-05, |
|
"loss": 10.7478, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.029386343993085567, |
|
"grad_norm": 0.20933398604393005, |
|
"learning_rate": 3.107944749788449e-05, |
|
"loss": 10.759, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.029602420051858255, |
|
"grad_norm": 0.2519747316837311, |
|
"learning_rate": 3.0227439656472877e-05, |
|
"loss": 10.7503, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.029818496110630942, |
|
"grad_norm": 0.2388688176870346, |
|
"learning_rate": 2.9382184126304834e-05, |
|
"loss": 10.7351, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.03003457216940363, |
|
"grad_norm": 0.29849356412887573, |
|
"learning_rate": 2.8543969561369556e-05, |
|
"loss": 10.7364, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.03025064822817632, |
|
"grad_norm": 0.2356027066707611, |
|
"learning_rate": 2.771308221117309e-05, |
|
"loss": 10.7399, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.030466724286949007, |
|
"grad_norm": 0.25474485754966736, |
|
"learning_rate": 2.688980582298435e-05, |
|
"loss": 10.74, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.030682800345721694, |
|
"grad_norm": 0.23734347522258759, |
|
"learning_rate": 2.607442154493568e-05, |
|
"loss": 10.7393, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.03089887640449438, |
|
"grad_norm": 0.28821271657943726, |
|
"learning_rate": 2.5267207830011068e-05, |
|
"loss": 10.7336, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.03111495246326707, |
|
"grad_norm": 0.2773192822933197, |
|
"learning_rate": 2.446844034095466e-05, |
|
"loss": 10.7414, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.031331028522039756, |
|
"grad_norm": 0.3964485824108124, |
|
"learning_rate": 2.3678391856132204e-05, |
|
"loss": 10.742, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.031547104580812446, |
|
"grad_norm": 0.33878329396247864, |
|
"learning_rate": 2.2897332176377528e-05, |
|
"loss": 10.7489, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.03176318063958514, |
|
"grad_norm": 0.3486817479133606, |
|
"learning_rate": 2.2125528032855724e-05, |
|
"loss": 10.7379, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.03197925669835782, |
|
"grad_norm": 0.38555988669395447, |
|
"learning_rate": 2.136324299597474e-05, |
|
"loss": 10.7374, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.03219533275713051, |
|
"grad_norm": 0.521436333656311, |
|
"learning_rate": 2.061073738537635e-05, |
|
"loss": 10.7291, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.032411408815903195, |
|
"grad_norm": 0.8618770241737366, |
|
"learning_rate": 1.9868268181037185e-05, |
|
"loss": 10.7151, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.032411408815903195, |
|
"eval_loss": 10.746877670288086, |
|
"eval_runtime": 27.2327, |
|
"eval_samples_per_second": 286.237, |
|
"eval_steps_per_second": 143.137, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.032627484874675886, |
|
"grad_norm": 0.2572970688343048, |
|
"learning_rate": 1.9136088935510362e-05, |
|
"loss": 10.7659, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.032843560933448576, |
|
"grad_norm": 0.23141218721866608, |
|
"learning_rate": 1.8414449687337464e-05, |
|
"loss": 10.7649, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.03305963699222126, |
|
"grad_norm": 0.2419702559709549, |
|
"learning_rate": 1.7703596875660645e-05, |
|
"loss": 10.7615, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.03327571305099395, |
|
"grad_norm": 0.19218285381793976, |
|
"learning_rate": 1.700377325606388e-05, |
|
"loss": 10.7609, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.03349178910976664, |
|
"grad_norm": 0.2367192953824997, |
|
"learning_rate": 1.631521781767214e-05, |
|
"loss": 10.7647, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.033707865168539325, |
|
"grad_norm": 0.21695514023303986, |
|
"learning_rate": 1.5638165701536868e-05, |
|
"loss": 10.765, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.033923941227312016, |
|
"grad_norm": 0.2253107875585556, |
|
"learning_rate": 1.4972848120335453e-05, |
|
"loss": 10.7699, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.0341400172860847, |
|
"grad_norm": 0.19992785155773163, |
|
"learning_rate": 1.4319492279412388e-05, |
|
"loss": 10.7592, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.03435609334485739, |
|
"grad_norm": 0.21194936335086823, |
|
"learning_rate": 1.3678321299188801e-05, |
|
"loss": 10.7629, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.03457216940363008, |
|
"grad_norm": 0.26007580757141113, |
|
"learning_rate": 1.3049554138967051e-05, |
|
"loss": 10.7648, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.034788245462402764, |
|
"grad_norm": 0.22751381993293762, |
|
"learning_rate": 1.2433405522156332e-05, |
|
"loss": 10.7603, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.035004321521175455, |
|
"grad_norm": 0.2545979619026184, |
|
"learning_rate": 1.183008586294485e-05, |
|
"loss": 10.7617, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.03522039757994814, |
|
"grad_norm": 0.20070402324199677, |
|
"learning_rate": 1.1239801194443506e-05, |
|
"loss": 10.7547, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.03543647363872083, |
|
"grad_norm": 0.20730213820934296, |
|
"learning_rate": 1.066275309832584e-05, |
|
"loss": 10.761, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.03565254969749352, |
|
"grad_norm": 0.2222391963005066, |
|
"learning_rate": 1.0099138635988026e-05, |
|
"loss": 10.7707, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.035868625756266204, |
|
"grad_norm": 0.21017976105213165, |
|
"learning_rate": 9.549150281252633e-06, |
|
"loss": 10.7582, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.036084701815038894, |
|
"grad_norm": 0.21349570155143738, |
|
"learning_rate": 9.012975854638949e-06, |
|
"loss": 10.7534, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.036300777873811585, |
|
"grad_norm": 0.19311493635177612, |
|
"learning_rate": 8.490798459222476e-06, |
|
"loss": 10.7536, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.03651685393258427, |
|
"grad_norm": 0.18329201638698578, |
|
"learning_rate": 7.982796418105371e-06, |
|
"loss": 10.7595, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.03673292999135696, |
|
"grad_norm": 0.24617359042167664, |
|
"learning_rate": 7.489143213519301e-06, |
|
"loss": 10.7562, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.03694900605012964, |
|
"grad_norm": 0.24574460089206696, |
|
"learning_rate": 7.010007427581378e-06, |
|
"loss": 10.7496, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.037165082108902334, |
|
"grad_norm": 0.2094903290271759, |
|
"learning_rate": 6.5455526847235825e-06, |
|
"loss": 10.7454, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.037381158167675024, |
|
"grad_norm": 0.23481670022010803, |
|
"learning_rate": 6.0959375958151045e-06, |
|
"loss": 10.7503, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.03759723422644771, |
|
"grad_norm": 0.20529882609844208, |
|
"learning_rate": 5.6613157039969055e-06, |
|
"loss": 10.7551, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.0378133102852204, |
|
"grad_norm": 0.2087268829345703, |
|
"learning_rate": 5.241835432246889e-06, |
|
"loss": 10.7512, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.03802938634399308, |
|
"grad_norm": 0.24301747977733612, |
|
"learning_rate": 4.837640032693558e-06, |
|
"loss": 10.7587, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.03824546240276577, |
|
"grad_norm": 0.24005451798439026, |
|
"learning_rate": 4.448867537695578e-06, |
|
"loss": 10.7615, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.038461538461538464, |
|
"grad_norm": 0.2205115705728531, |
|
"learning_rate": 4.075650712703849e-06, |
|
"loss": 10.7405, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.03867761452031115, |
|
"grad_norm": 0.22657622396945953, |
|
"learning_rate": 3.71811701092219e-06, |
|
"loss": 10.7544, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.03889369057908384, |
|
"grad_norm": 0.24251677095890045, |
|
"learning_rate": 3.376388529782215e-06, |
|
"loss": 10.7535, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.03910976663785653, |
|
"grad_norm": 0.22308377921581268, |
|
"learning_rate": 3.0505819692471792e-06, |
|
"loss": 10.7536, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.03932584269662921, |
|
"grad_norm": 0.2320922166109085, |
|
"learning_rate": 2.7408085919590264e-06, |
|
"loss": 10.7482, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.0395419187554019, |
|
"grad_norm": 0.23464159667491913, |
|
"learning_rate": 2.4471741852423237e-06, |
|
"loss": 10.7496, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.03975799481417459, |
|
"grad_norm": 0.21937261521816254, |
|
"learning_rate": 2.1697790249779636e-06, |
|
"loss": 10.7502, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.03997407087294728, |
|
"grad_norm": 0.23580560088157654, |
|
"learning_rate": 1.908717841359048e-06, |
|
"loss": 10.7532, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.04019014693171997, |
|
"grad_norm": 0.2424219399690628, |
|
"learning_rate": 1.6640797865406288e-06, |
|
"loss": 10.745, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.04040622299049265, |
|
"grad_norm": 0.2548362612724304, |
|
"learning_rate": 1.4359484041943038e-06, |
|
"loss": 10.7458, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.04062229904926534, |
|
"grad_norm": 0.31250232458114624, |
|
"learning_rate": 1.2244016009781701e-06, |
|
"loss": 10.744, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.040838375108038026, |
|
"grad_norm": 0.2745141088962555, |
|
"learning_rate": 1.0295116199317057e-06, |
|
"loss": 10.7386, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.04105445116681072, |
|
"grad_norm": 0.2804180383682251, |
|
"learning_rate": 8.513450158049108e-07, |
|
"loss": 10.7314, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.04127052722558341, |
|
"grad_norm": 0.2679109573364258, |
|
"learning_rate": 6.899626323298713e-07, |
|
"loss": 10.7439, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.04148660328435609, |
|
"grad_norm": 0.22815419733524323, |
|
"learning_rate": 5.454195814427021e-07, |
|
"loss": 10.7384, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.04170267934312878, |
|
"grad_norm": 0.2827376127243042, |
|
"learning_rate": 4.177652244628627e-07, |
|
"loss": 10.7385, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.04191875540190147, |
|
"grad_norm": 0.3146594762802124, |
|
"learning_rate": 3.0704315523631953e-07, |
|
"loss": 10.7451, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.042134831460674156, |
|
"grad_norm": 0.30587294697761536, |
|
"learning_rate": 2.1329118524827662e-07, |
|
"loss": 10.7321, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.04235090751944685, |
|
"grad_norm": 0.26626572012901306, |
|
"learning_rate": 1.3654133071059893e-07, |
|
"loss": 10.7262, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.04256698357821953, |
|
"grad_norm": 0.3111208975315094, |
|
"learning_rate": 7.681980162830282e-08, |
|
"loss": 10.7239, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.04278305963699222, |
|
"grad_norm": 0.4671022891998291, |
|
"learning_rate": 3.4146992848854695e-08, |
|
"loss": 10.7375, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.04299913569576491, |
|
"grad_norm": 0.520592987537384, |
|
"learning_rate": 8.537477097364522e-09, |
|
"loss": 10.7301, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.043215211754537596, |
|
"grad_norm": 0.5407875776290894, |
|
"learning_rate": 0.0, |
|
"loss": 10.6871, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.043215211754537596, |
|
"eval_loss": 10.745776176452637, |
|
"eval_runtime": 27.2162, |
|
"eval_samples_per_second": 286.41, |
|
"eval_steps_per_second": 143.223, |
|
"step": 200 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 200, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 5, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 139523405119488.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|