|
{ |
|
"best_metric": 0.15960238873958588, |
|
"best_model_checkpoint": "miner_id_24/checkpoint-100", |
|
"epoch": 0.11961722488038277, |
|
"eval_steps": 50, |
|
"global_step": 100, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0011961722488038277, |
|
"grad_norm": 1.9494132995605469, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9221, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0011961722488038277, |
|
"eval_loss": 1.7471424341201782, |
|
"eval_runtime": 35.3181, |
|
"eval_samples_per_second": 19.933, |
|
"eval_steps_per_second": 4.983, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0023923444976076554, |
|
"grad_norm": 2.3642637729644775, |
|
"learning_rate": 2e-05, |
|
"loss": 1.1185, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.0035885167464114833, |
|
"grad_norm": 2.282869815826416, |
|
"learning_rate": 3e-05, |
|
"loss": 1.1527, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.004784688995215311, |
|
"grad_norm": 2.2901337146759033, |
|
"learning_rate": 4e-05, |
|
"loss": 1.1557, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.005980861244019139, |
|
"grad_norm": 1.7862334251403809, |
|
"learning_rate": 5e-05, |
|
"loss": 1.0143, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.007177033492822967, |
|
"grad_norm": 1.8213731050491333, |
|
"learning_rate": 6e-05, |
|
"loss": 1.0051, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.008373205741626795, |
|
"grad_norm": 1.5596104860305786, |
|
"learning_rate": 7e-05, |
|
"loss": 0.831, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.009569377990430622, |
|
"grad_norm": 1.7667557001113892, |
|
"learning_rate": 8e-05, |
|
"loss": 0.8318, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.01076555023923445, |
|
"grad_norm": 1.9082083702087402, |
|
"learning_rate": 9e-05, |
|
"loss": 0.7814, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.011961722488038277, |
|
"grad_norm": 1.699601411819458, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7049, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.013157894736842105, |
|
"grad_norm": 1.7066322565078735, |
|
"learning_rate": 9.99695413509548e-05, |
|
"loss": 0.5715, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.014354066985645933, |
|
"grad_norm": 1.726962924003601, |
|
"learning_rate": 9.987820251299122e-05, |
|
"loss": 0.464, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.01555023923444976, |
|
"grad_norm": 1.3154484033584595, |
|
"learning_rate": 9.972609476841367e-05, |
|
"loss": 0.4189, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.01674641148325359, |
|
"grad_norm": 1.44478440284729, |
|
"learning_rate": 9.951340343707852e-05, |
|
"loss": 0.4308, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.017942583732057416, |
|
"grad_norm": 1.3897327184677124, |
|
"learning_rate": 9.924038765061042e-05, |
|
"loss": 0.3837, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.019138755980861243, |
|
"grad_norm": 1.588050127029419, |
|
"learning_rate": 9.890738003669029e-05, |
|
"loss": 0.3804, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.02033492822966507, |
|
"grad_norm": 1.2375253438949585, |
|
"learning_rate": 9.851478631379982e-05, |
|
"loss": 0.3513, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.0215311004784689, |
|
"grad_norm": 1.5607556104660034, |
|
"learning_rate": 9.806308479691595e-05, |
|
"loss": 0.382, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.022727272727272728, |
|
"grad_norm": 1.2236876487731934, |
|
"learning_rate": 9.755282581475769e-05, |
|
"loss": 0.3218, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.023923444976076555, |
|
"grad_norm": 1.2549458742141724, |
|
"learning_rate": 9.698463103929542e-05, |
|
"loss": 0.3437, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.025119617224880382, |
|
"grad_norm": 1.242065668106079, |
|
"learning_rate": 9.635919272833938e-05, |
|
"loss": 0.3025, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.02631578947368421, |
|
"grad_norm": 1.5587633848190308, |
|
"learning_rate": 9.567727288213005e-05, |
|
"loss": 0.3169, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.02751196172248804, |
|
"grad_norm": 1.2049616575241089, |
|
"learning_rate": 9.493970231495835e-05, |
|
"loss": 0.2571, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.028708133971291867, |
|
"grad_norm": 1.2683987617492676, |
|
"learning_rate": 9.414737964294636e-05, |
|
"loss": 0.2468, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.029904306220095694, |
|
"grad_norm": 1.271965503692627, |
|
"learning_rate": 9.330127018922194e-05, |
|
"loss": 0.2941, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.03110047846889952, |
|
"grad_norm": 1.4347857236862183, |
|
"learning_rate": 9.24024048078213e-05, |
|
"loss": 0.2639, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.03229665071770335, |
|
"grad_norm": 1.281456708908081, |
|
"learning_rate": 9.145187862775209e-05, |
|
"loss": 0.2307, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.03349282296650718, |
|
"grad_norm": 1.2285360097885132, |
|
"learning_rate": 9.045084971874738e-05, |
|
"loss": 0.2135, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.034688995215311005, |
|
"grad_norm": 1.1313438415527344, |
|
"learning_rate": 8.940053768033609e-05, |
|
"loss": 0.2001, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.03588516746411483, |
|
"grad_norm": 1.2885600328445435, |
|
"learning_rate": 8.83022221559489e-05, |
|
"loss": 0.2083, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.03708133971291866, |
|
"grad_norm": 1.4596316814422607, |
|
"learning_rate": 8.715724127386972e-05, |
|
"loss": 0.2443, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.03827751196172249, |
|
"grad_norm": 0.982135534286499, |
|
"learning_rate": 8.596699001693255e-05, |
|
"loss": 0.1612, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.039473684210526314, |
|
"grad_norm": 1.1624139547348022, |
|
"learning_rate": 8.473291852294987e-05, |
|
"loss": 0.1982, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.04066985645933014, |
|
"grad_norm": 1.4763832092285156, |
|
"learning_rate": 8.345653031794292e-05, |
|
"loss": 0.1981, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.041866028708133975, |
|
"grad_norm": 1.56986403465271, |
|
"learning_rate": 8.213938048432697e-05, |
|
"loss": 0.1967, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.0430622009569378, |
|
"grad_norm": 1.4756582975387573, |
|
"learning_rate": 8.07830737662829e-05, |
|
"loss": 0.2175, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.04425837320574163, |
|
"grad_norm": 1.4440913200378418, |
|
"learning_rate": 7.938926261462366e-05, |
|
"loss": 0.2053, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.045454545454545456, |
|
"grad_norm": 1.2837365865707397, |
|
"learning_rate": 7.795964517353735e-05, |
|
"loss": 0.2031, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.04665071770334928, |
|
"grad_norm": 1.421876311302185, |
|
"learning_rate": 7.649596321166024e-05, |
|
"loss": 0.2062, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.04784688995215311, |
|
"grad_norm": 1.4365227222442627, |
|
"learning_rate": 7.500000000000001e-05, |
|
"loss": 0.1868, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.04904306220095694, |
|
"grad_norm": 1.5663189888000488, |
|
"learning_rate": 7.347357813929454e-05, |
|
"loss": 0.2521, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.050239234449760764, |
|
"grad_norm": 1.4127620458602905, |
|
"learning_rate": 7.191855733945387e-05, |
|
"loss": 0.1688, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.05143540669856459, |
|
"grad_norm": 1.5866608619689941, |
|
"learning_rate": 7.033683215379002e-05, |
|
"loss": 0.224, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.05263157894736842, |
|
"grad_norm": 2.2985501289367676, |
|
"learning_rate": 6.873032967079561e-05, |
|
"loss": 0.3267, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.05382775119617225, |
|
"grad_norm": 2.202700138092041, |
|
"learning_rate": 6.710100716628344e-05, |
|
"loss": 0.2247, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.05502392344497608, |
|
"grad_norm": 1.6158735752105713, |
|
"learning_rate": 6.545084971874738e-05, |
|
"loss": 0.1816, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.056220095693779906, |
|
"grad_norm": 2.0496480464935303, |
|
"learning_rate": 6.378186779084995e-05, |
|
"loss": 0.141, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.05741626794258373, |
|
"grad_norm": 2.518812894821167, |
|
"learning_rate": 6.209609477998338e-05, |
|
"loss": 0.1988, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.05861244019138756, |
|
"grad_norm": 3.8230884075164795, |
|
"learning_rate": 6.0395584540887963e-05, |
|
"loss": 0.3154, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.05980861244019139, |
|
"grad_norm": 3.400611400604248, |
|
"learning_rate": 5.868240888334653e-05, |
|
"loss": 0.2167, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.05980861244019139, |
|
"eval_loss": 0.19923071563243866, |
|
"eval_runtime": 35.3983, |
|
"eval_samples_per_second": 19.888, |
|
"eval_steps_per_second": 4.972, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.061004784688995214, |
|
"grad_norm": 0.7308669090270996, |
|
"learning_rate": 5.695865504800327e-05, |
|
"loss": 0.1903, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.06220095693779904, |
|
"grad_norm": 0.8045475482940674, |
|
"learning_rate": 5.522642316338268e-05, |
|
"loss": 0.2067, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.06339712918660287, |
|
"grad_norm": 0.8171185851097107, |
|
"learning_rate": 5.348782368720626e-05, |
|
"loss": 0.2032, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.0645933014354067, |
|
"grad_norm": 0.6744611859321594, |
|
"learning_rate": 5.174497483512506e-05, |
|
"loss": 0.1581, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.06578947368421052, |
|
"grad_norm": 0.7448108792304993, |
|
"learning_rate": 5e-05, |
|
"loss": 0.1815, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.06698564593301436, |
|
"grad_norm": 0.7760705351829529, |
|
"learning_rate": 4.825502516487497e-05, |
|
"loss": 0.1738, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.06818181818181818, |
|
"grad_norm": 0.8528043031692505, |
|
"learning_rate": 4.6512176312793736e-05, |
|
"loss": 0.1841, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.06937799043062201, |
|
"grad_norm": 0.905849277973175, |
|
"learning_rate": 4.477357683661734e-05, |
|
"loss": 0.1738, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.07057416267942583, |
|
"grad_norm": 0.8059756755828857, |
|
"learning_rate": 4.3041344951996746e-05, |
|
"loss": 0.1722, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.07177033492822966, |
|
"grad_norm": 0.8063069581985474, |
|
"learning_rate": 4.131759111665349e-05, |
|
"loss": 0.1927, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.0729665071770335, |
|
"grad_norm": 0.825333833694458, |
|
"learning_rate": 3.960441545911204e-05, |
|
"loss": 0.1445, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.07416267942583732, |
|
"grad_norm": 0.8422712087631226, |
|
"learning_rate": 3.790390522001662e-05, |
|
"loss": 0.1558, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.07535885167464115, |
|
"grad_norm": 0.9840994477272034, |
|
"learning_rate": 3.6218132209150045e-05, |
|
"loss": 0.1733, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.07655502392344497, |
|
"grad_norm": 0.8723501563072205, |
|
"learning_rate": 3.4549150281252636e-05, |
|
"loss": 0.1491, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.07775119617224881, |
|
"grad_norm": 0.8056702017784119, |
|
"learning_rate": 3.289899283371657e-05, |
|
"loss": 0.1573, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.07894736842105263, |
|
"grad_norm": 0.8925252556800842, |
|
"learning_rate": 3.12696703292044e-05, |
|
"loss": 0.1795, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.08014354066985646, |
|
"grad_norm": 0.9729580283164978, |
|
"learning_rate": 2.9663167846209998e-05, |
|
"loss": 0.1791, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.08133971291866028, |
|
"grad_norm": 1.0415399074554443, |
|
"learning_rate": 2.8081442660546125e-05, |
|
"loss": 0.177, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.08253588516746412, |
|
"grad_norm": 0.8020843863487244, |
|
"learning_rate": 2.6526421860705473e-05, |
|
"loss": 0.1268, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.08373205741626795, |
|
"grad_norm": 0.8391036987304688, |
|
"learning_rate": 2.500000000000001e-05, |
|
"loss": 0.1344, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.08492822966507177, |
|
"grad_norm": 1.1039748191833496, |
|
"learning_rate": 2.350403678833976e-05, |
|
"loss": 0.1954, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.0861244019138756, |
|
"grad_norm": 0.9647433757781982, |
|
"learning_rate": 2.2040354826462668e-05, |
|
"loss": 0.1759, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.08732057416267942, |
|
"grad_norm": 0.9805471301078796, |
|
"learning_rate": 2.061073738537635e-05, |
|
"loss": 0.1625, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.08851674641148326, |
|
"grad_norm": 0.9431953430175781, |
|
"learning_rate": 1.9216926233717085e-05, |
|
"loss": 0.1464, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.08971291866028708, |
|
"grad_norm": 0.9271813631057739, |
|
"learning_rate": 1.7860619515673033e-05, |
|
"loss": 0.1374, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.09090909090909091, |
|
"grad_norm": 1.0403175354003906, |
|
"learning_rate": 1.6543469682057106e-05, |
|
"loss": 0.1637, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.09210526315789473, |
|
"grad_norm": 0.9993944764137268, |
|
"learning_rate": 1.526708147705013e-05, |
|
"loss": 0.1522, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.09330143540669857, |
|
"grad_norm": 1.0364563465118408, |
|
"learning_rate": 1.4033009983067452e-05, |
|
"loss": 0.1471, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.09449760765550239, |
|
"grad_norm": 0.8725647926330566, |
|
"learning_rate": 1.2842758726130283e-05, |
|
"loss": 0.1113, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.09569377990430622, |
|
"grad_norm": 0.9386890530586243, |
|
"learning_rate": 1.1697777844051105e-05, |
|
"loss": 0.1104, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.09688995215311005, |
|
"grad_norm": 1.0741575956344604, |
|
"learning_rate": 1.0599462319663905e-05, |
|
"loss": 0.1286, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.09808612440191387, |
|
"grad_norm": 1.2276641130447388, |
|
"learning_rate": 9.549150281252633e-06, |
|
"loss": 0.1517, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.09928229665071771, |
|
"grad_norm": 1.12937593460083, |
|
"learning_rate": 8.548121372247918e-06, |
|
"loss": 0.1482, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.10047846889952153, |
|
"grad_norm": 1.1188244819641113, |
|
"learning_rate": 7.597595192178702e-06, |
|
"loss": 0.1201, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.10167464114832536, |
|
"grad_norm": 1.406683325767517, |
|
"learning_rate": 6.698729810778065e-06, |
|
"loss": 0.2082, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.10287081339712918, |
|
"grad_norm": 1.214578628540039, |
|
"learning_rate": 5.852620357053651e-06, |
|
"loss": 0.186, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.10406698564593302, |
|
"grad_norm": 1.270246148109436, |
|
"learning_rate": 5.060297685041659e-06, |
|
"loss": 0.1308, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.10526315789473684, |
|
"grad_norm": 1.2750657796859741, |
|
"learning_rate": 4.322727117869951e-06, |
|
"loss": 0.1296, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.10645933014354067, |
|
"grad_norm": 1.4091739654541016, |
|
"learning_rate": 3.6408072716606346e-06, |
|
"loss": 0.1985, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.1076555023923445, |
|
"grad_norm": 1.3769253492355347, |
|
"learning_rate": 3.0153689607045845e-06, |
|
"loss": 0.1777, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.10885167464114832, |
|
"grad_norm": 1.5859569311141968, |
|
"learning_rate": 2.4471741852423237e-06, |
|
"loss": 0.1514, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.11004784688995216, |
|
"grad_norm": 1.3966237306594849, |
|
"learning_rate": 1.9369152030840556e-06, |
|
"loss": 0.1825, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.11124401913875598, |
|
"grad_norm": 1.6977550983428955, |
|
"learning_rate": 1.4852136862001764e-06, |
|
"loss": 0.1799, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.11244019138755981, |
|
"grad_norm": 1.6813634634017944, |
|
"learning_rate": 1.0926199633097157e-06, |
|
"loss": 0.1539, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.11363636363636363, |
|
"grad_norm": 2.3753628730773926, |
|
"learning_rate": 7.596123493895991e-07, |
|
"loss": 0.2349, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.11483253588516747, |
|
"grad_norm": 2.1884944438934326, |
|
"learning_rate": 4.865965629214819e-07, |
|
"loss": 0.2109, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.11602870813397129, |
|
"grad_norm": 1.6471924781799316, |
|
"learning_rate": 2.7390523158633554e-07, |
|
"loss": 0.1618, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.11722488038277512, |
|
"grad_norm": 1.5181317329406738, |
|
"learning_rate": 1.2179748700879012e-07, |
|
"loss": 0.102, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.11842105263157894, |
|
"grad_norm": 5.536009788513184, |
|
"learning_rate": 3.04586490452119e-08, |
|
"loss": 0.5176, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.11961722488038277, |
|
"grad_norm": 5.12874174118042, |
|
"learning_rate": 0.0, |
|
"loss": 0.2482, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.11961722488038277, |
|
"eval_loss": 0.15960238873958588, |
|
"eval_runtime": 35.4137, |
|
"eval_samples_per_second": 19.879, |
|
"eval_steps_per_second": 4.97, |
|
"step": 100 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 100, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 5, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2.015623072500941e+16, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|