|
{ |
|
"best_metric": 0.4131671190261841, |
|
"best_model_checkpoint": "miner_id_24/checkpoint-200", |
|
"epoch": 0.005134458635517617, |
|
"eval_steps": 50, |
|
"global_step": 200, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 2.5672293177588086e-05, |
|
"grad_norm": 1.026159644126892, |
|
"learning_rate": 1.0060000000000002e-05, |
|
"loss": 1.2569, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 2.5672293177588086e-05, |
|
"eval_loss": 1.1921963691711426, |
|
"eval_runtime": 1183.8788, |
|
"eval_samples_per_second": 13.854, |
|
"eval_steps_per_second": 3.464, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 5.134458635517617e-05, |
|
"grad_norm": 1.255319595336914, |
|
"learning_rate": 2.0120000000000004e-05, |
|
"loss": 1.2297, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 7.701687953276427e-05, |
|
"grad_norm": 1.6948285102844238, |
|
"learning_rate": 3.018e-05, |
|
"loss": 1.6159, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.00010268917271035235, |
|
"grad_norm": 1.570288062095642, |
|
"learning_rate": 4.024000000000001e-05, |
|
"loss": 1.5701, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.00012836146588794044, |
|
"grad_norm": 1.4998438358306885, |
|
"learning_rate": 5.03e-05, |
|
"loss": 1.7115, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.00015403375906552854, |
|
"grad_norm": 1.805379867553711, |
|
"learning_rate": 6.036e-05, |
|
"loss": 1.7774, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.00017970605224311662, |
|
"grad_norm": 2.1143808364868164, |
|
"learning_rate": 7.042e-05, |
|
"loss": 2.1946, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.0002053783454207047, |
|
"grad_norm": 2.083022356033325, |
|
"learning_rate": 8.048000000000002e-05, |
|
"loss": 1.9931, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.0002310506385982928, |
|
"grad_norm": 2.2490415573120117, |
|
"learning_rate": 9.054000000000001e-05, |
|
"loss": 2.7258, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.00025672293177588087, |
|
"grad_norm": 1.9612363576889038, |
|
"learning_rate": 0.0001006, |
|
"loss": 1.7879, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.00028239522495346895, |
|
"grad_norm": 2.7282867431640625, |
|
"learning_rate": 0.00010007052631578948, |
|
"loss": 2.3426, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.0003080675181310571, |
|
"grad_norm": 2.3359768390655518, |
|
"learning_rate": 9.954105263157895e-05, |
|
"loss": 1.8971, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.00033373981130864515, |
|
"grad_norm": 2.0164449214935303, |
|
"learning_rate": 9.901157894736842e-05, |
|
"loss": 1.8824, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.00035941210448623323, |
|
"grad_norm": 2.7772645950317383, |
|
"learning_rate": 9.84821052631579e-05, |
|
"loss": 2.6385, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.0003850843976638213, |
|
"grad_norm": 2.503121852874756, |
|
"learning_rate": 9.795263157894737e-05, |
|
"loss": 1.9396, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.0004107566908414094, |
|
"grad_norm": 2.5371038913726807, |
|
"learning_rate": 9.742315789473686e-05, |
|
"loss": 2.1417, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.0004364289840189975, |
|
"grad_norm": 3.1160120964050293, |
|
"learning_rate": 9.689368421052633e-05, |
|
"loss": 2.4972, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.0004621012771965856, |
|
"grad_norm": 3.7234385013580322, |
|
"learning_rate": 9.63642105263158e-05, |
|
"loss": 2.569, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.00048777357037417367, |
|
"grad_norm": 3.364891290664673, |
|
"learning_rate": 9.583473684210527e-05, |
|
"loss": 2.4822, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.0005134458635517617, |
|
"grad_norm": 9.361184120178223, |
|
"learning_rate": 9.530526315789474e-05, |
|
"loss": 1.3435, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.0005391181567293499, |
|
"grad_norm": 5.888495445251465, |
|
"learning_rate": 9.477578947368422e-05, |
|
"loss": 0.8447, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.0005647904499069379, |
|
"grad_norm": 4.862771511077881, |
|
"learning_rate": 9.424631578947369e-05, |
|
"loss": 0.4383, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.000590462743084526, |
|
"grad_norm": 1.6988191604614258, |
|
"learning_rate": 9.371684210526316e-05, |
|
"loss": 0.1793, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.0006161350362621142, |
|
"grad_norm": 0.5420213341712952, |
|
"learning_rate": 9.318736842105263e-05, |
|
"loss": 0.0468, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.0006418073294397022, |
|
"grad_norm": 0.26038220524787903, |
|
"learning_rate": 9.26578947368421e-05, |
|
"loss": 0.0137, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.0006674796226172903, |
|
"grad_norm": 0.11139990389347076, |
|
"learning_rate": 9.212842105263159e-05, |
|
"loss": 0.004, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.0006931519157948783, |
|
"grad_norm": 0.030027125030755997, |
|
"learning_rate": 9.159894736842107e-05, |
|
"loss": 0.0012, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.0007188242089724665, |
|
"grad_norm": 0.015038474462926388, |
|
"learning_rate": 9.106947368421054e-05, |
|
"loss": 0.0005, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.0007444965021500546, |
|
"grad_norm": 0.008915350772440434, |
|
"learning_rate": 9.054000000000001e-05, |
|
"loss": 0.0002, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.0007701687953276426, |
|
"grad_norm": 0.006986498832702637, |
|
"learning_rate": 9.001052631578948e-05, |
|
"loss": 0.0002, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.0007958410885052307, |
|
"grad_norm": 0.0063736108131706715, |
|
"learning_rate": 8.948105263157895e-05, |
|
"loss": 0.0001, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.0008215133816828188, |
|
"grad_norm": 0.007324839010834694, |
|
"learning_rate": 8.895157894736842e-05, |
|
"loss": 0.0001, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.0008471856748604069, |
|
"grad_norm": 0.010746892541646957, |
|
"learning_rate": 8.842210526315789e-05, |
|
"loss": 0.0002, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.000872857968037995, |
|
"grad_norm": 0.014329284429550171, |
|
"learning_rate": 8.789263157894738e-05, |
|
"loss": 0.0002, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.000898530261215583, |
|
"grad_norm": 0.012248692102730274, |
|
"learning_rate": 8.736315789473685e-05, |
|
"loss": 0.0002, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.0009242025543931712, |
|
"grad_norm": 0.009668469429016113, |
|
"learning_rate": 8.683368421052632e-05, |
|
"loss": 0.0001, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.0009498748475707593, |
|
"grad_norm": 0.007297456730157137, |
|
"learning_rate": 8.63042105263158e-05, |
|
"loss": 0.0001, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.0009755471407483473, |
|
"grad_norm": 0.005695601459592581, |
|
"learning_rate": 8.577473684210527e-05, |
|
"loss": 0.0001, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.0010012194339259354, |
|
"grad_norm": 0.004724627826362848, |
|
"learning_rate": 8.524526315789474e-05, |
|
"loss": 0.0001, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.0010268917271035235, |
|
"grad_norm": 0.0041098096407949924, |
|
"learning_rate": 8.471578947368421e-05, |
|
"loss": 0.0001, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.0010525640202811116, |
|
"grad_norm": 0.003558989381417632, |
|
"learning_rate": 8.41863157894737e-05, |
|
"loss": 0.0001, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.0010782363134586997, |
|
"grad_norm": 0.00332538690418005, |
|
"learning_rate": 8.365684210526317e-05, |
|
"loss": 0.0001, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.0011039086066362879, |
|
"grad_norm": 0.0027524020988494158, |
|
"learning_rate": 8.312736842105264e-05, |
|
"loss": 0.0001, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.0011295808998138758, |
|
"grad_norm": 0.002535044215619564, |
|
"learning_rate": 8.259789473684211e-05, |
|
"loss": 0.0001, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.001155253192991464, |
|
"grad_norm": 0.00231631426140666, |
|
"learning_rate": 8.206842105263158e-05, |
|
"loss": 0.0, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.001180925486169052, |
|
"grad_norm": 0.0019382525933906436, |
|
"learning_rate": 8.153894736842105e-05, |
|
"loss": 0.0, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.0012065977793466402, |
|
"grad_norm": 0.022770335897803307, |
|
"learning_rate": 8.100947368421053e-05, |
|
"loss": 0.0001, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.0012322700725242283, |
|
"grad_norm": 0.0018689745338633657, |
|
"learning_rate": 8.048000000000002e-05, |
|
"loss": 0.0, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.0012579423657018162, |
|
"grad_norm": 20.690397262573242, |
|
"learning_rate": 7.995052631578949e-05, |
|
"loss": 4.2531, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.0012836146588794044, |
|
"grad_norm": 18.80559730529785, |
|
"learning_rate": 7.942105263157896e-05, |
|
"loss": 3.8468, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.0012836146588794044, |
|
"eval_loss": 0.5789145231246948, |
|
"eval_runtime": 1182.6905, |
|
"eval_samples_per_second": 13.868, |
|
"eval_steps_per_second": 3.468, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.0013092869520569925, |
|
"grad_norm": 2.402365207672119, |
|
"learning_rate": 7.889157894736843e-05, |
|
"loss": 1.2955, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.0013349592452345806, |
|
"grad_norm": 2.3085319995880127, |
|
"learning_rate": 7.83621052631579e-05, |
|
"loss": 1.9171, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.0013606315384121687, |
|
"grad_norm": 1.7359583377838135, |
|
"learning_rate": 7.783263157894737e-05, |
|
"loss": 1.5831, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.0013863038315897567, |
|
"grad_norm": 1.780971884727478, |
|
"learning_rate": 7.730315789473684e-05, |
|
"loss": 1.9551, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.0014119761247673448, |
|
"grad_norm": 1.7606279850006104, |
|
"learning_rate": 7.677368421052632e-05, |
|
"loss": 1.7301, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.001437648417944933, |
|
"grad_norm": 1.6475731134414673, |
|
"learning_rate": 7.624421052631579e-05, |
|
"loss": 1.8897, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.001463320711122521, |
|
"grad_norm": 1.541284203529358, |
|
"learning_rate": 7.571473684210526e-05, |
|
"loss": 1.8631, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.0014889930043001092, |
|
"grad_norm": 1.9083011150360107, |
|
"learning_rate": 7.518526315789475e-05, |
|
"loss": 1.9604, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.001514665297477697, |
|
"grad_norm": 1.855704665184021, |
|
"learning_rate": 7.465578947368422e-05, |
|
"loss": 2.3663, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.0015403375906552852, |
|
"grad_norm": 1.8971003293991089, |
|
"learning_rate": 7.412631578947369e-05, |
|
"loss": 2.0379, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.0015660098838328734, |
|
"grad_norm": 1.7835102081298828, |
|
"learning_rate": 7.359684210526317e-05, |
|
"loss": 2.1501, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.0015916821770104615, |
|
"grad_norm": 1.6390843391418457, |
|
"learning_rate": 7.306736842105264e-05, |
|
"loss": 2.1538, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.0016173544701880496, |
|
"grad_norm": 1.7119994163513184, |
|
"learning_rate": 7.253789473684211e-05, |
|
"loss": 1.7663, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.0016430267633656375, |
|
"grad_norm": 1.6038694381713867, |
|
"learning_rate": 7.200842105263158e-05, |
|
"loss": 2.2391, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.0016686990565432257, |
|
"grad_norm": 1.7166171073913574, |
|
"learning_rate": 7.147894736842105e-05, |
|
"loss": 2.1004, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.0016943713497208138, |
|
"grad_norm": 1.7185120582580566, |
|
"learning_rate": 7.094947368421052e-05, |
|
"loss": 1.732, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.001720043642898402, |
|
"grad_norm": 1.7218092679977417, |
|
"learning_rate": 7.042e-05, |
|
"loss": 1.92, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.00174571593607599, |
|
"grad_norm": 1.944508671760559, |
|
"learning_rate": 6.989052631578948e-05, |
|
"loss": 1.8669, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.001771388229253578, |
|
"grad_norm": 2.296661615371704, |
|
"learning_rate": 6.936105263157896e-05, |
|
"loss": 1.5057, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.001797060522431166, |
|
"grad_norm": 2.606893539428711, |
|
"learning_rate": 6.883157894736843e-05, |
|
"loss": 2.3624, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.0018227328156087542, |
|
"grad_norm": 5.274344444274902, |
|
"learning_rate": 6.83021052631579e-05, |
|
"loss": 2.6868, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.0018484051087863424, |
|
"grad_norm": 2.480908155441284, |
|
"learning_rate": 6.777263157894737e-05, |
|
"loss": 1.9085, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.0018740774019639305, |
|
"grad_norm": 1.2263275384902954, |
|
"learning_rate": 6.724315789473684e-05, |
|
"loss": 0.1152, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.0018997496951415186, |
|
"grad_norm": 0.09067212790250778, |
|
"learning_rate": 6.671368421052631e-05, |
|
"loss": 0.0036, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.0019254219883191065, |
|
"grad_norm": 0.04749782010912895, |
|
"learning_rate": 6.61842105263158e-05, |
|
"loss": 0.002, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.0019510942814966947, |
|
"grad_norm": 0.03309526666998863, |
|
"learning_rate": 6.565473684210527e-05, |
|
"loss": 0.0014, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.0019767665746742826, |
|
"grad_norm": 0.021400053054094315, |
|
"learning_rate": 6.512526315789474e-05, |
|
"loss": 0.0009, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.0020024388678518707, |
|
"grad_norm": 0.017963755875825882, |
|
"learning_rate": 6.459578947368421e-05, |
|
"loss": 0.0008, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.002028111161029459, |
|
"grad_norm": 1.1963677406311035, |
|
"learning_rate": 6.406631578947369e-05, |
|
"loss": 0.2937, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.002053783454207047, |
|
"grad_norm": 0.010341464541852474, |
|
"learning_rate": 6.353684210526316e-05, |
|
"loss": 0.0005, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.002079455747384635, |
|
"grad_norm": 0.00791017897427082, |
|
"learning_rate": 6.300736842105263e-05, |
|
"loss": 0.0004, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.0021051280405622232, |
|
"grad_norm": 0.00704535935074091, |
|
"learning_rate": 6.247789473684212e-05, |
|
"loss": 0.0003, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.0021308003337398114, |
|
"grad_norm": 0.005926445592194796, |
|
"learning_rate": 6.194842105263159e-05, |
|
"loss": 0.0003, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.0021564726269173995, |
|
"grad_norm": 0.005512417294085026, |
|
"learning_rate": 6.141894736842106e-05, |
|
"loss": 0.0003, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.0021821449200949876, |
|
"grad_norm": 0.005944954231381416, |
|
"learning_rate": 6.088947368421053e-05, |
|
"loss": 0.0003, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.0022078172132725758, |
|
"grad_norm": 0.005636818241328001, |
|
"learning_rate": 6.036e-05, |
|
"loss": 0.0003, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.0022334895064501635, |
|
"grad_norm": 0.005355035420507193, |
|
"learning_rate": 5.9830526315789475e-05, |
|
"loss": 0.0003, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.0022591617996277516, |
|
"grad_norm": 0.004906694870442152, |
|
"learning_rate": 5.9301052631578946e-05, |
|
"loss": 0.0002, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.0022848340928053397, |
|
"grad_norm": 0.00455674109980464, |
|
"learning_rate": 5.877157894736843e-05, |
|
"loss": 0.0002, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.002310506385982928, |
|
"grad_norm": 0.004359242040663958, |
|
"learning_rate": 5.82421052631579e-05, |
|
"loss": 0.0002, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.002336178679160516, |
|
"grad_norm": 0.004265242256224155, |
|
"learning_rate": 5.771263157894737e-05, |
|
"loss": 0.0002, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.002361850972338104, |
|
"grad_norm": 0.00403448985889554, |
|
"learning_rate": 5.718315789473685e-05, |
|
"loss": 0.0002, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.0023875232655156922, |
|
"grad_norm": 0.003738979110494256, |
|
"learning_rate": 5.665368421052632e-05, |
|
"loss": 0.0002, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.0024131955586932804, |
|
"grad_norm": 0.0035066159907728434, |
|
"learning_rate": 5.612421052631579e-05, |
|
"loss": 0.0002, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.0024388678518708685, |
|
"grad_norm": 0.0033452173229306936, |
|
"learning_rate": 5.559473684210527e-05, |
|
"loss": 0.0001, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.0024645401450484566, |
|
"grad_norm": 0.0031449589878320694, |
|
"learning_rate": 5.506526315789474e-05, |
|
"loss": 0.0001, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.0024902124382260448, |
|
"grad_norm": 0.8770474791526794, |
|
"learning_rate": 5.453578947368421e-05, |
|
"loss": 0.1372, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.0025158847314036325, |
|
"grad_norm": 0.006977991200983524, |
|
"learning_rate": 5.400631578947369e-05, |
|
"loss": 0.0002, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.0025415570245812206, |
|
"grad_norm": 2.6762304306030273, |
|
"learning_rate": 5.347684210526316e-05, |
|
"loss": 1.5861, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.0025672293177588087, |
|
"grad_norm": 6.186676502227783, |
|
"learning_rate": 5.294736842105263e-05, |
|
"loss": 3.1351, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.0025672293177588087, |
|
"eval_loss": 0.444609671831131, |
|
"eval_runtime": 1180.4892, |
|
"eval_samples_per_second": 13.894, |
|
"eval_steps_per_second": 3.474, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.002592901610936397, |
|
"grad_norm": 1.0906559228897095, |
|
"learning_rate": 5.24178947368421e-05, |
|
"loss": 1.5745, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.002618573904113985, |
|
"grad_norm": 1.057900071144104, |
|
"learning_rate": 5.1888421052631585e-05, |
|
"loss": 1.359, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.002644246197291573, |
|
"grad_norm": 1.1936546564102173, |
|
"learning_rate": 5.135894736842106e-05, |
|
"loss": 1.8506, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.0026699184904691612, |
|
"grad_norm": 1.1742337942123413, |
|
"learning_rate": 5.082947368421053e-05, |
|
"loss": 1.4235, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.0026955907836467494, |
|
"grad_norm": 1.4636642932891846, |
|
"learning_rate": 5.03e-05, |
|
"loss": 1.9341, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.0027212630768243375, |
|
"grad_norm": 1.366550087928772, |
|
"learning_rate": 4.977052631578947e-05, |
|
"loss": 1.7735, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.0027469353700019256, |
|
"grad_norm": 1.4252630472183228, |
|
"learning_rate": 4.924105263157895e-05, |
|
"loss": 1.8694, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.0027726076631795133, |
|
"grad_norm": 1.5163335800170898, |
|
"learning_rate": 4.871157894736843e-05, |
|
"loss": 1.8035, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.0027982799563571015, |
|
"grad_norm": 1.6696662902832031, |
|
"learning_rate": 4.81821052631579e-05, |
|
"loss": 2.4422, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.0028239522495346896, |
|
"grad_norm": 1.823583960533142, |
|
"learning_rate": 4.765263157894737e-05, |
|
"loss": 2.0223, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.0028496245427122777, |
|
"grad_norm": 1.7153486013412476, |
|
"learning_rate": 4.7123157894736845e-05, |
|
"loss": 2.3425, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.002875296835889866, |
|
"grad_norm": 1.597476601600647, |
|
"learning_rate": 4.6593684210526316e-05, |
|
"loss": 1.9461, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.002900969129067454, |
|
"grad_norm": 1.6382873058319092, |
|
"learning_rate": 4.606421052631579e-05, |
|
"loss": 1.7633, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.002926641422245042, |
|
"grad_norm": 1.7374053001403809, |
|
"learning_rate": 4.553473684210527e-05, |
|
"loss": 2.0122, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.0029523137154226302, |
|
"grad_norm": 1.976195216178894, |
|
"learning_rate": 4.500526315789474e-05, |
|
"loss": 2.218, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.0029779860086002184, |
|
"grad_norm": 2.0346622467041016, |
|
"learning_rate": 4.447578947368421e-05, |
|
"loss": 2.3013, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.0030036583017778065, |
|
"grad_norm": 1.756709337234497, |
|
"learning_rate": 4.394631578947369e-05, |
|
"loss": 1.9596, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.003029330594955394, |
|
"grad_norm": 1.9882556200027466, |
|
"learning_rate": 4.341684210526316e-05, |
|
"loss": 2.2595, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.0030550028881329823, |
|
"grad_norm": 2.60469388961792, |
|
"learning_rate": 4.2887368421052636e-05, |
|
"loss": 1.5153, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.0030806751813105705, |
|
"grad_norm": 0.9778649806976318, |
|
"learning_rate": 4.2357894736842106e-05, |
|
"loss": 0.0113, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.0031063474744881586, |
|
"grad_norm": 0.011567816138267517, |
|
"learning_rate": 4.182842105263158e-05, |
|
"loss": 0.0004, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.0031320197676657467, |
|
"grad_norm": 0.010046404786407948, |
|
"learning_rate": 4.1298947368421053e-05, |
|
"loss": 0.0004, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.003157692060843335, |
|
"grad_norm": 0.009395002387464046, |
|
"learning_rate": 4.0769473684210524e-05, |
|
"loss": 0.0003, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.003183364354020923, |
|
"grad_norm": 0.008893662132322788, |
|
"learning_rate": 4.024000000000001e-05, |
|
"loss": 0.0003, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.003209036647198511, |
|
"grad_norm": 0.008500020019710064, |
|
"learning_rate": 3.971052631578948e-05, |
|
"loss": 0.0003, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.0032347089403760992, |
|
"grad_norm": 0.0076544624753296375, |
|
"learning_rate": 3.918105263157895e-05, |
|
"loss": 0.0003, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.0032603812335536874, |
|
"grad_norm": 0.007298609241843224, |
|
"learning_rate": 3.865157894736842e-05, |
|
"loss": 0.0003, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.003286053526731275, |
|
"grad_norm": 0.006563248578459024, |
|
"learning_rate": 3.8122105263157896e-05, |
|
"loss": 0.0002, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.003311725819908863, |
|
"grad_norm": 0.006430802401155233, |
|
"learning_rate": 3.759263157894737e-05, |
|
"loss": 0.0002, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.0033373981130864513, |
|
"grad_norm": 0.006386774126440287, |
|
"learning_rate": 3.7063157894736844e-05, |
|
"loss": 0.0002, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.0033630704062640395, |
|
"grad_norm": 0.6408913731575012, |
|
"learning_rate": 3.653368421052632e-05, |
|
"loss": 0.0931, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.0033887426994416276, |
|
"grad_norm": 0.005842825397849083, |
|
"learning_rate": 3.600421052631579e-05, |
|
"loss": 0.0002, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.0034144149926192157, |
|
"grad_norm": 0.005725574214011431, |
|
"learning_rate": 3.547473684210526e-05, |
|
"loss": 0.0002, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.003440087285796804, |
|
"grad_norm": 0.006068081129342318, |
|
"learning_rate": 3.494526315789474e-05, |
|
"loss": 0.0002, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.003465759578974392, |
|
"grad_norm": 0.00514700124040246, |
|
"learning_rate": 3.4415789473684216e-05, |
|
"loss": 0.0002, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.00349143187215198, |
|
"grad_norm": 0.005522563587874174, |
|
"learning_rate": 3.3886315789473686e-05, |
|
"loss": 0.0002, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.0035171041653295682, |
|
"grad_norm": 0.0057330005802214146, |
|
"learning_rate": 3.3356842105263156e-05, |
|
"loss": 0.0002, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.003542776458507156, |
|
"grad_norm": 0.0057954080402851105, |
|
"learning_rate": 3.2827368421052634e-05, |
|
"loss": 0.0002, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.003568448751684744, |
|
"grad_norm": 0.005006751511245966, |
|
"learning_rate": 3.2297894736842104e-05, |
|
"loss": 0.0002, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.003594121044862332, |
|
"grad_norm": 0.005032096989452839, |
|
"learning_rate": 3.176842105263158e-05, |
|
"loss": 0.0002, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.0036197933380399203, |
|
"grad_norm": 0.004935144912451506, |
|
"learning_rate": 3.123894736842106e-05, |
|
"loss": 0.0002, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.0036454656312175085, |
|
"grad_norm": 0.00508884247392416, |
|
"learning_rate": 3.070947368421053e-05, |
|
"loss": 0.0002, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.0036711379243950966, |
|
"grad_norm": 0.004769055638462305, |
|
"learning_rate": 3.018e-05, |
|
"loss": 0.0002, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.0036968102175726847, |
|
"grad_norm": 0.004287198651582003, |
|
"learning_rate": 2.9650526315789473e-05, |
|
"loss": 0.0002, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.003722482510750273, |
|
"grad_norm": 0.004471136257052422, |
|
"learning_rate": 2.912105263157895e-05, |
|
"loss": 0.0002, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.003748154803927861, |
|
"grad_norm": 0.004509101156145334, |
|
"learning_rate": 2.8591578947368424e-05, |
|
"loss": 0.0002, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.003773827097105449, |
|
"grad_norm": 0.005288159940391779, |
|
"learning_rate": 2.8062105263157894e-05, |
|
"loss": 0.0002, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.0037994993902830372, |
|
"grad_norm": 1.4431769847869873, |
|
"learning_rate": 2.753263157894737e-05, |
|
"loss": 0.4431, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.003825171683460625, |
|
"grad_norm": 2.757993459701538, |
|
"learning_rate": 2.7003157894736845e-05, |
|
"loss": 1.9569, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.003850843976638213, |
|
"grad_norm": 6.777806758880615, |
|
"learning_rate": 2.6473684210526315e-05, |
|
"loss": 2.2696, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.003850843976638213, |
|
"eval_loss": 0.41922199726104736, |
|
"eval_runtime": 1184.8981, |
|
"eval_samples_per_second": 13.843, |
|
"eval_steps_per_second": 3.461, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.003876516269815801, |
|
"grad_norm": 0.7326381802558899, |
|
"learning_rate": 2.5944210526315793e-05, |
|
"loss": 1.3282, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.0039021885629933893, |
|
"grad_norm": 0.931328535079956, |
|
"learning_rate": 2.5414736842105266e-05, |
|
"loss": 1.2657, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.003927860856170978, |
|
"grad_norm": 0.7979090213775635, |
|
"learning_rate": 2.4885263157894737e-05, |
|
"loss": 1.0431, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.003953533149348565, |
|
"grad_norm": 0.8406013250350952, |
|
"learning_rate": 2.4355789473684214e-05, |
|
"loss": 1.5444, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.003979205442526153, |
|
"grad_norm": 1.172977328300476, |
|
"learning_rate": 2.3826315789473684e-05, |
|
"loss": 2.2357, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.004004877735703741, |
|
"grad_norm": 1.081085443496704, |
|
"learning_rate": 2.3296842105263158e-05, |
|
"loss": 1.9574, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.0040305500288813296, |
|
"grad_norm": 1.213394284248352, |
|
"learning_rate": 2.2767368421052635e-05, |
|
"loss": 1.7966, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.004056222322058918, |
|
"grad_norm": 1.3481943607330322, |
|
"learning_rate": 2.2237894736842105e-05, |
|
"loss": 2.1099, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.004081894615236506, |
|
"grad_norm": 1.1967196464538574, |
|
"learning_rate": 2.170842105263158e-05, |
|
"loss": 1.7427, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.004107566908414094, |
|
"grad_norm": 1.6480809450149536, |
|
"learning_rate": 2.1178947368421053e-05, |
|
"loss": 2.2222, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.004133239201591682, |
|
"grad_norm": 1.4550552368164062, |
|
"learning_rate": 2.0649473684210527e-05, |
|
"loss": 1.5513, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.00415891149476927, |
|
"grad_norm": 1.5787893533706665, |
|
"learning_rate": 2.0120000000000004e-05, |
|
"loss": 2.0812, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.004184583787946858, |
|
"grad_norm": 1.6700499057769775, |
|
"learning_rate": 1.9590526315789474e-05, |
|
"loss": 2.0889, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.0042102560811244465, |
|
"grad_norm": 1.2719978094100952, |
|
"learning_rate": 1.9061052631578948e-05, |
|
"loss": 1.6051, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.004235928374302035, |
|
"grad_norm": 2.337200164794922, |
|
"learning_rate": 1.8531578947368422e-05, |
|
"loss": 2.2032, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.004261600667479623, |
|
"grad_norm": 2.0356595516204834, |
|
"learning_rate": 1.8002105263157896e-05, |
|
"loss": 2.3998, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.004287272960657211, |
|
"grad_norm": 1.9695961475372314, |
|
"learning_rate": 1.747263157894737e-05, |
|
"loss": 2.3592, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.004312945253834799, |
|
"grad_norm": 2.0183303356170654, |
|
"learning_rate": 1.6943157894736843e-05, |
|
"loss": 2.1365, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.004338617547012387, |
|
"grad_norm": 2.894932270050049, |
|
"learning_rate": 1.6413684210526317e-05, |
|
"loss": 2.4704, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.004364289840189975, |
|
"grad_norm": 1.7332582473754883, |
|
"learning_rate": 1.588421052631579e-05, |
|
"loss": 0.8739, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.004389962133367563, |
|
"grad_norm": 0.0018881710711866617, |
|
"learning_rate": 1.5354736842105264e-05, |
|
"loss": 0.0001, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.0044156344265451515, |
|
"grad_norm": 0.0019908491522073746, |
|
"learning_rate": 1.4825263157894736e-05, |
|
"loss": 0.0001, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.00444130671972274, |
|
"grad_norm": 0.0019480012124404311, |
|
"learning_rate": 1.4295789473684212e-05, |
|
"loss": 0.0001, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.004466979012900327, |
|
"grad_norm": 0.0019311723299324512, |
|
"learning_rate": 1.3766315789473686e-05, |
|
"loss": 0.0001, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.004492651306077915, |
|
"grad_norm": 0.0019914451986551285, |
|
"learning_rate": 1.3236842105263158e-05, |
|
"loss": 0.0001, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.004518323599255503, |
|
"grad_norm": 0.001993614248931408, |
|
"learning_rate": 1.2707368421052633e-05, |
|
"loss": 0.0001, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.004543995892433091, |
|
"grad_norm": 0.0019910179544240236, |
|
"learning_rate": 1.2177894736842107e-05, |
|
"loss": 0.0001, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.004569668185610679, |
|
"grad_norm": 0.002013832563534379, |
|
"learning_rate": 1.1648421052631579e-05, |
|
"loss": 0.0001, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.0045953404787882676, |
|
"grad_norm": 0.0020234170369803905, |
|
"learning_rate": 1.1118947368421053e-05, |
|
"loss": 0.0001, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.004621012771965856, |
|
"grad_norm": 0.0019844514317810535, |
|
"learning_rate": 1.0589473684210526e-05, |
|
"loss": 0.0001, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.004646685065143444, |
|
"grad_norm": 0.00206298241391778, |
|
"learning_rate": 1.0060000000000002e-05, |
|
"loss": 0.0001, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.004672357358321032, |
|
"grad_norm": 0.0020082283299416304, |
|
"learning_rate": 9.530526315789474e-06, |
|
"loss": 0.0001, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.00469802965149862, |
|
"grad_norm": 0.0020704076159745455, |
|
"learning_rate": 9.001052631578948e-06, |
|
"loss": 0.0001, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.004723701944676208, |
|
"grad_norm": 0.002045375294983387, |
|
"learning_rate": 8.471578947368422e-06, |
|
"loss": 0.0001, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.004749374237853796, |
|
"grad_norm": 0.0021098172292113304, |
|
"learning_rate": 7.942105263157895e-06, |
|
"loss": 0.0001, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.0047750465310313845, |
|
"grad_norm": 0.0021194566506892443, |
|
"learning_rate": 7.412631578947368e-06, |
|
"loss": 0.0001, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.004800718824208973, |
|
"grad_norm": 0.0020953835919499397, |
|
"learning_rate": 6.883157894736843e-06, |
|
"loss": 0.0001, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.004826391117386561, |
|
"grad_norm": 0.0021119611337780952, |
|
"learning_rate": 6.3536842105263166e-06, |
|
"loss": 0.0001, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.004852063410564149, |
|
"grad_norm": 0.0021464722231030464, |
|
"learning_rate": 5.8242105263157895e-06, |
|
"loss": 0.0001, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.004877735703741737, |
|
"grad_norm": 0.0022363984026014805, |
|
"learning_rate": 5.294736842105263e-06, |
|
"loss": 0.0001, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.004903407996919325, |
|
"grad_norm": 0.002060087164863944, |
|
"learning_rate": 4.765263157894737e-06, |
|
"loss": 0.0001, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.004929080290096913, |
|
"grad_norm": 0.0019431081600487232, |
|
"learning_rate": 4.235789473684211e-06, |
|
"loss": 0.0001, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.004954752583274501, |
|
"grad_norm": 0.0021071808878332376, |
|
"learning_rate": 3.706315789473684e-06, |
|
"loss": 0.0001, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.0049804248764520895, |
|
"grad_norm": 0.0020984155125916004, |
|
"learning_rate": 3.1768421052631583e-06, |
|
"loss": 0.0001, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.005006097169629677, |
|
"grad_norm": 0.002161442069336772, |
|
"learning_rate": 2.6473684210526316e-06, |
|
"loss": 0.0001, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.005031769462807265, |
|
"grad_norm": 0.002335567260161042, |
|
"learning_rate": 2.1178947368421054e-06, |
|
"loss": 0.0001, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.005057441755984853, |
|
"grad_norm": 1.0611015558242798, |
|
"learning_rate": 1.5884210526315791e-06, |
|
"loss": 0.2117, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.005083114049162441, |
|
"grad_norm": 0.39899060130119324, |
|
"learning_rate": 1.0589473684210527e-06, |
|
"loss": 0.089, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.005108786342340029, |
|
"grad_norm": 3.193357467651367, |
|
"learning_rate": 5.294736842105263e-07, |
|
"loss": 1.4026, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.005134458635517617, |
|
"grad_norm": 3.7854483127593994, |
|
"learning_rate": 0.0, |
|
"loss": 1.5641, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.005134458635517617, |
|
"eval_loss": 0.4131671190261841, |
|
"eval_runtime": 1189.4105, |
|
"eval_samples_per_second": 13.79, |
|
"eval_steps_per_second": 3.448, |
|
"step": 200 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 200, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 5, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 3.795839081879962e+16, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|