|
{ |
|
"best_global_step": 11000, |
|
"best_metric": 1.5041238069534302, |
|
"best_model_checkpoint": "./results/hierarchical_music_t5_small_finetune/checkpoint-11000", |
|
"epoch": 4.898065952414081, |
|
"eval_steps": 500, |
|
"global_step": 11000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.022262418255182968, |
|
"grad_norm": 0.8790799975395203, |
|
"learning_rate": 1.088888888888889e-05, |
|
"loss": 1.7866, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.044524836510365935, |
|
"grad_norm": 0.8580410480499268, |
|
"learning_rate": 2.2000000000000003e-05, |
|
"loss": 1.7931, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.0667872547655489, |
|
"grad_norm": 1.150437355041504, |
|
"learning_rate": 3.311111111111112e-05, |
|
"loss": 1.7747, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.08904967302073187, |
|
"grad_norm": 1.1797043085098267, |
|
"learning_rate": 4.422222222222222e-05, |
|
"loss": 1.7777, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.11131209127591485, |
|
"grad_norm": 1.1714961528778076, |
|
"learning_rate": 5e-05, |
|
"loss": 1.7833, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.1335745095310978, |
|
"grad_norm": 1.29172682762146, |
|
"learning_rate": 5e-05, |
|
"loss": 1.7813, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.15583692778628078, |
|
"grad_norm": 1.0865519046783447, |
|
"learning_rate": 5e-05, |
|
"loss": 1.7737, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.17809934604146374, |
|
"grad_norm": 1.1579242944717407, |
|
"learning_rate": 5e-05, |
|
"loss": 1.7716, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.20036176429664673, |
|
"grad_norm": 1.1099216938018799, |
|
"learning_rate": 5e-05, |
|
"loss": 1.7877, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.2226241825518297, |
|
"grad_norm": 1.1747430562973022, |
|
"learning_rate": 5e-05, |
|
"loss": 1.7839, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.2226241825518297, |
|
"eval_loss": 1.6513175964355469, |
|
"eval_runtime": 41.1785, |
|
"eval_samples_per_second": 387.848, |
|
"eval_steps_per_second": 48.496, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.24488660080701266, |
|
"grad_norm": 1.154510259628296, |
|
"learning_rate": 5e-05, |
|
"loss": 1.772, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.2671490190621956, |
|
"grad_norm": 0.99709153175354, |
|
"learning_rate": 5e-05, |
|
"loss": 1.7802, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.2894114373173786, |
|
"grad_norm": 1.3202115297317505, |
|
"learning_rate": 5e-05, |
|
"loss": 1.7565, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.31167385557256155, |
|
"grad_norm": 1.3452224731445312, |
|
"learning_rate": 5e-05, |
|
"loss": 1.7612, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.33393627382774455, |
|
"grad_norm": 1.1056386232376099, |
|
"learning_rate": 5e-05, |
|
"loss": 1.7696, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.3561986920829275, |
|
"grad_norm": 1.0761163234710693, |
|
"learning_rate": 5e-05, |
|
"loss": 1.6937, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.3784611103381105, |
|
"grad_norm": 0.9081959128379822, |
|
"learning_rate": 5e-05, |
|
"loss": 1.6766, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.40072352859329347, |
|
"grad_norm": 0.9897329211235046, |
|
"learning_rate": 5e-05, |
|
"loss": 1.6804, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.4229859468484764, |
|
"grad_norm": 0.992655873298645, |
|
"learning_rate": 5e-05, |
|
"loss": 1.6584, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.4452483651036594, |
|
"grad_norm": 1.0000704526901245, |
|
"learning_rate": 5e-05, |
|
"loss": 1.6576, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.4452483651036594, |
|
"eval_loss": 1.6615262031555176, |
|
"eval_runtime": 40.7337, |
|
"eval_samples_per_second": 392.083, |
|
"eval_steps_per_second": 49.026, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.46751078335884233, |
|
"grad_norm": 0.9494450092315674, |
|
"learning_rate": 5e-05, |
|
"loss": 1.6604, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.4897732016140253, |
|
"grad_norm": 0.9924134612083435, |
|
"learning_rate": 5e-05, |
|
"loss": 1.6539, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.5120356198692083, |
|
"grad_norm": 1.0620170831680298, |
|
"learning_rate": 5e-05, |
|
"loss": 1.6552, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.5342980381243913, |
|
"grad_norm": 1.1163603067398071, |
|
"learning_rate": 5e-05, |
|
"loss": 1.6452, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.5565604563795742, |
|
"grad_norm": 1.025298833847046, |
|
"learning_rate": 5e-05, |
|
"loss": 1.6468, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.5788228746347572, |
|
"grad_norm": 0.9661399722099304, |
|
"learning_rate": 5e-05, |
|
"loss": 1.6377, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.6010852928899402, |
|
"grad_norm": 0.9570266008377075, |
|
"learning_rate": 5e-05, |
|
"loss": 1.6525, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.6233477111451231, |
|
"grad_norm": 0.9325594902038574, |
|
"learning_rate": 5e-05, |
|
"loss": 1.6443, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.6456101294003062, |
|
"grad_norm": 1.071475625038147, |
|
"learning_rate": 5e-05, |
|
"loss": 1.6418, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.6678725476554891, |
|
"grad_norm": 0.9684040546417236, |
|
"learning_rate": 5e-05, |
|
"loss": 1.6396, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.6678725476554891, |
|
"eval_loss": 1.6649832725524902, |
|
"eval_runtime": 40.9434, |
|
"eval_samples_per_second": 390.075, |
|
"eval_steps_per_second": 48.775, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.690134965910672, |
|
"grad_norm": 1.0452582836151123, |
|
"learning_rate": 5e-05, |
|
"loss": 1.6583, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.712397384165855, |
|
"grad_norm": 0.8643882274627686, |
|
"learning_rate": 5e-05, |
|
"loss": 1.6538, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.734659802421038, |
|
"grad_norm": 1.0304285287857056, |
|
"learning_rate": 5e-05, |
|
"loss": 1.6653, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.756922220676221, |
|
"grad_norm": 1.1433496475219727, |
|
"learning_rate": 5e-05, |
|
"loss": 1.6605, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.7791846389314039, |
|
"grad_norm": 0.9240351319313049, |
|
"learning_rate": 5e-05, |
|
"loss": 1.6696, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.8014470571865869, |
|
"grad_norm": 1.0242925882339478, |
|
"learning_rate": 5e-05, |
|
"loss": 1.6697, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.8237094754417699, |
|
"grad_norm": 0.9509591460227966, |
|
"learning_rate": 5e-05, |
|
"loss": 1.6859, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.8459718936969528, |
|
"grad_norm": 1.2701749801635742, |
|
"learning_rate": 5e-05, |
|
"loss": 1.6885, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.8682343119521359, |
|
"grad_norm": 1.4032883644104004, |
|
"learning_rate": 5e-05, |
|
"loss": 1.6935, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.8904967302073188, |
|
"grad_norm": 1.2004971504211426, |
|
"learning_rate": 5e-05, |
|
"loss": 1.7168, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.8904967302073188, |
|
"eval_loss": 1.631461262702942, |
|
"eval_runtime": 40.9613, |
|
"eval_samples_per_second": 389.905, |
|
"eval_steps_per_second": 48.753, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.9127591484625017, |
|
"grad_norm": 1.193617820739746, |
|
"learning_rate": 5e-05, |
|
"loss": 1.7404, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.9350215667176847, |
|
"grad_norm": 1.424216866493225, |
|
"learning_rate": 5e-05, |
|
"loss": 1.7444, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.9572839849728677, |
|
"grad_norm": 1.2657979726791382, |
|
"learning_rate": 5e-05, |
|
"loss": 1.7529, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.9795464032280506, |
|
"grad_norm": 1.1823986768722534, |
|
"learning_rate": 5e-05, |
|
"loss": 1.7524, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 1.0017809934604147, |
|
"grad_norm": 1.078079104423523, |
|
"learning_rate": 5e-05, |
|
"loss": 1.7432, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 1.0240434117155977, |
|
"grad_norm": 1.279813528060913, |
|
"learning_rate": 5e-05, |
|
"loss": 1.7372, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 1.0463058299707806, |
|
"grad_norm": 1.1668626070022583, |
|
"learning_rate": 5e-05, |
|
"loss": 1.7296, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 1.0685682482259635, |
|
"grad_norm": 1.0546634197235107, |
|
"learning_rate": 5e-05, |
|
"loss": 1.7324, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 1.0908306664811465, |
|
"grad_norm": 1.1601485013961792, |
|
"learning_rate": 5e-05, |
|
"loss": 1.7401, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 1.1130930847363294, |
|
"grad_norm": 1.463930368423462, |
|
"learning_rate": 5e-05, |
|
"loss": 1.7366, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.1130930847363294, |
|
"eval_loss": 1.6233899593353271, |
|
"eval_runtime": 41.069, |
|
"eval_samples_per_second": 388.882, |
|
"eval_steps_per_second": 48.626, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.1353555029915126, |
|
"grad_norm": 1.172264575958252, |
|
"learning_rate": 5e-05, |
|
"loss": 1.7348, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 1.1576179212466955, |
|
"grad_norm": 1.076794981956482, |
|
"learning_rate": 5e-05, |
|
"loss": 1.7463, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 1.1798803395018784, |
|
"grad_norm": 1.0754376649856567, |
|
"learning_rate": 5e-05, |
|
"loss": 1.7378, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 1.2021427577570614, |
|
"grad_norm": 1.3081718683242798, |
|
"learning_rate": 5e-05, |
|
"loss": 1.7251, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 1.2244051760122443, |
|
"grad_norm": 1.0483145713806152, |
|
"learning_rate": 5e-05, |
|
"loss": 1.7414, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 1.2466675942674272, |
|
"grad_norm": 1.2890243530273438, |
|
"learning_rate": 5e-05, |
|
"loss": 1.7254, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 1.2689300125226102, |
|
"grad_norm": 1.0999932289123535, |
|
"learning_rate": 5e-05, |
|
"loss": 1.7333, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 1.291192430777793, |
|
"grad_norm": 1.0996226072311401, |
|
"learning_rate": 5e-05, |
|
"loss": 1.7151, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 1.3134548490329763, |
|
"grad_norm": 1.3446428775787354, |
|
"learning_rate": 5e-05, |
|
"loss": 1.7088, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 1.3357172672881592, |
|
"grad_norm": 0.9657168388366699, |
|
"learning_rate": 5e-05, |
|
"loss": 1.7171, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.3357172672881592, |
|
"eval_loss": 1.6028199195861816, |
|
"eval_runtime": 41.2736, |
|
"eval_samples_per_second": 386.954, |
|
"eval_steps_per_second": 48.384, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.3579796855433421, |
|
"grad_norm": 0.904662549495697, |
|
"learning_rate": 5e-05, |
|
"loss": 1.6656, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 1.380242103798525, |
|
"grad_norm": 1.2054646015167236, |
|
"learning_rate": 5e-05, |
|
"loss": 1.6409, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 1.402504522053708, |
|
"grad_norm": 0.8623887300491333, |
|
"learning_rate": 5e-05, |
|
"loss": 1.6378, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 1.4247669403088912, |
|
"grad_norm": 0.931481659412384, |
|
"learning_rate": 5e-05, |
|
"loss": 1.6395, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 1.447029358564074, |
|
"grad_norm": 0.8971887826919556, |
|
"learning_rate": 5e-05, |
|
"loss": 1.6338, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 1.469291776819257, |
|
"grad_norm": 0.9754030704498291, |
|
"learning_rate": 5e-05, |
|
"loss": 1.6341, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 1.49155419507444, |
|
"grad_norm": 0.9373458027839661, |
|
"learning_rate": 5e-05, |
|
"loss": 1.6214, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 1.513816613329623, |
|
"grad_norm": 1.1765072345733643, |
|
"learning_rate": 5e-05, |
|
"loss": 1.622, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 1.5360790315848059, |
|
"grad_norm": 0.9341714382171631, |
|
"learning_rate": 5e-05, |
|
"loss": 1.6242, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 1.5583414498399888, |
|
"grad_norm": 0.8690816164016724, |
|
"learning_rate": 5e-05, |
|
"loss": 1.6238, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 1.5583414498399888, |
|
"eval_loss": 1.6130000352859497, |
|
"eval_runtime": 40.5463, |
|
"eval_samples_per_second": 393.896, |
|
"eval_steps_per_second": 49.252, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 1.5806038680951717, |
|
"grad_norm": 1.0579187870025635, |
|
"learning_rate": 5e-05, |
|
"loss": 1.611, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 1.6028662863503547, |
|
"grad_norm": 0.8839408159255981, |
|
"learning_rate": 5e-05, |
|
"loss": 1.6106, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 1.6251287046055378, |
|
"grad_norm": 1.048997402191162, |
|
"learning_rate": 5e-05, |
|
"loss": 1.6075, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 1.6473911228607208, |
|
"grad_norm": 1.201557993888855, |
|
"learning_rate": 5e-05, |
|
"loss": 1.621, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 1.6696535411159037, |
|
"grad_norm": 0.9804443717002869, |
|
"learning_rate": 5e-05, |
|
"loss": 1.6079, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 1.6919159593710866, |
|
"grad_norm": 0.9969685077667236, |
|
"learning_rate": 5e-05, |
|
"loss": 1.6281, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 1.7141783776262698, |
|
"grad_norm": 1.0730953216552734, |
|
"learning_rate": 5e-05, |
|
"loss": 1.6235, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 1.7364407958814527, |
|
"grad_norm": 1.1014162302017212, |
|
"learning_rate": 5e-05, |
|
"loss": 1.6349, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 1.7587032141366357, |
|
"grad_norm": 0.9518324732780457, |
|
"learning_rate": 5e-05, |
|
"loss": 1.6271, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 1.7809656323918186, |
|
"grad_norm": 1.0745582580566406, |
|
"learning_rate": 5e-05, |
|
"loss": 1.6217, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.7809656323918186, |
|
"eval_loss": 1.6218018531799316, |
|
"eval_runtime": 41.2037, |
|
"eval_samples_per_second": 387.611, |
|
"eval_steps_per_second": 48.466, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.8032280506470015, |
|
"grad_norm": 1.137293815612793, |
|
"learning_rate": 5e-05, |
|
"loss": 1.6288, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 1.8254904689021845, |
|
"grad_norm": 1.1091963052749634, |
|
"learning_rate": 5e-05, |
|
"loss": 1.6421, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 1.8477528871573674, |
|
"grad_norm": 1.0500215291976929, |
|
"learning_rate": 5e-05, |
|
"loss": 1.6594, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 1.8700153054125503, |
|
"grad_norm": 1.2211509943008423, |
|
"learning_rate": 5e-05, |
|
"loss": 1.658, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 1.8922777236677333, |
|
"grad_norm": 1.1174074411392212, |
|
"learning_rate": 5e-05, |
|
"loss": 1.6815, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 1.9145401419229162, |
|
"grad_norm": 1.1086102724075317, |
|
"learning_rate": 5e-05, |
|
"loss": 1.7094, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 1.9368025601780994, |
|
"grad_norm": 1.3630105257034302, |
|
"learning_rate": 5e-05, |
|
"loss": 1.7099, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 1.9590649784332823, |
|
"grad_norm": 1.2096022367477417, |
|
"learning_rate": 5e-05, |
|
"loss": 1.7082, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 1.9813273966884652, |
|
"grad_norm": 1.1671497821807861, |
|
"learning_rate": 5e-05, |
|
"loss": 1.7031, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 2.0035619869208294, |
|
"grad_norm": 1.090248465538025, |
|
"learning_rate": 5e-05, |
|
"loss": 1.7077, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 2.0035619869208294, |
|
"eval_loss": 1.5784235000610352, |
|
"eval_runtime": 41.0244, |
|
"eval_samples_per_second": 389.305, |
|
"eval_steps_per_second": 48.678, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 2.0258244051760124, |
|
"grad_norm": 1.096616506576538, |
|
"learning_rate": 5e-05, |
|
"loss": 1.7104, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 2.0480868234311953, |
|
"grad_norm": 1.1066138744354248, |
|
"learning_rate": 5e-05, |
|
"loss": 1.7085, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 2.0703492416863782, |
|
"grad_norm": 1.2357349395751953, |
|
"learning_rate": 5e-05, |
|
"loss": 1.7095, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 2.092611659941561, |
|
"grad_norm": 1.0187031030654907, |
|
"learning_rate": 5e-05, |
|
"loss": 1.6925, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 2.114874078196744, |
|
"grad_norm": 1.1060880422592163, |
|
"learning_rate": 5e-05, |
|
"loss": 1.6929, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 2.137136496451927, |
|
"grad_norm": 1.3188073635101318, |
|
"learning_rate": 5e-05, |
|
"loss": 1.7067, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 2.15939891470711, |
|
"grad_norm": 1.3043791055679321, |
|
"learning_rate": 5e-05, |
|
"loss": 1.7165, |
|
"step": 4850 |
|
}, |
|
{ |
|
"epoch": 2.181661332962293, |
|
"grad_norm": 1.3332817554473877, |
|
"learning_rate": 5e-05, |
|
"loss": 1.7005, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 2.203923751217476, |
|
"grad_norm": 1.2902443408966064, |
|
"learning_rate": 5e-05, |
|
"loss": 1.6903, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 2.226186169472659, |
|
"grad_norm": 0.9684903621673584, |
|
"learning_rate": 5e-05, |
|
"loss": 1.7034, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 2.226186169472659, |
|
"eval_loss": 1.5792449712753296, |
|
"eval_runtime": 40.876, |
|
"eval_samples_per_second": 390.719, |
|
"eval_steps_per_second": 48.855, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 2.2484485877278417, |
|
"grad_norm": 1.1154942512512207, |
|
"learning_rate": 5e-05, |
|
"loss": 1.6964, |
|
"step": 5050 |
|
}, |
|
{ |
|
"epoch": 2.270711005983025, |
|
"grad_norm": 1.117543339729309, |
|
"learning_rate": 5e-05, |
|
"loss": 1.6862, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 2.292973424238208, |
|
"grad_norm": 0.9821292161941528, |
|
"learning_rate": 5e-05, |
|
"loss": 1.6819, |
|
"step": 5150 |
|
}, |
|
{ |
|
"epoch": 2.315235842493391, |
|
"grad_norm": 1.1892586946487427, |
|
"learning_rate": 5e-05, |
|
"loss": 1.6964, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 2.337498260748574, |
|
"grad_norm": 1.3049404621124268, |
|
"learning_rate": 5e-05, |
|
"loss": 1.682, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 2.359760679003757, |
|
"grad_norm": 1.0873595476150513, |
|
"learning_rate": 5e-05, |
|
"loss": 1.6399, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 2.38202309725894, |
|
"grad_norm": 1.0370205640792847, |
|
"learning_rate": 5e-05, |
|
"loss": 1.6153, |
|
"step": 5350 |
|
}, |
|
{ |
|
"epoch": 2.4042855155141227, |
|
"grad_norm": 0.8503725528717041, |
|
"learning_rate": 5e-05, |
|
"loss": 1.6022, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 2.4265479337693057, |
|
"grad_norm": 0.9510111212730408, |
|
"learning_rate": 5e-05, |
|
"loss": 1.6106, |
|
"step": 5450 |
|
}, |
|
{ |
|
"epoch": 2.4488103520244886, |
|
"grad_norm": 0.9935341477394104, |
|
"learning_rate": 5e-05, |
|
"loss": 1.6049, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 2.4488103520244886, |
|
"eval_loss": 1.586571455001831, |
|
"eval_runtime": 40.7387, |
|
"eval_samples_per_second": 392.036, |
|
"eval_steps_per_second": 49.02, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 2.4710727702796715, |
|
"grad_norm": 1.2289257049560547, |
|
"learning_rate": 5e-05, |
|
"loss": 1.5918, |
|
"step": 5550 |
|
}, |
|
{ |
|
"epoch": 2.4933351885348545, |
|
"grad_norm": 1.0900951623916626, |
|
"learning_rate": 5e-05, |
|
"loss": 1.6005, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 2.5155976067900374, |
|
"grad_norm": 0.9930930137634277, |
|
"learning_rate": 5e-05, |
|
"loss": 1.6151, |
|
"step": 5650 |
|
}, |
|
{ |
|
"epoch": 2.5378600250452203, |
|
"grad_norm": 0.9901494979858398, |
|
"learning_rate": 5e-05, |
|
"loss": 1.59, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 2.5601224433004033, |
|
"grad_norm": 0.9367809891700745, |
|
"learning_rate": 5e-05, |
|
"loss": 1.5844, |
|
"step": 5750 |
|
}, |
|
{ |
|
"epoch": 2.582384861555586, |
|
"grad_norm": 1.0291093587875366, |
|
"learning_rate": 5e-05, |
|
"loss": 1.5841, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 2.6046472798107696, |
|
"grad_norm": 0.8904668688774109, |
|
"learning_rate": 5e-05, |
|
"loss": 1.5883, |
|
"step": 5850 |
|
}, |
|
{ |
|
"epoch": 2.6269096980659525, |
|
"grad_norm": 0.9640474915504456, |
|
"learning_rate": 5e-05, |
|
"loss": 1.5855, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 2.6491721163211355, |
|
"grad_norm": 0.979326605796814, |
|
"learning_rate": 5e-05, |
|
"loss": 1.5798, |
|
"step": 5950 |
|
}, |
|
{ |
|
"epoch": 2.6714345345763184, |
|
"grad_norm": 1.2588844299316406, |
|
"learning_rate": 5e-05, |
|
"loss": 1.6018, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 2.6714345345763184, |
|
"eval_loss": 1.5868676900863647, |
|
"eval_runtime": 40.9701, |
|
"eval_samples_per_second": 389.821, |
|
"eval_steps_per_second": 48.743, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 2.6936969528315013, |
|
"grad_norm": 1.070421814918518, |
|
"learning_rate": 5e-05, |
|
"loss": 1.6947, |
|
"step": 6050 |
|
}, |
|
{ |
|
"epoch": 2.7159593710866843, |
|
"grad_norm": 0.9952645301818848, |
|
"learning_rate": 5e-05, |
|
"loss": 1.6907, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 2.738221789341867, |
|
"grad_norm": 1.2595455646514893, |
|
"learning_rate": 5e-05, |
|
"loss": 1.6954, |
|
"step": 6150 |
|
}, |
|
{ |
|
"epoch": 2.76048420759705, |
|
"grad_norm": 0.9722006916999817, |
|
"learning_rate": 5e-05, |
|
"loss": 1.6832, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 2.782746625852233, |
|
"grad_norm": 1.2001519203186035, |
|
"learning_rate": 5e-05, |
|
"loss": 1.6832, |
|
"step": 6250 |
|
}, |
|
{ |
|
"epoch": 2.805009044107416, |
|
"grad_norm": 1.316867709159851, |
|
"learning_rate": 5e-05, |
|
"loss": 1.6873, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 2.8272714623625994, |
|
"grad_norm": 1.2271651029586792, |
|
"learning_rate": 5e-05, |
|
"loss": 1.6865, |
|
"step": 6350 |
|
}, |
|
{ |
|
"epoch": 2.8495338806177823, |
|
"grad_norm": 1.2443265914916992, |
|
"learning_rate": 5e-05, |
|
"loss": 1.6779, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 2.8717962988729653, |
|
"grad_norm": 1.1751494407653809, |
|
"learning_rate": 5e-05, |
|
"loss": 1.666, |
|
"step": 6450 |
|
}, |
|
{ |
|
"epoch": 2.894058717128148, |
|
"grad_norm": 0.9704211950302124, |
|
"learning_rate": 5e-05, |
|
"loss": 1.6628, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 2.894058717128148, |
|
"eval_loss": 1.5628445148468018, |
|
"eval_runtime": 41.0455, |
|
"eval_samples_per_second": 389.105, |
|
"eval_steps_per_second": 48.653, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 2.916321135383331, |
|
"grad_norm": 1.0452390909194946, |
|
"learning_rate": 5e-05, |
|
"loss": 1.6794, |
|
"step": 6550 |
|
}, |
|
{ |
|
"epoch": 2.938583553638514, |
|
"grad_norm": 1.338881254196167, |
|
"learning_rate": 5e-05, |
|
"loss": 1.6678, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 2.960845971893697, |
|
"grad_norm": 0.989860475063324, |
|
"learning_rate": 5e-05, |
|
"loss": 1.6753, |
|
"step": 6650 |
|
}, |
|
{ |
|
"epoch": 2.98310839014888, |
|
"grad_norm": 1.1380687952041626, |
|
"learning_rate": 5e-05, |
|
"loss": 1.6639, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 3.0057882287463475, |
|
"grad_norm": 1.2292852401733398, |
|
"learning_rate": 5e-05, |
|
"loss": 1.697, |
|
"step": 6750 |
|
}, |
|
{ |
|
"epoch": 3.0280506470015305, |
|
"grad_norm": 1.1919242143630981, |
|
"learning_rate": 5e-05, |
|
"loss": 1.6714, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 3.0503130652567134, |
|
"grad_norm": 1.1312869787216187, |
|
"learning_rate": 5e-05, |
|
"loss": 1.6641, |
|
"step": 6850 |
|
}, |
|
{ |
|
"epoch": 3.0725754835118964, |
|
"grad_norm": 1.3589369058609009, |
|
"learning_rate": 5e-05, |
|
"loss": 1.655, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 3.0948379017670793, |
|
"grad_norm": 1.257063627243042, |
|
"learning_rate": 5e-05, |
|
"loss": 1.6661, |
|
"step": 6950 |
|
}, |
|
{ |
|
"epoch": 3.1171003200222622, |
|
"grad_norm": 1.3228737115859985, |
|
"learning_rate": 5e-05, |
|
"loss": 1.653, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 3.1171003200222622, |
|
"eval_loss": 1.5605802536010742, |
|
"eval_runtime": 42.9947, |
|
"eval_samples_per_second": 371.464, |
|
"eval_steps_per_second": 46.448, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 3.1393627382774456, |
|
"grad_norm": 1.0204429626464844, |
|
"learning_rate": 5e-05, |
|
"loss": 1.6602, |
|
"step": 7050 |
|
}, |
|
{ |
|
"epoch": 3.1616251565326285, |
|
"grad_norm": 0.9796785712242126, |
|
"learning_rate": 5e-05, |
|
"loss": 1.6508, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 3.1838875747878115, |
|
"grad_norm": 0.9721747040748596, |
|
"learning_rate": 5e-05, |
|
"loss": 1.6566, |
|
"step": 7150 |
|
}, |
|
{ |
|
"epoch": 3.2061499930429944, |
|
"grad_norm": 1.1874974966049194, |
|
"learning_rate": 5e-05, |
|
"loss": 1.6501, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 3.2284124112981774, |
|
"grad_norm": 1.2861804962158203, |
|
"learning_rate": 5e-05, |
|
"loss": 1.6663, |
|
"step": 7250 |
|
}, |
|
{ |
|
"epoch": 3.2506748295533603, |
|
"grad_norm": 0.9947218894958496, |
|
"learning_rate": 5e-05, |
|
"loss": 1.6732, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 3.2729372478085432, |
|
"grad_norm": 1.1224796772003174, |
|
"learning_rate": 5e-05, |
|
"loss": 1.6597, |
|
"step": 7350 |
|
}, |
|
{ |
|
"epoch": 3.295199666063726, |
|
"grad_norm": 1.2262948751449585, |
|
"learning_rate": 5e-05, |
|
"loss": 1.6544, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 3.317462084318909, |
|
"grad_norm": 1.114092469215393, |
|
"learning_rate": 5e-05, |
|
"loss": 1.6532, |
|
"step": 7450 |
|
}, |
|
{ |
|
"epoch": 3.339724502574092, |
|
"grad_norm": 1.0086640119552612, |
|
"learning_rate": 5e-05, |
|
"loss": 1.6575, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 3.339724502574092, |
|
"eval_loss": 1.5381077527999878, |
|
"eval_runtime": 43.9183, |
|
"eval_samples_per_second": 363.652, |
|
"eval_steps_per_second": 45.471, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 3.3615416724641713, |
|
"grad_norm": 1.4630149602890015, |
|
"learning_rate": 5e-05, |
|
"loss": 1.6543, |
|
"step": 7550 |
|
}, |
|
{ |
|
"epoch": 3.3838040907193543, |
|
"grad_norm": 0.9978652596473694, |
|
"learning_rate": 5e-05, |
|
"loss": 1.6469, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 3.406066508974537, |
|
"grad_norm": 0.9942854046821594, |
|
"learning_rate": 5e-05, |
|
"loss": 1.6524, |
|
"step": 7650 |
|
}, |
|
{ |
|
"epoch": 3.42832892722972, |
|
"grad_norm": 1.6113872528076172, |
|
"learning_rate": 5e-05, |
|
"loss": 1.6392, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 3.450591345484903, |
|
"grad_norm": 1.2430763244628906, |
|
"learning_rate": 5e-05, |
|
"loss": 1.6524, |
|
"step": 7750 |
|
}, |
|
{ |
|
"epoch": 3.472853763740086, |
|
"grad_norm": 0.9973090887069702, |
|
"learning_rate": 5e-05, |
|
"loss": 1.6396, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 3.4951161819952694, |
|
"grad_norm": 1.3717776536941528, |
|
"learning_rate": 5e-05, |
|
"loss": 1.6463, |
|
"step": 7850 |
|
}, |
|
{ |
|
"epoch": 3.5173786002504523, |
|
"grad_norm": 1.3711599111557007, |
|
"learning_rate": 5e-05, |
|
"loss": 1.644, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 3.5396410185056353, |
|
"grad_norm": 1.0126900672912598, |
|
"learning_rate": 5e-05, |
|
"loss": 1.6311, |
|
"step": 7950 |
|
}, |
|
{ |
|
"epoch": 3.561903436760818, |
|
"grad_norm": 1.0467159748077393, |
|
"learning_rate": 5e-05, |
|
"loss": 1.64, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 3.561903436760818, |
|
"eval_loss": 1.539516806602478, |
|
"eval_runtime": 42.3362, |
|
"eval_samples_per_second": 377.242, |
|
"eval_steps_per_second": 47.17, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 3.584165855016001, |
|
"grad_norm": 1.1766951084136963, |
|
"learning_rate": 5e-05, |
|
"loss": 1.6552, |
|
"step": 8050 |
|
}, |
|
{ |
|
"epoch": 3.606428273271184, |
|
"grad_norm": 1.0943933725357056, |
|
"learning_rate": 5e-05, |
|
"loss": 1.6385, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 3.628690691526367, |
|
"grad_norm": 1.2377898693084717, |
|
"learning_rate": 5e-05, |
|
"loss": 1.6288, |
|
"step": 8150 |
|
}, |
|
{ |
|
"epoch": 3.65095310978155, |
|
"grad_norm": 0.939339280128479, |
|
"learning_rate": 5e-05, |
|
"loss": 1.6357, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 3.673215528036733, |
|
"grad_norm": 1.0802948474884033, |
|
"learning_rate": 5e-05, |
|
"loss": 1.6367, |
|
"step": 8250 |
|
}, |
|
{ |
|
"epoch": 3.695477946291916, |
|
"grad_norm": 1.089154601097107, |
|
"learning_rate": 5e-05, |
|
"loss": 1.6434, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 3.7177403645470988, |
|
"grad_norm": 1.095510482788086, |
|
"learning_rate": 5e-05, |
|
"loss": 1.6445, |
|
"step": 8350 |
|
}, |
|
{ |
|
"epoch": 3.740002782802282, |
|
"grad_norm": 1.2433582544326782, |
|
"learning_rate": 5e-05, |
|
"loss": 1.6521, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 3.762265201057465, |
|
"grad_norm": 1.3547347784042358, |
|
"learning_rate": 5e-05, |
|
"loss": 1.6363, |
|
"step": 8450 |
|
}, |
|
{ |
|
"epoch": 3.784527619312648, |
|
"grad_norm": 1.224070429801941, |
|
"learning_rate": 5e-05, |
|
"loss": 1.6455, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 3.784527619312648, |
|
"eval_loss": 1.516330361366272, |
|
"eval_runtime": 40.388, |
|
"eval_samples_per_second": 395.439, |
|
"eval_steps_per_second": 49.445, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 3.806790037567831, |
|
"grad_norm": 1.3312312364578247, |
|
"learning_rate": 5e-05, |
|
"loss": 1.6493, |
|
"step": 8550 |
|
}, |
|
{ |
|
"epoch": 3.829052455823014, |
|
"grad_norm": 1.275539517402649, |
|
"learning_rate": 5e-05, |
|
"loss": 1.6385, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 3.851314874078197, |
|
"grad_norm": 1.1481244564056396, |
|
"learning_rate": 5e-05, |
|
"loss": 1.6369, |
|
"step": 8650 |
|
}, |
|
{ |
|
"epoch": 3.8735772923333798, |
|
"grad_norm": 1.039244532585144, |
|
"learning_rate": 5e-05, |
|
"loss": 1.6277, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 3.8958397105885627, |
|
"grad_norm": 1.0740258693695068, |
|
"learning_rate": 5e-05, |
|
"loss": 1.6294, |
|
"step": 8750 |
|
}, |
|
{ |
|
"epoch": 3.9181021288437456, |
|
"grad_norm": 1.0660001039505005, |
|
"learning_rate": 5e-05, |
|
"loss": 1.6123, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 3.9403645470989286, |
|
"grad_norm": 1.036129117012024, |
|
"learning_rate": 5e-05, |
|
"loss": 1.5803, |
|
"step": 8850 |
|
}, |
|
{ |
|
"epoch": 3.9626269653541115, |
|
"grad_norm": 0.9285004734992981, |
|
"learning_rate": 5e-05, |
|
"loss": 1.5905, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 3.9848893836092945, |
|
"grad_norm": 0.9074347019195557, |
|
"learning_rate": 5e-05, |
|
"loss": 1.5713, |
|
"step": 8950 |
|
}, |
|
{ |
|
"epoch": 4.0075692222067625, |
|
"grad_norm": 1.1706671714782715, |
|
"learning_rate": 5e-05, |
|
"loss": 1.6308, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 4.0075692222067625, |
|
"eval_loss": 1.5311250686645508, |
|
"eval_runtime": 40.3535, |
|
"eval_samples_per_second": 395.778, |
|
"eval_steps_per_second": 49.488, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 4.029831640461945, |
|
"grad_norm": 1.2552838325500488, |
|
"learning_rate": 5e-05, |
|
"loss": 1.6371, |
|
"step": 9050 |
|
}, |
|
{ |
|
"epoch": 4.052094058717128, |
|
"grad_norm": 1.3415331840515137, |
|
"learning_rate": 5e-05, |
|
"loss": 1.6345, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 4.074356476972311, |
|
"grad_norm": 1.029757022857666, |
|
"learning_rate": 5e-05, |
|
"loss": 1.629, |
|
"step": 9150 |
|
}, |
|
{ |
|
"epoch": 4.096618895227494, |
|
"grad_norm": 1.1435120105743408, |
|
"learning_rate": 5e-05, |
|
"loss": 1.6287, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 4.118881313482677, |
|
"grad_norm": 1.385100245475769, |
|
"learning_rate": 5e-05, |
|
"loss": 1.6335, |
|
"step": 9250 |
|
}, |
|
{ |
|
"epoch": 4.14114373173786, |
|
"grad_norm": 1.062818169593811, |
|
"learning_rate": 5e-05, |
|
"loss": 1.6184, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 4.163406149993043, |
|
"grad_norm": 1.3703244924545288, |
|
"learning_rate": 5e-05, |
|
"loss": 1.631, |
|
"step": 9350 |
|
}, |
|
{ |
|
"epoch": 4.185668568248226, |
|
"grad_norm": 1.1130529642105103, |
|
"learning_rate": 5e-05, |
|
"loss": 1.6284, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 4.207930986503409, |
|
"grad_norm": 1.189207911491394, |
|
"learning_rate": 5e-05, |
|
"loss": 1.619, |
|
"step": 9450 |
|
}, |
|
{ |
|
"epoch": 4.230193404758592, |
|
"grad_norm": 1.0979055166244507, |
|
"learning_rate": 5e-05, |
|
"loss": 1.6324, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 4.230193404758592, |
|
"eval_loss": 1.5118227005004883, |
|
"eval_runtime": 40.2303, |
|
"eval_samples_per_second": 396.989, |
|
"eval_steps_per_second": 49.639, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 4.252455823013775, |
|
"grad_norm": 1.1394270658493042, |
|
"learning_rate": 5e-05, |
|
"loss": 1.6229, |
|
"step": 9550 |
|
}, |
|
{ |
|
"epoch": 4.274718241268958, |
|
"grad_norm": 1.0398465394973755, |
|
"learning_rate": 5e-05, |
|
"loss": 1.625, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 4.296980659524141, |
|
"grad_norm": 1.1344504356384277, |
|
"learning_rate": 5e-05, |
|
"loss": 1.6113, |
|
"step": 9650 |
|
}, |
|
{ |
|
"epoch": 4.319243077779324, |
|
"grad_norm": 0.9889805316925049, |
|
"learning_rate": 5e-05, |
|
"loss": 1.6195, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 4.3415054960345065, |
|
"grad_norm": 1.2321630716323853, |
|
"learning_rate": 5e-05, |
|
"loss": 1.6133, |
|
"step": 9750 |
|
}, |
|
{ |
|
"epoch": 4.3637679142896895, |
|
"grad_norm": 1.0766791105270386, |
|
"learning_rate": 5e-05, |
|
"loss": 1.587, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 4.386030332544872, |
|
"grad_norm": 0.9230866432189941, |
|
"learning_rate": 5e-05, |
|
"loss": 1.5747, |
|
"step": 9850 |
|
}, |
|
{ |
|
"epoch": 4.408292750800055, |
|
"grad_norm": 1.036097526550293, |
|
"learning_rate": 5e-05, |
|
"loss": 1.5673, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 4.430555169055238, |
|
"grad_norm": 1.0321383476257324, |
|
"learning_rate": 5e-05, |
|
"loss": 1.5641, |
|
"step": 9950 |
|
}, |
|
{ |
|
"epoch": 4.452817587310421, |
|
"grad_norm": 0.9865553379058838, |
|
"learning_rate": 5e-05, |
|
"loss": 1.5481, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 4.452817587310421, |
|
"eval_loss": 1.5091972351074219, |
|
"eval_runtime": 40.5505, |
|
"eval_samples_per_second": 393.855, |
|
"eval_steps_per_second": 49.247, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 4.475080005565605, |
|
"grad_norm": 1.0181940793991089, |
|
"learning_rate": 5e-05, |
|
"loss": 1.5594, |
|
"step": 10050 |
|
}, |
|
{ |
|
"epoch": 4.497342423820788, |
|
"grad_norm": 1.0538172721862793, |
|
"learning_rate": 5e-05, |
|
"loss": 1.5523, |
|
"step": 10100 |
|
}, |
|
{ |
|
"epoch": 4.519604842075971, |
|
"grad_norm": 0.936060905456543, |
|
"learning_rate": 5e-05, |
|
"loss": 1.547, |
|
"step": 10150 |
|
}, |
|
{ |
|
"epoch": 4.541867260331154, |
|
"grad_norm": 1.225715160369873, |
|
"learning_rate": 5e-05, |
|
"loss": 1.5491, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 4.564129678586337, |
|
"grad_norm": 1.2574198246002197, |
|
"learning_rate": 5e-05, |
|
"loss": 1.5496, |
|
"step": 10250 |
|
}, |
|
{ |
|
"epoch": 4.58639209684152, |
|
"grad_norm": 1.2122540473937988, |
|
"learning_rate": 5e-05, |
|
"loss": 1.5327, |
|
"step": 10300 |
|
}, |
|
{ |
|
"epoch": 4.608654515096703, |
|
"grad_norm": 1.1094001531600952, |
|
"learning_rate": 5e-05, |
|
"loss": 1.5375, |
|
"step": 10350 |
|
}, |
|
{ |
|
"epoch": 4.630916933351886, |
|
"grad_norm": 1.0384974479675293, |
|
"learning_rate": 5e-05, |
|
"loss": 1.555, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 4.6531793516070685, |
|
"grad_norm": 1.0797594785690308, |
|
"learning_rate": 5e-05, |
|
"loss": 1.5621, |
|
"step": 10450 |
|
}, |
|
{ |
|
"epoch": 4.6754417698622515, |
|
"grad_norm": 1.0724256038665771, |
|
"learning_rate": 5e-05, |
|
"loss": 1.547, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 4.6754417698622515, |
|
"eval_loss": 1.5108764171600342, |
|
"eval_runtime": 40.4882, |
|
"eval_samples_per_second": 394.461, |
|
"eval_steps_per_second": 49.323, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 4.697704188117434, |
|
"grad_norm": 1.236370325088501, |
|
"learning_rate": 5e-05, |
|
"loss": 1.5426, |
|
"step": 10550 |
|
}, |
|
{ |
|
"epoch": 4.719966606372617, |
|
"grad_norm": 1.1259009838104248, |
|
"learning_rate": 5e-05, |
|
"loss": 1.5701, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 4.7422290246278, |
|
"grad_norm": 1.0653769969940186, |
|
"learning_rate": 5e-05, |
|
"loss": 1.5543, |
|
"step": 10650 |
|
}, |
|
{ |
|
"epoch": 4.764491442882983, |
|
"grad_norm": 1.1116371154785156, |
|
"learning_rate": 5e-05, |
|
"loss": 1.557, |
|
"step": 10700 |
|
}, |
|
{ |
|
"epoch": 4.786753861138166, |
|
"grad_norm": 1.0332480669021606, |
|
"learning_rate": 5e-05, |
|
"loss": 1.5513, |
|
"step": 10750 |
|
}, |
|
{ |
|
"epoch": 4.809016279393349, |
|
"grad_norm": 1.1142674684524536, |
|
"learning_rate": 5e-05, |
|
"loss": 1.5448, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 4.831278697648532, |
|
"grad_norm": 1.0316691398620605, |
|
"learning_rate": 5e-05, |
|
"loss": 1.5456, |
|
"step": 10850 |
|
}, |
|
{ |
|
"epoch": 4.853541115903715, |
|
"grad_norm": 0.987628161907196, |
|
"learning_rate": 5e-05, |
|
"loss": 1.5535, |
|
"step": 10900 |
|
}, |
|
{ |
|
"epoch": 4.875803534158898, |
|
"grad_norm": 1.125772476196289, |
|
"learning_rate": 5e-05, |
|
"loss": 1.5583, |
|
"step": 10950 |
|
}, |
|
{ |
|
"epoch": 4.898065952414081, |
|
"grad_norm": 0.9541718363761902, |
|
"learning_rate": 5e-05, |
|
"loss": 1.5584, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 4.898065952414081, |
|
"eval_loss": 1.5041238069534302, |
|
"eval_runtime": 40.2547, |
|
"eval_samples_per_second": 396.749, |
|
"eval_steps_per_second": 49.609, |
|
"step": 11000 |
|
} |
|
], |
|
"logging_steps": 50, |
|
"max_steps": 11230, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 5, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 3, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 4.530938070076723e+16, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|