|
,loss,learning_rate,epoch,step,eval_loss,eval_accuracy,eval_f1,eval_runtime,eval_samples_per_second,eval_steps_per_second,train_runtime,train_samples_per_second,train_steps_per_second,total_flos,train_loss |
|
0,0.6437,2.8185129462813098e-06,1.0,7500,,,,,,,,,,, |
|
1,,,1.0,7500,0.4621545076370239,0.8046666666666666,0.8049857966154991,4.091,733.314,45.954,,,,, |
|
2,0.4596,5.6377778970840395e-06,2.0,15000,,,,,,,,,,, |
|
3,,,2.0,15000,0.46744176745414734,0.818,0.8099413139021953,4.096,732.417,45.898,,,,, |
|
4,0.4051,8.456666845626058e-06,3.0,22500,,,,,,,,,,, |
|
5,,,3.0,22500,0.4205056428909302,0.8306666666666667,0.8266481080596656,4.0971,732.222,45.886,,,,, |
|
6,0.3529,1.127593179642879e-05,4.0,30000,,,,,,,,,,, |
|
7,,,4.0,30000,0.42838728427886963,0.8396666666666667,0.8387062437364164,4.0957,732.479,45.902,,,,, |
|
8,0.2958,1.40944447427101e-05,5.0,37500,,,,,,,,,,, |
|
9,,,5.0,37500,0.4620000720024109,0.8303333333333334,0.8304119496302145,4.0916,733.209,45.948,,,,, |
|
10,0.248,1.691370969351283e-05,6.0,45000,,,,,,,,,,, |
|
11,,,6.0,45000,0.5622704029083252,0.829,0.8285084230864971,4.0992,731.853,45.863,,,,, |
|
12,0.2172,1.9732598642054846e-05,7.0,52500,,,,,,,,,,, |
|
13,,,7.0,52500,0.6040271520614624,0.8243333333333334,0.8198857071905551,4.0953,732.547,45.906,,,,, |
|
14,,,7.0,52500,,,,,,,4427.9199,2710.076,169.38,8.609884212966826e+16,0.3746183896019345 |
|
|