|
{ |
|
"best_metric": 0.9820535451603413, |
|
"best_model_checkpoint": "wav2vec2-base-ft-keyword-spotting/checkpoint-1995", |
|
"epoch": 4.996869129618034, |
|
"eval_steps": 500, |
|
"global_step": 1995, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.025046963055729492, |
|
"grad_norm": 1.2714543342590332, |
|
"learning_rate": 1.5e-06, |
|
"loss": 2.4697, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.050093926111458985, |
|
"grad_norm": 1.3721944093704224, |
|
"learning_rate": 3e-06, |
|
"loss": 2.454, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.07514088916718847, |
|
"grad_norm": 1.3193830251693726, |
|
"learning_rate": 4.5e-06, |
|
"loss": 2.4043, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.10018785222291797, |
|
"grad_norm": 1.7374777793884277, |
|
"learning_rate": 6e-06, |
|
"loss": 2.3255, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.12523481527864747, |
|
"grad_norm": 2.109724760055542, |
|
"learning_rate": 7.5e-06, |
|
"loss": 2.216, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.15028177833437695, |
|
"grad_norm": 2.4082186222076416, |
|
"learning_rate": 9e-06, |
|
"loss": 2.057, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.17532874139010646, |
|
"grad_norm": 2.3386483192443848, |
|
"learning_rate": 1.05e-05, |
|
"loss": 1.8432, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.20037570444583594, |
|
"grad_norm": 2.037081241607666, |
|
"learning_rate": 1.2e-05, |
|
"loss": 1.7691, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.22542266750156542, |
|
"grad_norm": 2.0078208446502686, |
|
"learning_rate": 1.3500000000000001e-05, |
|
"loss": 1.6564, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.25046963055729493, |
|
"grad_norm": 1.5335993766784668, |
|
"learning_rate": 1.5e-05, |
|
"loss": 1.6115, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.27551659361302444, |
|
"grad_norm": 0.93914395570755, |
|
"learning_rate": 1.65e-05, |
|
"loss": 1.6253, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.3005635566687539, |
|
"grad_norm": 1.5028908252716064, |
|
"learning_rate": 1.8e-05, |
|
"loss": 1.5118, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.3256105197244834, |
|
"grad_norm": 0.5246427655220032, |
|
"learning_rate": 1.95e-05, |
|
"loss": 1.5028, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.3506574827802129, |
|
"grad_norm": 1.8485475778579712, |
|
"learning_rate": 2.1e-05, |
|
"loss": 1.5388, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.37570444583594237, |
|
"grad_norm": 1.7445107698440552, |
|
"learning_rate": 2.25e-05, |
|
"loss": 1.4807, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.4007514088916719, |
|
"grad_norm": 1.8197436332702637, |
|
"learning_rate": 2.4e-05, |
|
"loss": 1.4336, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.4257983719474014, |
|
"grad_norm": 2.3059756755828857, |
|
"learning_rate": 2.55e-05, |
|
"loss": 1.4139, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.45084533500313084, |
|
"grad_norm": 2.768622398376465, |
|
"learning_rate": 2.7000000000000002e-05, |
|
"loss": 1.3435, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.47589229805886035, |
|
"grad_norm": 2.9464476108551025, |
|
"learning_rate": 2.8499999999999998e-05, |
|
"loss": 1.3293, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.5009392611145899, |
|
"grad_norm": 4.094380855560303, |
|
"learning_rate": 3e-05, |
|
"loss": 1.2875, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.5259862241703194, |
|
"grad_norm": 3.006328582763672, |
|
"learning_rate": 2.9832869080779945e-05, |
|
"loss": 1.2165, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.5510331872260489, |
|
"grad_norm": 2.451118230819702, |
|
"learning_rate": 2.9665738161559886e-05, |
|
"loss": 1.1742, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.5760801502817783, |
|
"grad_norm": 5.200439929962158, |
|
"learning_rate": 2.9498607242339834e-05, |
|
"loss": 1.1409, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.6011271133375078, |
|
"grad_norm": 4.16045618057251, |
|
"learning_rate": 2.933147632311978e-05, |
|
"loss": 1.1796, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.6261740763932373, |
|
"grad_norm": 5.874985218048096, |
|
"learning_rate": 2.916434540389972e-05, |
|
"loss": 1.0734, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.6512210394489668, |
|
"grad_norm": 3.755768060684204, |
|
"learning_rate": 2.8997214484679665e-05, |
|
"loss": 1.0903, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.6762680025046963, |
|
"grad_norm": 4.306619644165039, |
|
"learning_rate": 2.8830083565459613e-05, |
|
"loss": 0.9576, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.7013149655604258, |
|
"grad_norm": 4.983129978179932, |
|
"learning_rate": 2.8662952646239554e-05, |
|
"loss": 0.8808, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.7263619286161553, |
|
"grad_norm": 3.138037919998169, |
|
"learning_rate": 2.84958217270195e-05, |
|
"loss": 0.8472, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.7514088916718847, |
|
"grad_norm": 4.114541530609131, |
|
"learning_rate": 2.8328690807799443e-05, |
|
"loss": 0.7928, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.7764558547276142, |
|
"grad_norm": 3.26853609085083, |
|
"learning_rate": 2.8161559888579388e-05, |
|
"loss": 0.8008, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.8015028177833438, |
|
"grad_norm": 5.10264253616333, |
|
"learning_rate": 2.7994428969359332e-05, |
|
"loss": 0.7371, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.8265497808390733, |
|
"grad_norm": 2.795498847961426, |
|
"learning_rate": 2.7827298050139277e-05, |
|
"loss": 0.6737, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.8515967438948028, |
|
"grad_norm": 3.0811805725097656, |
|
"learning_rate": 2.7660167130919218e-05, |
|
"loss": 0.6421, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.8766437069505323, |
|
"grad_norm": 3.24672794342041, |
|
"learning_rate": 2.7493036211699166e-05, |
|
"loss": 0.6054, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.9016906700062617, |
|
"grad_norm": 2.978050947189331, |
|
"learning_rate": 2.732590529247911e-05, |
|
"loss": 0.5662, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.9267376330619912, |
|
"grad_norm": 1.892045259475708, |
|
"learning_rate": 2.7158774373259055e-05, |
|
"loss": 0.5699, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.9517845961177207, |
|
"grad_norm": 2.637104034423828, |
|
"learning_rate": 2.6991643454038996e-05, |
|
"loss": 0.5033, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.9768315591734502, |
|
"grad_norm": 2.7039406299591064, |
|
"learning_rate": 2.6824512534818944e-05, |
|
"loss": 0.5147, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.9993738259236068, |
|
"eval_accuracy": 0.9664607237422771, |
|
"eval_loss": 0.3694746196269989, |
|
"eval_runtime": 21.9401, |
|
"eval_samples_per_second": 309.844, |
|
"eval_steps_per_second": 9.708, |
|
"step": 399 |
|
}, |
|
{ |
|
"epoch": 1.0018785222291797, |
|
"grad_norm": 3.884601593017578, |
|
"learning_rate": 2.665738161559889e-05, |
|
"loss": 0.5237, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.0269254852849092, |
|
"grad_norm": 3.843261957168579, |
|
"learning_rate": 2.649025069637883e-05, |
|
"loss": 0.4463, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 1.0519724483406387, |
|
"grad_norm": 7.544467926025391, |
|
"learning_rate": 2.6323119777158774e-05, |
|
"loss": 0.4465, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 1.0770194113963683, |
|
"grad_norm": 4.039328098297119, |
|
"learning_rate": 2.6155988857938722e-05, |
|
"loss": 0.4229, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 1.1020663744520978, |
|
"grad_norm": 2.7726874351501465, |
|
"learning_rate": 2.5988857938718663e-05, |
|
"loss": 0.4179, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 1.127113337507827, |
|
"grad_norm": 1.9234434366226196, |
|
"learning_rate": 2.5838440111420614e-05, |
|
"loss": 0.3878, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 1.1521603005635566, |
|
"grad_norm": 5.885218143463135, |
|
"learning_rate": 2.567130919220056e-05, |
|
"loss": 0.3734, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 1.177207263619286, |
|
"grad_norm": 4.204211235046387, |
|
"learning_rate": 2.55041782729805e-05, |
|
"loss": 0.3569, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 1.2022542266750156, |
|
"grad_norm": 3.173961639404297, |
|
"learning_rate": 2.5337047353760448e-05, |
|
"loss": 0.3598, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 1.227301189730745, |
|
"grad_norm": 2.828176975250244, |
|
"learning_rate": 2.5169916434540392e-05, |
|
"loss": 0.3744, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 1.2523481527864746, |
|
"grad_norm": 3.787830352783203, |
|
"learning_rate": 2.5002785515320333e-05, |
|
"loss": 0.3437, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.277395115842204, |
|
"grad_norm": 2.385486364364624, |
|
"learning_rate": 2.4835654596100278e-05, |
|
"loss": 0.3273, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 1.3024420788979336, |
|
"grad_norm": 2.802664279937744, |
|
"learning_rate": 2.4668523676880226e-05, |
|
"loss": 0.3129, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 1.3274890419536631, |
|
"grad_norm": 4.155833721160889, |
|
"learning_rate": 2.4501392757660167e-05, |
|
"loss": 0.3631, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 1.3525360050093926, |
|
"grad_norm": 4.156424522399902, |
|
"learning_rate": 2.433426183844011e-05, |
|
"loss": 0.2902, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 1.3775829680651221, |
|
"grad_norm": 2.7472970485687256, |
|
"learning_rate": 2.4167130919220056e-05, |
|
"loss": 0.3067, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 1.4026299311208517, |
|
"grad_norm": 2.5559287071228027, |
|
"learning_rate": 2.4e-05, |
|
"loss": 0.2819, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 1.4276768941765812, |
|
"grad_norm": 2.6116209030151367, |
|
"learning_rate": 2.3832869080779945e-05, |
|
"loss": 0.2995, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 1.4527238572323107, |
|
"grad_norm": 1.80980384349823, |
|
"learning_rate": 2.366573816155989e-05, |
|
"loss": 0.2733, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 1.4777708202880402, |
|
"grad_norm": 3.6102912425994873, |
|
"learning_rate": 2.349860724233983e-05, |
|
"loss": 0.3052, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 1.5028177833437697, |
|
"grad_norm": 2.1520848274230957, |
|
"learning_rate": 2.333147632311978e-05, |
|
"loss": 0.2605, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.5278647463994992, |
|
"grad_norm": 1.7574571371078491, |
|
"learning_rate": 2.3164345403899723e-05, |
|
"loss": 0.2805, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 1.5529117094552287, |
|
"grad_norm": 4.775511741638184, |
|
"learning_rate": 2.2997214484679665e-05, |
|
"loss": 0.2532, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 1.577958672510958, |
|
"grad_norm": 3.570802688598633, |
|
"learning_rate": 2.283008356545961e-05, |
|
"loss": 0.2524, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 1.6030056355666875, |
|
"grad_norm": 2.678054094314575, |
|
"learning_rate": 2.2662952646239557e-05, |
|
"loss": 0.2355, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 1.628052598622417, |
|
"grad_norm": 2.604247808456421, |
|
"learning_rate": 2.2495821727019498e-05, |
|
"loss": 0.257, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 1.6530995616781465, |
|
"grad_norm": 2.318293809890747, |
|
"learning_rate": 2.2328690807799443e-05, |
|
"loss": 0.2606, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 1.678146524733876, |
|
"grad_norm": 3.355053186416626, |
|
"learning_rate": 2.2161559888579387e-05, |
|
"loss": 0.2644, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 1.7031934877896056, |
|
"grad_norm": 3.6672332286834717, |
|
"learning_rate": 2.1994428969359335e-05, |
|
"loss": 0.2521, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 1.7282404508453348, |
|
"grad_norm": 5.440925121307373, |
|
"learning_rate": 2.1827298050139276e-05, |
|
"loss": 0.2612, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 1.7532874139010644, |
|
"grad_norm": 2.679652214050293, |
|
"learning_rate": 2.166016713091922e-05, |
|
"loss": 0.2291, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.7783343769567939, |
|
"grad_norm": 2.6038177013397217, |
|
"learning_rate": 2.1493036211699166e-05, |
|
"loss": 0.227, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 1.8033813400125234, |
|
"grad_norm": 3.784883499145508, |
|
"learning_rate": 2.1325905292479107e-05, |
|
"loss": 0.2404, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 1.8284283030682529, |
|
"grad_norm": 3.544004201889038, |
|
"learning_rate": 2.1158774373259055e-05, |
|
"loss": 0.2314, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 1.8534752661239824, |
|
"grad_norm": 1.186355471611023, |
|
"learning_rate": 2.0991643454039e-05, |
|
"loss": 0.2236, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 1.878522229179712, |
|
"grad_norm": 1.5770840644836426, |
|
"learning_rate": 2.082451253481894e-05, |
|
"loss": 0.2087, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 1.9035691922354414, |
|
"grad_norm": 2.307018995285034, |
|
"learning_rate": 2.0657381615598885e-05, |
|
"loss": 0.1937, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 1.928616155291171, |
|
"grad_norm": 3.615679979324341, |
|
"learning_rate": 2.0490250696378833e-05, |
|
"loss": 0.2061, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 1.9536631183469004, |
|
"grad_norm": 2.979759931564331, |
|
"learning_rate": 2.0323119777158774e-05, |
|
"loss": 0.2321, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 1.97871008140263, |
|
"grad_norm": 3.7641780376434326, |
|
"learning_rate": 2.015598885793872e-05, |
|
"loss": 0.2219, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 1.9987476518472134, |
|
"eval_accuracy": 0.9767578699617535, |
|
"eval_loss": 0.12758757174015045, |
|
"eval_runtime": 22.3439, |
|
"eval_samples_per_second": 304.245, |
|
"eval_steps_per_second": 9.533, |
|
"step": 798 |
|
}, |
|
{ |
|
"epoch": 2.0037570444583594, |
|
"grad_norm": 3.2667317390441895, |
|
"learning_rate": 1.9988857938718663e-05, |
|
"loss": 0.2024, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 2.028804007514089, |
|
"grad_norm": 2.963759422302246, |
|
"learning_rate": 1.9821727019498608e-05, |
|
"loss": 0.1808, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 2.0538509705698185, |
|
"grad_norm": 4.141463279724121, |
|
"learning_rate": 1.9654596100278552e-05, |
|
"loss": 0.194, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 2.078897933625548, |
|
"grad_norm": 4.703100681304932, |
|
"learning_rate": 1.9487465181058497e-05, |
|
"loss": 0.1963, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 2.1039448966812775, |
|
"grad_norm": 2.3061559200286865, |
|
"learning_rate": 1.9320334261838438e-05, |
|
"loss": 0.182, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 2.128991859737007, |
|
"grad_norm": 2.588878631591797, |
|
"learning_rate": 1.9153203342618386e-05, |
|
"loss": 0.197, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 2.1540388227927365, |
|
"grad_norm": 2.036632776260376, |
|
"learning_rate": 1.898607242339833e-05, |
|
"loss": 0.1939, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 2.179085785848466, |
|
"grad_norm": 2.999091625213623, |
|
"learning_rate": 1.8818941504178272e-05, |
|
"loss": 0.1776, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 2.2041327489041955, |
|
"grad_norm": 4.586688041687012, |
|
"learning_rate": 1.8651810584958216e-05, |
|
"loss": 0.2066, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 2.229179711959925, |
|
"grad_norm": 2.8724615573883057, |
|
"learning_rate": 1.8484679665738164e-05, |
|
"loss": 0.1814, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 2.254226675015654, |
|
"grad_norm": 3.051309585571289, |
|
"learning_rate": 1.8317548746518105e-05, |
|
"loss": 0.2035, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 2.279273638071384, |
|
"grad_norm": 2.697582721710205, |
|
"learning_rate": 1.815041782729805e-05, |
|
"loss": 0.2259, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 2.304320601127113, |
|
"grad_norm": 2.4961538314819336, |
|
"learning_rate": 1.7983286908077995e-05, |
|
"loss": 0.2069, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 2.329367564182843, |
|
"grad_norm": 4.160647392272949, |
|
"learning_rate": 1.781615598885794e-05, |
|
"loss": 0.1876, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 2.354414527238572, |
|
"grad_norm": 4.850248336791992, |
|
"learning_rate": 1.7649025069637884e-05, |
|
"loss": 0.1525, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 2.3794614902943017, |
|
"grad_norm": 2.7647998332977295, |
|
"learning_rate": 1.7481894150417828e-05, |
|
"loss": 0.1663, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 2.404508453350031, |
|
"grad_norm": 2.985328435897827, |
|
"learning_rate": 1.731476323119777e-05, |
|
"loss": 0.212, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 2.4295554164057607, |
|
"grad_norm": 2.1071982383728027, |
|
"learning_rate": 1.7147632311977717e-05, |
|
"loss": 0.1694, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 2.45460237946149, |
|
"grad_norm": 1.330075740814209, |
|
"learning_rate": 1.6980501392757662e-05, |
|
"loss": 0.1798, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 2.4796493425172197, |
|
"grad_norm": 3.0519397258758545, |
|
"learning_rate": 1.6813370473537606e-05, |
|
"loss": 0.1719, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 2.504696305572949, |
|
"grad_norm": 3.780168056488037, |
|
"learning_rate": 1.6646239554317548e-05, |
|
"loss": 0.174, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.5297432686286787, |
|
"grad_norm": 2.939603090286255, |
|
"learning_rate": 1.6479108635097496e-05, |
|
"loss": 0.1765, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 2.554790231684408, |
|
"grad_norm": 3.981560230255127, |
|
"learning_rate": 1.631197771587744e-05, |
|
"loss": 0.1951, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 2.5798371947401377, |
|
"grad_norm": 3.61944842338562, |
|
"learning_rate": 1.614484679665738e-05, |
|
"loss": 0.1394, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 2.6048841577958672, |
|
"grad_norm": 2.8106541633605957, |
|
"learning_rate": 1.5977715877437326e-05, |
|
"loss": 0.1767, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 2.6299311208515967, |
|
"grad_norm": 0.978993833065033, |
|
"learning_rate": 1.581058495821727e-05, |
|
"loss": 0.1438, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 2.6549780839073263, |
|
"grad_norm": 3.861020803451538, |
|
"learning_rate": 1.5643454038997215e-05, |
|
"loss": 0.1966, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 2.6800250469630558, |
|
"grad_norm": 6.368480682373047, |
|
"learning_rate": 1.547632311977716e-05, |
|
"loss": 0.1735, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 2.7050720100187853, |
|
"grad_norm": 2.9076144695281982, |
|
"learning_rate": 1.5309192200557104e-05, |
|
"loss": 0.1771, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 2.730118973074515, |
|
"grad_norm": 1.3361045122146606, |
|
"learning_rate": 1.5142061281337047e-05, |
|
"loss": 0.1594, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 2.7551659361302443, |
|
"grad_norm": 2.077012300491333, |
|
"learning_rate": 1.4974930362116992e-05, |
|
"loss": 0.1476, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 2.780212899185974, |
|
"grad_norm": 3.1275434494018555, |
|
"learning_rate": 1.4807799442896936e-05, |
|
"loss": 0.1965, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 2.8052598622417033, |
|
"grad_norm": 1.4369275569915771, |
|
"learning_rate": 1.464066852367688e-05, |
|
"loss": 0.1709, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 2.830306825297433, |
|
"grad_norm": 2.649810791015625, |
|
"learning_rate": 1.4473537604456825e-05, |
|
"loss": 0.1672, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 2.8553537883531623, |
|
"grad_norm": 2.733898401260376, |
|
"learning_rate": 1.4306406685236768e-05, |
|
"loss": 0.1614, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 2.8804007514088914, |
|
"grad_norm": 2.4066076278686523, |
|
"learning_rate": 1.4139275766016714e-05, |
|
"loss": 0.1798, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 2.9054477144646214, |
|
"grad_norm": 2.8622891902923584, |
|
"learning_rate": 1.3972144846796657e-05, |
|
"loss": 0.2037, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 2.9304946775203504, |
|
"grad_norm": 3.267059564590454, |
|
"learning_rate": 1.3805013927576602e-05, |
|
"loss": 0.1649, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 2.9555416405760804, |
|
"grad_norm": 1.7078909873962402, |
|
"learning_rate": 1.3637883008356546e-05, |
|
"loss": 0.1882, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 2.9805886036318094, |
|
"grad_norm": 4.093703746795654, |
|
"learning_rate": 1.3470752089136491e-05, |
|
"loss": 0.196, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 2.9981214777708205, |
|
"eval_accuracy": 0.980876728449544, |
|
"eval_loss": 0.09253185242414474, |
|
"eval_runtime": 23.1702, |
|
"eval_samples_per_second": 293.394, |
|
"eval_steps_per_second": 9.193, |
|
"step": 1197 |
|
}, |
|
{ |
|
"epoch": 3.005635566687539, |
|
"grad_norm": 1.693821907043457, |
|
"learning_rate": 1.3303621169916434e-05, |
|
"loss": 0.1862, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 3.0306825297432685, |
|
"grad_norm": 3.7635231018066406, |
|
"learning_rate": 1.313649025069638e-05, |
|
"loss": 0.1663, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 3.055729492798998, |
|
"grad_norm": 1.3760706186294556, |
|
"learning_rate": 1.2969359331476323e-05, |
|
"loss": 0.1484, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 3.0807764558547275, |
|
"grad_norm": 3.305560350418091, |
|
"learning_rate": 1.2802228412256267e-05, |
|
"loss": 0.174, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 3.105823418910457, |
|
"grad_norm": 3.514143228530884, |
|
"learning_rate": 1.2635097493036212e-05, |
|
"loss": 0.1642, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 3.1308703819661865, |
|
"grad_norm": 2.355562210083008, |
|
"learning_rate": 1.2467966573816157e-05, |
|
"loss": 0.1697, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 3.155917345021916, |
|
"grad_norm": 1.1649140119552612, |
|
"learning_rate": 1.2300835654596101e-05, |
|
"loss": 0.173, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 3.1809643080776455, |
|
"grad_norm": 2.151714324951172, |
|
"learning_rate": 1.2133704735376046e-05, |
|
"loss": 0.1686, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 3.206011271133375, |
|
"grad_norm": 1.4168139696121216, |
|
"learning_rate": 1.1966573816155989e-05, |
|
"loss": 0.1604, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 3.2310582341891045, |
|
"grad_norm": 1.7652188539505005, |
|
"learning_rate": 1.1799442896935935e-05, |
|
"loss": 0.1311, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 3.256105197244834, |
|
"grad_norm": 3.6942124366760254, |
|
"learning_rate": 1.1632311977715878e-05, |
|
"loss": 0.1641, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 3.2811521603005636, |
|
"grad_norm": 3.0993881225585938, |
|
"learning_rate": 1.1465181058495822e-05, |
|
"loss": 0.1075, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 3.306199123356293, |
|
"grad_norm": 2.4146475791931152, |
|
"learning_rate": 1.1298050139275767e-05, |
|
"loss": 0.1708, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 3.3312460864120226, |
|
"grad_norm": 2.550797700881958, |
|
"learning_rate": 1.1130919220055711e-05, |
|
"loss": 0.1377, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 3.356293049467752, |
|
"grad_norm": 1.8913064002990723, |
|
"learning_rate": 1.0963788300835654e-05, |
|
"loss": 0.1567, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 3.3813400125234816, |
|
"grad_norm": 2.369048833847046, |
|
"learning_rate": 1.0796657381615599e-05, |
|
"loss": 0.1363, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 3.406386975579211, |
|
"grad_norm": 3.636960029602051, |
|
"learning_rate": 1.0629526462395543e-05, |
|
"loss": 0.1527, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 3.4314339386349406, |
|
"grad_norm": 3.4311366081237793, |
|
"learning_rate": 1.0462395543175486e-05, |
|
"loss": 0.1285, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 3.45648090169067, |
|
"grad_norm": 0.938562273979187, |
|
"learning_rate": 1.0295264623955432e-05, |
|
"loss": 0.175, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 3.4815278647463996, |
|
"grad_norm": 3.5569591522216797, |
|
"learning_rate": 1.0128133704735375e-05, |
|
"loss": 0.1461, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 3.506574827802129, |
|
"grad_norm": 1.755241870880127, |
|
"learning_rate": 9.961002785515322e-06, |
|
"loss": 0.1548, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 3.5316217908578587, |
|
"grad_norm": 3.8491158485412598, |
|
"learning_rate": 9.793871866295264e-06, |
|
"loss": 0.1507, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 3.5566687539135877, |
|
"grad_norm": 1.9301509857177734, |
|
"learning_rate": 9.626740947075209e-06, |
|
"loss": 0.1525, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 3.5817157169693177, |
|
"grad_norm": 3.393623113632202, |
|
"learning_rate": 9.459610027855154e-06, |
|
"loss": 0.1532, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 3.6067626800250467, |
|
"grad_norm": 3.466665267944336, |
|
"learning_rate": 9.292479108635098e-06, |
|
"loss": 0.1489, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 3.6318096430807767, |
|
"grad_norm": 0.6595897674560547, |
|
"learning_rate": 9.125348189415041e-06, |
|
"loss": 0.14, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 3.6568566061365058, |
|
"grad_norm": 1.5920480489730835, |
|
"learning_rate": 8.958217270194987e-06, |
|
"loss": 0.1535, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 3.6819035691922357, |
|
"grad_norm": 2.3759074211120605, |
|
"learning_rate": 8.79108635097493e-06, |
|
"loss": 0.1352, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 3.706950532247965, |
|
"grad_norm": 3.0922329425811768, |
|
"learning_rate": 8.623955431754875e-06, |
|
"loss": 0.1517, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 3.7319974953036943, |
|
"grad_norm": 6.3821210861206055, |
|
"learning_rate": 8.45682451253482e-06, |
|
"loss": 0.1335, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 3.757044458359424, |
|
"grad_norm": 3.3234517574310303, |
|
"learning_rate": 8.289693593314764e-06, |
|
"loss": 0.1547, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 3.7820914214151533, |
|
"grad_norm": 1.0703407526016235, |
|
"learning_rate": 8.122562674094707e-06, |
|
"loss": 0.1488, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 3.807138384470883, |
|
"grad_norm": 4.2433905601501465, |
|
"learning_rate": 7.955431754874653e-06, |
|
"loss": 0.1546, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 3.8321853475266123, |
|
"grad_norm": 3.813833475112915, |
|
"learning_rate": 7.788300835654596e-06, |
|
"loss": 0.1178, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 3.857232310582342, |
|
"grad_norm": 2.304304599761963, |
|
"learning_rate": 7.621169916434541e-06, |
|
"loss": 0.1388, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 3.8822792736380713, |
|
"grad_norm": 2.777663230895996, |
|
"learning_rate": 7.454038997214485e-06, |
|
"loss": 0.1385, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 3.907326236693801, |
|
"grad_norm": 3.856813430786133, |
|
"learning_rate": 7.2869080779944286e-06, |
|
"loss": 0.1248, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 3.9323731997495304, |
|
"grad_norm": 1.7799324989318848, |
|
"learning_rate": 7.119777158774373e-06, |
|
"loss": 0.1573, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 3.95742016280526, |
|
"grad_norm": 2.822288751602173, |
|
"learning_rate": 6.952646239554318e-06, |
|
"loss": 0.1469, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 3.9824671258609894, |
|
"grad_norm": 2.2535672187805176, |
|
"learning_rate": 6.785515320334261e-06, |
|
"loss": 0.1388, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.9788172992056487, |
|
"eval_loss": 0.09761285036802292, |
|
"eval_runtime": 22.1195, |
|
"eval_samples_per_second": 307.331, |
|
"eval_steps_per_second": 9.63, |
|
"step": 1597 |
|
}, |
|
{ |
|
"epoch": 4.007514088916719, |
|
"grad_norm": 2.963806390762329, |
|
"learning_rate": 6.618384401114206e-06, |
|
"loss": 0.1605, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 4.032561051972448, |
|
"grad_norm": 1.9512594938278198, |
|
"learning_rate": 6.4512534818941505e-06, |
|
"loss": 0.1419, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 4.057608015028178, |
|
"grad_norm": 1.9299558401107788, |
|
"learning_rate": 6.284122562674095e-06, |
|
"loss": 0.1437, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 4.082654978083907, |
|
"grad_norm": 3.1459672451019287, |
|
"learning_rate": 6.116991643454039e-06, |
|
"loss": 0.1376, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 4.107701941139637, |
|
"grad_norm": 0.6871834993362427, |
|
"learning_rate": 5.949860724233983e-06, |
|
"loss": 0.1167, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 4.132748904195366, |
|
"grad_norm": 3.4506194591522217, |
|
"learning_rate": 5.782729805013928e-06, |
|
"loss": 0.1589, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 4.157795867251096, |
|
"grad_norm": 2.1339712142944336, |
|
"learning_rate": 5.615598885793872e-06, |
|
"loss": 0.1517, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 4.182842830306825, |
|
"grad_norm": 2.481374502182007, |
|
"learning_rate": 5.448467966573816e-06, |
|
"loss": 0.1726, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 4.207889793362555, |
|
"grad_norm": 3.550483465194702, |
|
"learning_rate": 5.281337047353761e-06, |
|
"loss": 0.1422, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 4.232936756418284, |
|
"grad_norm": 2.749545097351074, |
|
"learning_rate": 5.114206128133705e-06, |
|
"loss": 0.1275, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 4.257983719474014, |
|
"grad_norm": 4.703949928283691, |
|
"learning_rate": 4.947075208913649e-06, |
|
"loss": 0.1421, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 4.283030682529743, |
|
"grad_norm": 2.5116734504699707, |
|
"learning_rate": 4.7799442896935936e-06, |
|
"loss": 0.1101, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 4.308077645585473, |
|
"grad_norm": 3.076810121536255, |
|
"learning_rate": 4.612813370473538e-06, |
|
"loss": 0.0948, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 4.333124608641202, |
|
"grad_norm": 2.641812324523926, |
|
"learning_rate": 4.445682451253482e-06, |
|
"loss": 0.1119, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 4.358171571696932, |
|
"grad_norm": 2.6420016288757324, |
|
"learning_rate": 4.278551532033426e-06, |
|
"loss": 0.124, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 4.383218534752661, |
|
"grad_norm": 2.386286973953247, |
|
"learning_rate": 4.111420612813371e-06, |
|
"loss": 0.1498, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 4.408265497808391, |
|
"grad_norm": 1.9823393821716309, |
|
"learning_rate": 3.944289693593315e-06, |
|
"loss": 0.1205, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 4.43331246086412, |
|
"grad_norm": 1.9473960399627686, |
|
"learning_rate": 3.7771587743732592e-06, |
|
"loss": 0.1333, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 4.45835942391985, |
|
"grad_norm": 3.5217528343200684, |
|
"learning_rate": 3.6100278551532034e-06, |
|
"loss": 0.1328, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 4.483406386975579, |
|
"grad_norm": 1.426268219947815, |
|
"learning_rate": 3.4428969359331475e-06, |
|
"loss": 0.101, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 4.508453350031308, |
|
"grad_norm": 1.8567904233932495, |
|
"learning_rate": 3.275766016713092e-06, |
|
"loss": 0.1341, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 4.533500313087038, |
|
"grad_norm": 2.341360330581665, |
|
"learning_rate": 3.108635097493036e-06, |
|
"loss": 0.1096, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 4.558547276142768, |
|
"grad_norm": 2.002462387084961, |
|
"learning_rate": 2.9415041782729803e-06, |
|
"loss": 0.1397, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 4.583594239198497, |
|
"grad_norm": 1.7507251501083374, |
|
"learning_rate": 2.774373259052925e-06, |
|
"loss": 0.1081, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 4.608641202254226, |
|
"grad_norm": 1.9063440561294556, |
|
"learning_rate": 2.607242339832869e-06, |
|
"loss": 0.1053, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 4.633688165309956, |
|
"grad_norm": 3.434494733810425, |
|
"learning_rate": 2.4401114206128136e-06, |
|
"loss": 0.1492, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 4.658735128365686, |
|
"grad_norm": 2.072505235671997, |
|
"learning_rate": 2.2729805013927577e-06, |
|
"loss": 0.1578, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 4.683782091421415, |
|
"grad_norm": 1.0633418560028076, |
|
"learning_rate": 2.1058495821727023e-06, |
|
"loss": 0.1165, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 4.708829054477144, |
|
"grad_norm": 1.8562265634536743, |
|
"learning_rate": 1.9387186629526464e-06, |
|
"loss": 0.1282, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 4.733876017532874, |
|
"grad_norm": 1.3848246335983276, |
|
"learning_rate": 1.7715877437325906e-06, |
|
"loss": 0.1274, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 4.758922980588603, |
|
"grad_norm": 3.5492846965789795, |
|
"learning_rate": 1.604456824512535e-06, |
|
"loss": 0.1334, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 4.783969943644333, |
|
"grad_norm": 3.0531601905822754, |
|
"learning_rate": 1.437325905292479e-06, |
|
"loss": 0.1341, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 4.809016906700062, |
|
"grad_norm": 2.4751946926116943, |
|
"learning_rate": 1.2701949860724234e-06, |
|
"loss": 0.0987, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 4.834063869755792, |
|
"grad_norm": 1.2282112836837769, |
|
"learning_rate": 1.1030640668523677e-06, |
|
"loss": 0.1427, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 4.859110832811521, |
|
"grad_norm": 0.9631487131118774, |
|
"learning_rate": 9.35933147632312e-07, |
|
"loss": 0.1296, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 4.884157795867251, |
|
"grad_norm": 1.5988798141479492, |
|
"learning_rate": 7.688022284122563e-07, |
|
"loss": 0.1258, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 4.90920475892298, |
|
"grad_norm": 2.9330708980560303, |
|
"learning_rate": 6.016713091922006e-07, |
|
"loss": 0.1258, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 4.93425172197871, |
|
"grad_norm": 2.6305460929870605, |
|
"learning_rate": 4.3454038997214486e-07, |
|
"loss": 0.1257, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 4.959298685034439, |
|
"grad_norm": 1.8302196264266968, |
|
"learning_rate": 2.6740947075208915e-07, |
|
"loss": 0.1186, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 4.984345648090169, |
|
"grad_norm": 3.9666271209716797, |
|
"learning_rate": 1.0027855153203343e-07, |
|
"loss": 0.1444, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 4.996869129618034, |
|
"eval_accuracy": 0.9820535451603413, |
|
"eval_loss": 0.0860319584608078, |
|
"eval_runtime": 22.6177, |
|
"eval_samples_per_second": 300.561, |
|
"eval_steps_per_second": 9.417, |
|
"step": 1995 |
|
}, |
|
{ |
|
"epoch": 4.996869129618034, |
|
"step": 1995, |
|
"total_flos": 2.3180194781952e+18, |
|
"train_loss": 0.41004490450510106, |
|
"train_runtime": 2324.9845, |
|
"train_samples_per_second": 109.88, |
|
"train_steps_per_second": 0.858 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 1995, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 5, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2.3180194781952e+18, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|