|
{ |
|
"best_global_step": 4000, |
|
"best_metric": 81.67647178155556, |
|
"best_model_checkpoint": "./working_area/output_model/checkpoint-4000", |
|
"epoch": 0.6469351447517386, |
|
"eval_steps": 1000, |
|
"global_step": 4000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.004043344654698367, |
|
"grad_norm": 10.67656421661377, |
|
"learning_rate": 4.800000000000001e-07, |
|
"loss": 0.2728, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.008086689309396733, |
|
"grad_norm": 0.34478363394737244, |
|
"learning_rate": 9.800000000000001e-07, |
|
"loss": 0.0358, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.012130033964095099, |
|
"grad_norm": 0.26953190565109253, |
|
"learning_rate": 1.48e-06, |
|
"loss": 0.0261, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.016173378618793467, |
|
"grad_norm": 0.2419331669807434, |
|
"learning_rate": 1.98e-06, |
|
"loss": 0.0251, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.020216723273491832, |
|
"grad_norm": 0.29993513226509094, |
|
"learning_rate": 2.4800000000000004e-06, |
|
"loss": 0.0247, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.024260067928190198, |
|
"grad_norm": 0.2362569123506546, |
|
"learning_rate": 2.9800000000000003e-06, |
|
"loss": 0.0227, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.028303412582888564, |
|
"grad_norm": 0.2520463466644287, |
|
"learning_rate": 3.48e-06, |
|
"loss": 0.0246, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.03234675723758693, |
|
"grad_norm": 0.2729567885398865, |
|
"learning_rate": 3.980000000000001e-06, |
|
"loss": 0.0236, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.036390101892285295, |
|
"grad_norm": 0.21665456891059875, |
|
"learning_rate": 4.48e-06, |
|
"loss": 0.0242, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.040433446546983665, |
|
"grad_norm": 0.27291786670684814, |
|
"learning_rate": 4.980000000000001e-06, |
|
"loss": 0.0242, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.044476791201682034, |
|
"grad_norm": 0.20698551833629608, |
|
"learning_rate": 5.480000000000001e-06, |
|
"loss": 0.0229, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.048520135856380396, |
|
"grad_norm": 0.2506471276283264, |
|
"learning_rate": 5.98e-06, |
|
"loss": 0.0225, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.052563480511078765, |
|
"grad_norm": 0.24540211260318756, |
|
"learning_rate": 6.480000000000001e-06, |
|
"loss": 0.0231, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.05660682516577713, |
|
"grad_norm": 0.2298947423696518, |
|
"learning_rate": 6.98e-06, |
|
"loss": 0.0223, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.0606501698204755, |
|
"grad_norm": 0.19595475494861603, |
|
"learning_rate": 7.48e-06, |
|
"loss": 0.022, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.06469351447517387, |
|
"grad_norm": 0.2605821490287781, |
|
"learning_rate": 7.980000000000002e-06, |
|
"loss": 0.022, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.06873685912987224, |
|
"grad_norm": 0.25346869230270386, |
|
"learning_rate": 8.48e-06, |
|
"loss": 0.0226, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.07278020378457059, |
|
"grad_norm": 0.255500465631485, |
|
"learning_rate": 8.98e-06, |
|
"loss": 0.0216, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.07682354843926896, |
|
"grad_norm": 0.24462725222110748, |
|
"learning_rate": 9.48e-06, |
|
"loss": 0.0234, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.08086689309396733, |
|
"grad_norm": 0.23454348742961884, |
|
"learning_rate": 9.980000000000001e-06, |
|
"loss": 0.024, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.0849102377486657, |
|
"grad_norm": 0.2627425193786621, |
|
"learning_rate": 9.946666666666667e-06, |
|
"loss": 0.0226, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.08895358240336407, |
|
"grad_norm": 0.22754116356372833, |
|
"learning_rate": 9.891111111111113e-06, |
|
"loss": 0.023, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.09299692705806242, |
|
"grad_norm": 0.26447415351867676, |
|
"learning_rate": 9.835555555555556e-06, |
|
"loss": 0.0215, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.09704027171276079, |
|
"grad_norm": 0.27743828296661377, |
|
"learning_rate": 9.780000000000001e-06, |
|
"loss": 0.0215, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.10108361636745916, |
|
"grad_norm": 0.23157595098018646, |
|
"learning_rate": 9.724444444444445e-06, |
|
"loss": 0.023, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.10512696102215753, |
|
"grad_norm": 0.2561393082141876, |
|
"learning_rate": 9.66888888888889e-06, |
|
"loss": 0.0209, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.1091703056768559, |
|
"grad_norm": 0.2518314719200134, |
|
"learning_rate": 9.613333333333335e-06, |
|
"loss": 0.0235, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 0.11321365033155426, |
|
"grad_norm": 0.2346208244562149, |
|
"learning_rate": 9.557777777777777e-06, |
|
"loss": 0.0222, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.11725699498625262, |
|
"grad_norm": 0.23254457116127014, |
|
"learning_rate": 9.502222222222223e-06, |
|
"loss": 0.0216, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 0.121300339640951, |
|
"grad_norm": 0.2728439271450043, |
|
"learning_rate": 9.446666666666667e-06, |
|
"loss": 0.0208, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.12534368429564935, |
|
"grad_norm": 0.24401770532131195, |
|
"learning_rate": 9.391111111111111e-06, |
|
"loss": 0.0222, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 0.12938702895034773, |
|
"grad_norm": 0.2546014189720154, |
|
"learning_rate": 9.335555555555557e-06, |
|
"loss": 0.0223, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.1334303736050461, |
|
"grad_norm": 0.20592975616455078, |
|
"learning_rate": 9.280000000000001e-06, |
|
"loss": 0.0214, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 0.13747371825974447, |
|
"grad_norm": 0.2854577898979187, |
|
"learning_rate": 9.224444444444445e-06, |
|
"loss": 0.021, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.14151706291444283, |
|
"grad_norm": 0.2555174231529236, |
|
"learning_rate": 9.168888888888889e-06, |
|
"loss": 0.0216, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 0.14556040756914118, |
|
"grad_norm": 0.22724099457263947, |
|
"learning_rate": 9.113333333333335e-06, |
|
"loss": 0.0226, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.14960375222383956, |
|
"grad_norm": 0.24628663063049316, |
|
"learning_rate": 9.057777777777779e-06, |
|
"loss": 0.021, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 0.15364709687853792, |
|
"grad_norm": 0.22158333659172058, |
|
"learning_rate": 9.002222222222223e-06, |
|
"loss": 0.0221, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.1576904415332363, |
|
"grad_norm": 0.23985975980758667, |
|
"learning_rate": 8.946666666666669e-06, |
|
"loss": 0.0227, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 0.16173378618793466, |
|
"grad_norm": 0.24263052642345428, |
|
"learning_rate": 8.891111111111111e-06, |
|
"loss": 0.021, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.16173378618793466, |
|
"eval_loss": 0.026818539947271347, |
|
"eval_runtime": 15594.6052, |
|
"eval_samples_per_second": 7.9, |
|
"eval_steps_per_second": 0.494, |
|
"eval_wer": 84.01915040370025, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.165777130842633, |
|
"grad_norm": 0.24622474610805511, |
|
"learning_rate": 8.835555555555557e-06, |
|
"loss": 0.0215, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 0.1698204754973314, |
|
"grad_norm": 0.218623086810112, |
|
"learning_rate": 8.78e-06, |
|
"loss": 0.0209, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.17386382015202975, |
|
"grad_norm": 0.2270510345697403, |
|
"learning_rate": 8.724444444444445e-06, |
|
"loss": 0.0216, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 0.17790716480672814, |
|
"grad_norm": 0.2207878828048706, |
|
"learning_rate": 8.66888888888889e-06, |
|
"loss": 0.022, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.1819505094614265, |
|
"grad_norm": 0.18590456247329712, |
|
"learning_rate": 8.613333333333333e-06, |
|
"loss": 0.022, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 0.18599385411612485, |
|
"grad_norm": 0.1838596612215042, |
|
"learning_rate": 8.557777777777778e-06, |
|
"loss": 0.0215, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.19003719877082323, |
|
"grad_norm": 0.31669357419013977, |
|
"learning_rate": 8.502222222222223e-06, |
|
"loss": 0.0223, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 0.19408054342552158, |
|
"grad_norm": 0.21895891427993774, |
|
"learning_rate": 8.446666666666668e-06, |
|
"loss": 0.0209, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.19812388808021997, |
|
"grad_norm": 0.20323023200035095, |
|
"learning_rate": 8.391111111111112e-06, |
|
"loss": 0.021, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 0.20216723273491832, |
|
"grad_norm": 0.25468146800994873, |
|
"learning_rate": 8.335555555555556e-06, |
|
"loss": 0.0224, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.20621057738961668, |
|
"grad_norm": 0.20973573625087738, |
|
"learning_rate": 8.28e-06, |
|
"loss": 0.0211, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 0.21025392204431506, |
|
"grad_norm": 0.26390501856803894, |
|
"learning_rate": 8.224444444444444e-06, |
|
"loss": 0.0208, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.21429726669901342, |
|
"grad_norm": 0.2301158905029297, |
|
"learning_rate": 8.16888888888889e-06, |
|
"loss": 0.0205, |
|
"step": 1325 |
|
}, |
|
{ |
|
"epoch": 0.2183406113537118, |
|
"grad_norm": 0.24158801138401031, |
|
"learning_rate": 8.113333333333334e-06, |
|
"loss": 0.0206, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.22238395600841016, |
|
"grad_norm": 0.21493862569332123, |
|
"learning_rate": 8.057777777777778e-06, |
|
"loss": 0.0208, |
|
"step": 1375 |
|
}, |
|
{ |
|
"epoch": 0.2264273006631085, |
|
"grad_norm": 0.2373913675546646, |
|
"learning_rate": 8.002222222222222e-06, |
|
"loss": 0.0198, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.2304706453178069, |
|
"grad_norm": 0.2382635623216629, |
|
"learning_rate": 7.946666666666666e-06, |
|
"loss": 0.0198, |
|
"step": 1425 |
|
}, |
|
{ |
|
"epoch": 0.23451398997250525, |
|
"grad_norm": 0.256558895111084, |
|
"learning_rate": 7.891111111111112e-06, |
|
"loss": 0.0206, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.23855733462720363, |
|
"grad_norm": 0.23155981302261353, |
|
"learning_rate": 7.835555555555556e-06, |
|
"loss": 0.0182, |
|
"step": 1475 |
|
}, |
|
{ |
|
"epoch": 0.242600679281902, |
|
"grad_norm": 0.24832689762115479, |
|
"learning_rate": 7.78e-06, |
|
"loss": 0.02, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.24664402393660034, |
|
"grad_norm": 0.22871288657188416, |
|
"learning_rate": 7.724444444444446e-06, |
|
"loss": 0.0193, |
|
"step": 1525 |
|
}, |
|
{ |
|
"epoch": 0.2506873685912987, |
|
"grad_norm": 0.2521936297416687, |
|
"learning_rate": 7.66888888888889e-06, |
|
"loss": 0.0197, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.2547307132459971, |
|
"grad_norm": 0.24429504573345184, |
|
"learning_rate": 7.613333333333334e-06, |
|
"loss": 0.0221, |
|
"step": 1575 |
|
}, |
|
{ |
|
"epoch": 0.25877405790069546, |
|
"grad_norm": 0.23084107041358948, |
|
"learning_rate": 7.557777777777779e-06, |
|
"loss": 0.02, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.2628174025553938, |
|
"grad_norm": 0.2697436809539795, |
|
"learning_rate": 7.502222222222223e-06, |
|
"loss": 0.0211, |
|
"step": 1625 |
|
}, |
|
{ |
|
"epoch": 0.2668607472100922, |
|
"grad_norm": 0.23871037364006042, |
|
"learning_rate": 7.446666666666668e-06, |
|
"loss": 0.0214, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.27090409186479053, |
|
"grad_norm": 0.2677021920681, |
|
"learning_rate": 7.3911111111111125e-06, |
|
"loss": 0.0203, |
|
"step": 1675 |
|
}, |
|
{ |
|
"epoch": 0.27494743651948894, |
|
"grad_norm": 0.1928839236497879, |
|
"learning_rate": 7.335555555555556e-06, |
|
"loss": 0.0203, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.2789907811741873, |
|
"grad_norm": 0.26213887333869934, |
|
"learning_rate": 7.280000000000001e-06, |
|
"loss": 0.0205, |
|
"step": 1725 |
|
}, |
|
{ |
|
"epoch": 0.28303412582888565, |
|
"grad_norm": 0.23492328822612762, |
|
"learning_rate": 7.224444444444445e-06, |
|
"loss": 0.0201, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.287077470483584, |
|
"grad_norm": 0.23946373164653778, |
|
"learning_rate": 7.1688888888888895e-06, |
|
"loss": 0.0195, |
|
"step": 1775 |
|
}, |
|
{ |
|
"epoch": 0.29112081513828236, |
|
"grad_norm": 0.20817314088344574, |
|
"learning_rate": 7.113333333333334e-06, |
|
"loss": 0.0211, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.2951641597929808, |
|
"grad_norm": 0.22937080264091492, |
|
"learning_rate": 7.057777777777778e-06, |
|
"loss": 0.0203, |
|
"step": 1825 |
|
}, |
|
{ |
|
"epoch": 0.29920750444767913, |
|
"grad_norm": 0.23442518711090088, |
|
"learning_rate": 7.0022222222222225e-06, |
|
"loss": 0.0197, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.3032508491023775, |
|
"grad_norm": 0.24289068579673767, |
|
"learning_rate": 6.946666666666667e-06, |
|
"loss": 0.0198, |
|
"step": 1875 |
|
}, |
|
{ |
|
"epoch": 0.30729419375707584, |
|
"grad_norm": 0.24261179566383362, |
|
"learning_rate": 6.891111111111111e-06, |
|
"loss": 0.0211, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.3113375384117742, |
|
"grad_norm": 0.21574310958385468, |
|
"learning_rate": 6.835555555555556e-06, |
|
"loss": 0.0196, |
|
"step": 1925 |
|
}, |
|
{ |
|
"epoch": 0.3153808830664726, |
|
"grad_norm": 0.2648760676383972, |
|
"learning_rate": 6.780000000000001e-06, |
|
"loss": 0.021, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.31942422772117096, |
|
"grad_norm": 0.22739122807979584, |
|
"learning_rate": 6.724444444444444e-06, |
|
"loss": 0.019, |
|
"step": 1975 |
|
}, |
|
{ |
|
"epoch": 0.3234675723758693, |
|
"grad_norm": 0.2167060226202011, |
|
"learning_rate": 6.668888888888889e-06, |
|
"loss": 0.0208, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.3234675723758693, |
|
"eval_loss": 0.025321291759610176, |
|
"eval_runtime": 14275.0619, |
|
"eval_samples_per_second": 8.63, |
|
"eval_steps_per_second": 0.539, |
|
"eval_wer": 82.5098389256299, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.32751091703056767, |
|
"grad_norm": 0.2261808067560196, |
|
"learning_rate": 6.613333333333334e-06, |
|
"loss": 0.0192, |
|
"step": 2025 |
|
}, |
|
{ |
|
"epoch": 0.331554261685266, |
|
"grad_norm": 0.26066452264785767, |
|
"learning_rate": 6.557777777777778e-06, |
|
"loss": 0.0222, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.33559760633996444, |
|
"grad_norm": 0.22417497634887695, |
|
"learning_rate": 6.502222222222223e-06, |
|
"loss": 0.0204, |
|
"step": 2075 |
|
}, |
|
{ |
|
"epoch": 0.3396409509946628, |
|
"grad_norm": 0.20988379418849945, |
|
"learning_rate": 6.446666666666668e-06, |
|
"loss": 0.0191, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.34368429564936115, |
|
"grad_norm": 0.22671236097812653, |
|
"learning_rate": 6.391111111111111e-06, |
|
"loss": 0.0211, |
|
"step": 2125 |
|
}, |
|
{ |
|
"epoch": 0.3477276403040595, |
|
"grad_norm": 0.2695440351963043, |
|
"learning_rate": 6.335555555555556e-06, |
|
"loss": 0.0197, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.35177098495875786, |
|
"grad_norm": 0.20911026000976562, |
|
"learning_rate": 6.280000000000001e-06, |
|
"loss": 0.0191, |
|
"step": 2175 |
|
}, |
|
{ |
|
"epoch": 0.35581432961345627, |
|
"grad_norm": 0.21661245822906494, |
|
"learning_rate": 6.224444444444445e-06, |
|
"loss": 0.0199, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.3598576742681546, |
|
"grad_norm": 0.222182035446167, |
|
"learning_rate": 6.16888888888889e-06, |
|
"loss": 0.0195, |
|
"step": 2225 |
|
}, |
|
{ |
|
"epoch": 0.363901018922853, |
|
"grad_norm": 0.2006169855594635, |
|
"learning_rate": 6.113333333333333e-06, |
|
"loss": 0.0199, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.36794436357755134, |
|
"grad_norm": 0.22298727929592133, |
|
"learning_rate": 6.057777777777778e-06, |
|
"loss": 0.02, |
|
"step": 2275 |
|
}, |
|
{ |
|
"epoch": 0.3719877082322497, |
|
"grad_norm": 0.19001857936382294, |
|
"learning_rate": 6.002222222222223e-06, |
|
"loss": 0.0197, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.3760310528869481, |
|
"grad_norm": 0.2434571385383606, |
|
"learning_rate": 5.946666666666668e-06, |
|
"loss": 0.021, |
|
"step": 2325 |
|
}, |
|
{ |
|
"epoch": 0.38007439754164646, |
|
"grad_norm": 0.21516510844230652, |
|
"learning_rate": 5.891111111111112e-06, |
|
"loss": 0.0193, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 0.3841177421963448, |
|
"grad_norm": 0.24581994116306305, |
|
"learning_rate": 5.8355555555555565e-06, |
|
"loss": 0.0199, |
|
"step": 2375 |
|
}, |
|
{ |
|
"epoch": 0.38816108685104317, |
|
"grad_norm": 0.23675447702407837, |
|
"learning_rate": 5.78e-06, |
|
"loss": 0.0195, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.3922044315057415, |
|
"grad_norm": 0.21948282420635223, |
|
"learning_rate": 5.724444444444445e-06, |
|
"loss": 0.0194, |
|
"step": 2425 |
|
}, |
|
{ |
|
"epoch": 0.39624777616043994, |
|
"grad_norm": 0.21700553596019745, |
|
"learning_rate": 5.6688888888888895e-06, |
|
"loss": 0.0188, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 0.4002911208151383, |
|
"grad_norm": 0.24019096791744232, |
|
"learning_rate": 5.613333333333334e-06, |
|
"loss": 0.0211, |
|
"step": 2475 |
|
}, |
|
{ |
|
"epoch": 0.40433446546983665, |
|
"grad_norm": 0.21174991130828857, |
|
"learning_rate": 5.557777777777778e-06, |
|
"loss": 0.0207, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.408377810124535, |
|
"grad_norm": 0.2548556625843048, |
|
"learning_rate": 5.5022222222222224e-06, |
|
"loss": 0.0202, |
|
"step": 2525 |
|
}, |
|
{ |
|
"epoch": 0.41242115477923336, |
|
"grad_norm": 0.2106471061706543, |
|
"learning_rate": 5.4466666666666665e-06, |
|
"loss": 0.0206, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 0.41646449943393177, |
|
"grad_norm": 0.20076259970664978, |
|
"learning_rate": 5.391111111111111e-06, |
|
"loss": 0.0188, |
|
"step": 2575 |
|
}, |
|
{ |
|
"epoch": 0.4205078440886301, |
|
"grad_norm": 0.20281557738780975, |
|
"learning_rate": 5.335555555555556e-06, |
|
"loss": 0.0189, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.4245511887433285, |
|
"grad_norm": 0.23585236072540283, |
|
"learning_rate": 5.28e-06, |
|
"loss": 0.0193, |
|
"step": 2625 |
|
}, |
|
{ |
|
"epoch": 0.42859453339802683, |
|
"grad_norm": 0.20076635479927063, |
|
"learning_rate": 5.224444444444445e-06, |
|
"loss": 0.0188, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 0.4326378780527252, |
|
"grad_norm": 0.20363172888755798, |
|
"learning_rate": 5.168888888888889e-06, |
|
"loss": 0.0188, |
|
"step": 2675 |
|
}, |
|
{ |
|
"epoch": 0.4366812227074236, |
|
"grad_norm": 0.20968079566955566, |
|
"learning_rate": 5.113333333333333e-06, |
|
"loss": 0.0198, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.44072456736212196, |
|
"grad_norm": 0.2075379192829132, |
|
"learning_rate": 5.057777777777778e-06, |
|
"loss": 0.0192, |
|
"step": 2725 |
|
}, |
|
{ |
|
"epoch": 0.4447679120168203, |
|
"grad_norm": 0.19762763381004333, |
|
"learning_rate": 5.002222222222223e-06, |
|
"loss": 0.0192, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 0.44881125667151867, |
|
"grad_norm": 0.2090006172657013, |
|
"learning_rate": 4.946666666666667e-06, |
|
"loss": 0.0196, |
|
"step": 2775 |
|
}, |
|
{ |
|
"epoch": 0.452854601326217, |
|
"grad_norm": 0.20064634084701538, |
|
"learning_rate": 4.891111111111111e-06, |
|
"loss": 0.0187, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.45689794598091543, |
|
"grad_norm": 0.20843106508255005, |
|
"learning_rate": 4.835555555555556e-06, |
|
"loss": 0.0192, |
|
"step": 2825 |
|
}, |
|
{ |
|
"epoch": 0.4609412906356138, |
|
"grad_norm": 0.22503884136676788, |
|
"learning_rate": 4.78e-06, |
|
"loss": 0.0195, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 0.46498463529031214, |
|
"grad_norm": 0.22823262214660645, |
|
"learning_rate": 4.724444444444445e-06, |
|
"loss": 0.0199, |
|
"step": 2875 |
|
}, |
|
{ |
|
"epoch": 0.4690279799450105, |
|
"grad_norm": 0.24186566472053528, |
|
"learning_rate": 4.66888888888889e-06, |
|
"loss": 0.0194, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.47307132459970885, |
|
"grad_norm": 0.27571889758110046, |
|
"learning_rate": 4.613333333333334e-06, |
|
"loss": 0.0194, |
|
"step": 2925 |
|
}, |
|
{ |
|
"epoch": 0.47711466925440726, |
|
"grad_norm": 0.22955967485904694, |
|
"learning_rate": 4.557777777777778e-06, |
|
"loss": 0.0201, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 0.4811580139091056, |
|
"grad_norm": 0.21334044635295868, |
|
"learning_rate": 4.502222222222223e-06, |
|
"loss": 0.0204, |
|
"step": 2975 |
|
}, |
|
{ |
|
"epoch": 0.485201358563804, |
|
"grad_norm": 0.20334798097610474, |
|
"learning_rate": 4.446666666666667e-06, |
|
"loss": 0.0207, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.485201358563804, |
|
"eval_loss": 0.024814918637275696, |
|
"eval_runtime": 14244.4286, |
|
"eval_samples_per_second": 8.649, |
|
"eval_steps_per_second": 0.541, |
|
"eval_wer": 81.96210492149146, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.48924470321850233, |
|
"grad_norm": 0.23993884027004242, |
|
"learning_rate": 4.391111111111112e-06, |
|
"loss": 0.0192, |
|
"step": 3025 |
|
}, |
|
{ |
|
"epoch": 0.4932880478732007, |
|
"grad_norm": 0.19718846678733826, |
|
"learning_rate": 4.3355555555555565e-06, |
|
"loss": 0.0202, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 0.4973313925278991, |
|
"grad_norm": 0.21749120950698853, |
|
"learning_rate": 4.2800000000000005e-06, |
|
"loss": 0.0191, |
|
"step": 3075 |
|
}, |
|
{ |
|
"epoch": 0.5013747371825974, |
|
"grad_norm": 0.22544993460178375, |
|
"learning_rate": 4.2244444444444446e-06, |
|
"loss": 0.0189, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.5054180818372959, |
|
"grad_norm": 0.20528769493103027, |
|
"learning_rate": 4.168888888888889e-06, |
|
"loss": 0.0195, |
|
"step": 3125 |
|
}, |
|
{ |
|
"epoch": 0.5094614264919942, |
|
"grad_norm": 0.2464672178030014, |
|
"learning_rate": 4.1133333333333335e-06, |
|
"loss": 0.0184, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 0.5135047711466926, |
|
"grad_norm": 0.22547754645347595, |
|
"learning_rate": 4.057777777777778e-06, |
|
"loss": 0.0202, |
|
"step": 3175 |
|
}, |
|
{ |
|
"epoch": 0.5175481158013909, |
|
"grad_norm": 0.1981554925441742, |
|
"learning_rate": 4.002222222222222e-06, |
|
"loss": 0.0196, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.5215914604560893, |
|
"grad_norm": 0.23502561450004578, |
|
"learning_rate": 3.946666666666667e-06, |
|
"loss": 0.0188, |
|
"step": 3225 |
|
}, |
|
{ |
|
"epoch": 0.5256348051107876, |
|
"grad_norm": 0.23244182765483856, |
|
"learning_rate": 3.891111111111111e-06, |
|
"loss": 0.0194, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 0.529678149765486, |
|
"grad_norm": 0.23334389925003052, |
|
"learning_rate": 3.835555555555555e-06, |
|
"loss": 0.019, |
|
"step": 3275 |
|
}, |
|
{ |
|
"epoch": 0.5337214944201844, |
|
"grad_norm": 0.24255788326263428, |
|
"learning_rate": 3.7800000000000002e-06, |
|
"loss": 0.019, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.5377648390748827, |
|
"grad_norm": 0.19664430618286133, |
|
"learning_rate": 3.724444444444445e-06, |
|
"loss": 0.0186, |
|
"step": 3325 |
|
}, |
|
{ |
|
"epoch": 0.5418081837295811, |
|
"grad_norm": 0.23343808948993683, |
|
"learning_rate": 3.668888888888889e-06, |
|
"loss": 0.0185, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 0.5458515283842795, |
|
"grad_norm": 0.22261999547481537, |
|
"learning_rate": 3.6133333333333336e-06, |
|
"loss": 0.0185, |
|
"step": 3375 |
|
}, |
|
{ |
|
"epoch": 0.5498948730389779, |
|
"grad_norm": 0.26387473940849304, |
|
"learning_rate": 3.5577777777777785e-06, |
|
"loss": 0.0191, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.5539382176936762, |
|
"grad_norm": 0.2524121105670929, |
|
"learning_rate": 3.5022222222222225e-06, |
|
"loss": 0.019, |
|
"step": 3425 |
|
}, |
|
{ |
|
"epoch": 0.5579815623483746, |
|
"grad_norm": 0.21356618404388428, |
|
"learning_rate": 3.446666666666667e-06, |
|
"loss": 0.0202, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 0.562024907003073, |
|
"grad_norm": 0.20150095224380493, |
|
"learning_rate": 3.391111111111111e-06, |
|
"loss": 0.0199, |
|
"step": 3475 |
|
}, |
|
{ |
|
"epoch": 0.5660682516577713, |
|
"grad_norm": 0.20904186367988586, |
|
"learning_rate": 3.335555555555556e-06, |
|
"loss": 0.0187, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.5701115963124697, |
|
"grad_norm": 0.2027483582496643, |
|
"learning_rate": 3.2800000000000004e-06, |
|
"loss": 0.0184, |
|
"step": 3525 |
|
}, |
|
{ |
|
"epoch": 0.574154940967168, |
|
"grad_norm": 0.23289534449577332, |
|
"learning_rate": 3.2244444444444444e-06, |
|
"loss": 0.0196, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 0.5781982856218664, |
|
"grad_norm": 0.23588407039642334, |
|
"learning_rate": 3.1688888888888893e-06, |
|
"loss": 0.0179, |
|
"step": 3575 |
|
}, |
|
{ |
|
"epoch": 0.5822416302765647, |
|
"grad_norm": 0.2624644935131073, |
|
"learning_rate": 3.1133333333333337e-06, |
|
"loss": 0.0186, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.5862849749312632, |
|
"grad_norm": 0.21287862956523895, |
|
"learning_rate": 3.0577777777777778e-06, |
|
"loss": 0.0192, |
|
"step": 3625 |
|
}, |
|
{ |
|
"epoch": 0.5903283195859615, |
|
"grad_norm": 0.24328123033046722, |
|
"learning_rate": 3.0022222222222227e-06, |
|
"loss": 0.0191, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 0.5943716642406599, |
|
"grad_norm": 0.1968221366405487, |
|
"learning_rate": 2.946666666666667e-06, |
|
"loss": 0.0189, |
|
"step": 3675 |
|
}, |
|
{ |
|
"epoch": 0.5984150088953583, |
|
"grad_norm": 0.22048558294773102, |
|
"learning_rate": 2.891111111111111e-06, |
|
"loss": 0.0193, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.6024583535500566, |
|
"grad_norm": 0.23322191834449768, |
|
"learning_rate": 2.835555555555556e-06, |
|
"loss": 0.0203, |
|
"step": 3725 |
|
}, |
|
{ |
|
"epoch": 0.606501698204755, |
|
"grad_norm": 0.2503873407840729, |
|
"learning_rate": 2.7800000000000005e-06, |
|
"loss": 0.0187, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 0.6105450428594533, |
|
"grad_norm": 0.23583608865737915, |
|
"learning_rate": 2.7244444444444445e-06, |
|
"loss": 0.0182, |
|
"step": 3775 |
|
}, |
|
{ |
|
"epoch": 0.6145883875141517, |
|
"grad_norm": 0.21786954998970032, |
|
"learning_rate": 2.6688888888888894e-06, |
|
"loss": 0.0187, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.61863173216885, |
|
"grad_norm": 0.21708305180072784, |
|
"learning_rate": 2.6133333333333334e-06, |
|
"loss": 0.0207, |
|
"step": 3825 |
|
}, |
|
{ |
|
"epoch": 0.6226750768235484, |
|
"grad_norm": 0.21468117833137512, |
|
"learning_rate": 2.557777777777778e-06, |
|
"loss": 0.0206, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 0.6267184214782469, |
|
"grad_norm": 0.2351790964603424, |
|
"learning_rate": 2.5022222222222224e-06, |
|
"loss": 0.0184, |
|
"step": 3875 |
|
}, |
|
{ |
|
"epoch": 0.6307617661329452, |
|
"grad_norm": 0.20362141728401184, |
|
"learning_rate": 2.446666666666667e-06, |
|
"loss": 0.0181, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 0.6348051107876436, |
|
"grad_norm": 0.22791029512882233, |
|
"learning_rate": 2.3911111111111113e-06, |
|
"loss": 0.0176, |
|
"step": 3925 |
|
}, |
|
{ |
|
"epoch": 0.6388484554423419, |
|
"grad_norm": 0.23184864223003387, |
|
"learning_rate": 2.3355555555555557e-06, |
|
"loss": 0.0201, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 0.6428918000970403, |
|
"grad_norm": 0.213025763630867, |
|
"learning_rate": 2.28e-06, |
|
"loss": 0.0178, |
|
"step": 3975 |
|
}, |
|
{ |
|
"epoch": 0.6469351447517386, |
|
"grad_norm": 0.2062954306602478, |
|
"learning_rate": 2.2244444444444447e-06, |
|
"loss": 0.0198, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.6469351447517386, |
|
"eval_loss": 0.02430112473666668, |
|
"eval_runtime": 14235.4068, |
|
"eval_samples_per_second": 8.654, |
|
"eval_steps_per_second": 0.541, |
|
"eval_wer": 81.67647178155556, |
|
"step": 4000 |
|
} |
|
], |
|
"logging_steps": 25, |
|
"max_steps": 5000, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 1000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 3.693893124096e+19, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|