|
{ |
|
"best_metric": 0.15105250983113577, |
|
"best_model_checkpoint": "xlsr_Paiwan/checkpoint-19740", |
|
"epoch": 30.0, |
|
"global_step": 19740, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 0.0002924620060790273, |
|
"loss": 2.0437, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_loss": 0.43691062927246094, |
|
"eval_runtime": 54.5716, |
|
"eval_samples_per_second": 54.79, |
|
"eval_steps_per_second": 6.853, |
|
"eval_wer": 0.57050328806054, |
|
"step": 658 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 0.00028486322188449846, |
|
"loss": 0.4605, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_loss": 0.3218730688095093, |
|
"eval_runtime": 53.5979, |
|
"eval_samples_per_second": 55.786, |
|
"eval_steps_per_second": 6.978, |
|
"eval_wer": 0.424407653415287, |
|
"step": 1316 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 0.0002772644376899696, |
|
"loss": 0.3486, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_loss": 0.2743721604347229, |
|
"eval_runtime": 52.9623, |
|
"eval_samples_per_second": 56.455, |
|
"eval_steps_per_second": 7.062, |
|
"eval_wer": 0.3111265325005783, |
|
"step": 1974 |
|
}, |
|
{ |
|
"epoch": 3.04, |
|
"learning_rate": 0.0002696656534954407, |
|
"loss": 0.3005, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 3.8, |
|
"learning_rate": 0.00026206686930091183, |
|
"loss": 0.243, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_loss": 0.2703934609889984, |
|
"eval_runtime": 52.6582, |
|
"eval_samples_per_second": 56.781, |
|
"eval_steps_per_second": 7.102, |
|
"eval_wer": 0.3132745117477942, |
|
"step": 2632 |
|
}, |
|
{ |
|
"epoch": 4.56, |
|
"learning_rate": 0.000254468085106383, |
|
"loss": 0.2074, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_loss": 0.2540125250816345, |
|
"eval_runtime": 51.8815, |
|
"eval_samples_per_second": 57.631, |
|
"eval_steps_per_second": 7.209, |
|
"eval_wer": 0.29308350682396483, |
|
"step": 3290 |
|
}, |
|
{ |
|
"epoch": 5.32, |
|
"learning_rate": 0.0002468693009118541, |
|
"loss": 0.1827, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_loss": 0.24963043630123138, |
|
"eval_runtime": 52.0611, |
|
"eval_samples_per_second": 57.433, |
|
"eval_steps_per_second": 7.184, |
|
"eval_wer": 0.24047453818446185, |
|
"step": 3948 |
|
}, |
|
{ |
|
"epoch": 6.08, |
|
"learning_rate": 0.0002392705167173252, |
|
"loss": 0.1649, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 6.84, |
|
"learning_rate": 0.00023167173252279633, |
|
"loss": 0.1371, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_loss": 0.24666966497898102, |
|
"eval_runtime": 52.0071, |
|
"eval_samples_per_second": 57.492, |
|
"eval_steps_per_second": 7.191, |
|
"eval_wer": 0.23730213806549685, |
|
"step": 4606 |
|
}, |
|
{ |
|
"epoch": 7.6, |
|
"learning_rate": 0.00022407294832826744, |
|
"loss": 0.1258, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_loss": 0.25583532452583313, |
|
"eval_runtime": 52.0669, |
|
"eval_samples_per_second": 57.426, |
|
"eval_steps_per_second": 7.183, |
|
"eval_wer": 0.22309242919929942, |
|
"step": 5264 |
|
}, |
|
{ |
|
"epoch": 8.36, |
|
"learning_rate": 0.00021647416413373858, |
|
"loss": 0.1078, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_loss": 0.2600269019603729, |
|
"eval_runtime": 52.0714, |
|
"eval_samples_per_second": 57.421, |
|
"eval_steps_per_second": 7.182, |
|
"eval_wer": 0.213310862165824, |
|
"step": 5922 |
|
}, |
|
{ |
|
"epoch": 9.12, |
|
"learning_rate": 0.00020887537993920973, |
|
"loss": 0.1038, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 9.88, |
|
"learning_rate": 0.00020127659574468082, |
|
"loss": 0.0906, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_loss": 0.25083455443382263, |
|
"eval_runtime": 52.0294, |
|
"eval_samples_per_second": 57.467, |
|
"eval_steps_per_second": 7.188, |
|
"eval_wer": 0.20997323287399625, |
|
"step": 6580 |
|
}, |
|
{ |
|
"epoch": 10.64, |
|
"learning_rate": 0.00019367781155015196, |
|
"loss": 0.0843, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_loss": 0.2840941548347473, |
|
"eval_runtime": 52.1825, |
|
"eval_samples_per_second": 57.299, |
|
"eval_steps_per_second": 7.167, |
|
"eval_wer": 0.19976206999107762, |
|
"step": 7238 |
|
}, |
|
{ |
|
"epoch": 11.4, |
|
"learning_rate": 0.0001860790273556231, |
|
"loss": 0.0765, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_loss": 0.26433253288269043, |
|
"eval_runtime": 52.2172, |
|
"eval_samples_per_second": 57.261, |
|
"eval_steps_per_second": 7.162, |
|
"eval_wer": 0.19328508641485742, |
|
"step": 7896 |
|
}, |
|
{ |
|
"epoch": 12.16, |
|
"learning_rate": 0.00017848024316109422, |
|
"loss": 0.0699, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 12.92, |
|
"learning_rate": 0.00017088145896656533, |
|
"loss": 0.0671, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_loss": 0.2808719873428345, |
|
"eval_runtime": 52.2538, |
|
"eval_samples_per_second": 57.221, |
|
"eval_steps_per_second": 7.157, |
|
"eval_wer": 0.18816298205611182, |
|
"step": 8554 |
|
}, |
|
{ |
|
"epoch": 13.68, |
|
"learning_rate": 0.00016329787234042553, |
|
"loss": 0.0589, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_loss": 0.2795076072216034, |
|
"eval_runtime": 52.106, |
|
"eval_samples_per_second": 57.383, |
|
"eval_steps_per_second": 7.178, |
|
"eval_wer": 0.18647764449291168, |
|
"step": 9212 |
|
}, |
|
{ |
|
"epoch": 14.44, |
|
"learning_rate": 0.00015569908814589664, |
|
"loss": 0.0566, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_loss": 0.27890855073928833, |
|
"eval_runtime": 52.1531, |
|
"eval_samples_per_second": 57.331, |
|
"eval_steps_per_second": 7.171, |
|
"eval_wer": 0.18158686097617396, |
|
"step": 9870 |
|
}, |
|
{ |
|
"epoch": 15.2, |
|
"learning_rate": 0.00014810030395136776, |
|
"loss": 0.0514, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 15.96, |
|
"learning_rate": 0.0001405015197568389, |
|
"loss": 0.0507, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_loss": 0.28275853395462036, |
|
"eval_runtime": 52.1154, |
|
"eval_samples_per_second": 57.373, |
|
"eval_steps_per_second": 7.176, |
|
"eval_wer": 0.17676216912858134, |
|
"step": 10528 |
|
}, |
|
{ |
|
"epoch": 16.72, |
|
"learning_rate": 0.00013291793313069907, |
|
"loss": 0.0452, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_loss": 0.2956693470478058, |
|
"eval_runtime": 56.2891, |
|
"eval_samples_per_second": 53.119, |
|
"eval_steps_per_second": 6.644, |
|
"eval_wer": 0.1839661610653977, |
|
"step": 11186 |
|
}, |
|
{ |
|
"epoch": 17.48, |
|
"learning_rate": 0.0001253191489361702, |
|
"loss": 0.0414, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_loss": 0.272775262594223, |
|
"eval_runtime": 52.2587, |
|
"eval_samples_per_second": 57.215, |
|
"eval_steps_per_second": 7.157, |
|
"eval_wer": 0.17431677737021248, |
|
"step": 11844 |
|
}, |
|
{ |
|
"epoch": 18.24, |
|
"learning_rate": 0.00011772036474164133, |
|
"loss": 0.0373, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"learning_rate": 0.00011012158054711246, |
|
"loss": 0.0362, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_loss": 0.2982702851295471, |
|
"eval_runtime": 52.2158, |
|
"eval_samples_per_second": 57.262, |
|
"eval_steps_per_second": 7.163, |
|
"eval_wer": 0.17220184395756916, |
|
"step": 12502 |
|
}, |
|
{ |
|
"epoch": 19.76, |
|
"learning_rate": 0.00010252279635258359, |
|
"loss": 0.0348, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_loss": 0.2988567054271698, |
|
"eval_runtime": 52.245, |
|
"eval_samples_per_second": 57.23, |
|
"eval_steps_per_second": 7.159, |
|
"eval_wer": 0.1638082019761409, |
|
"step": 13160 |
|
}, |
|
{ |
|
"epoch": 20.52, |
|
"learning_rate": 9.493920972644377e-05, |
|
"loss": 0.0324, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_loss": 0.3093124032020569, |
|
"eval_runtime": 52.8274, |
|
"eval_samples_per_second": 56.599, |
|
"eval_steps_per_second": 7.08, |
|
"eval_wer": 0.16509698952447044, |
|
"step": 13818 |
|
}, |
|
{ |
|
"epoch": 21.28, |
|
"learning_rate": 8.734042553191488e-05, |
|
"loss": 0.0303, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_loss": 0.3057407736778259, |
|
"eval_runtime": 52.5941, |
|
"eval_samples_per_second": 56.851, |
|
"eval_steps_per_second": 7.111, |
|
"eval_wer": 0.16489871451703514, |
|
"step": 14476 |
|
}, |
|
{ |
|
"epoch": 22.04, |
|
"learning_rate": 7.974164133738601e-05, |
|
"loss": 0.0293, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 22.8, |
|
"learning_rate": 7.214285714285714e-05, |
|
"loss": 0.0267, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_loss": 0.3179123103618622, |
|
"eval_runtime": 52.2627, |
|
"eval_samples_per_second": 57.211, |
|
"eval_steps_per_second": 7.156, |
|
"eval_wer": 0.1582895476025247, |
|
"step": 15134 |
|
}, |
|
{ |
|
"epoch": 23.56, |
|
"learning_rate": 6.455927051671732e-05, |
|
"loss": 0.026, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_loss": 0.3296581506729126, |
|
"eval_runtime": 52.0664, |
|
"eval_samples_per_second": 57.427, |
|
"eval_steps_per_second": 7.183, |
|
"eval_wer": 0.15799213509137172, |
|
"step": 15792 |
|
}, |
|
{ |
|
"epoch": 24.32, |
|
"learning_rate": 5.696048632218844e-05, |
|
"loss": 0.0246, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_loss": 0.3182116746902466, |
|
"eval_runtime": 51.9316, |
|
"eval_samples_per_second": 57.576, |
|
"eval_steps_per_second": 7.202, |
|
"eval_wer": 0.15541455999471265, |
|
"step": 16450 |
|
}, |
|
{ |
|
"epoch": 25.08, |
|
"learning_rate": 4.937689969604863e-05, |
|
"loss": 0.0235, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 25.84, |
|
"learning_rate": 4.179331306990881e-05, |
|
"loss": 0.0215, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_loss": 0.3294830620288849, |
|
"eval_runtime": 52.2884, |
|
"eval_samples_per_second": 57.183, |
|
"eval_steps_per_second": 7.153, |
|
"eval_wer": 0.15541455999471265, |
|
"step": 17108 |
|
}, |
|
{ |
|
"epoch": 26.6, |
|
"learning_rate": 3.419452887537994e-05, |
|
"loss": 0.0199, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"eval_loss": 0.33707642555236816, |
|
"eval_runtime": 51.7426, |
|
"eval_samples_per_second": 57.786, |
|
"eval_steps_per_second": 7.228, |
|
"eval_wer": 0.1535309474240772, |
|
"step": 17766 |
|
}, |
|
{ |
|
"epoch": 27.36, |
|
"learning_rate": 2.6595744680851064e-05, |
|
"loss": 0.019, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_loss": 0.34009498357772827, |
|
"eval_runtime": 51.768, |
|
"eval_samples_per_second": 57.758, |
|
"eval_steps_per_second": 7.225, |
|
"eval_wer": 0.15174647235715938, |
|
"step": 18424 |
|
}, |
|
{ |
|
"epoch": 28.12, |
|
"learning_rate": 1.8996960486322186e-05, |
|
"loss": 0.0185, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 28.88, |
|
"learning_rate": 1.139817629179331e-05, |
|
"loss": 0.0172, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"eval_loss": 0.34455737471580505, |
|
"eval_runtime": 51.8182, |
|
"eval_samples_per_second": 57.702, |
|
"eval_steps_per_second": 7.218, |
|
"eval_wer": 0.15108555566570833, |
|
"step": 19082 |
|
}, |
|
{ |
|
"epoch": 29.64, |
|
"learning_rate": 3.7993920972644374e-06, |
|
"loss": 0.0166, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_loss": 0.3466561436653137, |
|
"eval_runtime": 51.8971, |
|
"eval_samples_per_second": 57.614, |
|
"eval_steps_per_second": 7.207, |
|
"eval_wer": 0.15105250983113577, |
|
"step": 19740 |
|
} |
|
], |
|
"max_steps": 19740, |
|
"num_train_epochs": 30, |
|
"total_flos": 7.422835365656386e+19, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|