|
{ |
|
"best_metric": 2.1115864203149175, |
|
"best_model_checkpoint": "./checkpoint-1000", |
|
"epoch": 33.333333333333336, |
|
"global_step": 1000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 4.2000000000000006e-07, |
|
"loss": 1.715, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"eval_loss": 1.6869975328445435, |
|
"eval_runtime": 192.3622, |
|
"eval_samples_per_second": 1.242, |
|
"eval_steps_per_second": 0.156, |
|
"eval_wer": 97.23309365613908, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 9.200000000000001e-07, |
|
"loss": 1.3716, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"eval_loss": 1.2431073188781738, |
|
"eval_runtime": 160.0584, |
|
"eval_samples_per_second": 1.493, |
|
"eval_steps_per_second": 0.187, |
|
"eval_wer": 93.12824246837171, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 1.42e-06, |
|
"loss": 0.969, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"eval_loss": 0.8620495796203613, |
|
"eval_runtime": 184.7193, |
|
"eval_samples_per_second": 1.294, |
|
"eval_steps_per_second": 0.162, |
|
"eval_wer": 90.79821607354145, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 3.33, |
|
"learning_rate": 1.9200000000000003e-06, |
|
"loss": 0.6238, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 3.33, |
|
"eval_loss": 0.4657336473464966, |
|
"eval_runtime": 195.5848, |
|
"eval_samples_per_second": 1.222, |
|
"eval_steps_per_second": 0.153, |
|
"eval_wer": 72.78601984163102, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 4.17, |
|
"learning_rate": 2.42e-06, |
|
"loss": 0.3129, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 4.17, |
|
"eval_loss": 0.283780962228775, |
|
"eval_runtime": 184.0918, |
|
"eval_samples_per_second": 1.298, |
|
"eval_steps_per_second": 0.163, |
|
"eval_wer": 34.486210976608724, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 2.92e-06, |
|
"loss": 0.2271, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_loss": 0.23963618278503418, |
|
"eval_runtime": 168.6349, |
|
"eval_samples_per_second": 1.417, |
|
"eval_steps_per_second": 0.178, |
|
"eval_wer": 21.498134158551014, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 5.83, |
|
"learning_rate": 3.4200000000000007e-06, |
|
"loss": 0.1854, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 5.83, |
|
"eval_loss": 0.21348817646503448, |
|
"eval_runtime": 131.9979, |
|
"eval_samples_per_second": 1.811, |
|
"eval_steps_per_second": 0.227, |
|
"eval_wer": 7.618094111222354, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 6.67, |
|
"learning_rate": 3.920000000000001e-06, |
|
"loss": 0.1527, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 6.67, |
|
"eval_loss": 0.19052761793136597, |
|
"eval_runtime": 122.4712, |
|
"eval_samples_per_second": 1.951, |
|
"eval_steps_per_second": 0.245, |
|
"eval_wer": 6.189132611267862, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 7.5, |
|
"learning_rate": 4.42e-06, |
|
"loss": 0.1214, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 7.5, |
|
"eval_loss": 0.17034465074539185, |
|
"eval_runtime": 130.0187, |
|
"eval_samples_per_second": 1.838, |
|
"eval_steps_per_second": 0.231, |
|
"eval_wer": 5.242559388368071, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 8.33, |
|
"learning_rate": 4.92e-06, |
|
"loss": 0.0973, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 8.33, |
|
"eval_loss": 0.1484680473804474, |
|
"eval_runtime": 113.2953, |
|
"eval_samples_per_second": 2.11, |
|
"eval_steps_per_second": 0.265, |
|
"eval_wer": 2.639483025393647, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 9.17, |
|
"learning_rate": 5.420000000000001e-06, |
|
"loss": 0.0669, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 9.17, |
|
"eval_loss": 0.12224661558866501, |
|
"eval_runtime": 111.1904, |
|
"eval_samples_per_second": 2.149, |
|
"eval_steps_per_second": 0.27, |
|
"eval_wer": 1.7202147993082735, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"learning_rate": 5.92e-06, |
|
"loss": 0.0291, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_loss": 0.09692100435495377, |
|
"eval_runtime": 110.9954, |
|
"eval_samples_per_second": 2.153, |
|
"eval_steps_per_second": 0.27, |
|
"eval_wer": 2.0478747610812778, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 10.83, |
|
"learning_rate": 6.42e-06, |
|
"loss": 0.0122, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 10.83, |
|
"eval_loss": 0.10275180637836456, |
|
"eval_runtime": 110.9654, |
|
"eval_samples_per_second": 2.154, |
|
"eval_steps_per_second": 0.27, |
|
"eval_wer": 1.8112314553563305, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 11.67, |
|
"learning_rate": 6.92e-06, |
|
"loss": 0.0087, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 11.67, |
|
"eval_loss": 0.1043357104063034, |
|
"eval_runtime": 110.8965, |
|
"eval_samples_per_second": 2.155, |
|
"eval_steps_per_second": 0.271, |
|
"eval_wer": 2.1206880859197232, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 12.5, |
|
"learning_rate": 7.420000000000001e-06, |
|
"loss": 0.005, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 12.5, |
|
"eval_loss": 0.11149879544973373, |
|
"eval_runtime": 110.87, |
|
"eval_samples_per_second": 2.156, |
|
"eval_steps_per_second": 0.271, |
|
"eval_wer": 2.1661964139437515, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 13.33, |
|
"learning_rate": 7.92e-06, |
|
"loss": 0.0041, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 13.33, |
|
"eval_loss": 0.11916878074407578, |
|
"eval_runtime": 110.7405, |
|
"eval_samples_per_second": 2.158, |
|
"eval_steps_per_second": 0.271, |
|
"eval_wer": 1.929553108218804, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 14.17, |
|
"learning_rate": 8.42e-06, |
|
"loss": 0.0036, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 14.17, |
|
"eval_loss": 0.11888804286718369, |
|
"eval_runtime": 113.1767, |
|
"eval_samples_per_second": 2.112, |
|
"eval_steps_per_second": 0.265, |
|
"eval_wer": 2.694093019022481, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"learning_rate": 8.920000000000001e-06, |
|
"loss": 0.003, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_loss": 0.1166142076253891, |
|
"eval_runtime": 113.7728, |
|
"eval_samples_per_second": 2.101, |
|
"eval_steps_per_second": 0.264, |
|
"eval_wer": 2.557568034950396, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 15.83, |
|
"learning_rate": 9.42e-06, |
|
"loss": 0.0025, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 15.83, |
|
"eval_loss": 0.11832274496555328, |
|
"eval_runtime": 113.8233, |
|
"eval_samples_per_second": 2.1, |
|
"eval_steps_per_second": 0.264, |
|
"eval_wer": 3.1855829616819875, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 16.67, |
|
"learning_rate": 9.920000000000002e-06, |
|
"loss": 0.0022, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 16.67, |
|
"eval_loss": 0.11866947263479233, |
|
"eval_runtime": 113.9373, |
|
"eval_samples_per_second": 2.098, |
|
"eval_steps_per_second": 0.263, |
|
"eval_wer": 3.34941294256849, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 17.5, |
|
"learning_rate": 9.58e-06, |
|
"loss": 0.0019, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 17.5, |
|
"eval_loss": 0.12195830047130585, |
|
"eval_runtime": 110.795, |
|
"eval_samples_per_second": 2.157, |
|
"eval_steps_per_second": 0.271, |
|
"eval_wer": 1.7384181305178845, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 18.33, |
|
"learning_rate": 9.080000000000001e-06, |
|
"loss": 0.0014, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 18.33, |
|
"eval_loss": 0.12314373254776001, |
|
"eval_runtime": 113.6372, |
|
"eval_samples_per_second": 2.103, |
|
"eval_steps_per_second": 0.264, |
|
"eval_wer": 2.102484754710112, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 19.17, |
|
"learning_rate": 8.580000000000001e-06, |
|
"loss": 0.0014, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 19.17, |
|
"eval_loss": 0.12057469040155411, |
|
"eval_runtime": 113.9208, |
|
"eval_samples_per_second": 2.098, |
|
"eval_steps_per_second": 0.263, |
|
"eval_wer": 2.1935014107581687, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"learning_rate": 8.08e-06, |
|
"loss": 0.0011, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_loss": 0.12358372658491135, |
|
"eval_runtime": 113.6203, |
|
"eval_samples_per_second": 2.103, |
|
"eval_steps_per_second": 0.264, |
|
"eval_wer": 2.1115864203149175, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 20.83, |
|
"learning_rate": 7.58e-06, |
|
"loss": 0.0008, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 20.83, |
|
"eval_loss": 0.12533117830753326, |
|
"eval_runtime": 113.505, |
|
"eval_samples_per_second": 2.106, |
|
"eval_steps_per_second": 0.264, |
|
"eval_wer": 2.1752980795485577, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 21.67, |
|
"learning_rate": 7.08e-06, |
|
"loss": 0.0008, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 21.67, |
|
"eval_loss": 0.12367217987775803, |
|
"eval_runtime": 113.5595, |
|
"eval_samples_per_second": 2.105, |
|
"eval_steps_per_second": 0.264, |
|
"eval_wer": 2.102484754710112, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 22.5, |
|
"learning_rate": 6.5800000000000005e-06, |
|
"loss": 0.0008, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 22.5, |
|
"eval_loss": 0.12699657678604126, |
|
"eval_runtime": 113.7775, |
|
"eval_samples_per_second": 2.101, |
|
"eval_steps_per_second": 0.264, |
|
"eval_wer": 2.102484754710112, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 23.33, |
|
"learning_rate": 6.08e-06, |
|
"loss": 0.0008, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 23.33, |
|
"eval_loss": 0.12633280456066132, |
|
"eval_runtime": 113.4116, |
|
"eval_samples_per_second": 2.107, |
|
"eval_steps_per_second": 0.265, |
|
"eval_wer": 2.093383089105306, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 24.17, |
|
"learning_rate": 5.580000000000001e-06, |
|
"loss": 0.0007, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 24.17, |
|
"eval_loss": 0.12679414451122284, |
|
"eval_runtime": 113.4198, |
|
"eval_samples_per_second": 2.107, |
|
"eval_steps_per_second": 0.265, |
|
"eval_wer": 2.093383089105306, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"learning_rate": 5.0800000000000005e-06, |
|
"loss": 0.0006, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_loss": 0.1280515044927597, |
|
"eval_runtime": 113.5855, |
|
"eval_samples_per_second": 2.104, |
|
"eval_steps_per_second": 0.264, |
|
"eval_wer": 2.220806407572586, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 25.83, |
|
"learning_rate": 4.58e-06, |
|
"loss": 0.0005, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 25.83, |
|
"eval_loss": 0.12817054986953735, |
|
"eval_runtime": 113.5145, |
|
"eval_samples_per_second": 2.105, |
|
"eval_steps_per_second": 0.264, |
|
"eval_wer": 2.093383089105306, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 26.67, |
|
"learning_rate": 4.08e-06, |
|
"loss": 0.0007, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 26.67, |
|
"eval_loss": 0.12809617817401886, |
|
"eval_runtime": 113.5669, |
|
"eval_samples_per_second": 2.104, |
|
"eval_steps_per_second": 0.264, |
|
"eval_wer": 2.1661964139437515, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 27.5, |
|
"learning_rate": 3.58e-06, |
|
"loss": 0.0006, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 27.5, |
|
"eval_loss": 0.12927967309951782, |
|
"eval_runtime": 113.4285, |
|
"eval_samples_per_second": 2.107, |
|
"eval_steps_per_second": 0.264, |
|
"eval_wer": 2.1115864203149175, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 28.33, |
|
"learning_rate": 3.08e-06, |
|
"loss": 0.0006, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 28.33, |
|
"eval_loss": 0.1292882263660431, |
|
"eval_runtime": 113.3979, |
|
"eval_samples_per_second": 2.108, |
|
"eval_steps_per_second": 0.265, |
|
"eval_wer": 2.2026030763629745, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 29.17, |
|
"learning_rate": 2.5800000000000003e-06, |
|
"loss": 0.0006, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 29.17, |
|
"eval_loss": 0.12911662459373474, |
|
"eval_runtime": 113.514, |
|
"eval_samples_per_second": 2.105, |
|
"eval_steps_per_second": 0.264, |
|
"eval_wer": 2.1206880859197232, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"learning_rate": 2.08e-06, |
|
"loss": 0.0006, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_loss": 0.12980858981609344, |
|
"eval_runtime": 113.4833, |
|
"eval_samples_per_second": 2.106, |
|
"eval_steps_per_second": 0.264, |
|
"eval_wer": 2.184399745153363, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 30.83, |
|
"learning_rate": 1.5800000000000001e-06, |
|
"loss": 0.0005, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 30.83, |
|
"eval_loss": 0.13006018102169037, |
|
"eval_runtime": 113.4467, |
|
"eval_samples_per_second": 2.107, |
|
"eval_steps_per_second": 0.264, |
|
"eval_wer": 2.1115864203149175, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 31.67, |
|
"learning_rate": 1.08e-06, |
|
"loss": 0.0005, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 31.67, |
|
"eval_loss": 0.13012319803237915, |
|
"eval_runtime": 113.2847, |
|
"eval_samples_per_second": 2.11, |
|
"eval_steps_per_second": 0.265, |
|
"eval_wer": 2.1115864203149175, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 32.5, |
|
"learning_rate": 5.800000000000001e-07, |
|
"loss": 0.0005, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 32.5, |
|
"eval_loss": 0.13053424656391144, |
|
"eval_runtime": 113.4539, |
|
"eval_samples_per_second": 2.107, |
|
"eval_steps_per_second": 0.264, |
|
"eval_wer": 2.1115864203149175, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 33.33, |
|
"learning_rate": 8e-08, |
|
"loss": 0.0005, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 33.33, |
|
"eval_loss": 0.13054169714450836, |
|
"eval_runtime": 113.3817, |
|
"eval_samples_per_second": 2.108, |
|
"eval_steps_per_second": 0.265, |
|
"eval_wer": 2.1115864203149175, |
|
"step": 1000 |
|
} |
|
], |
|
"max_steps": 1000, |
|
"num_train_epochs": 34, |
|
"total_flos": 9.19663953739776e+18, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|