|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 10.0, |
|
"global_step": 50000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 1.98e-05, |
|
"loss": 1.4142, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 1.9600000000000002e-05, |
|
"loss": 1.2006, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 1.94e-05, |
|
"loss": 1.1483, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 1.9200000000000003e-05, |
|
"loss": 1.1203, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 1.9e-05, |
|
"loss": 1.1042, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 1.88e-05, |
|
"loss": 1.0782, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.86e-05, |
|
"loss": 1.0579, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 1.8400000000000003e-05, |
|
"loss": 1.0638, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.8200000000000002e-05, |
|
"loss": 1.0532, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 1.8e-05, |
|
"loss": 1.0355, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_gen_len": 9.6717, |
|
"eval_loss": 0.9534130692481995, |
|
"eval_rouge1": 11.1402, |
|
"eval_rouge2": 2.3949, |
|
"eval_rougeL": 11.0659, |
|
"eval_rougeLsum": 11.1028, |
|
"eval_runtime": 604.1787, |
|
"eval_samples_per_second": 16.551, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 1.7800000000000002e-05, |
|
"loss": 1.0246, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 1.76e-05, |
|
"loss": 1.0355, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 1.7400000000000003e-05, |
|
"loss": 1.0076, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 1.72e-05, |
|
"loss": 0.9938, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 1.7e-05, |
|
"loss": 0.9928, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 1.6800000000000002e-05, |
|
"loss": 1.0041, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 1.66e-05, |
|
"loss": 0.9988, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 1.64e-05, |
|
"loss": 0.9949, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 1.62e-05, |
|
"loss": 0.9879, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 1.6000000000000003e-05, |
|
"loss": 1.0008, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_gen_len": 10.509, |
|
"eval_loss": 0.9204540252685547, |
|
"eval_rouge1": 11.224, |
|
"eval_rouge2": 2.672, |
|
"eval_rougeL": 11.1021, |
|
"eval_rougeLsum": 11.1328, |
|
"eval_runtime": 596.5212, |
|
"eval_samples_per_second": 16.764, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 1.58e-05, |
|
"loss": 0.9723, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 1.5600000000000003e-05, |
|
"loss": 0.9784, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 1.54e-05, |
|
"loss": 0.9629, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 1.5200000000000002e-05, |
|
"loss": 0.9719, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 1.5000000000000002e-05, |
|
"loss": 0.9644, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 1.48e-05, |
|
"loss": 0.9847, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 1.46e-05, |
|
"loss": 0.9717, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 1.4400000000000001e-05, |
|
"loss": 0.9644, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 1.4200000000000001e-05, |
|
"loss": 0.9618, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 1.4e-05, |
|
"loss": 0.9602, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_gen_len": 9.474, |
|
"eval_loss": 0.9030117392539978, |
|
"eval_rouge1": 12.4503, |
|
"eval_rouge2": 2.8721, |
|
"eval_rougeL": 12.354, |
|
"eval_rougeLsum": 12.3602, |
|
"eval_runtime": 585.0965, |
|
"eval_samples_per_second": 17.091, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 3.1, |
|
"learning_rate": 1.38e-05, |
|
"loss": 0.9477, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"learning_rate": 1.3600000000000002e-05, |
|
"loss": 0.9582, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 3.3, |
|
"learning_rate": 1.3400000000000002e-05, |
|
"loss": 0.9391, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 3.4, |
|
"learning_rate": 1.3200000000000002e-05, |
|
"loss": 0.9626, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 3.5, |
|
"learning_rate": 1.3000000000000001e-05, |
|
"loss": 0.951, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 3.6, |
|
"learning_rate": 1.2800000000000001e-05, |
|
"loss": 0.9472, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 3.7, |
|
"learning_rate": 1.2600000000000001e-05, |
|
"loss": 0.9519, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 3.8, |
|
"learning_rate": 1.2400000000000002e-05, |
|
"loss": 0.952, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 3.9, |
|
"learning_rate": 1.22e-05, |
|
"loss": 0.945, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 1.2e-05, |
|
"loss": 0.9381, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_gen_len": 9.4554, |
|
"eval_loss": 0.8888181447982788, |
|
"eval_rouge1": 12.4117, |
|
"eval_rouge2": 2.9965, |
|
"eval_rougeL": 12.3285, |
|
"eval_rougeLsum": 12.3327, |
|
"eval_runtime": 592.8034, |
|
"eval_samples_per_second": 16.869, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 4.1, |
|
"learning_rate": 1.18e-05, |
|
"loss": 0.9535, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 4.2, |
|
"learning_rate": 1.16e-05, |
|
"loss": 0.9458, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 4.3, |
|
"learning_rate": 1.14e-05, |
|
"loss": 0.9321, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 4.4, |
|
"learning_rate": 1.1200000000000001e-05, |
|
"loss": 0.9446, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 4.5, |
|
"learning_rate": 1.1000000000000001e-05, |
|
"loss": 0.9231, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 4.6, |
|
"learning_rate": 1.0800000000000002e-05, |
|
"loss": 0.9285, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 4.7, |
|
"learning_rate": 1.0600000000000002e-05, |
|
"loss": 0.9367, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 4.8, |
|
"learning_rate": 1.04e-05, |
|
"loss": 0.9361, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 4.9, |
|
"learning_rate": 1.02e-05, |
|
"loss": 0.9194, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9288, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_gen_len": 9.6762, |
|
"eval_loss": 0.8804787993431091, |
|
"eval_rouge1": 12.6469, |
|
"eval_rouge2": 2.9175, |
|
"eval_rougeL": 12.5557, |
|
"eval_rougeLsum": 12.5805, |
|
"eval_runtime": 595.0767, |
|
"eval_samples_per_second": 16.805, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 5.1, |
|
"learning_rate": 9.800000000000001e-06, |
|
"loss": 0.9299, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 5.2, |
|
"learning_rate": 9.600000000000001e-06, |
|
"loss": 0.9313, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 5.3, |
|
"learning_rate": 9.4e-06, |
|
"loss": 0.9239, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 5.4, |
|
"learning_rate": 9.200000000000002e-06, |
|
"loss": 0.9238, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 5.5, |
|
"learning_rate": 9e-06, |
|
"loss": 0.9291, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 5.6, |
|
"learning_rate": 8.8e-06, |
|
"loss": 0.9207, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 5.7, |
|
"learning_rate": 8.6e-06, |
|
"loss": 0.9256, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 5.8, |
|
"learning_rate": 8.400000000000001e-06, |
|
"loss": 0.9199, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 5.9, |
|
"learning_rate": 8.2e-06, |
|
"loss": 0.9113, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"learning_rate": 8.000000000000001e-06, |
|
"loss": 0.9243, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_gen_len": 10.0823, |
|
"eval_loss": 0.8752232193946838, |
|
"eval_rouge1": 14.0898, |
|
"eval_rouge2": 3.4389, |
|
"eval_rougeL": 13.9627, |
|
"eval_rougeLsum": 13.9793, |
|
"eval_runtime": 599.6266, |
|
"eval_samples_per_second": 16.677, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 6.1, |
|
"learning_rate": 7.800000000000002e-06, |
|
"loss": 0.9268, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 6.2, |
|
"learning_rate": 7.600000000000001e-06, |
|
"loss": 0.9144, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 6.3, |
|
"learning_rate": 7.4e-06, |
|
"loss": 0.9152, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 6.4, |
|
"learning_rate": 7.2000000000000005e-06, |
|
"loss": 0.9016, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 6.5, |
|
"learning_rate": 7e-06, |
|
"loss": 0.9119, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 6.6, |
|
"learning_rate": 6.800000000000001e-06, |
|
"loss": 0.916, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 6.7, |
|
"learning_rate": 6.600000000000001e-06, |
|
"loss": 0.9253, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 6.8, |
|
"learning_rate": 6.4000000000000006e-06, |
|
"loss": 0.9199, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 6.9, |
|
"learning_rate": 6.200000000000001e-06, |
|
"loss": 0.9168, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"learning_rate": 6e-06, |
|
"loss": 0.9087, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_gen_len": 9.7588, |
|
"eval_loss": 0.8731149435043335, |
|
"eval_rouge1": 13.9046, |
|
"eval_rouge2": 3.2515, |
|
"eval_rougeL": 13.7955, |
|
"eval_rougeLsum": 13.8149, |
|
"eval_runtime": 600.5144, |
|
"eval_samples_per_second": 16.652, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 7.1, |
|
"learning_rate": 5.8e-06, |
|
"loss": 0.905, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 7.2, |
|
"learning_rate": 5.600000000000001e-06, |
|
"loss": 0.9005, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 7.3, |
|
"learning_rate": 5.400000000000001e-06, |
|
"loss": 0.9214, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 7.4, |
|
"learning_rate": 5.2e-06, |
|
"loss": 0.9152, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 7.5, |
|
"learning_rate": 5e-06, |
|
"loss": 0.9089, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 7.6, |
|
"learning_rate": 4.800000000000001e-06, |
|
"loss": 0.9061, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 7.7, |
|
"learning_rate": 4.600000000000001e-06, |
|
"loss": 0.918, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 7.8, |
|
"learning_rate": 4.4e-06, |
|
"loss": 0.9055, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 7.9, |
|
"learning_rate": 4.2000000000000004e-06, |
|
"loss": 0.9067, |
|
"step": 39500 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"learning_rate": 4.000000000000001e-06, |
|
"loss": 0.9146, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_gen_len": 9.8881, |
|
"eval_loss": 0.871009886264801, |
|
"eval_rouge1": 14.9652, |
|
"eval_rouge2": 3.5015, |
|
"eval_rougeL": 14.8443, |
|
"eval_rougeLsum": 14.8507, |
|
"eval_runtime": 606.6389, |
|
"eval_samples_per_second": 16.484, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 8.1, |
|
"learning_rate": 3.8000000000000005e-06, |
|
"loss": 0.9119, |
|
"step": 40500 |
|
}, |
|
{ |
|
"epoch": 8.2, |
|
"learning_rate": 3.6000000000000003e-06, |
|
"loss": 0.8942, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 8.3, |
|
"learning_rate": 3.4000000000000005e-06, |
|
"loss": 0.9066, |
|
"step": 41500 |
|
}, |
|
{ |
|
"epoch": 8.4, |
|
"learning_rate": 3.2000000000000003e-06, |
|
"loss": 0.92, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 8.5, |
|
"learning_rate": 3e-06, |
|
"loss": 0.9046, |
|
"step": 42500 |
|
}, |
|
{ |
|
"epoch": 8.6, |
|
"learning_rate": 2.8000000000000003e-06, |
|
"loss": 0.9027, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 8.7, |
|
"learning_rate": 2.6e-06, |
|
"loss": 0.9019, |
|
"step": 43500 |
|
}, |
|
{ |
|
"epoch": 8.8, |
|
"learning_rate": 2.4000000000000003e-06, |
|
"loss": 0.9056, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 8.9, |
|
"learning_rate": 2.2e-06, |
|
"loss": 0.899, |
|
"step": 44500 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"learning_rate": 2.0000000000000003e-06, |
|
"loss": 0.9034, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_gen_len": 9.8936, |
|
"eval_loss": 0.8681530952453613, |
|
"eval_rouge1": 14.9995, |
|
"eval_rouge2": 3.5176, |
|
"eval_rougeL": 14.8747, |
|
"eval_rougeLsum": 14.8748, |
|
"eval_runtime": 601.5997, |
|
"eval_samples_per_second": 16.622, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 9.1, |
|
"learning_rate": 1.8000000000000001e-06, |
|
"loss": 0.9079, |
|
"step": 45500 |
|
}, |
|
{ |
|
"epoch": 9.2, |
|
"learning_rate": 1.6000000000000001e-06, |
|
"loss": 0.9027, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 9.3, |
|
"learning_rate": 1.4000000000000001e-06, |
|
"loss": 0.8944, |
|
"step": 46500 |
|
}, |
|
{ |
|
"epoch": 9.4, |
|
"learning_rate": 1.2000000000000002e-06, |
|
"loss": 0.8955, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 9.5, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 0.8949, |
|
"step": 47500 |
|
}, |
|
{ |
|
"epoch": 9.6, |
|
"learning_rate": 8.000000000000001e-07, |
|
"loss": 0.9037, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 9.7, |
|
"learning_rate": 6.000000000000001e-07, |
|
"loss": 0.9074, |
|
"step": 48500 |
|
}, |
|
{ |
|
"epoch": 9.8, |
|
"learning_rate": 4.0000000000000003e-07, |
|
"loss": 0.9145, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 9.9, |
|
"learning_rate": 2.0000000000000002e-07, |
|
"loss": 0.9035, |
|
"step": 49500 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"learning_rate": 0.0, |
|
"loss": 0.9147, |
|
"step": 50000 |
|
} |
|
], |
|
"max_steps": 50000, |
|
"num_train_epochs": 10, |
|
"total_flos": 6.850590755050291e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|