|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 20.0, |
|
"eval_steps": 248, |
|
"global_step": 4960, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 3e-05, |
|
"loss": 2.8428, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 3e-05, |
|
"loss": 2.1589, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 3e-05, |
|
"loss": 2.0527, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 3e-05, |
|
"loss": 1.9823, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.6398457059679767, |
|
"eval_loss": 1.6149228811264038, |
|
"eval_runtime": 17.4504, |
|
"eval_samples_per_second": 28.653, |
|
"eval_steps_per_second": 1.834, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_exact_match": 9.0, |
|
"eval_f1": 13.45666666666666, |
|
"eval_qa_bleu": 7.063432441196736, |
|
"eval_qa_exact_match": 0.09, |
|
"eval_recite_bleu": 15.344658855358656, |
|
"eval_recite_exact_match": 0.0, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 3e-05, |
|
"loss": 1.9153, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 3e-05, |
|
"loss": 1.49, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 3e-05, |
|
"loss": 1.4406, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 3e-05, |
|
"loss": 1.4161, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 3e-05, |
|
"loss": 1.3868, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.6700320232896652, |
|
"eval_loss": 1.1928881406784058, |
|
"eval_runtime": 15.4486, |
|
"eval_samples_per_second": 32.365, |
|
"eval_steps_per_second": 2.071, |
|
"step": 496 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_exact_match": 9.2, |
|
"eval_f1": 14.735098500533276, |
|
"eval_qa_bleu": 7.255412763224127, |
|
"eval_qa_exact_match": 0.088, |
|
"eval_recite_bleu": 17.979965007898493, |
|
"eval_recite_exact_match": 0.0, |
|
"step": 496 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 3e-05, |
|
"loss": 1.309, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 3e-05, |
|
"loss": 0.9401, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 3e-05, |
|
"loss": 0.9429, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 3e-05, |
|
"loss": 0.9115, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 3e-05, |
|
"loss": 0.9102, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.6992489082969432, |
|
"eval_loss": 0.8513368368148804, |
|
"eval_runtime": 15.7421, |
|
"eval_samples_per_second": 31.762, |
|
"eval_steps_per_second": 2.033, |
|
"step": 744 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_exact_match": 10.2, |
|
"eval_f1": 15.888450046685344, |
|
"eval_qa_bleu": 7.2379640900667646, |
|
"eval_qa_exact_match": 0.098, |
|
"eval_recite_bleu": 20.876580135938546, |
|
"eval_recite_exact_match": 0.0, |
|
"step": 744 |
|
}, |
|
{ |
|
"epoch": 3.02, |
|
"learning_rate": 3e-05, |
|
"loss": 0.8302, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 3.23, |
|
"learning_rate": 3e-05, |
|
"loss": 0.5813, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 3.43, |
|
"learning_rate": 3e-05, |
|
"loss": 0.5844, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 3.63, |
|
"learning_rate": 3e-05, |
|
"loss": 0.5776, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 3.83, |
|
"learning_rate": 3e-05, |
|
"loss": 0.5869, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.721787481804949, |
|
"eval_loss": 0.6180558204650879, |
|
"eval_runtime": 15.5183, |
|
"eval_samples_per_second": 32.22, |
|
"eval_steps_per_second": 2.062, |
|
"step": 992 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_exact_match": 13.6, |
|
"eval_f1": 19.457685255920552, |
|
"eval_qa_bleu": 7.372812651620524, |
|
"eval_qa_exact_match": 0.126, |
|
"eval_recite_bleu": 24.119469757278573, |
|
"eval_recite_exact_match": 0.0, |
|
"step": 992 |
|
}, |
|
{ |
|
"epoch": 4.03, |
|
"learning_rate": 3e-05, |
|
"loss": 0.5394, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 4.23, |
|
"learning_rate": 3e-05, |
|
"loss": 0.3787, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 4.44, |
|
"learning_rate": 3e-05, |
|
"loss": 0.3865, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 4.64, |
|
"learning_rate": 3e-05, |
|
"loss": 0.3734, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 4.84, |
|
"learning_rate": 3e-05, |
|
"loss": 0.364, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.7352052401746725, |
|
"eval_loss": 0.49626532196998596, |
|
"eval_runtime": 15.762, |
|
"eval_samples_per_second": 31.722, |
|
"eval_steps_per_second": 2.03, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_exact_match": 11.6, |
|
"eval_f1": 17.050505050505052, |
|
"eval_qa_bleu": 9.3668737780184, |
|
"eval_qa_exact_match": 0.114, |
|
"eval_recite_bleu": 28.6471128712922, |
|
"eval_recite_exact_match": 0.004, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 5.04, |
|
"learning_rate": 3e-05, |
|
"loss": 0.345, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 5.24, |
|
"learning_rate": 3e-05, |
|
"loss": 0.2508, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 5.44, |
|
"learning_rate": 3e-05, |
|
"loss": 0.26, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 5.65, |
|
"learning_rate": 3e-05, |
|
"loss": 0.2618, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 5.85, |
|
"learning_rate": 3e-05, |
|
"loss": 0.2706, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.7419825327510917, |
|
"eval_loss": 0.441346138715744, |
|
"eval_runtime": 15.4217, |
|
"eval_samples_per_second": 32.422, |
|
"eval_steps_per_second": 2.075, |
|
"step": 1488 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_exact_match": 17.6, |
|
"eval_f1": 23.964126984126988, |
|
"eval_qa_bleu": 13.1735935431327, |
|
"eval_qa_exact_match": 0.164, |
|
"eval_recite_bleu": 35.20769002447571, |
|
"eval_recite_exact_match": 0.008, |
|
"step": 1488 |
|
}, |
|
{ |
|
"epoch": 6.05, |
|
"learning_rate": 3e-05, |
|
"loss": 0.2459, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 6.25, |
|
"learning_rate": 3e-05, |
|
"loss": 0.1802, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 6.45, |
|
"learning_rate": 3e-05, |
|
"loss": 0.1853, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 6.65, |
|
"learning_rate": 3e-05, |
|
"loss": 0.1905, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 6.85, |
|
"learning_rate": 3e-05, |
|
"loss": 0.1906, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.7457903930131005, |
|
"eval_loss": 0.4084840714931488, |
|
"eval_runtime": 15.7338, |
|
"eval_samples_per_second": 31.779, |
|
"eval_steps_per_second": 2.034, |
|
"step": 1736 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_exact_match": 20.2, |
|
"eval_f1": 25.946536796536805, |
|
"eval_qa_bleu": 13.109935567750034, |
|
"eval_qa_exact_match": 0.198, |
|
"eval_recite_bleu": 41.370445416956926, |
|
"eval_recite_exact_match": 0.018, |
|
"step": 1736 |
|
}, |
|
{ |
|
"epoch": 7.06, |
|
"learning_rate": 3e-05, |
|
"loss": 0.1735, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 7.26, |
|
"learning_rate": 3e-05, |
|
"loss": 0.1402, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 7.46, |
|
"learning_rate": 3e-05, |
|
"loss": 0.1423, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 7.66, |
|
"learning_rate": 3e-05, |
|
"loss": 0.1468, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 7.86, |
|
"learning_rate": 3e-05, |
|
"loss": 0.1474, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.7476826783114993, |
|
"eval_loss": 0.4009522497653961, |
|
"eval_runtime": 15.0512, |
|
"eval_samples_per_second": 33.22, |
|
"eval_steps_per_second": 2.126, |
|
"step": 1984 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_exact_match": 17.4, |
|
"eval_f1": 24.613044733044738, |
|
"eval_qa_bleu": 11.328428780013391, |
|
"eval_qa_exact_match": 0.162, |
|
"eval_recite_bleu": 45.62279446830165, |
|
"eval_recite_exact_match": 0.036, |
|
"step": 1984 |
|
}, |
|
{ |
|
"epoch": 8.06, |
|
"learning_rate": 3e-05, |
|
"loss": 0.1383, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 8.27, |
|
"learning_rate": 3e-05, |
|
"loss": 0.1181, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 8.47, |
|
"learning_rate": 3e-05, |
|
"loss": 0.1212, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 8.67, |
|
"learning_rate": 3e-05, |
|
"loss": 0.118, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 8.87, |
|
"learning_rate": 3e-05, |
|
"loss": 0.1264, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.7484366812227075, |
|
"eval_loss": 0.39788317680358887, |
|
"eval_runtime": 15.8038, |
|
"eval_samples_per_second": 31.638, |
|
"eval_steps_per_second": 2.025, |
|
"step": 2232 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_exact_match": 16.4, |
|
"eval_f1": 23.748124098124105, |
|
"eval_qa_bleu": 10.821376307326117, |
|
"eval_qa_exact_match": 0.148, |
|
"eval_recite_bleu": 46.63269966666572, |
|
"eval_recite_exact_match": 0.04, |
|
"step": 2232 |
|
}, |
|
{ |
|
"epoch": 9.07, |
|
"learning_rate": 3e-05, |
|
"loss": 0.121, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 9.27, |
|
"learning_rate": 3e-05, |
|
"loss": 0.1036, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 9.48, |
|
"learning_rate": 3e-05, |
|
"loss": 0.1053, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 9.68, |
|
"learning_rate": 3e-05, |
|
"loss": 0.1129, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 9.88, |
|
"learning_rate": 3e-05, |
|
"loss": 0.1091, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.7487336244541485, |
|
"eval_loss": 0.4059823751449585, |
|
"eval_runtime": 15.4716, |
|
"eval_samples_per_second": 32.317, |
|
"eval_steps_per_second": 2.068, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_exact_match": 16.6, |
|
"eval_f1": 24.02516483516484, |
|
"eval_qa_bleu": 9.045796919947849, |
|
"eval_qa_exact_match": 0.156, |
|
"eval_recite_bleu": 50.48708434203323, |
|
"eval_recite_exact_match": 0.042, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 10.08, |
|
"learning_rate": 3e-05, |
|
"loss": 0.1062, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 10.28, |
|
"learning_rate": 3e-05, |
|
"loss": 0.098, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 10.48, |
|
"learning_rate": 3e-05, |
|
"loss": 0.103, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 10.69, |
|
"learning_rate": 3e-05, |
|
"loss": 0.1015, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 10.89, |
|
"learning_rate": 3e-05, |
|
"loss": 0.1072, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.749018922852984, |
|
"eval_loss": 0.4050058424472809, |
|
"eval_runtime": 15.8782, |
|
"eval_samples_per_second": 31.49, |
|
"eval_steps_per_second": 2.015, |
|
"step": 2728 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_exact_match": 17.0, |
|
"eval_f1": 23.9401875901876, |
|
"eval_qa_bleu": 11.704401309760495, |
|
"eval_qa_exact_match": 0.154, |
|
"eval_recite_bleu": 49.91003032025176, |
|
"eval_recite_exact_match": 0.054, |
|
"step": 2728 |
|
}, |
|
{ |
|
"epoch": 11.09, |
|
"learning_rate": 3e-05, |
|
"loss": 0.1013, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 11.29, |
|
"learning_rate": 3e-05, |
|
"loss": 0.0969, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 11.49, |
|
"learning_rate": 3e-05, |
|
"loss": 0.0971, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 11.69, |
|
"learning_rate": 3e-05, |
|
"loss": 0.0984, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 11.9, |
|
"learning_rate": 3e-05, |
|
"loss": 0.0969, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.7491644832605532, |
|
"eval_loss": 0.4080793559551239, |
|
"eval_runtime": 15.5323, |
|
"eval_samples_per_second": 32.191, |
|
"eval_steps_per_second": 2.06, |
|
"step": 2976 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_exact_match": 18.2, |
|
"eval_f1": 25.5360361860362, |
|
"eval_qa_bleu": 14.03126374073827, |
|
"eval_qa_exact_match": 0.176, |
|
"eval_recite_bleu": 49.89862429102314, |
|
"eval_recite_exact_match": 0.064, |
|
"step": 2976 |
|
}, |
|
{ |
|
"epoch": 12.1, |
|
"learning_rate": 3e-05, |
|
"loss": 0.0977, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 12.3, |
|
"learning_rate": 3e-05, |
|
"loss": 0.0917, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 12.5, |
|
"learning_rate": 3e-05, |
|
"loss": 0.0927, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 12.7, |
|
"learning_rate": 3e-05, |
|
"loss": 0.0963, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 12.9, |
|
"learning_rate": 3e-05, |
|
"loss": 0.0935, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.7494788937409025, |
|
"eval_loss": 0.4145064055919647, |
|
"eval_runtime": 15.6596, |
|
"eval_samples_per_second": 31.929, |
|
"eval_steps_per_second": 2.043, |
|
"step": 3224 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_exact_match": 19.4, |
|
"eval_f1": 26.85320346320347, |
|
"eval_qa_bleu": 12.354122306398153, |
|
"eval_qa_exact_match": 0.18, |
|
"eval_recite_bleu": 51.76883509669797, |
|
"eval_recite_exact_match": 0.068, |
|
"step": 3224 |
|
}, |
|
{ |
|
"epoch": 13.1, |
|
"learning_rate": 3e-05, |
|
"loss": 0.0897, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 13.31, |
|
"learning_rate": 3e-05, |
|
"loss": 0.0905, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 13.51, |
|
"learning_rate": 3e-05, |
|
"loss": 0.0906, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 13.71, |
|
"learning_rate": 3e-05, |
|
"loss": 0.0929, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 13.91, |
|
"learning_rate": 3e-05, |
|
"loss": 0.0932, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.7494177583697235, |
|
"eval_loss": 0.4077925384044647, |
|
"eval_runtime": 15.5414, |
|
"eval_samples_per_second": 32.172, |
|
"eval_steps_per_second": 2.059, |
|
"step": 3472 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_exact_match": 20.0, |
|
"eval_f1": 27.0681240981241, |
|
"eval_qa_bleu": 11.488299016816185, |
|
"eval_qa_exact_match": 0.182, |
|
"eval_recite_bleu": 50.751064727490615, |
|
"eval_recite_exact_match": 0.072, |
|
"step": 3472 |
|
}, |
|
{ |
|
"epoch": 14.11, |
|
"learning_rate": 3e-05, |
|
"loss": 0.0918, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 14.31, |
|
"learning_rate": 3e-05, |
|
"loss": 0.0855, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 14.52, |
|
"learning_rate": 3e-05, |
|
"loss": 0.0918, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 14.72, |
|
"learning_rate": 3e-05, |
|
"loss": 0.0951, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 14.92, |
|
"learning_rate": 3e-05, |
|
"loss": 0.0929, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.7494439592430859, |
|
"eval_loss": 0.41397154331207275, |
|
"eval_runtime": 15.6399, |
|
"eval_samples_per_second": 31.969, |
|
"eval_steps_per_second": 2.046, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_exact_match": 21.4, |
|
"eval_f1": 28.090981240981243, |
|
"eval_qa_bleu": 12.708765778160004, |
|
"eval_qa_exact_match": 0.204, |
|
"eval_recite_bleu": 51.72639724879611, |
|
"eval_recite_exact_match": 0.068, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 15.12, |
|
"learning_rate": 3e-05, |
|
"loss": 0.0875, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 15.32, |
|
"learning_rate": 3e-05, |
|
"loss": 0.0889, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 15.52, |
|
"learning_rate": 3e-05, |
|
"loss": 0.0871, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 15.73, |
|
"learning_rate": 3e-05, |
|
"loss": 0.0918, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 15.93, |
|
"learning_rate": 3e-05, |
|
"loss": 0.0951, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.7494730713245997, |
|
"eval_loss": 0.4144821763038635, |
|
"eval_runtime": 15.6836, |
|
"eval_samples_per_second": 31.88, |
|
"eval_steps_per_second": 2.04, |
|
"step": 3968 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_exact_match": 19.8, |
|
"eval_f1": 27.501433011433015, |
|
"eval_qa_bleu": 16.46590504145556, |
|
"eval_qa_exact_match": 0.186, |
|
"eval_recite_bleu": 52.1066910776491, |
|
"eval_recite_exact_match": 0.074, |
|
"step": 3968 |
|
}, |
|
{ |
|
"epoch": 16.13, |
|
"learning_rate": 3e-05, |
|
"loss": 0.0818, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 16.33, |
|
"learning_rate": 3e-05, |
|
"loss": 0.0894, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 16.53, |
|
"learning_rate": 3e-05, |
|
"loss": 0.0897, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 16.73, |
|
"learning_rate": 3e-05, |
|
"loss": 0.0914, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 16.94, |
|
"learning_rate": 3e-05, |
|
"loss": 0.0926, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.7494847161572052, |
|
"eval_loss": 0.4133778512477875, |
|
"eval_runtime": 15.6296, |
|
"eval_samples_per_second": 31.991, |
|
"eval_steps_per_second": 2.047, |
|
"step": 4216 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_exact_match": 19.2, |
|
"eval_f1": 26.86869424692955, |
|
"eval_qa_bleu": 15.6053335659073, |
|
"eval_qa_exact_match": 0.178, |
|
"eval_recite_bleu": 52.354652374608186, |
|
"eval_recite_exact_match": 0.068, |
|
"step": 4216 |
|
}, |
|
{ |
|
"epoch": 17.14, |
|
"learning_rate": 3e-05, |
|
"loss": 0.0884, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 17.34, |
|
"learning_rate": 3e-05, |
|
"loss": 0.085, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 17.54, |
|
"learning_rate": 3e-05, |
|
"loss": 0.0908, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 17.74, |
|
"learning_rate": 3e-05, |
|
"loss": 0.0898, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 17.94, |
|
"learning_rate": 3e-05, |
|
"loss": 0.0946, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.7493420669577875, |
|
"eval_loss": 0.42565715312957764, |
|
"eval_runtime": 15.8896, |
|
"eval_samples_per_second": 31.467, |
|
"eval_steps_per_second": 2.014, |
|
"step": 4464 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_exact_match": 21.2, |
|
"eval_f1": 29.34194877018406, |
|
"eval_qa_bleu": 13.32335745765551, |
|
"eval_qa_exact_match": 0.196, |
|
"eval_recite_bleu": 53.51642016865907, |
|
"eval_recite_exact_match": 0.074, |
|
"step": 4464 |
|
}, |
|
{ |
|
"epoch": 18.15, |
|
"learning_rate": 3e-05, |
|
"loss": 0.09, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 18.35, |
|
"learning_rate": 3e-05, |
|
"loss": 0.0883, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 18.55, |
|
"learning_rate": 3e-05, |
|
"loss": 0.0866, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 18.75, |
|
"learning_rate": 3e-05, |
|
"loss": 0.0901, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 18.95, |
|
"learning_rate": 3e-05, |
|
"loss": 0.0897, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.7495778748180495, |
|
"eval_loss": 0.41643163561820984, |
|
"eval_runtime": 15.7007, |
|
"eval_samples_per_second": 31.846, |
|
"eval_steps_per_second": 2.038, |
|
"step": 4712 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_exact_match": 20.2, |
|
"eval_f1": 27.71383838383839, |
|
"eval_qa_bleu": 14.36873555788707, |
|
"eval_qa_exact_match": 0.192, |
|
"eval_recite_bleu": 50.76206757901549, |
|
"eval_recite_exact_match": 0.072, |
|
"step": 4712 |
|
}, |
|
{ |
|
"epoch": 19.15, |
|
"learning_rate": 3e-05, |
|
"loss": 0.0858, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 19.35, |
|
"learning_rate": 3e-05, |
|
"loss": 0.086, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 19.56, |
|
"learning_rate": 3e-05, |
|
"loss": 0.0862, |
|
"step": 4850 |
|
}, |
|
{ |
|
"epoch": 19.76, |
|
"learning_rate": 3e-05, |
|
"loss": 0.0876, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 19.96, |
|
"learning_rate": 3e-05, |
|
"loss": 0.092, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.7496011644832605, |
|
"eval_loss": 0.4180622398853302, |
|
"eval_runtime": 15.5975, |
|
"eval_samples_per_second": 32.056, |
|
"eval_steps_per_second": 2.052, |
|
"step": 4960 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_exact_match": 21.4, |
|
"eval_f1": 29.01558441558442, |
|
"eval_qa_bleu": 14.570041032641159, |
|
"eval_qa_exact_match": 0.2, |
|
"eval_recite_bleu": 53.657481681296964, |
|
"eval_recite_exact_match": 0.072, |
|
"step": 4960 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"step": 4960, |
|
"total_flos": 2.30297745517824e+17, |
|
"train_loss": 0.3613291465947705, |
|
"train_runtime": 9979.1636, |
|
"train_samples_per_second": 7.935, |
|
"train_steps_per_second": 0.497 |
|
} |
|
], |
|
"logging_steps": 50, |
|
"max_steps": 4960, |
|
"num_train_epochs": 20, |
|
"save_steps": 500, |
|
"total_flos": 2.30297745517824e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|